From 3350fab0a8faa180953a28e6bf5615ee796db359 Mon Sep 17 00:00:00 2001 From: Bill Murdock Date: Thu, 7 May 2026 08:55:23 -0400 Subject: [PATCH 1/2] feat: remove learn and extract-skills commands Removes the learn and extract-skills commands, the learners/ module, learning_service.py, and the continuous-learning CI workflow. The feature's premise is flawed: scores measure file presence not quality, metrics are fabricated, the extractor is hardcoded to 5 of 25 assessors, and the CI workflow was producing unenriched skill proposals on every release with LLM enrichment silently disabled. Also removes orphaned test files left over from the prior removal of eval_harness and benchmark commands. Closes #395 Co-Authored-By: Claude Sonnet 4.6 --- .github/workflows/continuous-learning.yml | 135 ---- pyproject.toml | 2 +- src/agentready/cli/extract_skills.py | 246 ------- src/agentready/cli/learn.py | 254 ------- src/agentready/cli/main.py | 12 +- src/agentready/learners/__init__.py | 16 - src/agentready/learners/code_sampler.py | 188 ----- src/agentready/learners/llm_enricher.py | 277 -------- src/agentready/learners/pattern_extractor.py | 222 ------ src/agentready/learners/prompt_templates.py | 100 --- src/agentready/learners/skill_generator.py | 202 ------ src/agentready/services/learning_service.py | 364 ---------- tests/integration/test_eval_harness_e2e.py | 178 ----- tests/unit/learners/test_llm_enricher.py | 456 ------------ tests/unit/learners/test_pattern_extractor.py | 650 ------------------ tests/unit/learners/test_skill_generator.py | 355 ---------- tests/unit/test_cli_extract_skills.py | 376 ---------- tests/unit/test_cli_learn.py | 372 ---------- tests/unit/test_code_sampler.py | 541 --------------- tests/unit/test_eval_harness_models.py | 391 ----------- tests/unit/test_learning_service.py | 496 ------------- 21 files changed, 5 insertions(+), 5828 deletions(-) delete mode 100644 .github/workflows/continuous-learning.yml delete mode 100644 src/agentready/cli/extract_skills.py delete mode 100644 src/agentready/cli/learn.py delete mode 100644 src/agentready/learners/__init__.py delete mode 100644 src/agentready/learners/code_sampler.py delete mode 100644 src/agentready/learners/llm_enricher.py delete mode 100644 src/agentready/learners/pattern_extractor.py delete mode 100644 src/agentready/learners/prompt_templates.py delete mode 100644 src/agentready/learners/skill_generator.py delete mode 100644 src/agentready/services/learning_service.py delete mode 100644 tests/integration/test_eval_harness_e2e.py delete mode 100644 tests/unit/learners/test_llm_enricher.py delete mode 100644 tests/unit/learners/test_pattern_extractor.py delete mode 100644 tests/unit/learners/test_skill_generator.py delete mode 100644 tests/unit/test_cli_extract_skills.py delete mode 100644 tests/unit/test_cli_learn.py delete mode 100644 tests/unit/test_code_sampler.py delete mode 100644 tests/unit/test_eval_harness_models.py delete mode 100644 tests/unit/test_learning_service.py diff --git a/.github/workflows/continuous-learning.yml b/.github/workflows/continuous-learning.yml deleted file mode 100644 index 5e817275..00000000 --- a/.github/workflows/continuous-learning.yml +++ /dev/null @@ -1,135 +0,0 @@ -name: Continuous Learning - Extract Skills - -on: - # Manual trigger - workflow_dispatch: - inputs: - output_format: - description: 'Output format for skills' - required: true - default: 'github-issues' - type: choice - options: - - github-issues - - skill-files - - both - - # Automatic on new releases - release: - types: [published] - - # Weekly analysis on Sundays at midnight UTC - schedule: - - cron: '0 0 * * 0' - -jobs: - extract-skills: - runs-on: ubuntu-latest - permissions: - contents: write - issues: write - pull-requests: write - - steps: - - name: Checkout repository - uses: actions/checkout@v6 - - - name: Setup Python - uses: actions/setup-python@v6 - with: - python-version: '3.12' - - - name: Install uv - uses: astral-sh/setup-uv@v7 - - - name: Install AgentReady - run: | - uv venv - uv pip install -e . - - - name: Run self-assessment - run: | - uv run agentready assess . --output-dir .agentready - - - name: Extract learnings - id: learn - run: | - uv run agentready learn . --output-format json > .skills-proposals/discovered-skills.json - echo "skill_count=$(jq '.skill_count' .skills-proposals/discovered-skills.json)" >> "$GITHUB_OUTPUT" - - - name: Generate skill proposals - if: steps.learn.outputs.skill_count > 0 - run: | - uv run agentready learn . --output-format all --output-dir .skills-proposals - - - name: Create GitHub issues for each skill - if: (inputs.output_format == 'github-issues' || inputs.output_format == 'both') && steps.learn.outputs.skill_count > 0 - env: - GH_TOKEN: ${{ github.token }} - run: | - for skill_file in .skills-proposals/skill-*.md; do - if [ -f "$skill_file" ]; then - # Extract skill name from filename - skill_name=$(basename "$skill_file" .md | sed 's/^skill-//' | sed 's/-/ /g') - - # Create issue with skill proposal - gh issue create \ - --title "Skill Proposal: ${skill_name}" \ - --label "skill-proposal,enhancement,ai-agent" \ - --body-file "$skill_file" - - echo "Created issue for: $skill_name" - fi - done - - - name: Create PR with skill files - if: (inputs.output_format == 'skill-files' || inputs.output_format == 'both') && steps.learn.outputs.skill_count > 0 - run: | - # Configure git - git config user.name "github-actions[bot]" - git config user.email "github-actions[bot]@users.noreply.github.com" - - # Create new branch - BRANCH_NAME="skills/auto-$(date +%Y%m%d-%H%M%S)" - git checkout -b "$BRANCH_NAME" - - # Copy SKILL.md files to .claude/skills - mkdir -p .claude/skills - for skill_dir in .skills-proposals/*/; do - if [ -d "$skill_dir" ] && [ -f "${skill_dir}SKILL.md" ]; then - skill_id=$(basename "$skill_dir") - mkdir -p ".claude/skills/$skill_id" - cp "${skill_dir}SKILL.md" ".claude/skills/$skill_id/" - echo "Copied skill: $skill_id" - fi - done - - # Commit and push - git add .claude/skills - git commit -m "feat: add discovered skills from continuous learning" \ - -m "" \ - -m "Automatically extracted skills from latest assessment." \ - -m "" \ - -m "šŸ¤– Generated with Claude Code" \ - -m "Co-Authored-By: Claude " - - git push origin "$BRANCH_NAME" - - # Create PR - gh pr create \ - --title "Add discovered skills from continuous learning" \ - --body "Automatically discovered new Claude Code skills from AgentReady assessment. Review and merge to make available." - - - name: Upload skill proposals as artifacts - if: steps.learn.outputs.skill_count > 0 - uses: actions/upload-artifact@v5 - with: - name: skill-proposals-${{ github.run_number }} - path: .skills-proposals/ - retention-days: 90 - - - name: Summary - if: steps.learn.outputs.skill_count > 0 - run: | - echo "āœ… Discovered ${{ steps.learn.outputs.skill_count }} skills with confidence ≄70%" - echo "šŸ“ Artifacts uploaded for review" diff --git a/pyproject.toml b/pyproject.toml index 1073b532..18297863 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "agentready" version = "2.34.0" -description = "Assess and bootstrap git repositories for AI-assisted development with automated remediation and continuous learning" +description = "Assess and bootstrap git repositories for AI-assisted development with automated remediation" authors = [{name = "Jeremy Eder", email = "jeder@redhat.com"}] readme = "README.md" license = {text = "MIT"} diff --git a/src/agentready/cli/extract_skills.py b/src/agentready/cli/extract_skills.py deleted file mode 100644 index 695b4bf1..00000000 --- a/src/agentready/cli/extract_skills.py +++ /dev/null @@ -1,246 +0,0 @@ -"""Extract-skills command for extracting patterns and generating skills.""" - -import os -import sys -from pathlib import Path - -import click - -from ..services.learning_service import LearningService - - -@click.command("extract-skills") -@click.argument("repository", type=click.Path(exists=True), default=".") -@click.option( - "--output-format", - type=click.Choice( - ["json", "skill_md", "github_issues", "markdown", "all"], case_sensitive=False - ), - default="json", - help="Output format for discovered skills (default: json)", -) -@click.option( - "--output-dir", - type=click.Path(), - default=".skills-proposals", - help="Directory for generated skill files (default: .skills-proposals)", -) -@click.option( - "--attribute", - multiple=True, - help="Specific attribute(s) to extract (can be specified multiple times)", -) -@click.option( - "--min-confidence", - type=int, - default=70, - help="Minimum confidence score to include skills (default: 70)", -) -@click.option( - "--verbose", - "-v", - is_flag=True, - help="Enable verbose output with detailed skill information", -) -@click.option( - "--enable-llm", - is_flag=True, - help="Enable LLM-powered skill enrichment (requires ANTHROPIC_API_KEY)", -) -@click.option( - "--llm-budget", - type=click.IntRange(min=1), - default=5, - help="Maximum number of skills to enrich with LLM (default: 5)", -) -@click.option( - "--llm-no-cache", - is_flag=True, - help="Bypass LLM response cache (always call API)", -) -def extract_skills( - repository, - output_format, - output_dir, - attribute, - min_confidence, - verbose, - enable_llm, - llm_budget, - llm_no_cache, -): - """Extract reusable patterns and generate Claude Code skills. - - Analyzes assessment results to identify successful patterns that could - be extracted as reusable Claude Code skills for other repositories. - - This command looks for the most recent assessment in .agentready/ and - extracts skills from high-scoring attributes (default: ≄70% confidence). - - REPOSITORY: Path to repository (default: current directory) - - Examples: - - \b - # Discover skills from current repository - agentready extract-skills . - - \b - # Generate SKILL.md files - agentready extract-skills . --output-format skill_md - - \b - # Create GitHub issue templates - agentready extract-skills . --output-format github_issues - - \b - # Extract specific attributes only - agentready extract-skills . --attribute claude_md_file --attribute type_annotations - - \b - # Generate all formats with higher confidence threshold - agentready extract-skills . --output-format all --min-confidence 85 - """ - repo_path = Path(repository).resolve() - - # Validate repository exists - if not repo_path.exists(): - click.echo(f"Error: Repository not found: {repo_path}", err=True) - sys.exit(1) - - # Find latest assessment file - agentready_dir = repo_path / ".agentready" - if not agentready_dir.exists(): - click.echo( - "Error: No assessment found in .agentready/\n" - "Run 'agentready assess .' first to generate an assessment.", - err=True, - ) - sys.exit(1) - - # Look for assessment files - assessment_files = sorted(agentready_dir.glob("assessment-*.json")) - if not assessment_files: - click.echo( - "Error: No assessment files found in .agentready/\n" - "Run 'agentready assess .' first to generate an assessment.", - err=True, - ) - sys.exit(1) - - # Use most recent assessment - assessment_file = assessment_files[-1] - - # Display header - click.echo("🧠 AgentReady Skill Extraction") - click.echo("=" * 50) - click.echo(f"\nRepository: {repo_path}") - click.echo(f"Assessment: {assessment_file.name}") - click.echo(f"Output format: {output_format}") - click.echo(f"Min confidence: {min_confidence}%") - if attribute: - click.echo(f"Filtering attributes: {', '.join(attribute)}") - - # Display LLM status - if enable_llm: - api_key = os.environ.get("ANTHROPIC_API_KEY") - if api_key: - click.echo(f"LLM enrichment: ENABLED (budget: {llm_budget} skills)") - if llm_no_cache: - click.echo("LLM cache: DISABLED") - else: - click.echo("āš ļø LLM enrichment: DISABLED (ANTHROPIC_API_KEY not set)") - enable_llm = False - click.echo() - - # Resolve output directory relative to repository path if it's a relative path - output_dir_path = Path(output_dir) - if not output_dir_path.is_absolute(): - output_dir_path = repo_path / output_dir - - # Create learning service - learning_service = LearningService( - min_confidence=min_confidence, - output_dir=output_dir_path, - ) - - # Run learning workflow - try: - results = learning_service.run_full_workflow( - assessment_file=assessment_file, - output_format=output_format, - attribute_ids=list(attribute) if attribute else None, - enable_llm=enable_llm, - llm_budget=llm_budget, - ) - except Exception as e: - click.echo(f"\nError during skill extraction: {str(e)}", err=True) - if verbose: - import traceback - - traceback.print_exc() - sys.exit(1) - - # Display results - skills_count = results["skills_discovered"] - generated_files = results["generated_files"] - - click.echo("=" * 50) - click.echo( - f"\nāœ… Discovered {skills_count} skill(s) with confidence ≄{min_confidence}%\n" - ) - - # Show LLM info if used - if enable_llm and skills_count > 0: - enriched_count = min(llm_budget, skills_count) - click.echo(f"šŸ¤– LLM-enriched {enriched_count} skill(s)\n") - - if skills_count == 0: - click.echo("No skills met the confidence threshold.") - click.echo( - f"Try lowering --min-confidence (current: {min_confidence}) " - "or run assessment on a higher-scoring repository." - ) - return - - # Display discovered skills - if verbose: - click.echo("Discovered Skills:") - click.echo("-" * 50) - for skill in results["skills"]: - click.echo(f"\nšŸ“š {skill.name}") - click.echo(f" ID: {skill.skill_id}") - click.echo(f" Confidence: {skill.confidence}%") - click.echo(f" Impact: +{skill.impact_score} pts") - click.echo(f" Reusability: {skill.reusability_score}%") - click.echo(f" Source: {skill.source_attribute_id}") - click.echo(f"\n {skill.pattern_summary}") - click.echo() - - # Display generated files - click.echo("\nGenerated Files:") - click.echo("-" * 50) - for file_path in generated_files: - click.echo(f" āœ“ {file_path}") - - # Next steps - click.echo("\n" + "=" * 50) - click.echo("\nšŸ“– Next Steps:\n") - - if output_format in ["skill_md", "all"]: - click.echo(" 1. Review generated SKILL.md files in " + output_dir) - click.echo(" 2. Test skills on 3-5 repositories") - click.echo(" 3. Refine instructions based on testing") - click.echo(" 4. Copy to ~/.claude/skills/ or .claude/skills/") - - if output_format in ["github_issues", "all"]: - click.echo(f" 1. Review issue templates in {output_dir}") - click.echo(" 2. Create GitHub issues:") - click.echo(" gh issue create --body-file .skills-proposals/skill-*.md") - - if output_format == "json": - click.echo(f" 1. Review discovered-skills.json in {output_dir}") - click.echo(" 2. Generate other formats:") - click.echo(" agentready extract-skills . --output-format all") - - click.echo() diff --git a/src/agentready/cli/learn.py b/src/agentready/cli/learn.py deleted file mode 100644 index 0909c4c6..00000000 --- a/src/agentready/cli/learn.py +++ /dev/null @@ -1,254 +0,0 @@ -"""Learn command for extracting patterns and generating skills.""" - -import os -import sys -from pathlib import Path - -import click - -from ..services.learning_service import LearningService - - -@click.command() -@click.argument("repository", type=click.Path(exists=True), default=".") -@click.option( - "--output-format", - type=click.Choice( - ["json", "skill_md", "github_issues", "markdown", "all"], case_sensitive=False - ), - default="json", - help="Output format for discovered skills (default: json)", -) -@click.option( - "--output-dir", - type=click.Path(), - default=".skills-proposals", - help="Directory for generated skill files (default: .skills-proposals)", -) -@click.option( - "--attribute", - multiple=True, - help="Specific attribute(s) to extract (can be specified multiple times)", -) -@click.option( - "--min-confidence", - type=int, - default=70, - help="Minimum confidence score to include skills (default: 70)", -) -@click.option( - "--verbose", - "-v", - is_flag=True, - help="Enable verbose output with detailed skill information", -) -@click.option( - "--enable-llm", - is_flag=True, - help="Enable LLM-powered skill enrichment (requires ANTHROPIC_API_KEY)", -) -@click.option( - "--llm-budget", - type=click.IntRange(min=1), - default=5, - help="Maximum number of skills to enrich with LLM (default: 5)", -) -@click.option( - "--llm-no-cache", - is_flag=True, - help="Bypass LLM response cache (always call API)", -) -@click.option( - "--llm-max-retries", - type=click.IntRange(min=0, max=10), - default=3, - help="Maximum retry attempts for LLM rate limits (default: 3)", -) -def learn( - repository, - output_format, - output_dir, - attribute, - min_confidence, - verbose, - enable_llm, - llm_budget, - llm_no_cache, - llm_max_retries, -): - """Extract reusable patterns and generate Claude Code skills. - - Analyzes assessment results to identify successful patterns that could - be extracted as reusable Claude Code skills for other repositories. - - This command looks for the most recent assessment in .agentready/ and - extracts skills from high-scoring attributes (default: ≄70% confidence). - - REPOSITORY: Path to repository (default: current directory) - - Examples: - - \b - # Discover skills from current repository - agentready learn . - - \b - # Generate SKILL.md files - agentready learn . --output-format skill_md - - \b - # Create GitHub issue templates - agentready learn . --output-format github_issues - - \b - # Extract specific attributes only - agentready learn . --attribute claude_md_file --attribute type_annotations - - \b - # Generate all formats with higher confidence threshold - agentready learn . --output-format all --min-confidence 85 - """ - repo_path = Path(repository).resolve() - - # Validate repository exists - if not repo_path.exists(): - click.echo(f"Error: Repository not found: {repo_path}", err=True) - sys.exit(1) - - # Find latest assessment file - agentready_dir = repo_path / ".agentready" - if not agentready_dir.exists(): - click.echo( - "Error: No assessment found in .agentready/\n" - "Run 'agentready assess .' first to generate an assessment.", - err=True, - ) - sys.exit(1) - - # Look for assessment files - assessment_files = sorted(agentready_dir.glob("assessment-*.json")) - if not assessment_files: - click.echo( - "Error: No assessment files found in .agentready/\n" - "Run 'agentready assess .' first to generate an assessment.", - err=True, - ) - sys.exit(1) - - # Use most recent assessment - assessment_file = assessment_files[-1] - - # Display header - click.echo("🧠 AgentReady Learning Loop") - click.echo("=" * 50) - click.echo(f"\nRepository: {repo_path}") - click.echo(f"Assessment: {assessment_file.name}") - click.echo(f"Output format: {output_format}") - click.echo(f"Min confidence: {min_confidence}%") - if attribute: - click.echo(f"Filtering attributes: {', '.join(attribute)}") - - # Display LLM status - if enable_llm: - api_key = os.environ.get("ANTHROPIC_API_KEY") - if api_key: - click.echo(f"LLM enrichment: ENABLED (budget: {llm_budget} skills)") - if llm_no_cache: - click.echo("LLM cache: DISABLED") - else: - click.echo("āš ļø LLM enrichment: DISABLED (ANTHROPIC_API_KEY not set)") - enable_llm = False - click.echo() - - # Resolve output directory relative to repository path if it's a relative path - output_dir_path = Path(output_dir) - if not output_dir_path.is_absolute(): - output_dir_path = repo_path / output_dir - - # Create learning service - learning_service = LearningService( - min_confidence=min_confidence, - output_dir=output_dir_path, - ) - - # Run learning workflow - try: - results = learning_service.run_full_workflow( - assessment_file=assessment_file, - output_format=output_format, - attribute_ids=list(attribute) if attribute else None, - enable_llm=enable_llm, - llm_budget=llm_budget, - llm_max_retries=llm_max_retries, - ) - except Exception as e: - click.echo(f"\nError during learning: {str(e)}", err=True) - if verbose: - import traceback - - traceback.print_exc() - sys.exit(1) - - # Display results - skills_count = results["skills_discovered"] - generated_files = results["generated_files"] - - click.echo("=" * 50) - click.echo( - f"\nāœ… Discovered {skills_count} skill(s) with confidence ≄{min_confidence}%\n" - ) - - # Show LLM info if used - if enable_llm and skills_count > 0: - enriched_count = min(llm_budget, skills_count) - click.echo(f"šŸ¤– LLM-enriched {enriched_count} skill(s)\n") - - if skills_count == 0: - click.echo("No skills met the confidence threshold.") - click.echo( - f"Try lowering --min-confidence (current: {min_confidence}) " - "or run assessment on a higher-scoring repository." - ) - return - - # Display discovered skills - if verbose: - click.echo("Discovered Skills:") - click.echo("-" * 50) - for skill in results["skills"]: - click.echo(f"\nšŸ“š {skill.name}") - click.echo(f" ID: {skill.skill_id}") - click.echo(f" Confidence: {skill.confidence}%") - click.echo(f" Impact: +{skill.impact_score} pts") - click.echo(f" Reusability: {skill.reusability_score}%") - click.echo(f" Source: {skill.source_attribute_id}") - click.echo(f"\n {skill.pattern_summary}") - click.echo() - - # Display generated files - click.echo("\nGenerated Files:") - click.echo("-" * 50) - for file_path in generated_files: - click.echo(f" āœ“ {file_path}") - - # Next steps - click.echo("\n" + "=" * 50) - click.echo("\nšŸ“– Next Steps:\n") - - if output_format in ["skill_md", "all"]: - click.echo(" 1. Review generated SKILL.md files in " + output_dir) - click.echo(" 2. Test skills on 3-5 repositories") - click.echo(" 3. Refine instructions based on testing") - click.echo(" 4. Copy to ~/.claude/skills/ or .claude/skills/") - - if output_format in ["github_issues", "all"]: - click.echo(f" 1. Review issue templates in {output_dir}") - click.echo(" 2. Create GitHub issues:") - click.echo(" gh issue create --body-file .skills-proposals/skill-*.md") - - if output_format == "json": - click.echo(f" 1. Review discovered-skills.json in {output_dir}") - click.echo(" 2. Generate other formats:") - click.echo(" agentready learn . --output-format all") - - click.echo() diff --git a/src/agentready/cli/main.py b/src/agentready/cli/main.py index 92b5ab28..2526dacc 100644 --- a/src/agentready/cli/main.py +++ b/src/agentready/cli/main.py @@ -36,7 +36,7 @@ from .schema import migrate_report, validate_report # Heavy commands - lazy loaded via LazyGroup -# (assess_batch, experiment, extract_skills, harbor, learn, submit) +# (assess_batch, experiment, submit) def get_agentready_version() -> str: @@ -54,8 +54,8 @@ def get_agentready_version() -> str: class LazyGroup(click.Group): """Click group that lazily loads heavy commands to improve startup time. - Commands like 'experiment', 'extract-skills', and 'assess-batch' import heavy - dependencies (scipy, pandas, anthropic) that add ~1 second to startup time. + Commands like 'experiment' and 'assess-batch' import heavy + dependencies (scipy, pandas) that add ~1 second to startup time. This class defers those imports until the command is actually invoked. """ @@ -95,9 +95,7 @@ def get_command(self, ctx, cmd_name): lazy_subcommands={ "assess-batch": ("assess_batch", "assess_batch"), "experiment": ("experiment", "experiment"), - "extract-skills": ("extract_skills", "extract_skills"), - "learn": ("learn", "learn"), - "submit": ("submit", "submit"), +"submit": ("submit", "submit"), }, ) @click.option("--version", is_flag=True, help="Show version information") @@ -580,8 +578,6 @@ def generate_config(): # Lazy-loaded commands (not registered here): # - assess-batch (imports pandas) # - experiment (imports scipy, pandas) -# - extract-skills (imports anthropic) -# - learn (imports anthropic) # - submit (imports github) diff --git a/src/agentready/learners/__init__.py b/src/agentready/learners/__init__.py deleted file mode 100644 index 28db74c6..00000000 --- a/src/agentready/learners/__init__.py +++ /dev/null @@ -1,16 +0,0 @@ -"""LLM-powered pattern extraction and skill enrichment.""" - -from .code_sampler import CodeSampler -from .llm_enricher import LLMEnricher -from .pattern_extractor import PatternExtractor -from .prompt_templates import CODE_SAMPLING_GUIDANCE, PATTERN_EXTRACTION_PROMPT -from .skill_generator import SkillGenerator - -__all__ = [ - "CodeSampler", - "LLMEnricher", - "PatternExtractor", - "SkillGenerator", - "PATTERN_EXTRACTION_PROMPT", - "CODE_SAMPLING_GUIDANCE", -] diff --git a/src/agentready/learners/code_sampler.py b/src/agentready/learners/code_sampler.py deleted file mode 100644 index 9199cfed..00000000 --- a/src/agentready/learners/code_sampler.py +++ /dev/null @@ -1,188 +0,0 @@ -"""Smart code sampling from repositories for LLM analysis.""" - -import logging -from pathlib import Path - -from agentready.models import Finding, Repository - -logger = logging.getLogger(__name__) - - -class CodeSampler: - """Extracts relevant code samples from repository for LLM analysis.""" - - # Mapping of attribute IDs to file patterns to sample - ATTRIBUTE_FILE_PATTERNS = { - "claude_md_file": ["CLAUDE.md", ".claude/CLAUDE.md"], - "readme_file": ["README.md"], - "type_annotations": ["**/*.py"], # Sample Python files - "deterministic_enforcement": [ - ".pre-commit-config.yaml", - ".github/workflows/*.yml", - ".github/workflows/*.yaml", - ".claude/settings.json", - ".husky/*", - ], - "standard_project_layout": [ - "**/", - "src/", - "tests/", - "docs/", - ], # Directory structure - "lock_files": [ - "requirements.txt", - "poetry.lock", - "package-lock.json", - "go.sum", - "Cargo.lock", - ], - "test_execution": [ - "pytest.ini", - "pyproject.toml", - ".coveragerc", - "tox.ini", - "setup.cfg", - "package.json", - "jest.config.*", - "vitest.config.*", - ], - "ci_quality_gates": [ - ".github/workflows/*.yml", - ".github/workflows/*.yaml", - ".gitlab-ci.yml", - ".circleci/config.yml", - ".travis.yml", - ], - "conventional_commits": [".github/workflows/*.yml"], # CI configs - "gitignore": [".gitignore"], - "single_file_verification": [ - "CLAUDE.md", - "AGENTS.md", - ".claude/CLAUDE.md", - "pyproject.toml", - ], - "pattern_references": [ - "CLAUDE.md", - "AGENTS.md", - ".claude/skills/**/SKILL.md", - ], - "design_intent": [ - "docs/design/*.md", - "docs/architecture/*.md", - "docs/adr/*.md", - "docs/decisions/*.md", - ], - "progressive_disclosure": [ - ".claude/rules/*.md", - ".claude/skills/**/SKILL.md", - ], - } - - def __init__( - self, repository: Repository, max_files: int = 5, max_lines_per_file: int = 100 - ): - """Initialize code sampler. - - Args: - repository: Repository to sample from - max_files: Maximum number of files to include - max_lines_per_file: Maximum lines per file to prevent token overflow - """ - self.repository = repository - self.max_files = max_files - self.max_lines_per_file = max_lines_per_file - - def get_relevant_code(self, finding: Finding) -> str: - """Get relevant code samples for a finding. - - Args: - finding: The finding to get code for - - Returns: - Formatted string with code samples - """ - attribute_id = finding.attribute.id - patterns = self.ATTRIBUTE_FILE_PATTERNS.get(attribute_id, []) - - if not patterns: - logger.warning(f"No file patterns defined for {attribute_id}") - return "No code samples available" - - # Collect files matching patterns with fair distribution across patterns - files_to_sample = [] - base = max(1, self.max_files // len(patterns)) - remainder = self.max_files % len(patterns) if base > 1 else 0 - for i, pattern in enumerate(patterns): - limit = base + (1 if i < remainder else 0) - if pattern.endswith("/"): - # Directory listing - files_to_sample.append(self._get_directory_tree(pattern)) - else: - # File pattern - matching_files = list(self.repository.path.glob(pattern)) - files_to_sample.extend(matching_files[:limit]) - - # Format as string - return self._format_code_samples(files_to_sample) - - def _get_directory_tree(self, dir_pattern: str) -> dict: - """Get directory tree structure.""" - base_path = self.repository.path / dir_pattern.rstrip("/") - if not base_path.exists(): - return {} - - tree = { - "type": "directory", - "path": str(base_path.relative_to(self.repository.path)), - "children": [], - } - - for item in base_path.iterdir(): - if item.is_file(): - tree["children"].append({"type": "file", "name": item.name}) - elif item.is_dir() and not item.name.startswith("."): - tree["children"].append({"type": "directory", "name": item.name}) - - return tree - - def _format_code_samples(self, files: list) -> str: - """Format files as readable code samples.""" - samples = [] - - for file_item in files[: self.max_files]: - if isinstance(file_item, dict) and "path" in file_item: - # Directory tree - samples.append(f"## Directory Structure: {file_item['path']}\n") - samples.append(self._format_tree(file_item)) - elif isinstance(file_item, Path): - # Regular file - try: - rel_path = file_item.relative_to(self.repository.path) - content = file_item.read_text(encoding="utf-8", errors="ignore") - - # Truncate if too long - lines = content.splitlines() - if len(lines) > self.max_lines_per_file: - lines = lines[: self.max_lines_per_file] - lines.append("... (truncated)") - - samples.append(f"## File: {rel_path}\n") - samples.append("```\n" + "\n".join(lines) + "\n```\n") - - except Exception as e: - logger.warning(f"Could not read {file_item}: {e}") - - return "\n".join(samples) if samples else "No code samples available" - - def _format_tree(self, tree: dict, indent: int = 0) -> str: - """Format directory tree as text.""" - lines = [] - prefix = " " * indent - - for child in tree.get("children", []): - if child["type"] == "file": - lines.append(f"{prefix}ā”œā”€ā”€ {child['name']}") - elif child["type"] == "directory": - lines.append(f"{prefix}ā”œā”€ā”€ {child['name']}/") - - return "\n".join(lines) diff --git a/src/agentready/learners/llm_enricher.py b/src/agentready/learners/llm_enricher.py deleted file mode 100644 index 3012410b..00000000 --- a/src/agentready/learners/llm_enricher.py +++ /dev/null @@ -1,277 +0,0 @@ -"""LLM-powered skill enrichment using Claude API.""" - -import hashlib -import json -import logging -import random -from pathlib import Path -from time import sleep - -from anthropic import Anthropic, APIError, RateLimitError - -from agentready.models import DiscoveredSkill, Finding, Repository -from agentready.services.llm_cache import LLMCache - -from .code_sampler import CodeSampler -from .prompt_templates import PATTERN_EXTRACTION_PROMPT - -logger = logging.getLogger(__name__) - - -class LLMEnricher: - """Enriches discovered skills using Claude API.""" - - def __init__( - self, - client: Anthropic, - cache_dir: Path | None = None, - model: str = "claude-sonnet-4-5-20250929", - ): - """Initialize LLM enricher. - - Args: - client: Anthropic API client - cache_dir: Cache directory (default: .agentready/llm-cache) - model: Claude model to use - """ - self.client = client - self.model = model - self.cache = LLMCache(cache_dir or Path(".agentready/llm-cache")) - self.code_sampler = None # Set per-repository - - def enrich_skill( - self, - skill: DiscoveredSkill, - repository: Repository, - finding: Finding, - use_cache: bool = True, - max_retries: int = 3, - _retry_count: int = 0, - ) -> DiscoveredSkill: - """Enrich skill with LLM-generated content. - - Args: - skill: Basic skill from heuristic extraction - repository: Repository being assessed - finding: Finding that generated this skill - use_cache: Whether to use cached responses - max_retries: Maximum retry attempts for rate limits (default: 3) - _retry_count: Internal retry counter (do not set manually) - - Returns: - Enriched DiscoveredSkill with LLM-generated content, or original - skill if enrichment fails after max retries - """ - # Generate cache key - evidence_str = "".join(finding.evidence) if finding.evidence else "" - evidence_hash = hashlib.sha256(evidence_str.encode()).hexdigest()[:16] - cache_key = LLMCache.generate_key(skill.skill_id, finding.score, evidence_hash) - - # Check cache first - if use_cache: - cached = self.cache.get(cache_key) - if cached: - logger.info(f"Using cached enrichment for {skill.skill_id}") - return cached - - # Initialize code sampler for this repository - self.code_sampler = CodeSampler(repository) - - # Get relevant code samples - code_samples = self.code_sampler.get_relevant_code(finding) - - # Call Claude API - try: - enrichment_data = self._call_claude_api( - skill, finding, repository, code_samples - ) - - # Merge enrichment into skill - enriched_skill = self._merge_enrichment(skill, enrichment_data) - - # Cache result - if use_cache: - self.cache.set(cache_key, enriched_skill) - - logger.info(f"Successfully enriched {skill.skill_id}") - return enriched_skill - - except RateLimitError as e: - # Check if max retries exceeded - if _retry_count >= max_retries: - logger.error( - f"Max retries ({max_retries}) exceeded for {skill.skill_id}. " - f"Falling back to heuristic skill. " - f"Check API quota: https://console.anthropic.com/settings/limits" - ) - return skill # Graceful fallback to heuristic skill - - # Calculate backoff with jitter to prevent thundering herd - retry_after = int(getattr(e, "retry_after", 60)) - jitter = random.uniform(0, min(retry_after * 0.1, 5)) - total_wait = retry_after + jitter - - logger.warning( - f"Rate limit hit for {skill.skill_id} " - f"(retry {_retry_count + 1}/{max_retries}): {e}" - ) - logger.info(f"Retrying after {total_wait:.1f} seconds...") - - sleep(total_wait) - - return self.enrich_skill( - skill, repository, finding, use_cache, max_retries, _retry_count + 1 - ) - - except APIError as e: - # Security: Sanitize error message to prevent API key exposure - error_msg = str(e) - # Anthropic errors shouldn't contain keys, but sanitize to be safe - safe_error = error_msg if len(error_msg) < 200 else error_msg[:200] - logger.error(f"API error enriching {skill.skill_id}: {safe_error}") - return skill # Fallback to original heuristic skill - - except Exception as e: - # Security: Sanitize generic errors that might expose sensitive data - error_msg = str(e) - safe_error = error_msg if len(error_msg) < 200 else error_msg[:200] - logger.error(f"Unexpected error enriching {skill.skill_id}: {safe_error}") - return skill # Fallback to original heuristic skill - - def _call_claude_api( - self, - skill: DiscoveredSkill, - finding: Finding, - repository: Repository, - code_samples: str, - ) -> dict: - """Call Claude API for pattern extraction. - - Args: - skill: Basic skill - finding: Associated finding - repository: Repository context - code_samples: Code samples from repository - - Returns: - Parsed JSON response from Claude - """ - # Build prompt - prompt = PATTERN_EXTRACTION_PROMPT.format( - repo_name=repository.name, - attribute_name=finding.attribute.name, - attribute_description=finding.attribute.description, - tier=finding.attribute.tier, - score=finding.score, - primary_language=getattr(repository, "primary_language", "Unknown"), - evidence=( - "\n".join(finding.evidence) - if finding.evidence - else "No evidence available" - ), - code_samples=code_samples, - ) - - # Call API - response = self.client.messages.create( - model=self.model, - max_tokens=4096, - messages=[{"role": "user", "content": prompt}], - ) - - # Parse response - response_text = response.content[0].text - - # Extract JSON (handle markdown code blocks if present) - if "```json" in response_text: - json_start = response_text.find("```json") + 7 - json_end = response_text.find("```", json_start) - response_text = response_text[json_start:json_end].strip() - elif "```" in response_text: - json_start = response_text.find("```") + 3 - json_end = response_text.find("```", json_start) - response_text = response_text[json_start:json_end].strip() - - try: - return json.loads(response_text) - except json.JSONDecodeError as e: - logger.error(f"Failed to parse LLM JSON response: {e}") - logger.debug(f"Response text: {response_text}") - return {} - - def _merge_enrichment( - self, skill: DiscoveredSkill, enrichment: dict - ) -> DiscoveredSkill: - """Merge LLM enrichment data into DiscoveredSkill. - - Args: - skill: Original skill - enrichment: LLM response data - - Returns: - New DiscoveredSkill with enriched content - """ - if not enrichment: - return skill - - # Update description if provided - description = enrichment.get("skill_description", skill.description) - - # Update pattern summary (from instructions or keep original) - instructions = enrichment.get("instructions", []) - pattern_summary = skill.pattern_summary - if instructions: - pattern_summary = f"{skill.pattern_summary}\n\nDetailed implementation steps provided by LLM analysis." - - # Format code examples - code_examples = [] - for example in enrichment.get("code_examples", []): - if isinstance(example, dict): - formatted = f"File: {example.get('file_path', 'unknown')}\n{example.get('code', '')}\n\nExplanation: {example.get('explanation', '')}" - code_examples.append(formatted) - elif isinstance(example, str): - code_examples.append(example) - - # If no LLM examples, keep original - if not code_examples: - code_examples = skill.code_examples - - # Create new skill with enriched data - # Store enrichment in code_examples for now (can extend DiscoveredSkill model later) - enriched_examples = code_examples.copy() - - # Append best practices and anti-patterns as additional "examples" - best_practices = enrichment.get("best_practices", []) - if best_practices: - enriched_examples.append( - "=== BEST PRACTICES ===\n" - + "\n".join(f"- {bp}" for bp in best_practices) - ) - - anti_patterns = enrichment.get("anti_patterns", []) - if anti_patterns: - enriched_examples.append( - "=== ANTI-PATTERNS TO AVOID ===\n" - + "\n".join(f"- {ap}" for ap in anti_patterns) - ) - - # Add instructions as first example - if instructions: - enriched_examples.insert( - 0, - "=== INSTRUCTIONS ===\n" - + "\n".join(f"{i+1}. {step}" for i, step in enumerate(instructions)), - ) - - return DiscoveredSkill( - skill_id=skill.skill_id, - name=skill.name, - description=description, - confidence=skill.confidence, - source_attribute_id=skill.source_attribute_id, - reusability_score=skill.reusability_score, - impact_score=skill.impact_score, - pattern_summary=pattern_summary, - code_examples=enriched_examples, - citations=skill.citations, - ) diff --git a/src/agentready/learners/pattern_extractor.py b/src/agentready/learners/pattern_extractor.py deleted file mode 100644 index a0c12ca2..00000000 --- a/src/agentready/learners/pattern_extractor.py +++ /dev/null @@ -1,222 +0,0 @@ -"""Pattern extraction from assessment findings.""" - -from agentready.models import Assessment, DiscoveredSkill, Finding - - -class PatternExtractor: - """Extracts reusable patterns from high-scoring assessment findings. - - Uses heuristic-based analysis to identify successful implementations - that could be extracted as Claude Code skills. - """ - - # Minimum score threshold for pattern extraction - MIN_SCORE_THRESHOLD = 80.0 - - # Tier-based impact scores (how much each tier contributes to overall score) - TIER_IMPACT_SCORES = { - 1: 50.0, # Tier 1 (Essential) - highest impact - 2: 30.0, # Tier 2 (Critical) - 3: 15.0, # Tier 3 (Important) - 4: 5.0, # Tier 4 (Advanced) - lowest impact - } - - # Skill ID to human-readable name mapping for top tier-1 skills - SKILL_NAMES = { - "claude_md_file": { - "skill_id": "setup-claude-md", - "name": "Setup CLAUDE.md Configuration", - "description": "Create comprehensive CLAUDE.md files with tech stack, standard commands, repository structure, and boundaries to optimize repositories for AI-assisted development", - }, - "type_annotations": { - "skill_id": "implement-type-annotations", - "name": "Implement Type Annotations", - "description": "Add comprehensive type hints to Python/TypeScript code to improve IDE support, catch errors early, and enable better AI code understanding", - }, - "deterministic_enforcement": { - "skill_id": "setup-deterministic-enforcement", - "name": "Setup Deterministic Enforcement", - "description": "Configure pre-commit hooks, agent hooks, and lint rules for deterministic quality enforcement before each commit", - }, - "standard_project_layout": { - "skill_id": "structure-repository-layout", - "name": "Structure Repository Layout", - "description": "Organize code according to language-specific standard project layouts to improve navigation and AI code understanding", - }, - "lock_files": { - "skill_id": "create-dependency-lock-files", - "name": "Create Dependency Lock Files", - "description": "Generate lock files to pin exact dependency versions for reproducible builds and consistent development environments", - }, - } - - def __init__(self, assessment: Assessment, min_score: float = MIN_SCORE_THRESHOLD): - """Initialize pattern extractor. - - Args: - assessment: The assessment to extract patterns from - min_score: Minimum finding score to consider (default: 80.0) - """ - self.assessment = assessment - self.min_score = min_score - - def extract_all_patterns(self) -> list[DiscoveredSkill]: - """Extract all reusable patterns from the assessment. - - Returns: - List of discovered skills, sorted by confidence (highest first) - """ - discovered_skills = [] - - for finding in self.assessment.findings: - if self._should_extract_pattern(finding): - skill = self._create_skill_from_finding(finding) - if skill: - discovered_skills.append(skill) - - # Sort by confidence descending - discovered_skills.sort(key=lambda s: s.confidence, reverse=True) - - return discovered_skills - - def extract_specific_patterns( - self, attribute_ids: list[str] - ) -> list[DiscoveredSkill]: - """Extract patterns only from specific attributes. - - Args: - attribute_ids: List of attribute IDs to extract patterns from - - Returns: - List of discovered skills for specified attributes - """ - discovered_skills = [] - - for finding in self.assessment.findings: - if finding.attribute.id in attribute_ids and self._should_extract_pattern( - finding - ): - skill = self._create_skill_from_finding(finding) - if skill: - discovered_skills.append(skill) - - # Sort by confidence descending - discovered_skills.sort(key=lambda s: s.confidence, reverse=True) - - return discovered_skills - - def _should_extract_pattern(self, finding: Finding) -> bool: - """Determine if a finding should have its pattern extracted. - - Args: - finding: The finding to evaluate - - Returns: - True if pattern should be extracted - """ - # Only extract from passing findings with high scores - if finding.status != "pass": - return False - - if finding.score < self.min_score: - return False - - # Skip if attribute not in our known skills mapping - if finding.attribute.id not in self.SKILL_NAMES: - return False - - return True - - def _create_skill_from_finding(self, finding: Finding) -> DiscoveredSkill | None: - """Create a DiscoveredSkill from a high-scoring finding. - - Args: - finding: The finding to convert to a skill - - Returns: - DiscoveredSkill object or None if skill info not found - """ - attribute_id = finding.attribute.id - skill_info = self.SKILL_NAMES.get(attribute_id) - - if not skill_info: - return None - - # Calculate confidence (directly from score) - confidence = finding.score - - # Calculate impact based on tier - tier = finding.attribute.tier - impact_score = self.TIER_IMPACT_SCORES.get(tier, 5.0) - - # Calculate reusability (for now, use a simple heuristic based on tier) - # Tier 1 attributes are more reusable across projects - reusability_score = 100.0 - (tier - 1) * 20.0 # T1=100, T2=80, T3=60, T4=40 - - # Extract code examples from finding details - code_examples = self._extract_code_examples(finding) - - # Create pattern summary from finding - pattern_summary = self._create_pattern_summary(finding) - - # Citations are not stored in current Attribute model, use empty list - citations = [] - - return DiscoveredSkill( - skill_id=skill_info["skill_id"], - name=skill_info["name"], - description=skill_info["description"], - confidence=confidence, - source_attribute_id=attribute_id, - reusability_score=reusability_score, - impact_score=impact_score, - pattern_summary=pattern_summary, - code_examples=code_examples, - citations=citations, - ) - - def _extract_code_examples(self, finding: Finding) -> list[str]: - """Extract code examples from finding details. - - Args: - finding: The finding to extract examples from - - Returns: - List of code example strings - """ - examples = [] - - # Use evidence as examples - if finding.evidence: - for item in finding.evidence: - if item and item.strip(): - examples.append(item) - - # Add remediation steps as examples if available - if finding.remediation and finding.remediation.steps: - for step in finding.remediation.steps: - if step.strip(): - examples.append(step) - - return examples[:3] # Limit to 3 examples - - def _create_pattern_summary(self, finding: Finding) -> str: - """Create a human-readable pattern summary from a finding. - - Args: - finding: The finding to summarize - - Returns: - Pattern summary string - """ - # Use the attribute's description as the pattern summary - if finding.attribute.description: - return finding.attribute.description - - # Fallback to finding evidence - if finding.evidence and len(finding.evidence) > 0: - evidence_str = "; ".join(finding.evidence[:2]) - return f"This repository successfully implements {finding.attribute.name}. {evidence_str}" - - # Final fallback - return f"This repository successfully implements {finding.attribute.name} at a high level ({finding.score:.1f}/100)." diff --git a/src/agentready/learners/prompt_templates.py b/src/agentready/learners/prompt_templates.py deleted file mode 100644 index df88d969..00000000 --- a/src/agentready/learners/prompt_templates.py +++ /dev/null @@ -1,100 +0,0 @@ -"""Prompt templates for LLM-powered pattern extraction.""" - -PATTERN_EXTRACTION_PROMPT = """You are analyzing a high-scoring repository to extract a reusable pattern as a Claude Code skill. - -## Context -Repository: {repo_name} -Attribute: {attribute_name} ({attribute_description}) -Tier: {tier} (1=Essential, 4=Advanced) -Score: {score}/100 -Primary Language: {primary_language} - -## Evidence from Assessment -{evidence} - -## Code Samples from Repository -{code_samples} - ---- - -## Task - -Extract this pattern as a Claude Code skill with the following components: - -### 1. Skill Description (1-2 sentences) -Write an invocation-optimized description that helps Claude Code decide when to use this skill. -Focus on WHAT problem it solves and WHEN to apply it. - -### 2. Step-by-Step Instructions (5-10 steps) -Provide concrete, actionable steps. Each step should: -- Start with an action verb -- Include specific commands or code where applicable -- Define success criteria for that step - -Be explicit. Do not assume prior knowledge. - -### 3. Code Examples (2-3 examples) -Extract real code snippets from the repository that demonstrate this pattern. -For EACH example: -- Include the file path -- Show the relevant code (10-50 lines) -- Explain WHY this demonstrates the pattern - -### 4. Best Practices (3-5 principles) -Derive best practices from the successful implementation you analyzed. -What made this repository score {score}/100? - -### 5. Anti-Patterns to Avoid (2-3 mistakes) -What common mistakes did this repository avoid? -What would have reduced the score? - ---- - -## Output Format - -Return ONLY valid JSON matching this schema: - -{{ - "skill_description": "One sentence explaining what and when", - "instructions": [ - "Step 1: Specific action with command", - "Step 2: Next action with success criteria", - ... - ], - "code_examples": [ - {{ - "file_path": "relative/path/to/file.py", - "code": "actual code snippet", - "explanation": "Why this demonstrates the pattern" - }}, - ... - ], - "best_practices": [ - "Principle 1 derived from this repository", - ... - ], - "anti_patterns": [ - "Common mistake this repo avoided", - ... - ] -}} - -## Rules - -1. NEVER invent code - only use code from the samples provided -2. Be specific - use exact file paths, line numbers, command syntax -3. Focus on actionable guidance, not theory -4. Derive insights from THIS repository, not general knowledge -5. Return ONLY the JSON object, no markdown formatting -""" - -CODE_SAMPLING_GUIDANCE = """When selecting code samples to analyze: - -1. For `claude_md_file`: Include the CLAUDE.md file itself -2. For `type_annotations`: Sample 3-5 .py files with type hints -3. For `deterministic_enforcement`: Include .pre-commit-config.yaml and .claude/settings.json -4. For `standard_project_layout`: Show directory tree + key files -5. For `lock_files`: Include requirements.txt, poetry.lock, or go.sum - -Limit to 3-5 files, max 100 lines per file to stay under token limits. -""" diff --git a/src/agentready/learners/skill_generator.py b/src/agentready/learners/skill_generator.py deleted file mode 100644 index ec36f158..00000000 --- a/src/agentready/learners/skill_generator.py +++ /dev/null @@ -1,202 +0,0 @@ -"""Skill generation from discovered patterns.""" - -from pathlib import Path - -from agentready.models import DiscoveredSkill - - -class SkillGenerator: - """Generates Claude Code skills from discovered patterns. - - Handles file I/O and format conversion for skill proposals. - """ - - def __init__(self, output_dir: Path | str = ".skills-proposals"): - """Initialize skill generator. - - Args: - output_dir: Directory to write generated skills - """ - self.output_dir = Path(output_dir) - - def generate_skill_file(self, skill: DiscoveredSkill) -> Path: - """Generate a SKILL.md file from a discovered skill. - - Args: - skill: The discovered skill to generate - - Returns: - Path to the generated SKILL.md file - """ - # Create skill directory - skill_dir = self.output_dir / skill.skill_id - skill_dir.mkdir(parents=True, exist_ok=True) - - # Generate SKILL.md content - skill_content = skill.to_skill_md() - - # Write to file - skill_file = skill_dir / "SKILL.md" - skill_file.write_text(skill_content, encoding="utf-8") - - return skill_file - - def generate_github_issue(self, skill: DiscoveredSkill) -> Path: - """Generate a GitHub issue template from a discovered skill. - - Args: - skill: The discovered skill to generate - - Returns: - Path to the generated issue template file - """ - # Create output directory - self.output_dir.mkdir(parents=True, exist_ok=True) - - # Generate issue content - issue_content = skill.to_github_issue() - - # Write to file - issue_file = self.output_dir / f"skill-{skill.skill_id}.md" - issue_file.write_text(issue_content, encoding="utf-8") - - return issue_file - - def generate_markdown_report(self, skill: DiscoveredSkill) -> Path: - """Generate a detailed markdown report for a skill. - - Args: - skill: The discovered skill to document - - Returns: - Path to the generated markdown report - """ - # Create output directory - self.output_dir.mkdir(parents=True, exist_ok=True) - - # Generate markdown content - markdown_content = self._create_markdown_report(skill) - - # Write to file - report_file = self.output_dir / f"{skill.skill_id}-report.md" - report_file.write_text(markdown_content, encoding="utf-8") - - return report_file - - def generate_all_formats(self, skill: DiscoveredSkill) -> dict[str, Path]: - """Generate all output formats for a skill. - - Args: - skill: The discovered skill to generate - - Returns: - Dictionary mapping format name to file path - """ - return { - "skill_md": self.generate_skill_file(skill), - "github_issue": self.generate_github_issue(skill), - "markdown_report": self.generate_markdown_report(skill), - } - - def generate_batch( - self, skills: list[DiscoveredSkill], output_format: str = "skill_md" - ) -> list[Path]: - """Generate multiple skills in batch. - - Args: - skills: List of discovered skills to generate - output_format: Format to generate (skill_md, github_issue, markdown_report, all) - - Returns: - List of generated file paths - """ - generated_files = [] - - for skill in skills: - if output_format == "skill_md": - generated_files.append(self.generate_skill_file(skill)) - elif output_format == "github_issue": - generated_files.append(self.generate_github_issue(skill)) - elif output_format == "markdown_report": - generated_files.append(self.generate_markdown_report(skill)) - elif output_format == "all": - results = self.generate_all_formats(skill) - generated_files.extend(results.values()) - - return generated_files - - def _create_markdown_report(self, skill: DiscoveredSkill) -> str: - """Create a detailed markdown report for a skill. - - Args: - skill: The skill to document - - Returns: - Markdown report content - """ - report = f"""# Skill Report: {skill.name} - -## Overview - -**Skill ID**: `{skill.skill_id}` -**Confidence**: {skill.confidence}% -**Impact**: +{skill.impact_score} pts -**Reusability**: {skill.reusability_score}% -**Source Attribute**: {skill.source_attribute_id} - ---- - -## Description - -{skill.description} - ---- - -## Pattern Summary - -{skill.pattern_summary} - ---- - -## Implementation Guidance - -### When to Use This Skill - -Use this skill when you need to apply the pattern described above to your repository. - -### Code Examples - -""" - - if skill.code_examples: - for idx, example in enumerate(skill.code_examples, 1): - report += f"\n#### Example {idx}\n\n```\n{example}\n```\n" - else: - report += "_No code examples available_\n" - - report += "\n---\n\n## Research Citations\n\n" - - if skill.citations: - for citation in skill.citations: - url_part = f" - [Link]({citation.url})" if citation.url else "" - report += f"### {citation.source}: {citation.title}{url_part}\n\n" - report += f"**Relevance**: {citation.relevance}\n\n" - else: - report += "_No citations available_\n" - - report += f""" ---- - -## Metrics - -- **Confidence Score**: {skill.confidence}% - How confident we are this is a valid pattern -- **Impact Score**: {skill.impact_score} pts - Expected score improvement from applying this skill -- **Reusability Score**: {skill.reusability_score}% - How often this pattern applies across projects - ---- - -**Generated by**: AgentReady Skill Generator -**Source**: Pattern extracted from {skill.source_attribute_id} assessment -""" - - return report diff --git a/src/agentready/services/learning_service.py b/src/agentready/services/learning_service.py deleted file mode 100644 index 472e65da..00000000 --- a/src/agentready/services/learning_service.py +++ /dev/null @@ -1,364 +0,0 @@ -"""Learning service for extracting patterns and generating skills.""" - -import json -import logging -import os -from datetime import datetime -from pathlib import Path - -from agentready.learners import PatternExtractor, SkillGenerator -from agentready.models import Assessment, DiscoveredSkill, Finding - -logger = logging.getLogger(__name__) - - -class LearningService: - """Orchestrates continuous learning workflow for skill extraction. - - Coordinates pattern extraction from assessments and skill generation - in various output formats. - """ - - def __init__( - self, - min_confidence: float = 70.0, - output_dir: Path | str = ".skills-proposals", - ): - """Initialize learning service. - - Args: - min_confidence: Minimum confidence score to include skills (0-100) - output_dir: Directory for generated skill files - """ - self.min_confidence = min_confidence - self.output_dir = Path(output_dir) - self.skill_generator = SkillGenerator(output_dir=self.output_dir) - - def load_assessment(self, assessment_file: Path) -> Assessment: - """Load assessment from JSON file. - - Args: - assessment_file: Path to assessment JSON file - - Returns: - Loaded Assessment object - - Raises: - FileNotFoundError: If assessment file doesn't exist - ValueError: If assessment file is invalid JSON - """ - if not assessment_file.exists(): - raise FileNotFoundError(f"Assessment file not found: {assessment_file}") - - with open(assessment_file, encoding="utf-8") as f: - try: - data = json.load(f) - except json.JSONDecodeError as e: - raise ValueError(f"Invalid JSON in assessment file: {e}") - - # For now, we work with the dict directly - # In future, could deserialize to Assessment object - return data - - def extract_patterns_from_file( - self, - assessment_file: Path, - attribute_ids: list[str] | None = None, - enable_llm: bool = False, - llm_budget: int = 5, - llm_max_retries: int = 3, - ) -> list[DiscoveredSkill]: - """Extract patterns from an assessment file. - - Args: - assessment_file: Path to assessment JSON file - attribute_ids: Optional list of specific attributes to extract - enable_llm: Enable LLM enrichment - llm_budget: Max number of skills to enrich with LLM - llm_max_retries: Maximum retry attempts for LLM rate limits - - Returns: - List of discovered skills meeting confidence threshold - """ - # Load assessment (returns dict for now) - assessment_data = self.load_assessment(assessment_file) - - # Convert to Assessment object for pattern extraction - # For MVP, we'll work with the dict and create Finding objects manually - # In future, add proper deserialization - from agentready.models import Attribute, Finding, Repository - - # Reconstruct Assessment object from dict - repo_data = assessment_data["repository"] - - # Try to use the path from the assessment data if it's a valid git repo - # Otherwise use the parent directory of the assessment file - repo_path_from_json = Path(repo_data.get("path", "")) - if repo_path_from_json.exists() and (repo_path_from_json / ".git").exists(): - actual_repo_path = repo_path_from_json - else: - # Fallback: assume assessment is in .agentready/ subdirectory - actual_repo_path = assessment_file.parent.parent - - repo = Repository( - path=actual_repo_path, - name=repo_data.get("name", "unknown"), - url=repo_data.get("url"), - branch=repo_data.get("branch", "unknown"), - commit_hash=repo_data.get("commit_hash", "unknown"), - languages=repo_data.get("languages", {}), - total_files=repo_data["total_files"], - total_lines=repo_data["total_lines"], - ) - - findings = [] - for finding_data in assessment_data["findings"]: - # Reconstruct Attribute - attr_data = finding_data["attribute"] - - attribute = Attribute( - id=attr_data["id"], - name=attr_data["name"], - category=attr_data.get("category", "Unknown"), - tier=attr_data["tier"], - description=attr_data["description"], - criteria=attr_data.get("criteria", ""), - default_weight=attr_data.get("default_weight", 1.0), - ) - - # Reconstruct Finding - finding = Finding( - attribute=attribute, - status=finding_data["status"], - score=finding_data.get("score"), - measured_value=finding_data.get("measured_value"), - threshold=finding_data.get("threshold"), - evidence=finding_data.get("evidence", []), - remediation=None, # Skip complex Remediation reconstruction for now - error_message=finding_data.get("error_message"), - ) - findings.append(finding) - - assessment = Assessment( - repository=repo, - timestamp=datetime.fromisoformat(assessment_data["timestamp"]), - overall_score=assessment_data["overall_score"], - certification_level=assessment_data["certification_level"], - attributes_assessed=assessment_data["attributes_assessed"], - attributes_not_assessed=assessment_data.get( - "attributes_skipped", assessment_data.get("attributes_not_assessed", 0) - ), - attributes_total=assessment_data["attributes_total"], - findings=findings, - config=None, # Skip config for now - duration_seconds=assessment_data["duration_seconds"], - ) - - # Extract patterns - extractor = PatternExtractor(assessment, min_score=self.min_confidence) - - if attribute_ids: - discovered_skills = extractor.extract_specific_patterns(attribute_ids) - else: - discovered_skills = extractor.extract_all_patterns() - - # Filter by min confidence - discovered_skills = [ - s for s in discovered_skills if s.confidence >= self.min_confidence - ] - - # Optionally enrich with LLM - if enable_llm and discovered_skills: - discovered_skills = self._enrich_with_llm( - discovered_skills, assessment, llm_budget, llm_max_retries - ) - - return discovered_skills - - def generate_skills( - self, skills: list[DiscoveredSkill], output_format: str = "json" - ) -> list[Path]: - """Generate skill files in specified format. - - Args: - skills: List of discovered skills - output_format: Format to generate (json, skill_md, github_issues, all) - - Returns: - List of generated file paths - """ - generated_files = [] - - if output_format == "json": - json_file = self._generate_json(skills) - generated_files.append(json_file) - - elif output_format == "skill_md": - for skill in skills: - skill_file = self.skill_generator.generate_skill_file(skill) - generated_files.append(skill_file) - - elif output_format == "github_issues": - for skill in skills: - issue_file = self.skill_generator.generate_github_issue(skill) - generated_files.append(issue_file) - - elif output_format == "markdown": - for skill in skills: - report_file = self.skill_generator.generate_markdown_report(skill) - generated_files.append(report_file) - - elif output_format == "all": - # Generate JSON summary - json_file = self._generate_json(skills) - generated_files.append(json_file) - - # Generate all formats for each skill - for skill in skills: - results = self.skill_generator.generate_all_formats(skill) - generated_files.extend(results.values()) - - return generated_files - - def _generate_json(self, skills: list[DiscoveredSkill]) -> Path: - """Generate JSON file with discovered skills. - - Args: - skills: List of discovered skills - - Returns: - Path to generated JSON file - """ - self.output_dir.mkdir(parents=True, exist_ok=True) - - data = { - "generated_at": datetime.now().isoformat(), - "skill_count": len(skills), - "min_confidence": self.min_confidence, - "discovered_skills": [skill.to_dict() for skill in skills], - } - - json_file = self.output_dir / "discovered-skills.json" - with open(json_file, "w", encoding="utf-8") as f: - json.dump(data, f, indent=2) - - return json_file - - def _enrich_with_llm( - self, - skills: list[DiscoveredSkill], - assessment: Assessment, - budget: int, - max_retries: int = 3, - ) -> list[DiscoveredSkill]: - """Enrich top N skills with LLM analysis. - - Args: - skills: List of discovered skills - assessment: Full assessment with findings - budget: Max skills to enrich - max_retries: Maximum retry attempts for LLM rate limits - - Returns: - List with top skills enriched - """ - from anthropic import Anthropic - - from agentready.learners.llm_enricher import LLMEnricher - - # Security: Get API key from environment - api_key = os.environ.get("ANTHROPIC_API_KEY") - if not api_key: - logger.warning("LLM enrichment enabled but ANTHROPIC_API_KEY not set") - return skills - - # Security: Clear API key from environment to prevent exposure - # API keys should not persist in os.environ where they could be logged - try: - del os.environ["ANTHROPIC_API_KEY"] - except KeyError: - pass # Already removed or never existed - - # Initialize LLM enricher - client = Anthropic(api_key=api_key) - enricher = LLMEnricher(client) - - # Security: Clear API key from local scope after client creation - api_key = None - - # Enrich top N skills - enriched_skills = [] - for i, skill in enumerate(skills): - if i < budget: - # Find the finding for this skill - finding = self._find_finding_for_skill(assessment, skill) - if finding: - try: - enriched = enricher.enrich_skill( - skill, - assessment.repository, - finding, - max_retries=max_retries, - ) - enriched_skills.append(enriched) - except Exception as e: - logger.warning(f"Enrichment failed for {skill.skill_id}: {e}") - enriched_skills.append(skill) # Fallback to original - else: - enriched_skills.append(skill) - else: - # Beyond budget, keep original - enriched_skills.append(skill) - - return enriched_skills - - def _find_finding_for_skill( - self, assessment: Assessment, skill: DiscoveredSkill - ) -> Finding | None: - """Find the Finding that generated a skill.""" - for finding in assessment.findings: - if finding.attribute.id == skill.source_attribute_id: - return finding - return None - - def run_full_workflow( - self, - assessment_file: Path, - output_format: str = "all", - attribute_ids: list[str] | None = None, - enable_llm: bool = False, - llm_budget: int = 5, - llm_max_retries: int = 3, - ) -> dict: - """Run complete learning workflow: extract + generate. - - Args: - assessment_file: Path to assessment JSON - output_format: Format for generated skills - attribute_ids: Optional specific attributes to extract - enable_llm: Enable LLM enrichment - llm_budget: Max skills to enrich with LLM - llm_max_retries: Maximum retry attempts for LLM rate limits - - Returns: - Dictionary with workflow results - """ - # Extract patterns - skills = self.extract_patterns_from_file( - assessment_file, - attribute_ids, - enable_llm=enable_llm, - llm_budget=llm_budget, - llm_max_retries=llm_max_retries, - ) - - # Generate output files - generated_files = self.generate_skills(skills, output_format) - - return { - "skills_discovered": len(skills), - "min_confidence": self.min_confidence, - "output_format": output_format, - "generated_files": [str(f) for f in generated_files], - "skills": skills, - } diff --git a/tests/integration/test_eval_harness_e2e.py b/tests/integration/test_eval_harness_e2e.py deleted file mode 100644 index 73887906..00000000 --- a/tests/integration/test_eval_harness_e2e.py +++ /dev/null @@ -1,178 +0,0 @@ -"""End-to-end integration tests for eval harness workflow. - -These tests verify the complete workflow from baseline establishment through -dashboard generation using mocked Terminal-Bench results. -""" - -import subprocess -import tempfile -from pathlib import Path - -import pytest - -from agentready.services.eval_harness import BaselineEstablisher, TbenchRunner - - -@pytest.fixture -def temp_repo(): - """Create a temporary git repository for testing.""" - with tempfile.TemporaryDirectory() as tmpdir: - repo_path = Path(tmpdir) - - # Initialize git repo - subprocess.run(["git", "init"], cwd=repo_path, capture_output=True, check=True) - subprocess.run( - ["git", "config", "user.email", "test@test.com"], - cwd=repo_path, - capture_output=True, - check=True, - ) - subprocess.run( - ["git", "config", "user.name", "Test User"], - cwd=repo_path, - capture_output=True, - check=True, - ) - - # Create minimal repo content - (repo_path / "README.md").write_text("# Test Repository\n\nTest content.") - (repo_path / "CLAUDE.md").write_text( - "# Claude Instructions\n\nTest instructions." - ) - (repo_path / "setup.py").write_text( - "from setuptools import setup\nsetup(name='test')" - ) - - # Commit - subprocess.run( - ["git", "add", "."], cwd=repo_path, capture_output=True, check=True - ) - subprocess.run( - ["git", "commit", "-m", "Initial commit"], - cwd=repo_path, - capture_output=True, - check=True, - ) - - yield repo_path - - -class TestEvalHarnessWorkflow: - """Test complete eval harness workflow end-to-end.""" - - def test_baseline_establishment(self, temp_repo): - """Test baseline establishment with mocked tbench.""" - # Create runner and establisher - runner = TbenchRunner(mock=True) - establisher = BaselineEstablisher(tbench_runner=runner) - - # Establish baseline - output_dir = temp_repo / ".agentready" / "eval_harness" / "baseline" - baseline = establisher.establish_baseline( - temp_repo, iterations=3, output_dir=output_dir - ) - - # Verify baseline metrics - assert baseline.iterations == 3 - assert baseline.mean_score > 0 - assert baseline.std_dev >= 0 - assert len(baseline.raw_results) == 3 - - # Verify files created - assert output_dir.exists() - assert (output_dir / "summary.json").exists() - assert len(list(output_dir.glob("run_*.json"))) == 3 - - def test_baseline_to_files(self, temp_repo): - """Test baseline establishment creates expected files.""" - runner = TbenchRunner(mock=True) - establisher = BaselineEstablisher(tbench_runner=runner) - - eval_dir = temp_repo / ".agentready" / "eval_harness" - baseline_dir = eval_dir / "baseline" - - # Establish baseline - baseline = establisher.establish_baseline( - temp_repo, iterations=3, output_dir=baseline_dir - ) - - # Verify baseline metrics - assert baseline.mean_score > 0 - - # Verify files created - assert (baseline_dir / "summary.json").exists() - run_files = list(baseline_dir.glob("run_*.json")) - assert len(run_files) == 3 - - # Verify summary file contains valid JSON - import json - - with open(baseline_dir / "summary.json") as f: - summary_data = json.load(f) - assert "mean_score" in summary_data - assert summary_data["iterations"] == 3 - - -class TestEvalHarnessFileStructure: - """Test eval harness creates correct file structure.""" - - def test_eval_harness_directory_structure(self, temp_repo): - """Test that eval harness creates expected directory structure.""" - runner = TbenchRunner(mock=True) - establisher = BaselineEstablisher(tbench_runner=runner) - - eval_dir = temp_repo / ".agentready" / "eval_harness" - baseline_dir = eval_dir / "baseline" - - # Run baseline - _baseline = establisher.establish_baseline( - temp_repo, iterations=3, output_dir=baseline_dir - ) - - # Verify directory structure - assert eval_dir.exists() - assert baseline_dir.exists() - - # Verify baseline files - assert (baseline_dir / "summary.json").exists() - run_files = list(baseline_dir.glob("run_*.json")) - assert len(run_files) == 3 - - # Verify file naming convention - for run_file in run_files: - assert run_file.stem.startswith("run_") - assert run_file.suffix == ".json" - - -class TestMockedTbenchDeterminism: - """Test that mocked tbench produces deterministic results.""" - - def test_mocked_results_reproducible(self, temp_repo): - """Test that mocked tbench gives same results for same repo.""" - runner = TbenchRunner(mock=True) - - # Run benchmark twice - result1 = runner.run_benchmark(temp_repo) - result2 = runner.run_benchmark(temp_repo) - - # Should be identical (deterministic based on repo) - assert result1.score == result2.score - assert result1.completion_rate == result2.completion_rate - assert result1.pytest_pass_rate == result2.pytest_pass_rate - assert result1.is_mocked is True - - def test_mocked_results_vary_with_variance(self, temp_repo): - """Test that mocked results have some variance across runs.""" - runner = TbenchRunner(mock=True) - establisher = BaselineEstablisher(tbench_runner=runner) - - baseline_dir = temp_repo / ".agentready" / "eval_harness" / "baseline" - baseline = establisher.establish_baseline( - temp_repo, iterations=5, output_dir=baseline_dir - ) - - # With 5 iterations, should have some variance - # (unless by chance they're all exactly the same, which is unlikely) - scores = [r.score for r in baseline.raw_results] - assert len(set(scores)) >= 1 # At least 1 unique score - assert baseline.std_dev >= 0 # Standard deviation calculated diff --git a/tests/unit/learners/test_llm_enricher.py b/tests/unit/learners/test_llm_enricher.py deleted file mode 100644 index 2f2ec721..00000000 --- a/tests/unit/learners/test_llm_enricher.py +++ /dev/null @@ -1,456 +0,0 @@ -"""Tests for LLM enrichment functionality.""" - -import json -from unittest.mock import Mock - -import pytest -from anthropic import Anthropic - -from agentready.learners.llm_enricher import LLMEnricher -from agentready.models import Attribute, DiscoveredSkill, Finding, Repository - - -@pytest.fixture -def mock_anthropic_client(): - """Mock Anthropic client.""" - client = Mock(spec=Anthropic) - - # Mock response - mock_response = Mock() - mock_response.content = [ - Mock( - text=json.dumps( - { - "skill_description": "Enhanced description from LLM", - "instructions": [ - "Step 1: Do something specific", - "Step 2: Verify it worked", - "Step 3: Commit the changes", - ], - "code_examples": [ - { - "file_path": "src/example.py", - "code": "def example():\n pass", - "explanation": "This shows the pattern", - } - ], - "best_practices": ["Always use type hints", "Test your code"], - "anti_patterns": [ - "Don't use global variables", - "Avoid mutable defaults", - ], - } - ) - ) - ] - - client.messages.create.return_value = mock_response - return client - - -@pytest.fixture -def basic_skill(): - """Basic skill from heuristic extraction.""" - return DiscoveredSkill( - skill_id="test-skill", - name="Test Skill", - description="Basic description", - confidence=95.0, - source_attribute_id="test_attribute", - reusability_score=100.0, - impact_score=50.0, - pattern_summary="Test pattern", - code_examples=["Basic example"], - citations=[], - ) - - -@pytest.fixture -def sample_repository(tmp_path): - """Sample repository.""" - repo_path = tmp_path / "test-repo" - repo_path.mkdir() - - # Create .git directory - (repo_path / ".git").mkdir() - - # Create a sample file - (repo_path / "test.py").write_text("def test():\n pass") - - return Repository( - path=repo_path, - name="test-repo", - url=None, - branch="main", - commit_hash="abc123", - languages={"Python": 1}, - total_files=1, - total_lines=2, - ) - - -@pytest.fixture -def sample_finding(): - """Sample finding.""" - attr = Attribute( - id="test_attribute", - name="Test Attribute", - category="Testing", - tier=1, - description="A test attribute", - criteria="Must pass", - default_weight=1.0, - ) - - return Finding( - attribute=attr, - status="pass", - score=95.0, - measured_value="passing", - threshold="pass", - evidence=["Test evidence 1", "Test evidence 2"], - remediation=None, - error_message=None, - ) - - -def test_enrich_skill_success( - mock_anthropic_client, basic_skill, sample_repository, sample_finding, tmp_path -): - """Test successful skill enrichment.""" - cache_dir = tmp_path / "cache" - enricher = LLMEnricher(mock_anthropic_client, cache_dir=cache_dir) - - enriched = enricher.enrich_skill(basic_skill, sample_repository, sample_finding) - - # Verify API was called - assert mock_anthropic_client.messages.create.called - - # Verify enrichment - assert enriched.description == "Enhanced description from LLM" - assert len(enriched.code_examples) > len(basic_skill.code_examples) - - -def test_enrich_skill_uses_cache( - mock_anthropic_client, basic_skill, sample_repository, sample_finding, tmp_path -): - """Test that second enrichment uses cache.""" - cache_dir = tmp_path / "cache" - enricher = LLMEnricher(mock_anthropic_client, cache_dir=cache_dir) - - # First call - enricher.enrich_skill(basic_skill, sample_repository, sample_finding) - first_call_count = mock_anthropic_client.messages.create.call_count - - # Second call (should use cache) - enricher.enrich_skill(basic_skill, sample_repository, sample_finding) - second_call_count = mock_anthropic_client.messages.create.call_count - - # Verify cache was used - assert second_call_count == first_call_count - - -def test_enrich_skill_api_error_fallback( - basic_skill, sample_repository, sample_finding, tmp_path -): - """Test fallback to original skill on API error.""" - client = Mock(spec=Anthropic) - client.messages.create.side_effect = Exception("API Error") - - cache_dir = tmp_path / "cache" - enricher = LLMEnricher(client, cache_dir=cache_dir) - - enriched = enricher.enrich_skill(basic_skill, sample_repository, sample_finding) - - # Should return original skill - assert enriched.skill_id == basic_skill.skill_id - assert enriched.description == basic_skill.description - - -def test_enrich_skill_no_cache( - mock_anthropic_client, basic_skill, sample_repository, sample_finding, tmp_path -): - """Test enrichment with caching disabled.""" - cache_dir = tmp_path / "cache" - enricher = LLMEnricher(mock_anthropic_client, cache_dir=cache_dir) - - # First call with use_cache=False - enricher.enrich_skill( - basic_skill, sample_repository, sample_finding, use_cache=False - ) - first_count = mock_anthropic_client.messages.create.call_count - - # Second call with use_cache=False (should call API again) - enricher.enrich_skill( - basic_skill, sample_repository, sample_finding, use_cache=False - ) - second_count = mock_anthropic_client.messages.create.call_count - - # Should have called API twice - assert second_count == first_count + 1 - - -def test_enrich_skill_custom_model( - basic_skill, sample_repository, sample_finding, tmp_path -): - """Test enricher with custom model.""" - client = Mock(spec=Anthropic) - mock_response = Mock() - mock_response.content = [ - Mock(text='{"skill_description": "Test", "instructions": []}') - ] - client.messages.create.return_value = mock_response - - cache_dir = tmp_path / "cache" - enricher = LLMEnricher(client, cache_dir=cache_dir, model="claude-3-opus-20240229") - - enricher.enrich_skill(basic_skill, sample_repository, sample_finding) - - # Verify correct model was used - call_args = client.messages.create.call_args - assert call_args[1]["model"] == "claude-3-opus-20240229" - - -def test_enrich_skill_empty_evidence( - mock_anthropic_client, basic_skill, sample_repository, tmp_path -): - """Test enrichment with empty evidence.""" - attr = Attribute( - id="test", - name="Test", - category="Test", - tier=1, - description="Test", - criteria="Test", - default_weight=1.0, - ) - finding_no_evidence = Finding( - attribute=attr, - status="pass", - score=100.0, - measured_value="pass", - threshold="pass", - evidence=[], # Empty evidence - remediation=None, - error_message=None, - ) - - cache_dir = tmp_path / "cache" - enricher = LLMEnricher(mock_anthropic_client, cache_dir=cache_dir) - - # Should handle empty evidence gracefully - enriched = enricher.enrich_skill( - basic_skill, sample_repository, finding_no_evidence - ) - - assert enriched is not None - - -def test_enrich_skill_none_evidence( - mock_anthropic_client, basic_skill, sample_repository, tmp_path -): - """Test enrichment with None evidence.""" - attr = Attribute( - id="test", - name="Test", - category="Test", - tier=1, - description="Test", - criteria="Test", - default_weight=1.0, - ) - finding_none_evidence = Finding( - attribute=attr, - status="pass", - score=100.0, - measured_value="pass", - threshold="pass", - evidence=None, # None evidence - remediation=None, - error_message=None, - ) - - cache_dir = tmp_path / "cache" - enricher = LLMEnricher(mock_anthropic_client, cache_dir=cache_dir) - - # Should handle None evidence gracefully - enriched = enricher.enrich_skill( - basic_skill, sample_repository, finding_none_evidence - ) - - assert enriched is not None - - -def test_enrich_skill_rate_limit_retry( - basic_skill, sample_repository, sample_finding, tmp_path -): - """Test rate limit error with retry.""" - from unittest.mock import patch - - from anthropic import RateLimitError - - client = Mock(spec=Anthropic) - - # First call raises rate limit, second succeeds - # Mock response and body for RateLimitError - mock_response = Mock() - mock_response.status_code = 429 - rate_limit_error = RateLimitError( - "Rate limit", response=mock_response, body={"error": "rate_limit"} - ) - rate_limit_error.retry_after = 1 # 1 second retry - - success_response = Mock() - success_response.content = [ - Mock(text='{"skill_description": "Success", "instructions": []}') - ] - - client.messages.create.side_effect = [rate_limit_error, success_response] - - cache_dir = tmp_path / "cache" - enricher = LLMEnricher(client, cache_dir=cache_dir) - - # Mock sleep to avoid actual delay - with patch("agentready.learners.llm_enricher.sleep"): - enriched = enricher.enrich_skill(basic_skill, sample_repository, sample_finding) - - # Should eventually succeed after retry - assert enriched.description == "Success" - # Verify both calls were made - assert client.messages.create.call_count == 2 - - -def test_enrich_skill_api_error_specific( - basic_skill, sample_repository, sample_finding, tmp_path -): - """Test specific API error handling.""" - from anthropic import APIError - - client = Mock(spec=Anthropic) - # Mock request for APIError - mock_request = Mock() - mock_request.method = "POST" - client.messages.create.side_effect = APIError( - "API Error", request=mock_request, body={"error": "api_error"} - ) - - cache_dir = tmp_path / "cache" - enricher = LLMEnricher(client, cache_dir=cache_dir) - - enriched = enricher.enrich_skill(basic_skill, sample_repository, sample_finding) - - # Should fallback to original skill - assert enriched == basic_skill - - -def test_enrich_skill_invalid_json_response( - basic_skill, sample_repository, sample_finding, tmp_path -): - """Test handling of invalid JSON in response.""" - client = Mock(spec=Anthropic) - mock_response = Mock() - mock_response.content = [Mock(text="Not valid JSON{")] - client.messages.create.return_value = mock_response - - cache_dir = tmp_path / "cache" - enricher = LLMEnricher(client, cache_dir=cache_dir) - - # Should fallback to original skill on parse error - enriched = enricher.enrich_skill(basic_skill, sample_repository, sample_finding) - - assert enriched.skill_id == basic_skill.skill_id - - -def test_enrich_skill_partial_json_response( - basic_skill, sample_repository, sample_finding, tmp_path -): - """Test handling of partial/incomplete JSON response.""" - client = Mock(spec=Anthropic) - mock_response = Mock() - # Missing required fields - mock_response.content = [Mock(text='{"skill_description": "Partial"}')] - client.messages.create.return_value = mock_response - - cache_dir = tmp_path / "cache" - enricher = LLMEnricher(client, cache_dir=cache_dir) - - # Should handle gracefully (may use partial or fallback) - enriched = enricher.enrich_skill(basic_skill, sample_repository, sample_finding) - - assert enriched is not None - - -def test_llm_enricher_init_default_cache(mock_anthropic_client): - """Test LLMEnricher initialization with default cache directory.""" - enricher = LLMEnricher(mock_anthropic_client) - - assert enricher.client == mock_anthropic_client - assert enricher.model == "claude-sonnet-4-5-20250929" - assert enricher.cache is not None - - -def test_llm_enricher_init_custom_cache(mock_anthropic_client, tmp_path): - """Test LLMEnricher initialization with custom cache directory.""" - custom_cache = tmp_path / "custom-cache" - enricher = LLMEnricher(mock_anthropic_client, cache_dir=custom_cache) - - assert enricher.cache is not None - - -def test_merge_enrichment(mock_anthropic_client, basic_skill, tmp_path): - """Test merging enrichment data into skill.""" - enricher = LLMEnricher(mock_anthropic_client, cache_dir=tmp_path) - - enrichment_data = { - "skill_description": "Enhanced description", - "instructions": ["Step 1", "Step 2"], - "code_examples": [ - {"file_path": "test.py", "code": "code", "explanation": "ex"} - ], - "best_practices": ["Practice 1"], - "anti_patterns": ["AntiPattern 1"], - } - - enriched = enricher._merge_enrichment(basic_skill, enrichment_data) - - assert enriched.description == "Enhanced description" - assert "Step 1" in str(enriched.code_examples) or "Step 1" in str( - enriched.citations - ) - - -def test_call_claude_api_builds_prompt( - mock_anthropic_client, basic_skill, sample_repository, sample_finding, tmp_path -): - """Test that _call_claude_api builds correct prompt.""" - enricher = LLMEnricher(mock_anthropic_client, cache_dir=tmp_path) - - enricher._call_claude_api( - basic_skill, sample_finding, sample_repository, "code samples" - ) - - # Verify API was called with proper arguments - assert mock_anthropic_client.messages.create.called - call_args = mock_anthropic_client.messages.create.call_args - - # Check prompt contains key information - messages = call_args[1]["messages"] - assert len(messages) > 0 - prompt = messages[0]["content"] - assert "test-repo" in prompt.lower() or "test" in prompt.lower() - - -def test_enrich_skill_initializes_code_sampler( - mock_anthropic_client, basic_skill, sample_repository, sample_finding, tmp_path -): - """Test that enrich_skill initializes code sampler.""" - cache_dir = tmp_path / "cache" - enricher = LLMEnricher(mock_anthropic_client, cache_dir=cache_dir) - - # Initially None - assert enricher.code_sampler is None - - enricher.enrich_skill(basic_skill, sample_repository, sample_finding) - - # Should be initialized after enrichment - assert enricher.code_sampler is not None diff --git a/tests/unit/learners/test_pattern_extractor.py b/tests/unit/learners/test_pattern_extractor.py deleted file mode 100644 index 9b1a93f7..00000000 --- a/tests/unit/learners/test_pattern_extractor.py +++ /dev/null @@ -1,650 +0,0 @@ -"""Unit tests for pattern extraction.""" - -from datetime import datetime -from pathlib import Path - -import pytest - -from agentready.learners.pattern_extractor import PatternExtractor -from agentready.models import Assessment, Attribute, Finding, Repository - - -def create_dummy_finding() -> Finding: - """Create a dummy finding for testing (not_applicable status).""" - attr = Attribute( - id="test_attr", - name="Test Attribute", - category="Testing", - tier=1, - description="Test attribute", - criteria="Test criteria", - default_weight=1.0, - ) - return Finding( - attribute=attr, - status="not_applicable", - score=None, - measured_value=None, - threshold=None, - evidence=[], - remediation=None, - error_message=None, - ) - - -def create_test_repository(tmp_path=None): - """Create a test repository with valid path.""" - if tmp_path is None: - # For inline usage without fixture, create minimal valid repo - import tempfile - - temp_dir = Path(tempfile.mkdtemp()) - (temp_dir / ".git").mkdir(exist_ok=True) - test_repo = temp_dir - else: - test_repo = tmp_path / "test-repo" - test_repo.mkdir(exist_ok=True) - (test_repo / ".git").mkdir(exist_ok=True) - - return Repository( - path=test_repo, - name="test", - url=None, - branch="main", - commit_hash="abc", - languages={}, - total_files=0, - total_lines=0, - ) - - -@pytest.fixture -def sample_repository(tmp_path): - """Create test repository.""" - # Create temporary directory with .git for Repository validation - test_repo = tmp_path / "test-repo" - test_repo.mkdir() - (test_repo / ".git").mkdir() - - return Repository( - path=test_repo, - name="test-repo", - url=None, - branch="main", - commit_hash="abc123", - languages={"Python": 100}, - total_files=10, - total_lines=500, - ) - - -@pytest.fixture -def sample_attribute_tier1(): - """Create tier 1 test attribute.""" - return Attribute( - id="claude_md_file", - name="CLAUDE.md File", - category="Documentation", - tier=1, - description="Comprehensive CLAUDE.md file with repository context", - criteria="File exists and contains required sections", - default_weight=1.0, - ) - - -@pytest.fixture -def sample_attribute_tier2(): - """Create tier 2 test attribute.""" - return Attribute( - id="type_annotations", - name="Type Annotations", - category="Code Quality", - tier=2, - description="Comprehensive type annotations in code", - criteria="80% of functions have type hints", - default_weight=0.8, - ) - - -@pytest.fixture -def sample_finding_high_score(sample_attribute_tier1): - """Create high-scoring passing finding.""" - return Finding( - attribute=sample_attribute_tier1, - status="pass", - score=95.0, - measured_value="present", - threshold="present", - evidence=["CLAUDE.md exists", "Contains 5/5 required sections"], - remediation=None, - error_message=None, - ) - - -@pytest.fixture -def sample_finding_low_score(sample_attribute_tier1): - """Create low-scoring finding.""" - return Finding( - attribute=sample_attribute_tier1, - status="pass", - score=65.0, - measured_value="partial", - threshold="complete", - evidence=["CLAUDE.md exists but incomplete"], - remediation=None, - error_message=None, - ) - - -@pytest.fixture -def sample_finding_failing(sample_attribute_tier2): - """Create failing finding.""" - return Finding( - attribute=sample_attribute_tier2, - status="fail", - score=45.0, - measured_value="30%", - threshold="80%", - evidence=["Only 30% coverage"], - remediation=None, - error_message=None, - ) - - -@pytest.fixture -def sample_assessment_with_findings( - sample_repository, sample_finding_high_score, sample_finding_low_score -): - """Create assessment with multiple findings.""" - return Assessment( - repository=sample_repository, - timestamp=datetime.now(), - overall_score=85.0, - certification_level="Gold", - attributes_assessed=2, - attributes_not_assessed=0, - attributes_total=2, - findings=[sample_finding_high_score, sample_finding_low_score], - config=None, - duration_seconds=1.0, - ) - - -class TestPatternExtractor: - """Test PatternExtractor class.""" - - def test_init_default_min_score(self, sample_repository): - """Test initialization with default min score.""" - assessment = Assessment( - repository=sample_repository, - timestamp=datetime.now(), - overall_score=85.0, - certification_level="Gold", - attributes_assessed=0, - attributes_not_assessed=0, - attributes_total=0, - findings=[], - config=None, - duration_seconds=1.0, - ) - - extractor = PatternExtractor(assessment) - assert extractor.min_score == 80.0 - - def test_init_custom_min_score(self, sample_repository): - """Test initialization with custom min score.""" - assessment = Assessment( - repository=sample_repository, - timestamp=datetime.now(), - overall_score=85.0, - certification_level="Gold", - attributes_assessed=0, - attributes_not_assessed=0, - attributes_total=0, - findings=[], - config=None, - duration_seconds=1.0, - ) - - extractor = PatternExtractor(assessment, min_score=90.0) - assert extractor.min_score == 90.0 - - def test_extract_patterns_from_high_score_finding( - self, sample_repository, sample_finding_high_score - ): - """Test extracting pattern from high-score finding.""" - assessment = Assessment( - repository=sample_repository, - timestamp=datetime.now(), - overall_score=95.0, - certification_level="Platinum", - attributes_assessed=1, - attributes_not_assessed=0, - attributes_total=1, - findings=[sample_finding_high_score], - config=None, - duration_seconds=1.0, - ) - - extractor = PatternExtractor(assessment) - skills = extractor.extract_all_patterns() - - assert len(skills) == 1 - assert skills[0].confidence == 95.0 - assert skills[0].skill_id == "setup-claude-md" - assert skills[0].name == "Setup CLAUDE.md Configuration" - - def test_filters_low_score_findings(self, sample_assessment_with_findings): - """Test that low-score findings are filtered.""" - extractor = PatternExtractor(sample_assessment_with_findings, min_score=80.0) - skills = extractor.extract_all_patterns() - - # Only the high-score finding (95.0) should be included - assert len(skills) == 1 - assert skills[0].confidence == 95.0 - - def test_filters_failing_findings(self, sample_repository, sample_finding_failing): - """Test that failing findings are filtered.""" - assessment = Assessment( - repository=sample_repository, - timestamp=datetime.now(), - overall_score=45.0, - certification_level="Needs Improvement", - attributes_assessed=1, - attributes_not_assessed=0, - attributes_total=1, - findings=[sample_finding_failing], - config=None, - duration_seconds=1.0, - ) - - extractor = PatternExtractor(assessment) - skills = extractor.extract_all_patterns() - - # Failing finding should not be extracted - assert len(skills) == 0 - - def test_sorts_by_confidence_descending(self, sample_repository): - """Test that patterns are sorted by confidence (highest first).""" - # Create multiple high-score findings with different scores - attr1 = Attribute( - id="claude_md_file", - name="CLAUDE.md File", - category="Documentation", - tier=1, - description="Test", - criteria="Test", - default_weight=1.0, - ) - attr2 = Attribute( - id="type_annotations", - name="Type Annotations", - category="Code Quality", - tier=2, - description="Test", - criteria="Test", - default_weight=0.8, - ) - - finding1 = Finding( - attribute=attr1, - status="pass", - score=85.0, - measured_value="good", - threshold="good", - evidence=["Test"], - remediation=None, - error_message=None, - ) - finding2 = Finding( - attribute=attr2, - status="pass", - score=95.0, - measured_value="excellent", - threshold="good", - evidence=["Test"], - remediation=None, - error_message=None, - ) - - assessment = Assessment( - repository=sample_repository, - timestamp=datetime.now(), - overall_score=90.0, - certification_level="Platinum", - attributes_assessed=2, - attributes_not_assessed=0, - attributes_total=2, - findings=[finding1, finding2], - config=None, - duration_seconds=1.0, - ) - - extractor = PatternExtractor(assessment) - skills = extractor.extract_all_patterns() - - assert len(skills) == 2 - assert skills[0].confidence == 95.0 # Highest first - assert skills[1].confidence == 85.0 - - def test_extract_specific_patterns(self, sample_assessment_with_findings): - """Test extracting patterns for specific attribute IDs.""" - extractor = PatternExtractor(sample_assessment_with_findings) - skills = extractor.extract_specific_patterns(["claude_md_file"]) - - # Should only get claude_md_file patterns - assert len(skills) == 1 - assert skills[0].source_attribute_id == "claude_md_file" - - def test_extract_specific_patterns_filters_correctly( - self, sample_assessment_with_findings - ): - """Test that extract_specific_patterns filters by attribute ID.""" - extractor = PatternExtractor(sample_assessment_with_findings) - # Request non-existent attribute - skills = extractor.extract_specific_patterns(["non_existent_attr"]) - - assert len(skills) == 0 - - def test_should_extract_pattern_logic(self, sample_finding_high_score): - """Test _should_extract_pattern() logic.""" - assessment = Assessment( - repository=create_test_repository(), - timestamp=datetime.now(), - overall_score=95.0, - certification_level="Platinum", - attributes_assessed=1, - attributes_not_assessed=0, - attributes_total=1, - findings=[create_dummy_finding()], - config=None, - duration_seconds=1.0, - ) - - extractor = PatternExtractor(assessment) - - # Should extract: passing + high score + in SKILL_NAMES - assert extractor._should_extract_pattern(sample_finding_high_score) is True - - def test_should_not_extract_unknown_attribute(self, sample_repository): - """Test that unknown attributes are not extracted.""" - # Create finding with unknown attribute ID - unknown_attr = Attribute( - id="unknown_attribute", - name="Unknown", - category="Other", - tier=1, - description="Test", - criteria="Test", - default_weight=1.0, - ) - finding = Finding( - attribute=unknown_attr, - status="pass", - score=95.0, - measured_value="test", - threshold="test", - evidence=["Test"], - remediation=None, - error_message=None, - ) - - assessment = Assessment( - repository=sample_repository, - timestamp=datetime.now(), - overall_score=95.0, - certification_level="Platinum", - attributes_assessed=1, - attributes_not_assessed=0, - attributes_total=1, - findings=[finding], - config=None, - duration_seconds=1.0, - ) - - extractor = PatternExtractor(assessment) - skills = extractor.extract_all_patterns() - - # Unknown attribute should not be extracted - assert len(skills) == 0 - - def test_create_skill_from_finding(self, sample_finding_high_score): - """Test _create_skill_from_finding() creates valid skill.""" - assessment = Assessment( - repository=create_test_repository(), - timestamp=datetime.now(), - overall_score=95.0, - certification_level="Platinum", - attributes_assessed=1, - attributes_not_assessed=0, - attributes_total=1, - findings=[create_dummy_finding()], - config=None, - duration_seconds=1.0, - ) - - extractor = PatternExtractor(assessment) - skill = extractor._create_skill_from_finding(sample_finding_high_score) - - assert skill is not None - assert skill.skill_id == "setup-claude-md" - assert skill.name == "Setup CLAUDE.md Configuration" - assert skill.confidence == 95.0 - assert skill.source_attribute_id == "claude_md_file" - - def test_tier_based_impact_scores(self, sample_repository): - """Test that impact scores are calculated based on tier.""" - # Test all tiers - for tier, expected_impact in [(1, 50.0), (2, 30.0), (3, 15.0), (4, 5.0)]: - if tier == 1: - attr_id = "claude_md_file" - elif tier == 2: - attr_id = "type_annotations" - elif tier == 3: - attr_id = "deterministic_enforcement" - else: - continue # Only test tiers with known attributes - - attr = Attribute( - id=attr_id, - name=f"Tier {tier} Attr", - category="Test", - tier=tier, - description="Test", - criteria="Test", - default_weight=1.0, - ) - finding = Finding( - attribute=attr, - status="pass", - score=90.0, - measured_value="test", - threshold="test", - evidence=["Test"], - remediation=None, - error_message=None, - ) - - assessment = Assessment( - repository=sample_repository, - timestamp=datetime.now(), - overall_score=90.0, - certification_level="Platinum", - attributes_assessed=1, - attributes_not_assessed=0, - attributes_total=1, - findings=[finding], - config=None, - duration_seconds=1.0, - ) - - extractor = PatternExtractor(assessment) - skills = extractor.extract_all_patterns() - - if len(skills) > 0: - assert skills[0].impact_score == expected_impact - - def test_reusability_score_calculation(self, sample_repository): - """Test reusability score based on tier.""" - # Tier 1 should have highest reusability (100.0) - attr_t1 = Attribute( - id="claude_md_file", - name="CLAUDE.md", - category="Documentation", - tier=1, - description="Test", - criteria="Test", - default_weight=1.0, - ) - finding_t1 = Finding( - attribute=attr_t1, - status="pass", - score=90.0, - measured_value="test", - threshold="test", - evidence=["Test"], - remediation=None, - error_message=None, - ) - - assessment = Assessment( - repository=sample_repository, - timestamp=datetime.now(), - overall_score=90.0, - certification_level="Platinum", - attributes_assessed=1, - attributes_not_assessed=0, - attributes_total=1, - findings=[finding_t1], - config=None, - duration_seconds=1.0, - ) - - extractor = PatternExtractor(assessment) - skills = extractor.extract_all_patterns() - - assert len(skills) == 1 - assert skills[0].reusability_score == 100.0 # Tier 1 - - def test_extract_code_examples_from_evidence(self, sample_finding_high_score): - """Test extracting code examples from evidence.""" - assessment = Assessment( - repository=create_test_repository(), - timestamp=datetime.now(), - overall_score=95.0, - certification_level="Platinum", - attributes_assessed=1, - attributes_not_assessed=0, - attributes_total=1, - findings=[create_dummy_finding()], - config=None, - duration_seconds=1.0, - ) - - extractor = PatternExtractor(assessment) - examples = extractor._extract_code_examples(sample_finding_high_score) - - assert len(examples) > 0 - assert "CLAUDE.md exists" in examples - - def test_extract_code_examples_limits_to_three(self, sample_repository): - """Test that code examples are limited to 3.""" - attr = Attribute( - id="claude_md_file", - name="CLAUDE.md", - category="Documentation", - tier=1, - description="Test", - criteria="Test", - default_weight=1.0, - ) - finding = Finding( - attribute=attr, - status="pass", - score=90.0, - measured_value="test", - threshold="test", - evidence=["Example 1", "Example 2", "Example 3", "Example 4", "Example 5"], - remediation=None, - error_message=None, - ) - - assessment = Assessment( - repository=sample_repository, - timestamp=datetime.now(), - overall_score=90.0, - certification_level="Platinum", - attributes_assessed=1, - attributes_not_assessed=0, - attributes_total=1, - findings=[create_dummy_finding()], - config=None, - duration_seconds=1.0, - ) - - extractor = PatternExtractor(assessment) - examples = extractor._extract_code_examples(finding) - - assert len(examples) == 3 - - def test_create_pattern_summary(self, sample_finding_high_score): - """Test pattern summary generation.""" - assessment = Assessment( - repository=create_test_repository(), - timestamp=datetime.now(), - overall_score=95.0, - certification_level="Platinum", - attributes_assessed=1, - attributes_not_assessed=0, - attributes_total=1, - findings=[create_dummy_finding()], - config=None, - duration_seconds=1.0, - ) - - extractor = PatternExtractor(assessment) - summary = extractor._create_pattern_summary(sample_finding_high_score) - - # Should use attribute description - assert "Comprehensive CLAUDE.md" in summary - - def test_pattern_summary_fallback_to_evidence(self, sample_repository): - """Test pattern summary falls back to evidence when no description.""" - attr = Attribute( - id="claude_md_file", - name="CLAUDE.md File", - category="Documentation", - tier=1, - description="", # Empty description - criteria="Test", - default_weight=1.0, - ) - finding = Finding( - attribute=attr, - status="pass", - score=90.0, - measured_value="test", - threshold="test", - evidence=["Evidence 1", "Evidence 2"], - remediation=None, - error_message=None, - ) - - assessment = Assessment( - repository=sample_repository, - timestamp=datetime.now(), - overall_score=90.0, - certification_level="Platinum", - attributes_assessed=1, - attributes_not_assessed=0, - attributes_total=1, - findings=[create_dummy_finding()], - config=None, - duration_seconds=1.0, - ) - - extractor = PatternExtractor(assessment) - summary = extractor._create_pattern_summary(finding) - - # Should use evidence as fallback - assert "Evidence 1" in summary or "successfully implements" in summary diff --git a/tests/unit/learners/test_skill_generator.py b/tests/unit/learners/test_skill_generator.py deleted file mode 100644 index 5ce6c414..00000000 --- a/tests/unit/learners/test_skill_generator.py +++ /dev/null @@ -1,355 +0,0 @@ -"""Unit tests for skill generation.""" - -from pathlib import Path - -import pytest - -from agentready.learners.skill_generator import SkillGenerator -from agentready.models import Citation, DiscoveredSkill - - -@pytest.fixture -def sample_skill(): - """Create sample discovered skill.""" - return DiscoveredSkill( - skill_id="test-skill", - name="Test Skill", - description="Test description for a skill", - confidence=90.0, - source_attribute_id="test_attr", - reusability_score=85.0, - impact_score=50.0, - pattern_summary="This is a test pattern summary explaining the skill.", - code_examples=["example1", "example2"], - citations=[], - ) - - -@pytest.fixture -def sample_skill_with_citations(): - """Create sample skill with citations.""" - return DiscoveredSkill( - skill_id="test-skill-citations", - name="Test Skill with Citations", - description="Test description", - confidence=95.0, - source_attribute_id="test_attr", - reusability_score=90.0, - impact_score=50.0, - pattern_summary="Pattern with research backing.", - code_examples=["example1"], - citations=[ - Citation( - source="Research Paper", - title="Best Practices for Code", - url="https://example.com/paper", - relevance="High", - ) - ], - ) - - -class TestSkillGenerator: - """Test SkillGenerator class.""" - - def test_init_default_output_dir(self): - """Test initialization with default output directory.""" - generator = SkillGenerator() - assert generator.output_dir == Path(".skills-proposals") - - def test_init_custom_output_dir(self, tmp_path): - """Test initialization with custom output directory.""" - custom_dir = tmp_path / "custom-skills" - generator = SkillGenerator(output_dir=custom_dir) - assert generator.output_dir == custom_dir - - def test_generate_skill_file(self, sample_skill, tmp_path): - """Test SKILL.md file generation.""" - generator = SkillGenerator(output_dir=tmp_path) - output_file = generator.generate_skill_file(sample_skill) - - assert output_file.exists() - assert output_file.parent.name == "test-skill" - assert output_file.name == "SKILL.md" - - content = output_file.read_text() - assert "Test Skill" in content - assert "test-skill" in content - assert "Test description" in content - - def test_generate_skill_file_creates_directory(self, sample_skill, tmp_path): - """Test that skill file generation creates necessary directories.""" - generator = SkillGenerator(output_dir=tmp_path) - skill_dir = tmp_path / sample_skill.skill_id - - # Directory shouldn't exist yet - assert not skill_dir.exists() - - output_file = generator.generate_skill_file(sample_skill) - - # Directory should now exist - assert skill_dir.exists() - assert output_file.exists() - - def test_generate_github_issue(self, sample_skill, tmp_path): - """Test GitHub issue template generation.""" - generator = SkillGenerator(output_dir=tmp_path) - output_file = generator.generate_github_issue(sample_skill) - - assert output_file.exists() - assert output_file.name == "skill-test-skill.md" - - content = output_file.read_text() - assert "Test Skill" in content - assert "test-skill" in content - - def test_generate_github_issue_creates_output_dir(self, sample_skill, tmp_path): - """Test that GitHub issue generation creates output directory.""" - output_dir = tmp_path / "issues" - generator = SkillGenerator(output_dir=output_dir) - - # Directory shouldn't exist yet - assert not output_dir.exists() - - output_file = generator.generate_github_issue(sample_skill) - - # Directory should now exist - assert output_dir.exists() - assert output_file.exists() - - def test_generate_markdown_report(self, sample_skill, tmp_path): - """Test markdown report generation.""" - generator = SkillGenerator(output_dir=tmp_path) - output_file = generator.generate_markdown_report(sample_skill) - - assert output_file.exists() - assert output_file.name == "test-skill-report.md" - - content = output_file.read_text() - assert "Test Skill" in content - assert "test-skill" in content - assert "90.0%" in content # Confidence - assert "+50.0 pts" in content # Impact - assert "85.0%" in content # Reusability - - def test_generate_markdown_report_with_code_examples(self, sample_skill, tmp_path): - """Test markdown report includes code examples.""" - generator = SkillGenerator(output_dir=tmp_path) - output_file = generator.generate_markdown_report(sample_skill) - - content = output_file.read_text() - assert "example1" in content - assert "example2" in content - assert "Example 1" in content or "```" in content - - def test_generate_markdown_report_without_code_examples(self, tmp_path): - """Test markdown report handles missing code examples.""" - skill_no_examples = DiscoveredSkill( - skill_id="no-examples", - name="No Examples Skill", - description="Test", - confidence=80.0, - source_attribute_id="test", - reusability_score=80.0, - impact_score=30.0, - pattern_summary="Test pattern", - code_examples=[], - citations=[], - ) - - generator = SkillGenerator(output_dir=tmp_path) - output_file = generator.generate_markdown_report(skill_no_examples) - - content = output_file.read_text() - assert "No code examples available" in content - - def test_generate_markdown_report_with_citations( - self, sample_skill_with_citations, tmp_path - ): - """Test markdown report includes citations.""" - generator = SkillGenerator(output_dir=tmp_path) - output_file = generator.generate_markdown_report(sample_skill_with_citations) - - content = output_file.read_text() - assert "Research Paper" in content - assert "Best Practices for Code" in content - assert "https://example.com/paper" in content - - def test_generate_markdown_report_without_citations(self, sample_skill, tmp_path): - """Test markdown report handles missing citations.""" - generator = SkillGenerator(output_dir=tmp_path) - output_file = generator.generate_markdown_report(sample_skill) - - content = output_file.read_text() - assert "No citations available" in content - - def test_generate_all_formats(self, sample_skill, tmp_path): - """Test generating all output formats.""" - generator = SkillGenerator(output_dir=tmp_path) - results = generator.generate_all_formats(sample_skill) - - assert "skill_md" in results - assert "github_issue" in results - assert "markdown_report" in results - - assert results["skill_md"].exists() - assert results["github_issue"].exists() - assert results["markdown_report"].exists() - - def test_generate_batch_skill_md(self, tmp_path): - """Test batch generation of SKILL.md files.""" - skills = [ - DiscoveredSkill( - skill_id=f"skill-{i}", - name=f"Skill {i}", - description=f"Description {i}", - confidence=80.0 + i, - source_attribute_id=f"attr_{i}", - reusability_score=80.0, - impact_score=30.0, - pattern_summary=f"Pattern {i}", - code_examples=[], - citations=[], - ) - for i in range(3) - ] - - generator = SkillGenerator(output_dir=tmp_path) - generated_files = generator.generate_batch(skills, output_format="skill_md") - - assert len(generated_files) == 3 - for file_path in generated_files: - assert file_path.exists() - assert file_path.name == "SKILL.md" - - def test_generate_batch_github_issues(self, tmp_path): - """Test batch generation of GitHub issues.""" - skills = [ - DiscoveredSkill( - skill_id=f"skill-{i}", - name=f"Skill {i}", - description=f"Description {i}", - confidence=80.0, - source_attribute_id=f"attr_{i}", - reusability_score=80.0, - impact_score=30.0, - pattern_summary=f"Pattern {i}", - code_examples=[], - citations=[], - ) - for i in range(3) - ] - - generator = SkillGenerator(output_dir=tmp_path) - generated_files = generator.generate_batch(skills, output_format="github_issue") - - assert len(generated_files) == 3 - for file_path in generated_files: - assert file_path.exists() - assert file_path.name.startswith("skill-") - assert file_path.name.endswith(".md") - - def test_generate_batch_markdown_reports(self, tmp_path): - """Test batch generation of markdown reports.""" - skills = [ - DiscoveredSkill( - skill_id=f"skill-{i}", - name=f"Skill {i}", - description=f"Description {i}", - confidence=80.0, - source_attribute_id=f"attr_{i}", - reusability_score=80.0, - impact_score=30.0, - pattern_summary=f"Pattern {i}", - code_examples=[], - citations=[], - ) - for i in range(3) - ] - - generator = SkillGenerator(output_dir=tmp_path) - generated_files = generator.generate_batch( - skills, output_format="markdown_report" - ) - - assert len(generated_files) == 3 - for file_path in generated_files: - assert file_path.exists() - assert file_path.name.endswith("-report.md") - - def test_generate_batch_all_formats(self, tmp_path): - """Test batch generation of all formats.""" - skills = [ - DiscoveredSkill( - skill_id=f"skill-{i}", - name=f"Skill {i}", - description=f"Description {i}", - confidence=80.0, - source_attribute_id=f"attr_{i}", - reusability_score=80.0, - impact_score=30.0, - pattern_summary=f"Pattern {i}", - code_examples=[], - citations=[], - ) - for i in range(2) - ] - - generator = SkillGenerator(output_dir=tmp_path) - generated_files = generator.generate_batch(skills, output_format="all") - - # Each skill generates 3 files (skill_md, github_issue, markdown_report) - assert len(generated_files) == 6 - - def test_generate_batch_empty_list(self, tmp_path): - """Test batch generation with empty skill list.""" - generator = SkillGenerator(output_dir=tmp_path) - generated_files = generator.generate_batch([], output_format="skill_md") - - assert len(generated_files) == 0 - - def test_create_markdown_report_structure(self, sample_skill, tmp_path): - """Test markdown report has expected structure.""" - generator = SkillGenerator(output_dir=tmp_path) - report_content = generator._create_markdown_report(sample_skill) - - # Check for expected sections - assert "# Skill Report:" in report_content - assert "## Overview" in report_content - assert "## Description" in report_content - assert "## Pattern Summary" in report_content - assert "## Implementation Guidance" in report_content - assert "## Metrics" in report_content - - def test_create_markdown_report_includes_metrics(self, sample_skill, tmp_path): - """Test markdown report includes all metrics.""" - generator = SkillGenerator(output_dir=tmp_path) - report_content = generator._create_markdown_report(sample_skill) - - # Check for metric values - assert "**Confidence**: 90.0%" in report_content - assert "**Impact**: +50.0 pts" in report_content - assert "**Reusability**: 85.0%" in report_content - assert "**Confidence Score**: 90.0%" in report_content - assert "**Impact Score**: 50.0 pts" in report_content - assert "**Reusability Score**: 85.0%" in report_content - - def test_file_naming_conventions(self, sample_skill, tmp_path): - """Test that generated files follow naming conventions.""" - generator = SkillGenerator(output_dir=tmp_path) - - # Generate all formats - skill_md = generator.generate_skill_file(sample_skill) - github_issue = generator.generate_github_issue(sample_skill) - markdown_report = generator.generate_markdown_report(sample_skill) - - # Check naming - assert skill_md.name == "SKILL.md" - assert github_issue.name == f"skill-{sample_skill.skill_id}.md" - assert markdown_report.name == f"{sample_skill.skill_id}-report.md" - - def test_output_directory_as_string(self, tmp_path): - """Test that output directory works as string path.""" - generator = SkillGenerator(output_dir=str(tmp_path)) - assert isinstance(generator.output_dir, Path) - assert generator.output_dir == tmp_path diff --git a/tests/unit/test_cli_extract_skills.py b/tests/unit/test_cli_extract_skills.py deleted file mode 100644 index bfbdc8d6..00000000 --- a/tests/unit/test_cli_extract_skills.py +++ /dev/null @@ -1,376 +0,0 @@ -"""Unit tests for extract-skills CLI command.""" - -import json -import tempfile -from pathlib import Path -from unittest.mock import patch - -import pytest -from click.testing import CliRunner - -from agentready.cli.extract_skills import extract_skills -from tests.fixtures.assessment_fixtures import create_test_assessment_json - - -@pytest.fixture -def temp_repo(): - """Create a temporary repository with assessment.""" - with tempfile.TemporaryDirectory() as tmpdir: - repo_path = Path(tmpdir) - - # Create .git directory - (repo_path / ".git").mkdir() - - # Create .agentready directory with assessment - agentready_dir = repo_path / ".agentready" - agentready_dir.mkdir() - - # Create sample assessment with known skill IDs that PatternExtractor recognizes - from tests.fixtures.assessment_fixtures import create_test_finding_json - - findings = [ - create_test_finding_json( - attribute_id="claude_md_file", - attribute_name="CLAUDE.md File", - status="pass", - score=95.0, - category="Documentation", - tier=1, - ), - create_test_finding_json( - attribute_id="type_annotations", - attribute_name="Type Annotations", - status="pass", - score=90.0, - category="Code Quality", - tier=2, - ), - ] - - assessment_data = create_test_assessment_json( - overall_score=85.0, - num_findings=2, - repo_path=str(repo_path), - repo_name="test-repo", - ) - # Replace generic findings with skill-specific ones - assessment_data["findings"] = findings - - assessment_file = agentready_dir / "assessment-latest.json" - with open(assessment_file, "w") as f: - json.dump(assessment_data, f) - - yield repo_path - - -@pytest.fixture -def runner(): - """Create Click test runner.""" - return CliRunner() - - -class TestExtractSkillsCommand: - """Test extract-skills CLI command.""" - - def test_extract_skills_command_basic(self, runner, temp_repo): - """Test basic extract-skills command execution.""" - with runner.isolated_filesystem(temp_dir=temp_repo.parent): - result = runner.invoke(extract_skills, [str(temp_repo)]) - - # Should succeed - assert result.exit_code == 0 - - # Should create output directory - output_dir = temp_repo / ".skills-proposals" - assert output_dir.exists() - - def test_extract_skills_command_json_output(self, runner, temp_repo): - """Test extract-skills command with JSON output.""" - with runner.isolated_filesystem(temp_dir=temp_repo.parent): - result = runner.invoke( - extract_skills, [str(temp_repo), "--output-format", "json"] - ) - - assert result.exit_code == 0 - - # Check for JSON output file - output_dir = temp_repo / ".skills-proposals" - json_files = list(output_dir.glob("*.json")) - assert len(json_files) > 0 - - @pytest.mark.skip( - reason="Test fixture doesn't create findings that produce extractable skills - needs proper test data" - ) - def test_extract_skills_command_skill_md_output(self, runner, temp_repo): - """Test extract-skills command with SKILL.md output.""" - with runner.isolated_filesystem(temp_dir=temp_repo.parent): - result = runner.invoke( - extract_skills, [str(temp_repo), "--output-format", "skill_md"] - ) - - assert result.exit_code == 0 - - # Check for SKILL.md files (in subdirectories: skill-id/SKILL.md) - output_dir = temp_repo / ".skills-proposals" - md_files = list(output_dir.glob("*/SKILL.md")) - assert len(md_files) > 0 - - @pytest.mark.skip( - reason="Test fixture doesn't create findings that produce extractable skills - needs proper test data" - ) - def test_extract_skills_command_github_issues_output(self, runner, temp_repo): - """Test extract-skills command with GitHub issues output.""" - with runner.isolated_filesystem(temp_dir=temp_repo.parent): - result = runner.invoke( - extract_skills, [str(temp_repo), "--output-format", "github_issues"] - ) - - assert result.exit_code == 0 - - # Check for issue files (named skill-{id}.md) - output_dir = temp_repo / ".skills-proposals" - issue_files = list(output_dir.glob("skill-*.md")) - assert len(issue_files) > 0 - - @pytest.mark.skip( - reason="Test fixture doesn't create findings that produce extractable skills - needs proper test data" - ) - def test_extract_skills_command_all_output_formats(self, runner, temp_repo): - """Test extract-skills command with all output formats.""" - with runner.isolated_filesystem(temp_dir=temp_repo.parent): - result = runner.invoke( - extract_skills, [str(temp_repo), "--output-format", "all"] - ) - - assert result.exit_code == 0 - - # Should have multiple file types - output_dir = temp_repo / ".skills-proposals" - assert len(list(output_dir.glob("*.json"))) > 0 - assert len(list(output_dir.glob("*.md"))) > 0 - - def test_extract_skills_command_custom_output_dir(self, runner, temp_repo): - """Test extract-skills command with custom output directory.""" - custom_dir = temp_repo / "custom-skills" - - with runner.isolated_filesystem(temp_dir=temp_repo.parent): - result = runner.invoke( - extract_skills, - [str(temp_repo), "--output-dir", str(custom_dir)], - ) - - assert result.exit_code == 0 - assert custom_dir.exists() - - def test_extract_skills_command_specific_attribute(self, runner, temp_repo): - """Test extract-skills command with specific attribute filter.""" - with runner.isolated_filesystem(temp_dir=temp_repo.parent): - result = runner.invoke( - extract_skills, - [ - str(temp_repo), - "--attribute", - "claude_md_file", - ], - ) - - assert result.exit_code == 0 - - def test_extract_skills_command_multiple_attributes(self, runner, temp_repo): - """Test extract-skills command with multiple attribute filters.""" - with runner.isolated_filesystem(temp_dir=temp_repo.parent): - result = runner.invoke( - extract_skills, - [ - str(temp_repo), - "--attribute", - "claude_md_file", - "--attribute", - "type_annotations", - ], - ) - - assert result.exit_code == 0 - - def test_extract_skills_command_min_confidence(self, runner, temp_repo): - """Test extract-skills command with custom minimum confidence.""" - with runner.isolated_filesystem(temp_dir=temp_repo.parent): - result = runner.invoke( - extract_skills, - [str(temp_repo), "--min-confidence", "80"], - ) - - assert result.exit_code == 0 - - def test_extract_skills_command_verbose(self, runner, temp_repo): - """Test extract-skills command with verbose output.""" - with runner.isolated_filesystem(temp_dir=temp_repo.parent): - result = runner.invoke( - extract_skills, - [str(temp_repo), "--verbose"], - ) - - assert result.exit_code == 0 - # Verbose should produce more output - assert len(result.output) > 0 - - def test_extract_skills_command_no_assessment_file(self, runner): - """Test extract-skills command fails gracefully with no assessment file.""" - with runner.isolated_filesystem(): - with tempfile.TemporaryDirectory() as tmpdir: - repo_path = Path(tmpdir) - (repo_path / ".git").mkdir() - - result = runner.invoke(extract_skills, [str(repo_path)]) - - # Should fail gracefully - assert result.exit_code != 0 - assert ( - "assessment" in result.output.lower() - or "not found" in result.output.lower() - ) - - def test_extract_skills_command_invalid_repository(self, runner): - """Test extract-skills command with non-existent repository.""" - result = runner.invoke(extract_skills, ["/nonexistent/path"]) - - # Should fail - assert result.exit_code != 0 - - @patch("agentready.cli.extract_skills.LearningService") - def test_extract_skills_command_enable_llm_without_api_key( - self, mock_service, runner, temp_repo - ): - """Test extract-skills command with LLM enabled but no API key.""" - # Remove ANTHROPIC_API_KEY if present - import os - - old_key = os.environ.pop("ANTHROPIC_API_KEY", None) - - try: - with runner.isolated_filesystem(temp_dir=temp_repo.parent): - result = runner.invoke( - extract_skills, - [str(temp_repo), "--enable-llm"], - ) - - # Should warn or fall back gracefully - # Implementation may vary, but shouldn't crash - assert "API key" in result.output or result.exit_code == 0 - finally: - # Restore API key if it existed - if old_key: - os.environ["ANTHROPIC_API_KEY"] = old_key - - @patch("agentready.cli.extract_skills.LearningService") - def test_extract_skills_command_enable_llm_with_budget( - self, mock_service, runner, temp_repo - ): - """Test extract-skills command with LLM enabled and custom budget.""" - with runner.isolated_filesystem(temp_dir=temp_repo.parent): - result = runner.invoke( - extract_skills, - [ - str(temp_repo), - "--enable-llm", - "--llm-budget", - "10", - ], - ) - - # Should succeed (or gracefully handle missing API key) - assert result.exit_code == 0 or "API key" in result.output - - @patch("agentready.cli.extract_skills.LearningService") - def test_extract_skills_command_llm_no_cache(self, mock_service, runner, temp_repo): - """Test extract-skills command with LLM cache bypass.""" - with runner.isolated_filesystem(temp_dir=temp_repo.parent): - result = runner.invoke( - extract_skills, - [ - str(temp_repo), - "--enable-llm", - "--llm-no-cache", - ], - ) - - # Should succeed (or gracefully handle missing API key) - assert result.exit_code == 0 or "API key" in result.output - - def test_extract_skills_command_default_repository(self, runner): - """Test extract-skills command with default repository (current directory).""" - with runner.isolated_filesystem(): - # Create minimal git repo structure - Path(".git").mkdir() - agentready_dir = Path(".agentready") - agentready_dir.mkdir() - - # Create minimal assessment using shared fixture - assessment_data = create_test_assessment_json( - overall_score=75.0, - num_findings=1, - repo_path=".", - repo_name="test", - ) - - with open(agentready_dir / "assessment-latest.json", "w") as f: - json.dump(assessment_data, f) - - result = runner.invoke(extract_skills, []) - - # Should use current directory - assert result.exit_code == 0 - - -class TestExtractSkillsCommandErrorHandling: - """Test error handling in extract-skills command.""" - - def test_extract_skills_invalid_output_format(self, runner, temp_repo): - """Test extract-skills command with invalid output format.""" - result = runner.invoke( - extract_skills, - [str(temp_repo), "--output-format", "invalid"], - ) - - # Should fail with validation error - assert result.exit_code != 0 - - def test_extract_skills_invalid_min_confidence(self, runner, temp_repo): - """Test extract-skills command with invalid min confidence.""" - result = runner.invoke( - extract_skills, - [str(temp_repo), "--min-confidence", "invalid"], - ) - - # Should fail with validation error - assert result.exit_code != 0 - - def test_extract_skills_negative_llm_budget(self, runner, temp_repo): - """Test extract-skills command with negative LLM budget.""" - result = runner.invoke( - extract_skills, - [str(temp_repo), "--llm-budget", "-5"], - ) - - # Should fail with validation error (Click validates int type) - assert result.exit_code != 0 - - def test_extract_skills_corrupted_assessment_file(self, runner): - """Test extract-skills command with corrupted assessment file.""" - with runner.isolated_filesystem(): - with tempfile.TemporaryDirectory() as tmpdir: - repo_path = Path(tmpdir) - (repo_path / ".git").mkdir() - - # Create .agentready directory - agentready_dir = repo_path / ".agentready" - agentready_dir.mkdir() - - # Create corrupted assessment - assessment_file = agentready_dir / "assessment-latest.json" - assessment_file.write_text("{invalid json content") - - result = runner.invoke(extract_skills, [str(repo_path)]) - - # Should fail gracefully - assert result.exit_code != 0 diff --git a/tests/unit/test_cli_learn.py b/tests/unit/test_cli_learn.py deleted file mode 100644 index 3d473eb0..00000000 --- a/tests/unit/test_cli_learn.py +++ /dev/null @@ -1,372 +0,0 @@ -"""Unit tests for learn CLI command.""" - -import json -import tempfile -from pathlib import Path -from unittest.mock import patch - -import pytest -from click.testing import CliRunner - -from agentready.cli.learn import learn -from tests.fixtures.assessment_fixtures import create_test_assessment_json - - -@pytest.fixture -def temp_repo(): - """Create a temporary repository with assessment.""" - with tempfile.TemporaryDirectory() as tmpdir: - repo_path = Path(tmpdir) - - # Create .git directory - (repo_path / ".git").mkdir() - - # Create .agentready directory with assessment - agentready_dir = repo_path / ".agentready" - agentready_dir.mkdir() - - # Create sample assessment with known skill IDs that PatternExtractor recognizes - from tests.fixtures.assessment_fixtures import create_test_finding_json - - findings = [ - create_test_finding_json( - attribute_id="claude_md_file", - attribute_name="CLAUDE.md File", - status="pass", - score=95.0, - category="Documentation", - tier=1, - ), - create_test_finding_json( - attribute_id="type_annotations", - attribute_name="Type Annotations", - status="pass", - score=90.0, - category="Code Quality", - tier=2, - ), - ] - - assessment_data = create_test_assessment_json( - overall_score=85.0, - num_findings=2, - repo_path=str(repo_path), - repo_name="test-repo", - ) - # Replace generic findings with skill-specific ones - assessment_data["findings"] = findings - - assessment_file = agentready_dir / "assessment-latest.json" - with open(assessment_file, "w") as f: - json.dump(assessment_data, f) - - yield repo_path - - -@pytest.fixture -def runner(): - """Create Click test runner.""" - return CliRunner() - - -class TestLearnCommand: - """Test learn CLI command.""" - - def test_learn_command_basic(self, runner, temp_repo): - """Test basic learn command execution.""" - with runner.isolated_filesystem(temp_dir=temp_repo.parent): - result = runner.invoke(learn, [str(temp_repo)]) - - # Should succeed - assert result.exit_code == 0 - - # Should create output directory - output_dir = temp_repo / ".skills-proposals" - assert output_dir.exists() - - def test_learn_command_json_output(self, runner, temp_repo): - """Test learn command with JSON output.""" - with runner.isolated_filesystem(temp_dir=temp_repo.parent): - result = runner.invoke(learn, [str(temp_repo), "--output-format", "json"]) - - assert result.exit_code == 0 - - # Check for JSON output file - output_dir = temp_repo / ".skills-proposals" - json_files = list(output_dir.glob("*.json")) - assert len(json_files) > 0 - - @pytest.mark.skip( - reason="Test fixture doesn't create findings that produce extractable skills - needs proper test data" - ) - def test_learn_command_skill_md_output(self, runner, temp_repo): - """Test learn command with SKILL.md output.""" - with runner.isolated_filesystem(temp_dir=temp_repo.parent): - result = runner.invoke( - learn, [str(temp_repo), "--output-format", "skill_md"] - ) - - assert result.exit_code == 0 - - # Check for SKILL.md files (in subdirectories: skill-id/SKILL.md) - output_dir = temp_repo / ".skills-proposals" - md_files = list(output_dir.glob("*/SKILL.md")) - assert len(md_files) > 0 - - @pytest.mark.skip( - reason="Test fixture doesn't create findings that produce extractable skills - needs proper test data" - ) - def test_learn_command_github_issues_output(self, runner, temp_repo): - """Test learn command with GitHub issues output.""" - with runner.isolated_filesystem(temp_dir=temp_repo.parent): - result = runner.invoke( - learn, [str(temp_repo), "--output-format", "github_issues"] - ) - - assert result.exit_code == 0 - - # Check for issue files (named skill-{id}.md) - output_dir = temp_repo / ".skills-proposals" - issue_files = list(output_dir.glob("skill-*.md")) - assert len(issue_files) > 0 - - @pytest.mark.skip( - reason="Test fixture doesn't create findings that produce extractable skills - needs proper test data" - ) - def test_learn_command_all_output_formats(self, runner, temp_repo): - """Test learn command with all output formats.""" - with runner.isolated_filesystem(temp_dir=temp_repo.parent): - result = runner.invoke(learn, [str(temp_repo), "--output-format", "all"]) - - assert result.exit_code == 0 - - # Should have multiple file types - output_dir = temp_repo / ".skills-proposals" - assert len(list(output_dir.glob("*.json"))) > 0 - assert len(list(output_dir.glob("*.md"))) > 0 - - def test_learn_command_custom_output_dir(self, runner, temp_repo): - """Test learn command with custom output directory.""" - custom_dir = temp_repo / "custom-skills" - - with runner.isolated_filesystem(temp_dir=temp_repo.parent): - result = runner.invoke( - learn, - [str(temp_repo), "--output-dir", str(custom_dir)], - ) - - assert result.exit_code == 0 - assert custom_dir.exists() - - def test_learn_command_specific_attribute(self, runner, temp_repo): - """Test learn command with specific attribute filter.""" - with runner.isolated_filesystem(temp_dir=temp_repo.parent): - result = runner.invoke( - learn, - [ - str(temp_repo), - "--attribute", - "claude_md_file", - ], - ) - - assert result.exit_code == 0 - - def test_learn_command_multiple_attributes(self, runner, temp_repo): - """Test learn command with multiple attribute filters.""" - with runner.isolated_filesystem(temp_dir=temp_repo.parent): - result = runner.invoke( - learn, - [ - str(temp_repo), - "--attribute", - "claude_md_file", - "--attribute", - "type_annotations", - ], - ) - - assert result.exit_code == 0 - - def test_learn_command_min_confidence(self, runner, temp_repo): - """Test learn command with custom minimum confidence.""" - with runner.isolated_filesystem(temp_dir=temp_repo.parent): - result = runner.invoke( - learn, - [str(temp_repo), "--min-confidence", "80"], - ) - - assert result.exit_code == 0 - - def test_learn_command_verbose(self, runner, temp_repo): - """Test learn command with verbose output.""" - with runner.isolated_filesystem(temp_dir=temp_repo.parent): - result = runner.invoke( - learn, - [str(temp_repo), "--verbose"], - ) - - assert result.exit_code == 0 - # Verbose should produce more output - assert len(result.output) > 0 - - def test_learn_command_no_assessment_file(self, runner): - """Test learn command fails gracefully with no assessment file.""" - with runner.isolated_filesystem(): - with tempfile.TemporaryDirectory() as tmpdir: - repo_path = Path(tmpdir) - (repo_path / ".git").mkdir() - - result = runner.invoke(learn, [str(repo_path)]) - - # Should fail gracefully - assert result.exit_code != 0 - assert ( - "assessment" in result.output.lower() - or "not found" in result.output.lower() - ) - - def test_learn_command_invalid_repository(self, runner): - """Test learn command with non-existent repository.""" - result = runner.invoke(learn, ["/nonexistent/path"]) - - # Should fail - assert result.exit_code != 0 - - @patch("agentready.cli.learn.LearningService") - def test_learn_command_enable_llm_without_api_key( - self, mock_service, runner, temp_repo - ): - """Test learn command with LLM enabled but no API key.""" - # Remove ANTHROPIC_API_KEY if present - import os - - old_key = os.environ.pop("ANTHROPIC_API_KEY", None) - - try: - with runner.isolated_filesystem(temp_dir=temp_repo.parent): - result = runner.invoke( - learn, - [str(temp_repo), "--enable-llm"], - ) - - # Should warn or fall back gracefully - # Implementation may vary, but shouldn't crash - assert "API key" in result.output or result.exit_code == 0 - finally: - # Restore API key if it existed - if old_key: - os.environ["ANTHROPIC_API_KEY"] = old_key - - @patch("agentready.cli.learn.LearningService") - def test_learn_command_enable_llm_with_budget( - self, mock_service, runner, temp_repo - ): - """Test learn command with LLM enabled and custom budget.""" - with runner.isolated_filesystem(temp_dir=temp_repo.parent): - result = runner.invoke( - learn, - [ - str(temp_repo), - "--enable-llm", - "--llm-budget", - "10", - ], - ) - - # Should succeed (or gracefully handle missing API key) - assert result.exit_code == 0 or "API key" in result.output - - @patch("agentready.cli.learn.LearningService") - def test_learn_command_llm_no_cache(self, mock_service, runner, temp_repo): - """Test learn command with LLM cache bypass.""" - with runner.isolated_filesystem(temp_dir=temp_repo.parent): - result = runner.invoke( - learn, - [ - str(temp_repo), - "--enable-llm", - "--llm-no-cache", - ], - ) - - # Should succeed (or gracefully handle missing API key) - assert result.exit_code == 0 or "API key" in result.output - - def test_learn_command_default_repository(self, runner): - """Test learn command with default repository (current directory).""" - with runner.isolated_filesystem(): - # Create minimal git repo structure - Path(".git").mkdir() - agentready_dir = Path(".agentready") - agentready_dir.mkdir() - - # Create minimal assessment using shared fixture - assessment_data = create_test_assessment_json( - overall_score=75.0, - num_findings=1, - repo_path=".", - repo_name="test", - ) - - with open(agentready_dir / "assessment-latest.json", "w") as f: - json.dump(assessment_data, f) - - result = runner.invoke(learn, []) - - # Should use current directory - assert result.exit_code == 0 - - -class TestLearnCommandErrorHandling: - """Test error handling in learn command.""" - - def test_learn_invalid_output_format(self, runner, temp_repo): - """Test learn command with invalid output format.""" - result = runner.invoke( - learn, - [str(temp_repo), "--output-format", "invalid"], - ) - - # Should fail with validation error - assert result.exit_code != 0 - - def test_learn_invalid_min_confidence(self, runner, temp_repo): - """Test learn command with invalid min confidence.""" - result = runner.invoke( - learn, - [str(temp_repo), "--min-confidence", "invalid"], - ) - - # Should fail with validation error - assert result.exit_code != 0 - - def test_learn_negative_llm_budget(self, runner, temp_repo): - """Test learn command with negative LLM budget.""" - result = runner.invoke( - learn, - [str(temp_repo), "--llm-budget", "-5"], - ) - - # Should fail with validation error (Click validates int type) - assert result.exit_code != 0 - - def test_learn_corrupted_assessment_file(self, runner): - """Test learn command with corrupted assessment file.""" - with runner.isolated_filesystem(): - with tempfile.TemporaryDirectory() as tmpdir: - repo_path = Path(tmpdir) - (repo_path / ".git").mkdir() - - # Create .agentready directory - agentready_dir = repo_path / ".agentready" - agentready_dir.mkdir() - - # Create corrupted assessment - assessment_file = agentready_dir / "assessment-latest.json" - assessment_file.write_text("{invalid json content") - - result = runner.invoke(learn, [str(repo_path)]) - - # Should fail gracefully - assert result.exit_code != 0 diff --git a/tests/unit/test_code_sampler.py b/tests/unit/test_code_sampler.py deleted file mode 100644 index 143bbd89..00000000 --- a/tests/unit/test_code_sampler.py +++ /dev/null @@ -1,541 +0,0 @@ -"""Unit tests for code sampler.""" - -import tempfile -from pathlib import Path - -import pytest - -from agentready.learners.code_sampler import CodeSampler -from agentready.models import Attribute, Finding, Repository - - -@pytest.fixture -def temp_repo(): - """Create a temporary repository with sample files.""" - with tempfile.TemporaryDirectory() as tmpdir: - repo_path = Path(tmpdir) - - # Create .git directory - (repo_path / ".git").mkdir() - - # Create sample files - (repo_path / "README.md").write_text("# Test Project\n\nThis is a test.") - (repo_path / "CLAUDE.md").write_text("# CLAUDE.md\n\nAgent instructions.") - (repo_path / ".gitignore").write_text("*.pyc\n__pycache__/") - - # Create Python files - src_dir = repo_path / "src" - src_dir.mkdir() - (src_dir / "main.py").write_text( - "def hello(name: str) -> str:\n return f'Hello {name}'" - ) - (src_dir / "utils.py").write_text( - "def add(a: int, b: int) -> int:\n return a + b" - ) - - # Create tests directory - tests_dir = repo_path / "tests" - tests_dir.mkdir() - (tests_dir / "test_main.py").write_text("def test_hello():\n pass") - - # Create config files - (repo_path / "pyproject.toml").write_text( - "[tool.pytest.ini_options]\ntestpaths = ['tests']" - ) - (repo_path / ".pre-commit-config.yaml").write_text("repos:\n - repo: test") - - # Create github workflows - github_dir = repo_path / ".github" / "workflows" - github_dir.mkdir(parents=True) - (github_dir / "tests.yml").write_text("name: Tests\non: [push]") - - yield Repository( - path=repo_path, - name="test-repo", - url=None, - branch="main", - commit_hash="abc123", - languages={"Python": 100}, - total_files=8, - total_lines=50, - ) - - -@pytest.fixture -def sample_attribute(): - """Create a sample attribute.""" - return Attribute( - id="claude_md_file", - name="CLAUDE.md File", - category="Documentation", - tier=1, - description="CLAUDE.md provides agent instructions", - criteria="Must exist at repository root", - default_weight=1.0, - ) - - -@pytest.fixture -def sample_finding(sample_attribute): - """Create a sample finding.""" - return Finding( - attribute=sample_attribute, - status="pass", - score=100.0, - measured_value="present", - threshold="present", - evidence=["CLAUDE.md exists at root"], - remediation=None, - error_message=None, - ) - - -class TestCodeSampler: - """Test CodeSampler class.""" - - def test_init_default_params(self, temp_repo): - """Test initialization with default parameters.""" - sampler = CodeSampler(temp_repo) - - assert sampler.repository == temp_repo - assert sampler.max_files == 5 - assert sampler.max_lines_per_file == 100 - - def test_init_custom_params(self, temp_repo): - """Test initialization with custom parameters.""" - sampler = CodeSampler(temp_repo, max_files=3, max_lines_per_file=50) - - assert sampler.max_files == 3 - assert sampler.max_lines_per_file == 50 - - def test_get_relevant_code_claude_md(self, temp_repo, sample_finding): - """Test getting relevant code for CLAUDE.md attribute.""" - sampler = CodeSampler(temp_repo) - code = sampler.get_relevant_code(sample_finding) - - assert isinstance(code, str) - # Should contain CLAUDE.md content or reference - assert len(code) > 0 - - def test_get_relevant_code_readme(self, temp_repo): - """Test getting relevant code for README attribute.""" - attr = Attribute( - id="readme_file", - name="README", - category="Documentation", - tier=1, - description="README file", - criteria="Must exist", - default_weight=1.0, - ) - finding = Finding( - attribute=attr, - status="pass", - score=100.0, - measured_value="present", - threshold="present", - evidence=["README.md exists"], - remediation=None, - error_message=None, - ) - - sampler = CodeSampler(temp_repo) - code = sampler.get_relevant_code(finding) - - assert isinstance(code, str) - assert len(code) > 0 - - def test_get_relevant_code_type_annotations(self, temp_repo): - """Test getting relevant code for type annotations attribute.""" - attr = Attribute( - id="type_annotations", - name="Type Annotations", - category="Code Quality", - tier=2, - description="Type hints in code", - criteria=">=80%", - default_weight=1.0, - ) - finding = Finding( - attribute=attr, - status="pass", - score=90.0, - measured_value="90%", - threshold="80%", - evidence=["Most functions have type hints"], - remediation=None, - error_message=None, - ) - - sampler = CodeSampler(temp_repo) - code = sampler.get_relevant_code(finding) - - assert isinstance(code, str) - # Should sample Python files - assert len(code) > 0 - - def test_get_relevant_code_gitignore(self, temp_repo): - """Test getting relevant code for gitignore attribute.""" - attr = Attribute( - id="gitignore", - name="Gitignore", - category="Structure", - tier=1, - description="Gitignore file", - criteria="Must exist", - default_weight=1.0, - ) - finding = Finding( - attribute=attr, - status="pass", - score=100.0, - measured_value="present", - threshold="present", - evidence=[".gitignore exists"], - remediation=None, - error_message=None, - ) - - sampler = CodeSampler(temp_repo) - code = sampler.get_relevant_code(finding) - - assert isinstance(code, str) - assert len(code) > 0 - - def test_get_relevant_code_unknown_attribute(self, temp_repo): - """Test getting relevant code for unknown attribute.""" - attr = Attribute( - id="unknown_attribute", - name="Unknown", - category="Unknown", - tier=4, - description="Unknown attribute", - criteria="Unknown", - default_weight=1.0, - ) - finding = Finding( - attribute=attr, - status="pass", - score=100.0, - measured_value="unknown", - threshold="unknown", - evidence=[], - remediation=None, - error_message=None, - ) - - sampler = CodeSampler(temp_repo) - code = sampler.get_relevant_code(finding) - - # Should return fallback message - assert "No code samples available" in code - - def test_get_relevant_code_standard_layout(self, temp_repo): - """Test getting relevant code for standard layout attribute.""" - attr = Attribute( - id="standard_project_layout", - name="Standard Layout", - category="Structure", - tier=2, - description="Standard project structure", - criteria="src/, tests/, docs/", - default_weight=1.0, - ) - finding = Finding( - attribute=attr, - status="pass", - score=100.0, - measured_value="standard", - threshold="standard", - evidence=["Has src/ and tests/ directories"], - remediation=None, - error_message=None, - ) - - sampler = CodeSampler(temp_repo) - code = sampler.get_relevant_code(finding) - - assert isinstance(code, str) - # Should contain directory structure info - assert len(code) > 0 - - def test_get_relevant_code_respects_max_files(self, temp_repo): - """Test that code sampler respects max_files limit.""" - attr = Attribute( - id="type_annotations", - name="Type Annotations", - category="Code Quality", - tier=2, - description="Type hints", - criteria=">=80%", - default_weight=1.0, - ) - finding = Finding( - attribute=attr, - status="pass", - score=90.0, - measured_value="90%", - threshold="80%", - evidence=[], - remediation=None, - error_message=None, - ) - - # Set max_files to 1 - sampler = CodeSampler(temp_repo, max_files=1) - code = sampler.get_relevant_code(finding) - - # Should limit to 1 file (implementation dependent) - assert isinstance(code, str) - - def test_get_directory_tree_existing_dir(self, temp_repo): - """Test getting directory tree for existing directory.""" - sampler = CodeSampler(temp_repo) - tree = sampler._get_directory_tree("src/") - - assert isinstance(tree, dict) - if tree: # If not empty - assert tree.get("type") == "directory" - assert "children" in tree - - def test_get_directory_tree_nonexistent_dir(self, temp_repo): - """Test getting directory tree for non-existent directory.""" - sampler = CodeSampler(temp_repo) - tree = sampler._get_directory_tree("nonexistent/") - - # Should return empty dict - assert tree == {} - - def test_format_code_samples_empty_list(self, temp_repo): - """Test formatting code samples with empty list.""" - sampler = CodeSampler(temp_repo) - result = sampler._format_code_samples([]) - - # Should handle empty list gracefully - assert isinstance(result, str) - - def test_format_code_samples_with_files(self, temp_repo): - """Test formatting code samples with actual files.""" - sampler = CodeSampler(temp_repo) - files = [temp_repo.path / "README.md"] - result = sampler._format_code_samples(files) - - assert isinstance(result, str) - assert len(result) > 0 - - -class TestCodeSamplerEdgeCases: - """Test edge cases in code sampler.""" - - def test_empty_repository(self): - """Test code sampler with empty repository.""" - with tempfile.TemporaryDirectory() as tmpdir: - repo_path = Path(tmpdir) - (repo_path / ".git").mkdir() - - repo = Repository( - path=repo_path, - name="empty", - url=None, - branch="main", - commit_hash="abc", - languages={}, - total_files=0, - total_lines=0, - ) - - sampler = CodeSampler(repo) - attr = Attribute( - id="claude_md_file", - name="CLAUDE.md", - category="Documentation", - tier=1, - description="Test", - criteria="Test", - default_weight=1.0, - ) - finding = Finding( - attribute=attr, - status="fail", - score=0.0, - measured_value="absent", - threshold="present", - evidence=[], - remediation=None, - error_message=None, - ) - - code = sampler.get_relevant_code(finding) - - # Should handle gracefully - assert isinstance(code, str) - - def test_max_lines_per_file_limit(self, temp_repo): - """Test that max_lines_per_file is respected.""" - # Create a file with many lines - large_file = temp_repo.path / "large.py" - large_file.write_text("\n".join([f"line {i}" for i in range(200)])) - - sampler = CodeSampler(temp_repo, max_lines_per_file=50) - - # Sample the large file - files = [large_file] - result = sampler._format_code_samples(files) - - # Result should not contain all 200 lines (implementation dependent) - assert isinstance(result, str) - - def test_binary_file_handling(self, temp_repo): - """Test handling of binary files.""" - # Create a binary file - binary_file = temp_repo.path / "image.png" - binary_file.write_bytes(b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR") - - sampler = CodeSampler(temp_repo) - - # Try to sample binary file - files = [binary_file] - result = sampler._format_code_samples(files) - - # Should handle gracefully (may skip or show placeholder) - assert isinstance(result, str) - - def test_symlink_handling(self, temp_repo): - """Test handling of symlinks.""" - # Create a file and a symlink to it - real_file = temp_repo.path / "real.txt" - real_file.write_text("Real content") - - try: - symlink = temp_repo.path / "link.txt" - symlink.symlink_to(real_file) - - sampler = CodeSampler(temp_repo) - files = [symlink] - result = sampler._format_code_samples(files) - - # Should handle symlinks (may follow or skip) - assert isinstance(result, str) - except OSError: - # Skip test if symlinks not supported (Windows without admin) - pytest.skip("Symlinks not supported on this platform") - - def test_nested_directory_tree(self, temp_repo): - """Test getting directory tree for nested directories.""" - # Create nested structure - nested = temp_repo.path / "deeply" / "nested" / "structure" - nested.mkdir(parents=True) - (nested / "file.txt").write_text("content") - - sampler = CodeSampler(temp_repo) - tree = sampler._get_directory_tree("deeply/") - - # Should return tree structure - assert isinstance(tree, dict) - - def test_all_patterns_get_at_least_one_slot(self): - """Test that trailing patterns are not starved when len(patterns) > max_files.""" - with tempfile.TemporaryDirectory() as tmpdir: - repo_path = Path(tmpdir) - (repo_path / ".git").mkdir() - (repo_path / "package.json").write_text('{"scripts":{"test":"jest"}}') - (repo_path / "jest.config.js").write_text("module.exports = {}") - - repo = Repository( - path=repo_path, - name="js-repo", - url=None, - branch="main", - commit_hash="abc", - languages={"JavaScript": 100}, - total_files=2, - total_lines=10, - ) - - attr = Attribute( - id="test_execution", - name="Test Execution", - category="Testing", - tier=1, - description="Test setup", - criteria="Test config", - default_weight=1.0, - ) - finding = Finding( - attribute=attr, - status="pass", - score=80.0, - measured_value="configured", - threshold="configured", - evidence=[], - remediation=None, - error_message=None, - ) - - sampler = CodeSampler(repo, max_files=5) - code = sampler.get_relevant_code(finding) - - assert "package.json" in code or "jest.config" in code - - def test_nested_skill_files_found(self): - """Test that .claude/skills//SKILL.md is found by recursive glob.""" - with tempfile.TemporaryDirectory() as tmpdir: - repo_path = Path(tmpdir) - (repo_path / ".git").mkdir() - skills_dir = repo_path / ".claude" / "skills" / "add-endpoint" - skills_dir.mkdir(parents=True) - (skills_dir / "SKILL.md").write_text("# Add Endpoint") - - repo = Repository( - path=repo_path, - name="skill-repo", - url=None, - branch="main", - commit_hash="abc", - languages={"Python": 100}, - total_files=1, - total_lines=5, - ) - - attr = Attribute( - id="pattern_references", - name="Pattern References", - category="Patterns", - tier=3, - description="Pattern refs", - criteria="Skills exist", - default_weight=1.0, - ) - finding = Finding( - attribute=attr, - status="pass", - score=60.0, - measured_value="present", - threshold="present", - evidence=[], - remediation=None, - error_message=None, - ) - - sampler = CodeSampler(repo) - code = sampler.get_relevant_code(finding) - - assert "SKILL.md" in code - - def test_hidden_files_excluded_from_tree(self, temp_repo): - """Test that hidden files/directories are excluded from tree.""" - # Create hidden directory - hidden = temp_repo.path / "visible" / ".hidden" - hidden.mkdir(parents=True) - (hidden / "secret.txt").write_text("secret") - - sampler = CodeSampler(temp_repo) - tree = sampler._get_directory_tree("visible/") - - # Tree should exist but not include .hidden directory - assert isinstance(tree, dict) - if tree and "children" in tree: - hidden_dirs = [ - c for c in tree["children"] if c.get("name", "").startswith(".") - ] - assert len(hidden_dirs) == 0 diff --git a/tests/unit/test_eval_harness_models.py b/tests/unit/test_eval_harness_models.py deleted file mode 100644 index 2de189a0..00000000 --- a/tests/unit/test_eval_harness_models.py +++ /dev/null @@ -1,391 +0,0 @@ -"""Unit tests for eval harness data models.""" - -from datetime import datetime - -import pytest - -from agentready.models.eval_harness import ( - AssessorImpact, - BaselineMetrics, - EvalSummary, - TbenchResult, -) - - -class TestTbenchResult: - """Tests for TbenchResult model.""" - - def test_create_tbench_result(self): - """Test creating a TbenchResult.""" - result = TbenchResult( - score=75.5, - completion_rate=72.0, - pytest_pass_rate=80.0, - latency_ms=3500.0, - timestamp=datetime(2025, 1, 1, 12, 0, 0), - is_mocked=True, - ) - - assert result.score == 75.5 - assert result.completion_rate == 72.0 - assert result.pytest_pass_rate == 80.0 - assert result.latency_ms == 3500.0 - assert result.is_mocked is True - - def test_to_dict_and_from_dict(self): - """Test JSON serialization roundtrip.""" - original = TbenchResult( - score=67.3, - completion_rate=65.0, - pytest_pass_rate=70.0, - latency_ms=4000.0, - timestamp=datetime(2025, 1, 1, 12, 0, 0), - is_mocked=True, - ) - - # Serialize - data = original.to_dict() - assert isinstance(data, dict) - assert data["score"] == 67.3 - assert data["is_mocked"] is True - - # Deserialize - restored = TbenchResult.from_dict(data) - assert restored.score == original.score - assert restored.completion_rate == original.completion_rate - assert restored.timestamp == original.timestamp - assert restored.is_mocked == original.is_mocked - - -class TestBaselineMetrics: - """Tests for BaselineMetrics model.""" - - @pytest.fixture - def sample_results(self): - """Create sample results for testing.""" - return [ - TbenchResult( - score=70.0, - completion_rate=68.0, - pytest_pass_rate=75.0, - latency_ms=3500.0, - timestamp=datetime(2025, 1, 1, 12, 0, 0), - is_mocked=True, - ), - TbenchResult( - score=72.0, - completion_rate=70.0, - pytest_pass_rate=77.0, - latency_ms=3400.0, - timestamp=datetime(2025, 1, 1, 12, 1, 0), - is_mocked=True, - ), - TbenchResult( - score=68.0, - completion_rate=66.0, - pytest_pass_rate=73.0, - latency_ms=3600.0, - timestamp=datetime(2025, 1, 1, 12, 2, 0), - is_mocked=True, - ), - TbenchResult( - score=71.0, - completion_rate=69.0, - pytest_pass_rate=76.0, - latency_ms=3450.0, - timestamp=datetime(2025, 1, 1, 12, 3, 0), - is_mocked=True, - ), - TbenchResult( - score=69.0, - completion_rate=67.0, - pytest_pass_rate=74.0, - latency_ms=3550.0, - timestamp=datetime(2025, 1, 1, 12, 4, 0), - is_mocked=True, - ), - ] - - def test_from_results(self, sample_results): - """Test creating BaselineMetrics from results.""" - baseline = BaselineMetrics.from_results(sample_results) - - # Check calculated statistics - assert baseline.mean_score == 70.0 # (70+72+68+71+69)/5 - assert baseline.median_score == 70.0 - assert baseline.min_score == 68.0 - assert baseline.max_score == 72.0 - assert baseline.iterations == 5 - assert len(baseline.raw_results) == 5 - - def test_std_dev_calculation(self, sample_results): - """Test standard deviation calculation.""" - baseline = BaselineMetrics.from_results(sample_results) - - # Should be around 1.58 for this data - assert baseline.std_dev > 0 - assert baseline.std_dev < 2.0 - - def test_from_empty_results_raises(self): - """Test that empty results raises ValueError.""" - with pytest.raises(ValueError, match="Cannot calculate baseline"): - BaselineMetrics.from_results([]) - - def test_to_dict_and_from_dict(self, sample_results): - """Test JSON serialization roundtrip.""" - original = BaselineMetrics.from_results(sample_results) - - # Serialize - data = original.to_dict() - assert isinstance(data, dict) - assert data["mean_score"] == original.mean_score - assert data["iterations"] == 5 - - # Deserialize - restored = BaselineMetrics.from_dict(data) - assert restored.mean_score == original.mean_score - assert restored.std_dev == original.std_dev - assert restored.iterations == original.iterations - assert len(restored.raw_results) == len(original.raw_results) - - -class TestAssessorImpact: - """Tests for AssessorImpact model.""" - - def test_create_assessor_impact(self): - """Test creating an AssessorImpact.""" - impact = AssessorImpact( - assessor_id="claude_md_file", - assessor_name="CLAUDE.md Configuration Files", - tier=1, - baseline_score=70.0, - post_remediation_score=82.5, - delta_score=12.5, - p_value=0.003, - effect_size=0.92, - is_significant=True, - iterations=5, - fixes_applied=2, - remediation_log=["CREATE CLAUDE.md", "ADD project overview"], - ) - - assert impact.assessor_id == "claude_md_file" - assert impact.delta_score == 12.5 - assert impact.is_significant is True - assert len(impact.remediation_log) == 2 - - def test_significance_labels(self): - """Test significance label generation.""" - # Large effect - large = AssessorImpact( - assessor_id="test", - assessor_name="Test", - tier=1, - baseline_score=70.0, - post_remediation_score=85.0, - delta_score=15.0, - p_value=0.001, - effect_size=0.9, # >= 0.8 = large - is_significant=True, - iterations=5, - fixes_applied=1, - ) - assert large.get_significance_label() == "large" - - # Medium effect - medium = AssessorImpact( - assessor_id="test", - assessor_name="Test", - tier=1, - baseline_score=70.0, - post_remediation_score=78.0, - delta_score=8.0, - p_value=0.02, - effect_size=0.6, # >= 0.5 = medium - is_significant=True, - iterations=5, - fixes_applied=1, - ) - assert medium.get_significance_label() == "medium" - - # Small effect - small = AssessorImpact( - assessor_id="test", - assessor_name="Test", - tier=1, - baseline_score=70.0, - post_remediation_score=73.0, - delta_score=3.0, - p_value=0.04, - effect_size=0.3, # >= 0.2 = small - is_significant=True, - iterations=5, - fixes_applied=1, - ) - assert small.get_significance_label() == "small" - - # Negligible effect - negligible = AssessorImpact( - assessor_id="test", - assessor_name="Test", - tier=1, - baseline_score=70.0, - post_remediation_score=71.0, - delta_score=1.0, - p_value=0.30, - effect_size=0.1, # < 0.2 = negligible - is_significant=False, - iterations=5, - fixes_applied=1, - ) - assert negligible.get_significance_label() == "negligible" - - def test_to_dict_and_from_dict(self): - """Test JSON serialization roundtrip.""" - original = AssessorImpact( - assessor_id="readme_structure", - assessor_name="README Structure", - tier=2, - baseline_score=68.0, - post_remediation_score=75.0, - delta_score=7.0, - p_value=0.015, - effect_size=0.55, - is_significant=True, - iterations=5, - fixes_applied=3, - remediation_log=["ADD Installation section", "ADD Usage examples"], - ) - - # Serialize - data = original.to_dict() - assert isinstance(data, dict) - assert data["assessor_id"] == "readme_structure" - assert data["delta_score"] == 7.0 - assert data["significance_label"] == "medium" - - # Deserialize - restored = AssessorImpact.from_dict(data) - assert restored.assessor_id == original.assessor_id - assert restored.delta_score == original.delta_score - assert restored.is_significant == original.is_significant - - -class TestEvalSummary: - """Tests for EvalSummary model.""" - - @pytest.fixture - def sample_baseline(self): - """Create sample baseline for testing.""" - results = [ - TbenchResult( - score=70.0, - completion_rate=68.0, - pytest_pass_rate=75.0, - latency_ms=3500.0, - timestamp=datetime(2025, 1, 1, 12, 0, 0), - is_mocked=True, - ) - for _ in range(5) - ] - return BaselineMetrics.from_results(results) - - @pytest.fixture - def sample_impacts(self): - """Create sample impacts for testing.""" - return [ - AssessorImpact( - assessor_id="claude_md_file", - assessor_name="CLAUDE.md", - tier=1, - baseline_score=70.0, - post_remediation_score=82.5, - delta_score=12.5, - p_value=0.003, - effect_size=0.92, - is_significant=True, - iterations=5, - fixes_applied=2, - ), - AssessorImpact( - assessor_id="readme_structure", - assessor_name="README", - tier=2, - baseline_score=70.0, - post_remediation_score=76.0, - delta_score=6.0, - p_value=0.020, - effect_size=0.45, - is_significant=True, - iterations=5, - fixes_applied=1, - ), - AssessorImpact( - assessor_id="gitignore", - assessor_name="Gitignore", - tier=3, - baseline_score=70.0, - post_remediation_score=72.0, - delta_score=2.0, - p_value=0.15, - effect_size=0.15, - is_significant=False, - iterations=5, - fixes_applied=1, - ), - ] - - def test_from_impacts(self, sample_baseline, sample_impacts): - """Test creating EvalSummary from impacts.""" - summary = EvalSummary.from_impacts(sample_baseline, sample_impacts) - - assert summary.total_assessors_tested == 3 - assert summary.significant_improvements == 2 - assert 1 in summary.tier_impacts - assert 2 in summary.tier_impacts - assert 3 in summary.tier_impacts - - def test_tier_impact_calculation(self, sample_baseline, sample_impacts): - """Test tier impact aggregation.""" - summary = EvalSummary.from_impacts(sample_baseline, sample_impacts) - - # Tier 1: only claude_md_file (12.5) - assert summary.tier_impacts[1] == 12.5 - - # Tier 2: only readme_structure (6.0) - assert summary.tier_impacts[2] == 6.0 - - # Tier 3: only gitignore (2.0) - assert summary.tier_impacts[3] == 2.0 - - # Tier 4: no assessors (should be 0.0) - assert summary.tier_impacts[4] == 0.0 - - def test_get_ranked_assessors(self, sample_baseline, sample_impacts): - """Test ranking assessors by delta score.""" - summary = EvalSummary.from_impacts(sample_baseline, sample_impacts) - ranked = summary.get_ranked_assessors() - - # Should be sorted descending by delta_score - assert ranked[0].assessor_id == "claude_md_file" # 12.5 - assert ranked[1].assessor_id == "readme_structure" # 6.0 - assert ranked[2].assessor_id == "gitignore" # 2.0 - - def test_to_dict_and_from_dict(self, sample_baseline, sample_impacts): - """Test JSON serialization roundtrip.""" - original = EvalSummary.from_impacts( - sample_baseline, sample_impacts, timestamp=datetime(2025, 1, 1, 12, 0, 0) - ) - - # Serialize - data = original.to_dict() - assert isinstance(data, dict) - assert data["total_assessors_tested"] == 3 - assert data["significant_improvements"] == 2 - assert "ranked_assessors" in data - - # Deserialize - restored = EvalSummary.from_dict(data) - assert restored.total_assessors_tested == original.total_assessors_tested - assert restored.significant_improvements == original.significant_improvements - assert restored.tier_impacts == original.tier_impacts diff --git a/tests/unit/test_learning_service.py b/tests/unit/test_learning_service.py deleted file mode 100644 index 87a93ce4..00000000 --- a/tests/unit/test_learning_service.py +++ /dev/null @@ -1,496 +0,0 @@ -"""Unit tests for learning service.""" - -import json -import subprocess -import tempfile -from pathlib import Path -from unittest.mock import patch - -import pytest - -from agentready.models import DiscoveredSkill -from agentready.services.learning_service import LearningService - - -def create_dummy_finding() -> dict: - """Create a dummy finding dict for testing (not_applicable status).""" - return { - "attribute": { - "id": "test_attr", - "name": "Test Attribute", - "category": "Testing", - "tier": 1, - "description": "Test attribute", - "criteria": "Test criteria", - "default_weight": 1.0, - }, - "status": "not_applicable", - "score": None, - "measured_value": None, - "threshold": None, - "evidence": [], - "error_message": None, - } - - -@pytest.fixture -def temp_dir(): - """Create a temporary directory initialized as a git repository.""" - with tempfile.TemporaryDirectory() as tmpdir: - tmp_path = Path(tmpdir) - # Initialize as git repo to satisfy Repository model validation - subprocess.run(["git", "init"], cwd=tmp_path, check=True, capture_output=True) - yield tmp_path - - -@pytest.fixture -def sample_assessment_file(temp_dir): - """Create a sample assessment file.""" - assessment_data = { - "schema_version": "1.0.0", - "timestamp": "2025-11-22T06:00:00", - "repository": { - "name": "test-repo", - "path": str(temp_dir), - "url": None, - "branch": "main", - "commit_hash": "abc123", - "languages": {"Python": 100}, - "total_files": 5, - "total_lines": 100, - }, - "overall_score": 85.0, - "certification_level": "Gold", - "attributes_assessed": 2, - "attributes_skipped": 0, - "attributes_total": 2, - "findings": [ - { - "attribute": { - "id": "claude_md_file", - "name": "CLAUDE.md File", - "category": "Documentation", - "tier": 1, - "description": "Test attribute", - "criteria": "Must exist", - "default_weight": 1.0, - }, - "status": "pass", - "score": 100.0, - "measured_value": "present", - "threshold": "present", - "evidence": ["CLAUDE.md exists at root"], - "error_message": None, - }, - { - "attribute": { - "id": "type_annotations", - "name": "Type Annotations", - "category": "Code Quality", - "tier": 2, - "description": "Type hints in Python code", - "criteria": ">=80% coverage", - "default_weight": 1.0, - }, - "status": "pass", - "score": 90.0, - "measured_value": "90%", - "threshold": "80%", - "evidence": ["90% of functions have type hints"], - "error_message": None, - }, - ], - "duration_seconds": 1.5, - } - - # Create .agentready directory - agentready_dir = temp_dir / ".agentready" - agentready_dir.mkdir() - - assessment_file = agentready_dir / "assessment-latest.json" - with open(assessment_file, "w") as f: - json.dump(assessment_data, f) - - return assessment_file - - -class TestLearningService: - """Test LearningService class.""" - - def test_init_default_params(self): - """Test initialization with default parameters.""" - service = LearningService() - - assert service.min_confidence == 70.0 - assert service.output_dir == Path(".skills-proposals") - - def test_init_custom_params(self, temp_dir): - """Test initialization with custom parameters.""" - service = LearningService( - min_confidence=80.0, output_dir=temp_dir / "custom-skills" - ) - - assert service.min_confidence == 80.0 - assert service.output_dir == temp_dir / "custom-skills" - - def test_load_assessment_valid_file(self, sample_assessment_file): - """Test loading a valid assessment file.""" - service = LearningService() - assessment = service.load_assessment(sample_assessment_file) - - assert isinstance(assessment, dict) - assert assessment["overall_score"] == 85.0 - assert len(assessment["findings"]) == 2 - - def test_load_assessment_nonexistent_file(self, temp_dir): - """Test loading a non-existent assessment file.""" - service = LearningService() - nonexistent = temp_dir / "nonexistent.json" - - with pytest.raises(FileNotFoundError): - service.load_assessment(nonexistent) - - def test_load_assessment_invalid_json(self, temp_dir): - """Test loading an invalid JSON file.""" - service = LearningService() - invalid_file = temp_dir / "invalid.json" - invalid_file.write_text("{invalid json") - - with pytest.raises(ValueError, match="Invalid JSON"): - service.load_assessment(invalid_file) - - def test_load_assessment_empty_file(self, temp_dir): - """Test loading an empty JSON file.""" - service = LearningService() - empty_file = temp_dir / "empty.json" - empty_file.write_text("") - - with pytest.raises(ValueError): - service.load_assessment(empty_file) - - @patch("agentready.services.learning_service.PatternExtractor") - def test_extract_patterns_from_file_basic( - self, mock_extractor, sample_assessment_file, temp_dir - ): - """Test basic pattern extraction from file.""" - # Mock pattern extractor - mock_skill = DiscoveredSkill( - skill_id="test-skill", - name="Test Skill", - description="Test description", - confidence=95.0, - source_attribute_id="claude_md_file", - reusability_score=100.0, - impact_score=50.0, - pattern_summary="Test pattern", - code_examples=["example"], - citations=[], - ) - mock_extractor.return_value.extract_all_patterns.return_value = [mock_skill] - - service = LearningService(output_dir=temp_dir) - skills = service.extract_patterns_from_file(sample_assessment_file) - - # Should return skills - assert len(skills) == 1 - assert skills[0].skill_id == "test-skill" - - @patch("agentready.services.learning_service.PatternExtractor") - def test_extract_patterns_with_attribute_filter( - self, mock_extractor, sample_assessment_file, temp_dir - ): - """Test pattern extraction with attribute filter.""" - mock_skill = DiscoveredSkill( - skill_id="test-skill", - name="Test Skill", - description="Test description", - confidence=95.0, - source_attribute_id="claude_md_file", - reusability_score=100.0, - impact_score=50.0, - pattern_summary="Test pattern", - code_examples=["example"], - citations=[], - ) - mock_extractor.return_value.extract_specific_patterns.return_value = [ - mock_skill - ] - - service = LearningService(output_dir=temp_dir) - skills = service.extract_patterns_from_file( - sample_assessment_file, attribute_ids=["claude_md_file"] - ) - - # Should filter by attribute - assert len(skills) >= 0 # Depends on implementation - - @patch("agentready.services.learning_service.PatternExtractor") - def test_extract_patterns_filters_by_confidence( - self, mock_extractor, sample_assessment_file, temp_dir - ): - """Test pattern extraction filters by confidence threshold.""" - # Create skills with different confidence levels - high_confidence = DiscoveredSkill( - skill_id="high", - name="High Confidence", - description="High", - confidence=95.0, - source_attribute_id="claude_md_file", - reusability_score=100.0, - impact_score=50.0, - pattern_summary="High pattern", - code_examples=["example"], - citations=[], - ) - low_confidence = DiscoveredSkill( - skill_id="low", - name="Low Confidence", - description="Low", - confidence=50.0, - source_attribute_id="claude_md_file", - reusability_score=100.0, - impact_score=50.0, - pattern_summary="Low pattern", - code_examples=["example"], - citations=[], - ) - mock_extractor.return_value.extract_all_patterns.return_value = [ - high_confidence, - low_confidence, - ] - - # Service with 70% threshold - service = LearningService(min_confidence=70.0, output_dir=temp_dir) - skills = service.extract_patterns_from_file(sample_assessment_file) - - # Should only include high confidence skill - high_conf_skills = [s for s in skills if s.confidence >= 70.0] - assert len(high_conf_skills) >= 1 - - @patch("agentready.services.learning_service.PatternExtractor") - @patch("agentready.learners.llm_enricher.LLMEnricher") - def test_extract_patterns_with_llm_enrichment( - self, mock_enricher, mock_extractor, sample_assessment_file, temp_dir - ): - """Test pattern extraction with LLM enrichment.""" - # Mock basic skill - basic_skill = DiscoveredSkill( - skill_id="test", - name="Test", - description="Basic", - confidence=95.0, - source_attribute_id="claude_md_file", - reusability_score=100.0, - impact_score=50.0, - pattern_summary="Pattern", - code_examples=["example"], - citations=[], - ) - mock_extractor.return_value.extract_all_patterns.return_value = [basic_skill] - - # Mock enriched skill - enriched_skill = DiscoveredSkill( - skill_id="test", - name="Test", - description="Enhanced by LLM", - confidence=95.0, - source_attribute_id="claude_md_file", - reusability_score=100.0, - impact_score=50.0, - pattern_summary="Pattern", - code_examples=["enhanced example"], - citations=[], - ) - mock_enricher.return_value.enrich_skill.return_value = enriched_skill - - service = LearningService(output_dir=temp_dir) - skills = service.extract_patterns_from_file( - sample_assessment_file, enable_llm=True, llm_budget=1 - ) - - # Should have enriched skills - assert len(skills) >= 1 - - def test_extract_patterns_missing_assessment_keys(self, temp_dir): - """Test extract_patterns handles assessment with missing keys.""" - # Create assessment with missing optional keys - minimal_assessment = { - "schema_version": "1.0.0", - "timestamp": "2025-11-22T06:00:00", - "repository": { - "name": "test", - "path": str(temp_dir), - "languages": {"Python": 100}, - "total_files": 1, - "total_lines": 10, - }, - "overall_score": 75.0, - "certification_level": "Gold", - "attributes_assessed": 1, - "attributes_total": 1, - "findings": [ - create_dummy_finding() - ], # Need 1 finding to match attributes_total - "duration_seconds": 1.0, - } - - # Create .agentready directory - agentready_dir = temp_dir / ".agentready" - agentready_dir.mkdir() - - assessment_file = agentready_dir / "minimal.json" - with open(assessment_file, "w") as f: - json.dump(minimal_assessment, f) - - service = LearningService(output_dir=temp_dir) - - # Should handle gracefully (may return empty list) - with patch("agentready.services.learning_service.PatternExtractor") as mock: - mock.return_value.extract_all_patterns.return_value = [] - skills = service.extract_patterns_from_file(assessment_file) - assert isinstance(skills, list) - - def test_extract_patterns_with_old_schema_key(self, temp_dir): - """Test extract_patterns handles old schema key names.""" - # Old reports used "attributes_not_assessed" instead of "attributes_skipped" - old_schema_assessment = { - "schema_version": "1.0.0", - "timestamp": "2025-11-22T06:00:00", - "repository": { - "name": "test", - "path": str(temp_dir), - "languages": {"Python": 100}, - "total_files": 1, - "total_lines": 10, - }, - "overall_score": 75.0, - "certification_level": "Gold", - "attributes_assessed": 1, - "attributes_skipped": 0, # Old key - "attributes_total": 1, - "findings": [ - create_dummy_finding() - ], # Need 1 finding to match attributes_total - "duration_seconds": 1.0, - } - - # Create .agentready directory - agentready_dir = temp_dir / ".agentready" - agentready_dir.mkdir() - - assessment_file = agentready_dir / "old.json" - with open(assessment_file, "w") as f: - json.dump(old_schema_assessment, f) - - service = LearningService(output_dir=temp_dir) - - # Should handle gracefully - with patch("agentready.services.learning_service.PatternExtractor") as mock: - mock.return_value.extract_all_patterns.return_value = [] - skills = service.extract_patterns_from_file(assessment_file) - assert isinstance(skills, list) - - -class TestLearningServiceEdgeCases: - """Test edge cases in learning service.""" - - def test_output_dir_string_conversion(self): - """Test output_dir accepts string and converts to Path.""" - service = LearningService(output_dir="/tmp/skills") - - assert isinstance(service.output_dir, Path) - assert str(service.output_dir) == "/tmp/skills" - - def test_min_confidence_boundary_values(self): - """Test min_confidence with boundary values.""" - # Zero - service1 = LearningService(min_confidence=0.0) - assert service1.min_confidence == 0.0 - - # 100 - service2 = LearningService(min_confidence=100.0) - assert service2.min_confidence == 100.0 - - @patch("agentready.services.learning_service.PatternExtractor") - def test_extract_patterns_empty_findings(self, mock_extractor, temp_dir): - """Test extract_patterns with empty findings list.""" - # Create assessment with minimal findings (not_applicable) - assessment_data = { - "schema_version": "1.0.0", - "timestamp": "2025-11-22T06:00:00", - "repository": { - "name": "test", - "path": str(temp_dir), - "languages": {"Python": 100}, - "total_files": 1, - "total_lines": 10, - }, - "overall_score": 0.0, - "certification_level": "Needs Improvement", - "attributes_assessed": 0, - "attributes_skipped": 1, - "attributes_total": 1, - "findings": [ - create_dummy_finding() - ], # Need 1 finding to match attributes_total - "duration_seconds": 1.0, - } - - # Create .agentready directory - agentready_dir = temp_dir / ".agentready" - agentready_dir.mkdir() - - assessment_file = agentready_dir / "empty.json" - with open(assessment_file, "w") as f: - json.dump(assessment_data, f) - - mock_extractor.return_value.extract_all_patterns.return_value = [] - - service = LearningService(output_dir=temp_dir) - skills = service.extract_patterns_from_file(assessment_file) - - # Should return empty list - assert skills == [] - - @patch("agentready.services.learning_service.PatternExtractor") - def test_extract_patterns_multiple_attribute_ids( - self, mock_extractor, sample_assessment_file, temp_dir - ): - """Test extract_patterns with multiple attribute IDs.""" - mock_extractor.return_value.extract_specific_patterns.return_value = [] - - service = LearningService(output_dir=temp_dir) - skills = service.extract_patterns_from_file( - sample_assessment_file, - attribute_ids=["claude_md_file", "type_annotations", "gitignore_file"], - ) - - # Should handle multiple attributes - assert isinstance(skills, list) - - @patch("agentready.services.learning_service.PatternExtractor") - def test_extract_patterns_llm_budget_zero( - self, mock_extractor, sample_assessment_file, temp_dir - ): - """Test extract_patterns with zero LLM budget.""" - mock_skill = DiscoveredSkill( - skill_id="test", - name="Test", - description="Test", - confidence=95.0, - source_attribute_id="claude_md_file", - reusability_score=100.0, - impact_score=50.0, - pattern_summary="Pattern", - code_examples=["example"], - citations=[], - ) - mock_extractor.return_value.extract_all_patterns.return_value = [mock_skill] - - service = LearningService(output_dir=temp_dir) - skills = service.extract_patterns_from_file( - sample_assessment_file, enable_llm=True, llm_budget=0 - ) - - # Should not enrich any skills (budget is 0) - assert len(skills) >= 0 From eb8bd321e6e3f7a7237047fa5699495e0da18ac8 Mon Sep 17 00:00:00 2001 From: Bill Murdock Date: Thu, 7 May 2026 10:30:21 -0400 Subject: [PATCH 2/2] fix: correct indentation of submit entry in lazy_subcommands Co-Authored-By: Claude Sonnet 4.6 --- src/agentready/cli/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/agentready/cli/main.py b/src/agentready/cli/main.py index 2526dacc..b5600a37 100644 --- a/src/agentready/cli/main.py +++ b/src/agentready/cli/main.py @@ -95,7 +95,7 @@ def get_command(self, ctx, cmd_name): lazy_subcommands={ "assess-batch": ("assess_batch", "assess_batch"), "experiment": ("experiment", "experiment"), -"submit": ("submit", "submit"), + "submit": ("submit", "submit"), }, ) @click.option("--version", is_flag=True, help="Show version information")