From c9fdefa5d9b849debe6601ae132f02b361970e6a Mon Sep 17 00:00:00 2001 From: Salvydas Lukosius Date: Fri, 22 May 2026 23:45:27 +0100 Subject: [PATCH 1/2] chore(labels): add dry-run label audit script Add a read-only Ruby script that consumes lib/labels.yml and reports missing, mismatched, legacy, and unknown labels across one or more repositories. Document the dry-run step in the label maintenance runbook. --- runbooks/labels.md | 44 ++++++- scripts/labels-dry-run.rb | 247 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 286 insertions(+), 5 deletions(-) create mode 100755 scripts/labels-dry-run.rb diff --git a/runbooks/labels.md b/runbooks/labels.md index b236495d8..988d77eae 100644 --- a/runbooks/labels.md +++ b/runbooks/labels.md @@ -94,14 +94,48 @@ Also retire spaced namespace variants such as `type: bug`, `area: docs`, `priori ## Safe cleanup order 1. List labels in the target repository. -2. Create or update every canonical label from `.github/lib/labels.yml`. -3. For each legacy label, find open issues and pull requests using it. -4. Add the canonical replacement to each item before removing the legacy label. -5. Delete legacy labels only after they are no longer used. -6. Re-run the label list and compare it with `.github/lib/labels.yml`. +2. Run a dry-run audit before applying anything: + + ```sh + scripts/labels-dry-run.rb --repo z-shell/ + ``` + + For an org-wide read-only report: + + ```sh + scripts/labels-dry-run.rb --all-repos > /tmp/z-shell-labels-dry-run.md + ``` + +3. Create or update every canonical label from `.github/lib/labels.yml`. +4. For each legacy label, find open issues and pull requests using it. +5. Add the canonical replacement to each item before removing the legacy label. +6. Delete legacy labels only after they are no longer used. +7. Re-run the dry-run audit and compare it with `.github/lib/labels.yml`. Do not delete unknown labels in bulk. If a repository has a local label that is not obviously legacy, open or update an issue before removing it. +## Dry-run script + +`scripts/labels-dry-run.rb` is read-only. It consumes `lib/labels.yml`, queries GitHub through `gh api`, and reports: + +- canonical labels that would be created +- canonical labels whose color or description would be updated +- legacy labels that should be migrated before removal +- unknown local labels that should be preserved and reviewed manually + +Useful examples: + +```sh +# Audit one repository and include clean output. +scripts/labels-dry-run.rb --repo z-shell/.github --include-clean + +# Audit several repositories. +scripts/labels-dry-run.rb --repo z-shell/zi --repo z-shell/wiki + +# Emit machine-readable output for follow-up tooling. +scripts/labels-dry-run.rb --repo z-shell/zi --json +``` + ## See also - `.github/lib/labels.yml` diff --git a/scripts/labels-dry-run.rb b/scripts/labels-dry-run.rb new file mode 100755 index 000000000..3e49a69a5 --- /dev/null +++ b/scripts/labels-dry-run.rb @@ -0,0 +1,247 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true + +# Dry-run z-shell label synchronization audit. +# +# Reads lib/labels.yml and compares it with one or more GitHub repositories. +# This script is intentionally read-only: it uses only GET-style `gh api` calls +# and never creates, updates, deletes, or migrates labels. + +require "json" +require "open3" +require "optparse" +require "yaml" + +ROOT = File.expand_path("..", __dir__) +DEFAULT_LABELS_FILE = File.join(ROOT, "lib", "labels.yml") + +Options = Struct.new( + :labels_file, + :org, + :repos, + :json, + :include_clean, + keyword_init: true +) + +options = Options.new( + labels_file: DEFAULT_LABELS_FILE, + org: "z-shell", + repos: [], + json: false, + include_clean: false +) + +parser = OptionParser.new do |opts| + opts.banner = "Usage: scripts/labels-dry-run.rb [options]" + + opts.on("--labels-file PATH", "Canonical labels file (default: lib/labels.yml)") do |path| + options.labels_file = path + end + + opts.on("--org ORG", "GitHub organization for --all-repos (default: z-shell)") do |org| + options.org = org + end + + opts.on("--repo OWNER/REPO", "Repository to audit; may be repeated") do |repo| + options.repos << repo + end + + opts.on("--all-repos", "Audit every repository in --org") do + options.repos << :all + end + + opts.on("--json", "Emit JSON instead of Markdown") do + options.json = true + end + + opts.on("--include-clean", "Include clean repos in Markdown output") do + options.include_clean = true + end + + opts.on("-h", "--help", "Show this help") do + puts opts + exit 0 + end +end + +parser.parse! + +if options.repos.empty? + warn parser + warn "\nerror: pass at least one --repo OWNER/REPO or --all-repos" + exit 2 +end + +def gh_json(*args) + stdout, stderr, status = Open3.capture3("gh", *args) + unless status.success? + raise "gh #{args.join(' ')} failed: #{stderr.strip.empty? ? stdout.strip : stderr.strip}" + end + JSON.parse(stdout.empty? ? "[]" : stdout) +end + +def repo_list(org) + gh_json("repo", "list", org, "--limit", "1000", "--json", "nameWithOwner").map { |repo| repo.fetch("nameWithOwner") } +end + +def repo_labels(owner_repo) + gh_json("api", "repos/#{owner_repo}/labels", "--paginate").map do |label| + { + "name" => label.fetch("name"), + "color" => label.fetch("color").downcase, + "description" => (label["description"] || "") + } + end +end + +def canonical_label_map(labels_file) + data = YAML.load_file(labels_file) + labels = data.fetch("labels") + label_names = labels.map { |label| label.fetch("name") } + duplicate_names = label_names.select { |name| label_names.count(name) > 1 }.uniq + raise "duplicate canonical labels: #{duplicate_names.join(', ')}" unless duplicate_names.empty? + + [ + labels.to_h do |label| + [ + label.fetch("name"), + { + "name" => label.fetch("name"), + "color" => label.fetch("color").to_s.downcase, + "description" => (label["description"] || "") + } + ] + end, + data.fetch("legacy_migrations", {}) || {}, + data.fetch("sync_policy", {}) || {} + ] +end + +def diff_repo(owner_repo, canonical, legacy_migrations) + live = repo_labels(owner_repo) + live_by_name = live.to_h { |label| [label.fetch("name"), label] } + + missing = canonical.keys.reject { |name| live_by_name.key?(name) } + + updates = canonical.filter_map do |name, desired| + current = live_by_name[name] + next unless current + + changes = {} + if current.fetch("color") != desired.fetch("color") + changes["color"] = { "current" => current.fetch("color"), "desired" => desired.fetch("color") } + end + if current.fetch("description") != desired.fetch("description") + changes["description"] = { "current" => current.fetch("description"), "desired" => desired.fetch("description") } + end + changes.empty? ? nil : { "name" => name, "changes" => changes } + end + + legacy_present = legacy_migrations.filter_map do |legacy, replacement| + next unless live_by_name.key?(legacy) + + { "legacy" => legacy, "replacement" => replacement } + end + + unknown = live_by_name.keys.reject do |name| + canonical.key?(name) || legacy_migrations.key?(name) + end.sort + + { + "repo" => owner_repo, + "missing" => missing.sort, + "updates" => updates.sort_by { |item| item.fetch("name") }, + "legacy_present" => legacy_present.sort_by { |item| item.fetch("legacy") }, + "unknown" => unknown, + "summary" => { + "missing" => missing.length, + "updates" => updates.length, + "legacy_present" => legacy_present.length, + "unknown" => unknown.length + } + } +end + +def clean?(result) + result.fetch("summary").values.all?(&:zero?) +end + +canonical, legacy_migrations, sync_policy = canonical_label_map(options.labels_file) +repos = options.repos.include?(:all) ? repo_list(options.org) : options.repos +results = repos.sort.map { |repo| diff_repo(repo, canonical, legacy_migrations) } + +payload = { + "labels_file" => options.labels_file, + "canonical_labels" => canonical.length, + "legacy_migrations" => legacy_migrations.length, + "sync_policy" => sync_policy, + "repos_scanned" => results.length, + "repos_with_drift" => results.count { |result| !clean?(result) }, + "results" => results +} + +if options.json + puts JSON.pretty_generate(payload) + exit 0 +end + +puts "# Label sync dry-run" +puts +puts "Labels file: `#{options.labels_file}`" +puts "Canonical labels: #{canonical.length}" +puts "Legacy migrations: #{legacy_migrations.length}" +puts "Repos scanned: #{results.length}" +puts "Repos with drift: #{payload.fetch('repos_with_drift')}" +puts +puts "This is a read-only dry run. No labels or issues were changed." +puts +puts "## Sync policy" +puts +sync_policy.each do |key, value| + puts "- #{key}: #{value}" +end +puts + +results.each do |result| + next if clean?(result) && !options.include_clean + + puts "## #{result.fetch('repo')}" + puts + if clean?(result) + puts "Clean: no missing, mismatched, legacy, or unknown labels." + puts + next + end + + unless result.fetch("missing").empty? + puts "### Would create" + result.fetch("missing").each { |name| puts "- #{name}" } + puts + end + + unless result.fetch("updates").empty? + puts "### Would update" + result.fetch("updates").each do |item| + puts "- #{item.fetch('name')}" + item.fetch("changes").each do |field, change| + puts " - #{field}: `#{change.fetch('current')}` -> `#{change.fetch('desired')}`" + end + end + puts + end + + unless result.fetch("legacy_present").empty? + puts "### Legacy labels present" + result.fetch("legacy_present").each do |item| + puts "- #{item.fetch('legacy')} -> #{item.fetch('replacement')}" + end + puts + end + + unless result.fetch("unknown").empty? + puts "### Unknown local labels (preserve; review manually)" + result.fetch("unknown").each { |name| puts "- #{name}" } + puts + end +end From c0ec437dfc0333764880e4d531bb3d7de12cc9cb Mon Sep 17 00:00:00 2001 From: Salvydas Lukosius Date: Sat, 23 May 2026 00:13:45 +0100 Subject: [PATCH 2/2] fix(labels): address dry-run review feedback --- runbooks/labels.md | 2 +- scripts/labels-dry-run.rb | 38 +++++++++++++++++++++++++++++++++----- 2 files changed, 34 insertions(+), 6 deletions(-) diff --git a/runbooks/labels.md b/runbooks/labels.md index 988d77eae..4cf982f46 100644 --- a/runbooks/labels.md +++ b/runbooks/labels.md @@ -97,7 +97,7 @@ Also retire spaced namespace variants such as `type: bug`, `area: docs`, `priori 2. Run a dry-run audit before applying anything: ```sh - scripts/labels-dry-run.rb --repo z-shell/ + scripts/labels-dry-run.rb --repo z-shell/REPO ``` For an org-wide read-only report: diff --git a/scripts/labels-dry-run.rb b/scripts/labels-dry-run.rb index 3e49a69a5..73e3b8841 100755 --- a/scripts/labels-dry-run.rb +++ b/scripts/labels-dry-run.rb @@ -19,6 +19,7 @@ :labels_file, :org, :repos, + :all_repos, :json, :include_clean, keyword_init: true @@ -28,6 +29,7 @@ labels_file: DEFAULT_LABELS_FILE, org: "z-shell", repos: [], + all_repos: false, json: false, include_clean: false ) @@ -48,7 +50,7 @@ end opts.on("--all-repos", "Audit every repository in --org") do - options.repos << :all + options.all_repos = true end opts.on("--json", "Emit JSON instead of Markdown") do @@ -67,7 +69,13 @@ parser.parse! -if options.repos.empty? +if options.all_repos && !options.repos.empty? + warn parser + warn "\nerror: use either --all-repos or one or more --repo values, not both" + exit 2 +end + +if !options.all_repos && options.repos.empty? warn parser warn "\nerror: pass at least one --repo OWNER/REPO or --all-repos" exit 2 @@ -81,12 +89,23 @@ def gh_json(*args) JSON.parse(stdout.empty? ? "[]" : stdout) end +def gh_paginated_array(path) + stdout, stderr, status = Open3.capture3( + "gh", "api", path, "--paginate", "--template", "{{range .}}{{json .}}{{\"\\n\"}}{{end}}" + ) + unless status.success? + raise "gh api #{path} failed: #{stderr.strip.empty? ? stdout.strip : stderr.strip}" + end + + stdout.lines.reject { |line| line.strip.empty? }.map { |line| JSON.parse(line) } +end + def repo_list(org) gh_json("repo", "list", org, "--limit", "1000", "--json", "nameWithOwner").map { |repo| repo.fetch("nameWithOwner") } end def repo_labels(owner_repo) - gh_json("api", "repos/#{owner_repo}/labels", "--paginate").map do |label| + gh_paginated_array("repos/#{owner_repo}/labels?per_page=100").map do |label| { "name" => label.fetch("name"), "color" => label.fetch("color").downcase, @@ -96,8 +115,17 @@ def repo_labels(owner_repo) end def canonical_label_map(labels_file) - data = YAML.load_file(labels_file) + data = YAML.safe_load( + File.read(labels_file), + permitted_classes: [], + permitted_symbols: [], + aliases: false + ) + raise "labels file must contain a mapping" unless data.is_a?(Hash) + labels = data.fetch("labels") + raise "labels must be a list" unless labels.is_a?(Array) + label_names = labels.map { |label| label.fetch("name") } duplicate_names = label_names.select { |name| label_names.count(name) > 1 }.uniq raise "duplicate canonical labels: #{duplicate_names.join(', ')}" unless duplicate_names.empty? @@ -168,7 +196,7 @@ def clean?(result) end canonical, legacy_migrations, sync_policy = canonical_label_map(options.labels_file) -repos = options.repos.include?(:all) ? repo_list(options.org) : options.repos +repos = options.all_repos ? repo_list(options.org) : options.repos results = repos.sort.map { |repo| diff_repo(repo, canonical, legacy_migrations) } payload = {