From 3074b4f12db2bd9247309e0b668e8fe71ab4aa21 Mon Sep 17 00:00:00 2001 From: Joshua MARTINELLE Date: Tue, 23 Jun 2026 12:08:12 +0200 Subject: [PATCH] fix(diff): keep programs whose detail fetch fails instead of marking them removed --- lib/scopes_extractor/diff_engine.rb | 14 ++++ lib/scopes_extractor/models/program.rb | 8 +++ .../platforms/hacker_one/platform.rb | 18 ++++- spec/scopes_extractor/diff_engine_spec.rb | 72 +++++++++++++++++++ spec/scopes_extractor/models/program_spec.rb | 25 +++++++ .../platforms/hacker_one/platform_spec.rb | 9 ++- 6 files changed, 143 insertions(+), 3 deletions(-) diff --git a/lib/scopes_extractor/diff_engine.rb b/lib/scopes_extractor/diff_engine.rb index b06d749..633aeaa 100644 --- a/lib/scopes_extractor/diff_engine.rb +++ b/lib/scopes_extractor/diff_engine.rb @@ -16,6 +16,10 @@ def process_program(platform_name, fetched_program, skip_notifications: false) is_new_program = existing_program.nil? + # When the platform listed the program but failed to fetch its details, + # keep the existing scopes untouched and skip a half-known new program. + return preserve_failed_program(platform_name, existing_program, fetched_program) if fetched_program.fetch_failed? + if is_new_program program_id = insert_new_program(platform_name, fetched_program) else @@ -82,6 +86,16 @@ def process_removed_programs(platform_name, fetched_slugs, skip_notifications: f private + # Preserves a program whose details could not be fetched this cycle. + # Absence of fresh data is not a removal: an existing program keeps its + # scopes (only metadata from the listing is refreshed) and a brand-new + # program is left unregistered until a successful fetch. + def preserve_failed_program(platform_name, existing_program, fetched_program) + update_program_if_changed(existing_program[:id], existing_program, fetched_program) unless existing_program.nil? + ScopesExtractor.logger.debug "[#{platform_name}] Preserving #{fetched_program.slug}: detail fetch failed" + nil + end + def insert_new_program(platform_name, fetched_program) @db[:programs].insert( slug: fetched_program.slug, diff --git a/lib/scopes_extractor/models/program.rb b/lib/scopes_extractor/models/program.rb index 36381ed..72e8fae 100644 --- a/lib/scopes_extractor/models/program.rb +++ b/lib/scopes_extractor/models/program.rb @@ -13,6 +13,14 @@ class Program < Dry::Struct attribute :name, Types::String attribute :bounty, Types::Bool attribute :scopes, Types::Array.of(Scope).default([].freeze) + # Transient sync-cycle flag: set when the platform listed the program but + # could not fetch its details (rate limit, timeout, ...). Such a program + # must not be treated as removed nor have its scopes wiped. + attribute :fetch_failed, Types::Bool.default(false) + + def fetch_failed? + fetch_failed + end def in_scopes scopes.select(&:is_in_scope) diff --git a/lib/scopes_extractor/platforms/hacker_one/platform.rb b/lib/scopes_extractor/platforms/hacker_one/platform.rb index 88beb84..dff0cbc 100644 --- a/lib/scopes_extractor/platforms/hacker_one/platform.rb +++ b/lib/scopes_extractor/platforms/hacker_one/platform.rb @@ -81,13 +81,29 @@ def fetch_programs rescue StandardError => e ScopesExtractor.logger.error "[HackerOne] Failed to fetch/parse program #{handle}: #{e.message}" ScopesExtractor.logger.debug e.backtrace.join("\n") - nil + # Keep the program (flagged) so a transient failure is not mistaken + # for a removal; its existing scopes are preserved downstream. + build_failed_program(raw) end end end private + # Builds a program from listing data only, flagged as fetch-failed so + # the diff engine preserves it and its scopes for this cycle. + def build_failed_program(raw) + attr = raw['attributes'] + + Models::Program.new( + slug: attr['handle'], + platform: 'hackerone', + name: attr['name'], + bounty: attr['offers_bounties'] == true, + fetch_failed: true + ) + end + def parse_program(raw, scopes_data) attr = raw['attributes'] diff --git a/spec/scopes_extractor/diff_engine_spec.rb b/spec/scopes_extractor/diff_engine_spec.rb index 6926232..d60f0df 100644 --- a/spec/scopes_extractor/diff_engine_spec.rb +++ b/spec/scopes_extractor/diff_engine_spec.rb @@ -241,6 +241,78 @@ end end + describe '#process_program when fetch failed' do + context 'when the program already exists' do + before do + program_id = ScopesExtractor.db[:programs].insert( + slug: 'existing-program', + platform: 'hackerone', + name: 'Old Name', + bounty: false, + last_updated: Time.now + ) + ScopesExtractor.db[:scopes].insert( + program_id: program_id, + value: 'kept.example.com', + type: 'web', + is_in_scope: true, + created_at: Time.now + ) + end + + let(:failed_program) do + ScopesExtractor::Models::Program.new( + slug: 'existing-program', + platform: 'hackerone', + name: 'New Name', + bounty: true, + scopes: [], + fetch_failed: true + ) + end + + it 'preserves the existing scopes (does not wipe them)' do + diff_engine.process_program('hackerone', failed_program) + + scopes = ScopesExtractor.db[:scopes].all + expect(scopes.map { |s| s[:value] }).to contain_exactly('kept.example.com') + end + + it 'does not emit removed-scope notifications' do + expect(notifier).not_to receive(:notify_removed_scope) + diff_engine.process_program('hackerone', failed_program) + end + + it 'still updates the program metadata from the listing' do + diff_engine.process_program('hackerone', failed_program) + + db_program = ScopesExtractor.db[:programs].where(slug: 'existing-program').first + expect(db_program[:name]).to eq('New Name') + expect(db_program[:bounty]).to be true + end + end + + context 'when the program is brand new' do + let(:failed_program) do + ScopesExtractor::Models::Program.new( + slug: 'new-program', + platform: 'hackerone', + name: 'New Program', + bounty: true, + scopes: [], + fetch_failed: true + ) + end + + it 'does not register the half-known program' do + expect(notifier).not_to receive(:notify_new_program) + diff_engine.process_program('hackerone', failed_program) + + expect(ScopesExtractor.db[:programs].where(slug: 'new-program').first).to be_nil + end + end + end + describe '#process_removed_programs' do before do ScopesExtractor.db[:programs].insert( diff --git a/spec/scopes_extractor/models/program_spec.rb b/spec/scopes_extractor/models/program_spec.rb index d81e925..d555def 100644 --- a/spec/scopes_extractor/models/program_spec.rb +++ b/spec/scopes_extractor/models/program_spec.rb @@ -53,6 +53,31 @@ expect(program.scopes).to eq([]) end + + it 'defaults fetch_failed to false' do + program = described_class.new( + slug: 'test-program', + platform: 'hackerone', + name: 'Test Program', + bounty: true + ) + + expect(program.fetch_failed?).to be false + end + end + + describe '#fetch_failed?' do + it 'returns true when the program is flagged' do + program = described_class.new( + slug: 'test-program', + platform: 'hackerone', + name: 'Test Program', + bounty: true, + fetch_failed: true + ) + + expect(program.fetch_failed?).to be true + end end describe '#in_scopes' do diff --git a/spec/scopes_extractor/platforms/hacker_one/platform_spec.rb b/spec/scopes_extractor/platforms/hacker_one/platform_spec.rb index f6a813f..1d222e3 100644 --- a/spec/scopes_extractor/platforms/hacker_one/platform_spec.rb +++ b/spec/scopes_extractor/platforms/hacker_one/platform_spec.rb @@ -188,9 +188,14 @@ platform.fetch_programs end - it 'returns empty array' do + it 'keeps the program flagged as fetch_failed instead of dropping it' do programs = platform.fetch_programs - expect(programs).to be_empty + + expect(programs.size).to eq(1) + program = programs.first + expect(program.slug).to eq('test-program') + expect(program.fetch_failed?).to be true + expect(program.scopes).to be_empty end end