From 45064806741001bf120a2e414adcdb9a5250f3a8 Mon Sep 17 00:00:00 2001 From: Gravirei Date: Tue, 30 Jun 2026 19:52:16 +0600 Subject: [PATCH 01/25] fix(node): gate GET /ipfs/{cid} on reachable allowed-set, not deny-set (#126) The IPFS visibility gate used withheld_blob_oids (a deny-set enumerating only reachable blobs), so a dangling/unreachable blob was absent from the set and served in cleartext to anonymous callers. Flip to an allowed-set (allowed_blob_set_for_caller) that enumerates reachable blobs the caller may read: a dangling blob has no path, is never in the set, and 404s. --- crates/gitlawb-node/src/api/ipfs.rs | 115 ++++++++++-------- .../gitlawb-node/src/git/visibility_pack.rs | 96 ++++++++++++++- crates/gitlawb-node/src/test_support.rs | 112 +++++++++++++++++ 3 files changed, 273 insertions(+), 50 deletions(-) diff --git a/crates/gitlawb-node/src/api/ipfs.rs b/crates/gitlawb-node/src/api/ipfs.rs index 6a43cb5..405eaed 100644 --- a/crates/gitlawb-node/src/api/ipfs.rs +++ b/crates/gitlawb-node/src/api/ipfs.rs @@ -27,7 +27,7 @@ use std::str::FromStr; use crate::auth::AuthenticatedDid; use crate::error::{AppError, Result}; use crate::git::store; -use crate::git::visibility_pack::{has_path_scoped_rule, withheld_blob_oids}; +use crate::git::visibility_pack::{allowed_blob_set_for_caller, has_path_scoped_rule}; use crate::state::AppState; use crate::visibility::{visibility_check, Decision}; @@ -36,16 +36,22 @@ use crate::visibility::{visibility_check, Decision}; /// Search all repos on the node for a git object whose SHA-256 hash matches /// the given CIDv1, returning its raw content if the caller may read it. /// -/// Visibility (#110): the object is served only from a repo row the caller -/// passes. For each iterated row we gate against that row's OWN rules +/// Visibility (#110, #126): the object is served only from a repo row the +/// caller passes. For each iterated row we gate against that row's OWN rules /// (`visibility_check` at `"/"`), never re-resolving via `authorize_repo_read` /// — `get_repo`'s fuzzy match could otherwise authorize a different physical -/// row than the one read (KTD2a). When the row carries path-scoped rules, a -/// blob withheld from the caller (`withheld_blob_oids`) is skipped. Denial and -/// genuine not-found both fall through to an opaque 404. +/// row than the one read (KTD2a). When the row carries path-scoped rules +/// (KTD4) the served object must be either a non-blob (trees/commits are +/// structural; KTD3) OR a blob in the caller's *reachable* allowed-set +/// (`allowed_blob_set_for_caller`). The reachable allowed-set excludes +/// dangling blobs — a blob written via `git hash-object -w` and never +/// committed has no path to gate, so it is fail-closed 404'd under +/// path-scoped rules (#126). Denial and genuine not-found both fall through +/// to an opaque 404. /// -/// Scope: this closes the direct unauthenticated scan. A stale-public mirror -/// row still serves withheld content (tracked separately, #124). +/// Scope: this closes the direct unauthenticated scan, including the dangling +/// case. A stale-public mirror row still serves withheld content (tracked +/// separately, #124). pub async fn get_by_cid( Path(cid_str): Path, State(state): State, @@ -85,11 +91,16 @@ pub async fn get_by_cid( .await .map_err(AppError::Internal)?; - // Request-scoped memo of the per-repo withheld set (KTD1). The caller is - // constant for one request, so `repo.id` alone is a safe, sufficient key — - // never a coarse caller "class", which `visibility_check`'s exact full-DID - // reader match would make unsafe. - let mut withheld_memo: HashMap> = HashMap::new(); + // Request-scoped memo of the per-repo allowed-blob set (KTD1, #126). The + // caller is constant for one request, so `repo.id` alone is a safe, + // sufficient key — never a coarse caller "class", which + // `visibility_check`'s exact full-DID reader match would make unsafe. + // + // We flipped from a deny-set (`withheld_blob_oids`) to an allowed-set + // (`allowed_blob_set_for_caller`) so dangling blobs — never enumerated by + // the reachable walk — fail closed instead of slipping through an empty + // deny entry (#126). + let mut allowed_memo: HashMap> = HashMap::new(); for repo in &repos { // Repo-level read gate against THIS row's own rules (KTD2a). @@ -106,45 +117,51 @@ pub async fn get_by_cid( Err(_) => continue, }; - // Per-blob withholding only applies when a path-scoped rule exists (KTD4). - if has_path_scoped_rule(rules) { - if !withheld_memo.contains_key(&repo.id) { - let rp = repo_path.clone(); - let r = rules.to_vec(); - let is_public = repo.is_public; - let owner = repo.owner_did.clone(); - let caller_for_walk = caller_owned.clone(); - // Full-history walk shells out to git — keep it off the async runtime. - let walk = tokio::task::spawn_blocking(move || { - withheld_blob_oids(&rp, &r, is_public, &owner, caller_for_walk.as_deref()) - }) - .await; - // Fail closed on EITHER a task panic (JoinError) or a walk error: - // we cannot prove the caller may read here, so skip this repo and - // let a public copy (if any) serve. Never serve on an unproven gate. - let set = match walk { - Ok(Ok(set)) => set, - Ok(Err(e)) => { - tracing::warn!(repo = %repo.name, err = %e, "withheld walk failed; skipping repo"); - continue; - } - Err(e) => { - tracing::warn!(repo = %repo.name, err = %e, "withheld walk task panicked; skipping repo"); - continue; - } - }; - withheld_memo.insert(repo.id.clone(), set); - } - if withheld_memo - .get(&repo.id) - .is_some_and(|set| set.contains(&sha256_hex)) - { - continue; - } + // Per-blob gating only applies when a path-scoped rule exists (KTD4). + // Without any path-scoped rule, the "/" gate above is the whole story. + let path_scoped = has_path_scoped_rule(rules); + if path_scoped && !allowed_memo.contains_key(&repo.id) { + let rp = repo_path.clone(); + let r = rules.to_vec(); + let is_public = repo.is_public; + let owner = repo.owner_did.clone(); + let caller_for_walk = caller_owned.clone(); + // Full-history walk shells out to git — keep it off the async runtime. + let walk = tokio::task::spawn_blocking(move || { + allowed_blob_set_for_caller(&rp, &r, is_public, &owner, caller_for_walk.as_deref()) + }) + .await; + // Fail closed on EITHER a task panic (JoinError) or a walk error: + // we cannot prove the caller may read here, so skip this repo and + // let a public copy (if any) serve. Never serve on an unproven gate. + let set = match walk { + Ok(Ok(set)) => set, + Ok(Err(e)) => { + tracing::warn!(repo = %repo.name, err = %e, "allowed-blob walk failed; skipping repo"); + continue; + } + Err(e) => { + tracing::warn!(repo = %repo.name, err = %e, "allowed-blob walk task panicked; skipping repo"); + continue; + } + }; + allowed_memo.insert(repo.id.clone(), set); } match store::read_object(&repo_path, &sha256_hex) { - Ok(Some((_obj_type, content))) => { + Ok(Some((obj_type, content))) => { + // Path-scoped rules: serve trees/commits unconditionally + // (structural; KTD3); a blob must be in the reachable + // allowed-set, which excludes dangling blobs (#126). + if path_scoped && obj_type == "blob" { + let in_allowed = allowed_memo + .get(&repo.id) + .is_some_and(|set| set.contains(&sha256_hex)); + if !in_allowed { + continue; + } + } + // 3. Return the content with IPFS-style headers let mut headers = HeaderMap::new(); headers.insert( diff --git a/crates/gitlawb-node/src/git/visibility_pack.rs b/crates/gitlawb-node/src/git/visibility_pack.rs index 578ee40..cb70e39 100644 --- a/crates/gitlawb-node/src/git/visibility_pack.rs +++ b/crates/gitlawb-node/src/git/visibility_pack.rs @@ -309,11 +309,33 @@ pub fn replicable_blob_set( rules: &[VisibilityRule], is_public: bool, owner_did: &str, +) -> Result> { + allowed_blob_set_for_caller(repo_path, rules, is_public, owner_did, None) +} + +/// Reachable blob OIDs that visibility ALLOWS `caller` at some path. The +/// caller-aware generalization of `replicable_blob_set` (which is the anonymous +/// `caller = None` case). Used by `GET /ipfs/{cid}` to gate fail-closed against +/// dangling/unreachable blobs (#126): a blob written via `git hash-object -w` +/// but unreferenced is absent from the reachable walk, so it is never in this +/// set and the IPFS serve path drops it — even from the owner, who has no path +/// to authorize the blob at. +/// +/// A blob reachable at an allowed path is included even when also denied +/// elsewhere (its content is readable to this caller elsewhere). Trees and +/// commits are NOT included here; the caller decides per object type whether +/// the allow-set applies (it does not for trees/commits — KTD3). +pub fn allowed_blob_set_for_caller( + repo_path: &Path, + rules: &[VisibilityRule], + is_public: bool, + owner_did: &str, + caller: Option<&str>, ) -> Result> { let pairs = blob_paths(repo_path)?; let mut allowed = HashSet::new(); for (oid, path) in &pairs { - if visibility_check(rules, is_public, owner_did, None, path) == Decision::Allow { + if visibility_check(rules, is_public, owner_did, caller, path) == Decision::Allow { allowed.insert(oid.clone()); } } @@ -743,6 +765,78 @@ mod tests { ); } + #[test] + fn allowed_set_excludes_dangling_blob_for_every_caller() { + // #126: a blob written via `git hash-object -w` but never referenced has + // no path to gate on, so it is absent from the reachable allowed-set — + // for anonymous callers, listed readers, AND the owner. The IPFS serve + // path relies on this fail-closed property to drop dangling withheld + // blobs that the deny-set model leaked. + let td = TempDir::new().unwrap(); + let work = td.path().join("work"); + std::fs::create_dir_all(work.join("public")).unwrap(); + std::fs::write(work.join("public/a.txt"), b"public bytes\n").unwrap(); + let run = |args: &[&str]| { + assert!( + Command::new("git") + .args(args) + .current_dir(&work) + .status() + .unwrap() + .success(), + "git {args:?} failed" + ); + }; + run(&["init", "-q"]); + run(&["config", "user.email", "t@t"]); + run(&["config", "user.name", "t"]); + run(&["add", "."]); + run(&["commit", "-qm", "init"]); + let oid_of = |rev: &str| { + let out = Command::new("git") + .args(["rev-parse", rev]) + .current_dir(&work) + .output() + .unwrap(); + String::from_utf8_lossy(&out.stdout).trim().to_string() + }; + let public_oid = oid_of("HEAD:public/a.txt"); + + std::fs::write(work.join("orphan.bin"), b"DANGLING SECRET\n").unwrap(); + let dangling_oid = { + let out = Command::new("git") + .args(["hash-object", "-w", "orphan.bin"]) + .current_dir(&work) + .output() + .unwrap(); + String::from_utf8_lossy(&out.stdout).trim().to_string() + }; + assert!( + matches!(dangling_oid.len(), 40 | 64), + "precondition: hash-object stored the dangling blob" + ); + + // Path-scoped rule: /secret/** denied to anon, allowed to a listed reader. + let reader = "did:key:zReader"; + let rules = [rule("/secret/**", &[reader])]; + + // Every gate-relevant caller: anonymous, listed reader, owner. None of + // them can put the dangling blob in the allowed set — it has no path. + for caller in [None, Some(reader), Some(OWNER)] { + let allowed = allowed_blob_set_for_caller(&work, &rules, true, OWNER, caller).unwrap(); + assert!( + !allowed.contains(&dangling_oid), + "dangling blob must be absent from allowed-set (caller={caller:?})" + ); + // Sanity: the reachable public blob is still in the set for every + // caller (the rule does not deny /public/**). + assert!( + allowed.contains(&public_oid), + "reachable public blob must be in allowed-set (caller={caller:?})" + ); + } + } + #[test] fn recipients_are_owner_plus_allowed_readers_only() { let (_td, repo, secret_oid, public_oid) = fixture(); diff --git a/crates/gitlawb-node/src/test_support.rs b/crates/gitlawb-node/src/test_support.rs index 98fccc5..6de9b0f 100644 --- a/crates/gitlawb-node/src/test_support.rs +++ b/crates/gitlawb-node/src/test_support.rs @@ -1842,4 +1842,116 @@ mod tests { "walk error fails closed: repo skipped, even the public blob is not served" ); } + + /// #126: a dangling blob (written via `git hash-object -w`, never referenced + /// by any commit/tree) must 404 through `GET /ipfs/{cid}` under path-scoped + /// rules — for anon AND the owner. The pre-#126 deny-set was fail-open by + /// construction: dangling oids were absent from the reachable enumeration + /// and thus absent from the deny-set, so the handler served 200. The + /// allowed-set is fail-closed: dangling oids are absent from the reachable + /// allowed-set, so the handler 404s (per team memory: the owner shift to + /// 404 is the accepted fail-closed default — owners can still + /// `git cat-file` directly). + #[sqlx::test] + async fn ipfs_cid_dangling_blob_fails_closed_under_path_rules(pool: PgPool) { + use crate::db::VisibilityMode; + use gitlawb_core::identity::Keypair; + + let owner = Keypair::generate(); + let owner_did = owner.did().to_string(); + let slug = owner_did.replace([':', '/'], "_"); + let short = owner_did.split(':').next_back().unwrap().to_string(); + let state = test_state(pool).await; + + // Seed a normal repo with `secret/b.txt` reachable from HEAD, so the + // path-scoped rule has something to match — without this the rule has + // no anchor and we'd be testing nothing. + let _fx = seed_cid_repos(&slug, &short, &["dangling"]); + let bare = std::path::PathBuf::from("/tmp") + .join(&slug) + .join("dangling.git"); + + // Write a dangling blob: `git hash-object -w --stdin` adds it to the + // object DB but nothing references it, so the reachable walk never + // enumerates it. + let mut cmd = std::process::Command::new("git"); + cmd.args(["hash-object", "-w", "--stdin"]) + .current_dir(&bare) + .stdin(std::process::Stdio::piped()) + .stdout(std::process::Stdio::piped()); + let mut child = cmd.spawn().expect("spawn git hash-object"); + { + use std::io::Write; + let stdin = child.stdin.as_mut().expect("stdin"); + stdin.write_all(b"DANGLING SECRET\n").expect("write stdin"); + } + let out = child.wait_with_output().expect("hash-object output"); + assert!( + out.status.success(), + "git hash-object: {}", + String::from_utf8_lossy(&out.stderr) + ); + let dangling_oid = String::from_utf8_lossy(&out.stdout).trim().to_string(); + // Sanity: must be a 64-hex sha256 oid, since the repo is sha256-format. + assert_eq!( + dangling_oid.len(), + 64, + "expected sha256 oid: {dangling_oid}" + ); + let dangling_cid = cid_for_oid(&dangling_oid); + + state + .db + .create_repo(&seed_repo(&owner_did, "dangling")) + .await + .expect("seed repo"); + let rec = state + .db + .get_repo(&owner_did, "dangling") + .await + .unwrap() + .unwrap(); + // Path-scoped rule triggers the per-blob allowed-set gate (KTD4). + state + .db + .set_visibility_rule(&rec.id, "/secret/**", VisibilityMode::B, &[], &owner_did) + .await + .expect("deny rule"); + + // anon: the dangling blob is absent from the reachable allowed-set → + // 404, no leak. Pre-#126 (deny-set) would serve 200. + let (st, body) = cid_parts( + cid_router(&state) + .oneshot(cid_anon(&dangling_cid)) + .await + .unwrap(), + ) + .await; + assert_eq!( + st, + StatusCode::NOT_FOUND, + "dangling blob must 404 under path-scoped rules" + ); + assert!( + !body.contains("DANGLING SECRET"), + "404 body must not leak the dangling content" + ); + + // owner (signed): same 404. The dangling blob has no path, so it's + // never visibility-checked → never in the allowed set, even for the + // owner. This is the accepted fail-closed shift documented in the PR. + let (st, body) = cid_parts( + cid_router(&state) + .oneshot(cid_signed(&owner, &dangling_cid)) + .await + .unwrap(), + ) + .await; + assert_eq!( + st, + StatusCode::NOT_FOUND, + "owner also 404s on dangling blobs under path-scoped rules (fail-closed default)" + ); + assert!(!body.contains("DANGLING SECRET")); + } } From 3aa7bf06f8cd9acfc26f6b7cea99441760a4716b Mon Sep 17 00:00:00 2001 From: Gravirei Date: Tue, 30 Jun 2026 20:07:58 +0600 Subject: [PATCH 02/25] perf(ipfs): check object existence before allowed-blob walk Move store::read_object before the allowed_blob_set_for_caller spawn_blocking call so random-CID spray against repos with path-scoped rules cannot trigger full-history git walks on repos that don't carry the object. --- crates/gitlawb-node/src/api/ipfs.rs | 141 ++++++++++++++-------------- 1 file changed, 72 insertions(+), 69 deletions(-) diff --git a/crates/gitlawb-node/src/api/ipfs.rs b/crates/gitlawb-node/src/api/ipfs.rs index 405eaed..2177350 100644 --- a/crates/gitlawb-node/src/api/ipfs.rs +++ b/crates/gitlawb-node/src/api/ipfs.rs @@ -40,14 +40,16 @@ use crate::visibility::{visibility_check, Decision}; /// caller passes. For each iterated row we gate against that row's OWN rules /// (`visibility_check` at `"/"`), never re-resolving via `authorize_repo_read` /// — `get_repo`'s fuzzy match could otherwise authorize a different physical -/// row than the one read (KTD2a). When the row carries path-scoped rules -/// (KTD4) the served object must be either a non-blob (trees/commits are -/// structural; KTD3) OR a blob in the caller's *reachable* allowed-set -/// (`allowed_blob_set_for_caller`). The reachable allowed-set excludes -/// dangling blobs — a blob written via `git hash-object -w` and never -/// committed has no path to gate, so it is fail-closed 404'd under -/// path-scoped rules (#126). Denial and genuine not-found both fall through -/// to an opaque 404. +/// row than the one read (KTD2a). We check object existence via +/// `store::read_object` *before* the expensive reachability walk so random-CID +/// spray cannot trigger full-history git walks on repos that don't carry the +/// object. When the row carries path-scoped rules (KTD4) the served object +/// must be either a non-blob (trees/commits are structural; KTD3) OR a blob +/// in the caller's *reachable* allowed-set (`allowed_blob_set_for_caller`). +/// The reachable allowed-set excludes dangling blobs — a blob written via +/// `git hash-object -w` and never committed has no path to gate, so it is +/// fail-closed 404'd under path-scoped rules (#126). Denial and genuine +/// not-found both fall through to an opaque 404. /// /// Scope: this closes the direct unauthenticated scan, including the dangling /// case. A stale-public mirror row still serves withheld content (tracked @@ -117,76 +119,77 @@ pub async fn get_by_cid( Err(_) => continue, }; + // Check whether the object exists in this repo before any expensive + // reachability walk. This prevents random-CID spray from triggering + // full-history git walks on repos that don't carry the object. + let object = store::read_object(&repo_path, &sha256_hex); + let (obj_type, content) = match object { + Ok(Some(t)) => t, + Ok(None) => continue, + Err(e) => { + tracing::warn!(repo = %repo.name, err = %e, "error reading git object"); + continue; + } + }; + // Per-blob gating only applies when a path-scoped rule exists (KTD4). // Without any path-scoped rule, the "/" gate above is the whole story. + // Trees/commits are always served under path-scoped rules (KTD3). let path_scoped = has_path_scoped_rule(rules); - if path_scoped && !allowed_memo.contains_key(&repo.id) { - let rp = repo_path.clone(); - let r = rules.to_vec(); - let is_public = repo.is_public; - let owner = repo.owner_did.clone(); - let caller_for_walk = caller_owned.clone(); - // Full-history walk shells out to git — keep it off the async runtime. - let walk = tokio::task::spawn_blocking(move || { - allowed_blob_set_for_caller(&rp, &r, is_public, &owner, caller_for_walk.as_deref()) - }) - .await; - // Fail closed on EITHER a task panic (JoinError) or a walk error: - // we cannot prove the caller may read here, so skip this repo and - // let a public copy (if any) serve. Never serve on an unproven gate. - let set = match walk { - Ok(Ok(set)) => set, - Ok(Err(e)) => { - tracing::warn!(repo = %repo.name, err = %e, "allowed-blob walk failed; skipping repo"); - continue; - } - Err(e) => { - tracing::warn!(repo = %repo.name, err = %e, "allowed-blob walk task panicked; skipping repo"); - continue; - } - }; - allowed_memo.insert(repo.id.clone(), set); - } - - match store::read_object(&repo_path, &sha256_hex) { - Ok(Some((obj_type, content))) => { - // Path-scoped rules: serve trees/commits unconditionally - // (structural; KTD3); a blob must be in the reachable - // allowed-set, which excludes dangling blobs (#126). - if path_scoped && obj_type == "blob" { - let in_allowed = allowed_memo - .get(&repo.id) - .is_some_and(|set| set.contains(&sha256_hex)); - if !in_allowed { + if path_scoped && obj_type == "blob" { + if !allowed_memo.contains_key(&repo.id) { + let rp = repo_path.clone(); + let r = rules.to_vec(); + let is_public = repo.is_public; + let owner = repo.owner_did.clone(); + let caller_for_walk = caller_owned.clone(); + // Full-history walk shells out to git — keep it off the async runtime. + let walk = tokio::task::spawn_blocking(move || { + allowed_blob_set_for_caller(&rp, &r, is_public, &owner, caller_for_walk.as_deref()) + }) + .await; + // Fail closed on EITHER a task panic (JoinError) or a walk error: + // we cannot prove the caller may read here, so skip this repo and + // let a public copy (if any) serve. Never serve on an unproven gate. + let set = match walk { + Ok(Ok(set)) => set, + Ok(Err(e)) => { + tracing::warn!(repo = %repo.name, err = %e, "allowed-blob walk failed; skipping repo"); + continue; + } + Err(e) => { + tracing::warn!(repo = %repo.name, err = %e, "allowed-blob walk task panicked; skipping repo"); continue; } - } - - // 3. Return the content with IPFS-style headers - let mut headers = HeaderMap::new(); - headers.insert( - HeaderName::from_static("content-type"), - HeaderValue::from_static("application/octet-stream"), - ); - headers.insert( - HeaderName::from_static("x-content-cid"), - HeaderValue::from_str(&cid_str) - .unwrap_or_else(|_| HeaderValue::from_static("invalid")), - ); - headers.insert( - HeaderName::from_static("x-git-hash"), - HeaderValue::from_str(&sha256_hex) - .unwrap_or_else(|_| HeaderValue::from_static("invalid")), - ); - - return Ok((StatusCode::OK, headers, content).into_response()); + }; + allowed_memo.insert(repo.id.clone(), set); } - Ok(None) => continue, - Err(e) => { - tracing::warn!(repo = %repo.name, err = %e, "error reading git object"); + let in_allowed = allowed_memo + .get(&repo.id) + .is_some_and(|set| set.contains(&sha256_hex)); + if !in_allowed { continue; } } + + // 3. Return the content with IPFS-style headers + let mut headers = HeaderMap::new(); + headers.insert( + HeaderName::from_static("content-type"), + HeaderValue::from_static("application/octet-stream"), + ); + headers.insert( + HeaderName::from_static("x-content-cid"), + HeaderValue::from_str(&cid_str) + .unwrap_or_else(|_| HeaderValue::from_static("invalid")), + ); + headers.insert( + HeaderName::from_static("x-git-hash"), + HeaderValue::from_str(&sha256_hex) + .unwrap_or_else(|_| HeaderValue::from_static("invalid")), + ); + + return Ok((StatusCode::OK, headers, content).into_response()); } // Not found in any repo From 63580c730da890851324a45d872ede1c3c7d9a62 Mon Sep 17 00:00:00 2001 From: Gravirei Date: Tue, 30 Jun 2026 23:10:27 +0600 Subject: [PATCH 03/25] refactor(ipfs): improve formatting and readability in get_by_cid function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ✓ P3 blocker fixed: cargo fmt applied — the format gate will pass. ✓ P3 cleanup resolved: withheld_blob_oids is still used by replication code in repos.rs, so it stays. • P2 follow-up: Tree/commit disclosure tracked in #135 — out of scope here. --- crates/gitlawb-node/src/api/ipfs.rs | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/crates/gitlawb-node/src/api/ipfs.rs b/crates/gitlawb-node/src/api/ipfs.rs index 2177350..d6405fa 100644 --- a/crates/gitlawb-node/src/api/ipfs.rs +++ b/crates/gitlawb-node/src/api/ipfs.rs @@ -145,7 +145,13 @@ pub async fn get_by_cid( let caller_for_walk = caller_owned.clone(); // Full-history walk shells out to git — keep it off the async runtime. let walk = tokio::task::spawn_blocking(move || { - allowed_blob_set_for_caller(&rp, &r, is_public, &owner, caller_for_walk.as_deref()) + allowed_blob_set_for_caller( + &rp, + &r, + is_public, + &owner, + caller_for_walk.as_deref(), + ) }) .await; // Fail closed on EITHER a task panic (JoinError) or a walk error: @@ -180,8 +186,7 @@ pub async fn get_by_cid( ); headers.insert( HeaderName::from_static("x-content-cid"), - HeaderValue::from_str(&cid_str) - .unwrap_or_else(|_| HeaderValue::from_static("invalid")), + HeaderValue::from_str(&cid_str).unwrap_or_else(|_| HeaderValue::from_static("invalid")), ); headers.insert( HeaderName::from_static("x-git-hash"), From 002f35405874898dc538773f0dcda13bafb56f49 Mon Sep 17 00:00:00 2001 From: Gravirei Date: Wed, 1 Jul 2026 02:25:29 +0600 Subject: [PATCH 04/25] refactor(ipfs): streamline object retrieval by separating type and content reading --- crates/gitlawb-node/src/api/ipfs.rs | 14 +++++++++--- crates/gitlawb-node/src/git/store.rs | 34 ++++++++++++++++++++-------- 2 files changed, 36 insertions(+), 12 deletions(-) diff --git a/crates/gitlawb-node/src/api/ipfs.rs b/crates/gitlawb-node/src/api/ipfs.rs index d6405fa..46cbf73 100644 --- a/crates/gitlawb-node/src/api/ipfs.rs +++ b/crates/gitlawb-node/src/api/ipfs.rs @@ -122,12 +122,11 @@ pub async fn get_by_cid( // Check whether the object exists in this repo before any expensive // reachability walk. This prevents random-CID spray from triggering // full-history git walks on repos that don't carry the object. - let object = store::read_object(&repo_path, &sha256_hex); - let (obj_type, content) = match object { + let obj_type = match store::object_type(&repo_path, &sha256_hex) { Ok(Some(t)) => t, Ok(None) => continue, Err(e) => { - tracing::warn!(repo = %repo.name, err = %e, "error reading git object"); + tracing::warn!(repo = %repo.name, err = %e, "error checking git object type"); continue; } }; @@ -178,6 +177,15 @@ pub async fn get_by_cid( } } + // Now that we've passed the gate, read the content. + let content = match store::read_object_content(&repo_path, &sha256_hex, &obj_type) { + Ok(c) => c, + Err(e) => { + tracing::warn!(repo = %repo.name, err = %e, "error reading git object content"); + continue; + } + }; + // 3. Return the content with IPFS-style headers let mut headers = HeaderMap::new(); headers.insert( diff --git a/crates/gitlawb-node/src/git/store.rs b/crates/gitlawb-node/src/git/store.rs index b975914..290da6c 100644 --- a/crates/gitlawb-node/src/git/store.rs +++ b/crates/gitlawb-node/src/git/store.rs @@ -271,9 +271,8 @@ pub struct TreeEntry { /// `/ipfs/` is computed from these same content bytes via /// `gitlawb_core::cid::Cid::from_git_object_bytes`. /// -/// Returns `None` if the object does not exist in this repo. -pub fn read_object(repo_path: &Path, sha256_hex: &str) -> Result)>> { - // First check if the object exists and get its type +/// Get just the object type. Returns `None` if the object doesn't exist. +pub fn object_type(repo_path: &Path, sha256_hex: &str) -> Result> { let type_output = Command::new("git") .args(["cat-file", "-t", sha256_hex]) .current_dir(repo_path) @@ -284,13 +283,13 @@ pub fn read_object(repo_path: &Path, sha256_hex: &str) -> Result Result> { let content_output = Command::new("git") - .args(["cat-file", &obj_type, sha256_hex]) + .args(["cat-file", obj_type, sha256_hex]) .current_dir(repo_path) .output() .context("failed to run git cat-file ")?; @@ -300,7 +299,24 @@ pub fn read_object(repo_path: &Path, sha256_hex: &str) -> Result` is computed from these same content bytes via +/// `gitlawb_core::cid::Cid::from_git_object_bytes`. +/// +/// Returns `None` if the object does not exist in this repo. +pub fn read_object(repo_path: &Path, sha256_hex: &str) -> Result)>> { + let obj_type = match object_type(repo_path, sha256_hex)? { + Some(t) => t, + None => return Ok(None), + }; + let content = read_object_content(repo_path, sha256_hex, &obj_type)?; + Ok(Some((obj_type, content))) } /// Get the diff between two branches: changes on source_branch not in target_branch. From f2c91a868afb57b7e2a2b949536015490492cef7 Mon Sep 17 00:00:00 2001 From: Gravirei Date: Wed, 1 Jul 2026 07:22:24 +0600 Subject: [PATCH 05/25] docs(ipfs): update get_by_cid comment to reflect split object retrieval --- crates/gitlawb-node/src/api/ipfs.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/gitlawb-node/src/api/ipfs.rs b/crates/gitlawb-node/src/api/ipfs.rs index 46cbf73..f3de757 100644 --- a/crates/gitlawb-node/src/api/ipfs.rs +++ b/crates/gitlawb-node/src/api/ipfs.rs @@ -41,7 +41,7 @@ use crate::visibility::{visibility_check, Decision}; /// (`visibility_check` at `"/"`), never re-resolving via `authorize_repo_read` /// — `get_repo`'s fuzzy match could otherwise authorize a different physical /// row than the one read (KTD2a). We check object existence via -/// `store::read_object` *before* the expensive reachability walk so random-CID +/// `store::object_type` *before* the expensive reachability walk so random-CID /// spray cannot trigger full-history git walks on repos that don't carry the /// object. When the row carries path-scoped rules (KTD4) the served object /// must be either a non-blob (trees/commits are structural; KTD3) OR a blob From 03ba7149fc248798df15f9fb26bf897fd2901b4d Mon Sep 17 00:00:00 2001 From: Gravirei Date: Wed, 1 Jul 2026 23:40:07 +0600 Subject: [PATCH 06/25] Run cargo fmt on store.rs --- crates/gitlawb-node/src/git/store.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/crates/gitlawb-node/src/git/store.rs b/crates/gitlawb-node/src/git/store.rs index 290da6c..229ee69 100644 --- a/crates/gitlawb-node/src/git/store.rs +++ b/crates/gitlawb-node/src/git/store.rs @@ -283,7 +283,11 @@ pub fn object_type(repo_path: &Path, sha256_hex: &str) -> Result> return Ok(None); } - Ok(Some(String::from_utf8_lossy(&type_output.stdout).trim().to_string())) + Ok(Some( + String::from_utf8_lossy(&type_output.stdout) + .trim() + .to_string(), + )) } /// Read an object's content if its type is already known. From 1d7e046eae920fe6c12472ceb01ef1c422bedcce Mon Sep 17 00:00:00 2001 From: Gravirei Date: Tue, 30 Jun 2026 20:22:26 +0600 Subject: [PATCH 07/25] fix(node): gate /ipfs/pins and /arweave/anchors behind authentication (#121) - /api/v1/ipfs/pins: require authentication to stop anonymous node-wide CID enumeration - /api/v1/arweave/anchors: gate on caller's read visibility when ?repo= is provided (deny -> 404); require authentication for global listing --- crates/gitlawb-node/src/api/arweave.rs | 30 +++++++++++++++++++++++--- crates/gitlawb-node/src/api/ipfs.rs | 14 +++++++++++- crates/gitlawb-node/src/server.rs | 17 +++++++++------ 3 files changed, 50 insertions(+), 11 deletions(-) diff --git a/crates/gitlawb-node/src/api/arweave.rs b/crates/gitlawb-node/src/api/arweave.rs index 0d728c7..b080741 100644 --- a/crates/gitlawb-node/src/api/arweave.rs +++ b/crates/gitlawb-node/src/api/arweave.rs @@ -2,11 +2,12 @@ use axum::{ extract::{Query, State}, - Json, + Extension, Json, }; use serde::Deserialize; -use crate::error::Result; +use crate::auth::AuthenticatedDid; +use crate::error::{AppError, Result}; use crate::state::AppState; #[derive(Debug, Deserialize)] @@ -21,16 +22,39 @@ fn default_limit() -> i64 { } /// GET /api/v1/arweave/anchors +/// +/// Returns Arweave ref-update anchors. When `?repo=/` is provided, +/// the response is gated on the caller's read visibility for that repo (deny -> +/// 404). Without a `?repo=` filter, authentication is required to prevent +/// anonymous node-wide anchor enumeration (#121). pub async fn list_anchors( State(state): State, + auth: Option>, Query(q): Query, ) -> Result> { + let caller = auth.as_ref().map(|e| e.0 .0.as_str()); + + if let Some(ref repo) = q.repo { + // Gate on per-repo visibility. + let parts: Vec<&str> = repo.splitn(2, '/').collect(); + if parts.len() != 2 { + return Err(AppError::NotFound("repo not found".into())); + } + let (owner, name) = (parts[0], parts[1]); + crate::api::authorize_repo_read(&state, owner, name, caller, "/").await?; + } else { + // Global listing (no ?repo=) requires authentication. + if caller.is_none() { + return Err(AppError::Unauthorized("authentication required".into())); + } + } + let limit = q.limit.min(200); let anchors = state .db .list_arweave_anchors(q.repo.as_deref(), limit) .await - .map_err(crate::error::AppError::Internal)?; + .map_err(AppError::Internal)?; Ok(Json(serde_json::json!({ "anchors": anchors, diff --git a/crates/gitlawb-node/src/api/ipfs.rs b/crates/gitlawb-node/src/api/ipfs.rs index f3de757..41d4b08 100644 --- a/crates/gitlawb-node/src/api/ipfs.rs +++ b/crates/gitlawb-node/src/api/ipfs.rs @@ -216,7 +216,19 @@ pub async fn get_by_cid( /// Returns all CIDs that have been pinned to the local IPFS node from git /// objects received via push. Each entry includes the git SHA-256 hex, the /// CIDv1 string, and the timestamp when it was pinned. -pub async fn list_pins(State(state): State) -> Result> { +/// +/// Authentication is required to prevent anonymous CID enumeration (#121). +/// Any authenticated caller may list pins — the node-wide index is gated on +/// the caller identity so it is not available anonymously. +pub async fn list_pins( + State(state): State, + auth: Option>, +) -> Result> { + let caller = auth.as_ref().map(|e| e.0 .0.as_str()); + if caller.is_none() { + return Err(AppError::Unauthorized("authentication required".into())); + } + let pins = state .db .list_pinned_cids() diff --git a/crates/gitlawb-node/src/server.rs b/crates/gitlawb-node/src/server.rs index d38d638..8f4e20a 100644 --- a/crates/gitlawb-node/src/server.rs +++ b/crates/gitlawb-node/src/server.rs @@ -187,17 +187,20 @@ pub fn build_router(state: AppState) -> Router { ); // ── IPFS content-addressed retrieval and pin listing ────────────────── - // `/ipfs/{cid}` carries `optional_signature` so `get_by_cid` sees the caller - // identity and can apply per-repo visibility (#110); anonymous callers stay - // anonymous and still read genuinely public content. `/api/v1/ipfs/pins` - // stays unsigned — gating the pin index is tracked separately (#121). + // Both routes carry `optional_signature` so `get_by_cid` and `list_pins` + // can apply per-repo visibility and authentication checks (#110, #121); + // anonymous callers stay anonymous and still read genuinely public content. let ipfs_routes = Router::new() .route("/ipfs/{cid}", get(ipfs::get_by_cid)) - .layer(middleware::from_fn(auth::optional_signature)) - .merge(Router::new().route("/api/v1/ipfs/pins", get(ipfs::list_pins))); + .route("/api/v1/ipfs/pins", get(ipfs::list_pins)) + .layer(middleware::from_fn(auth::optional_signature)); // ── Arweave permanent anchors ────────────────────────────────────────── - let arweave_routes = Router::new().route("/api/v1/arweave/anchors", get(arweave::list_anchors)); + // Carries `optional_signature` so `list_anchors` can check caller visibility + // when `?repo=` is provided and require authentication for global listing (#121). + let arweave_routes = Router::new() + .route("/api/v1/arweave/anchors", get(arweave::list_anchors)) + .layer(middleware::from_fn(auth::optional_signature)); // ── Bounty routes (write — require HTTP Signature) ───────────────── let bounty_write_routes = add_auth_layers( From abb0af0fcb792e09d4f0ae5cc826f8980adb0b5d Mon Sep 17 00:00:00 2001 From: Gravirei Date: Tue, 30 Jun 2026 20:27:16 +0600 Subject: [PATCH 08/25] test(node): add integration tests for /ipfs/pins and /arweave/anchors auth gates (#121) - pins_list_denies_anonymous: anonymous 401 - pins_list_allows_authenticated: signed caller 200 with pin data - anchors_global_denies_anonymous: global listing 401 without auth - anchors_global_allows_authenticated: signed caller 200 with anchors - anchors_repo_denies_anonymous_on_private: ?repo= + private repo -> 404 - anchors_repo_allows_owner: ?repo= + owner -> 200 - anchors_repo_denies_non_reader: ?repo= + stranger -> 404 --- Cargo.lock | 10 +- crates/gitlawb-node/src/test_support.rs | 197 ++++++++++++++++++++++++ 2 files changed, 202 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d42c370..dc6540e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3300,7 +3300,7 @@ dependencies = [ [[package]] name = "git-remote-gitlawb" -version = "0.3.9" +version = "0.4.0" dependencies = [ "anyhow", "gitlawb-core", @@ -3311,7 +3311,7 @@ dependencies = [ [[package]] name = "gitlawb-attest" -version = "0.3.9" +version = "0.4.0" dependencies = [ "base64", "ed25519-dalek", @@ -3328,7 +3328,7 @@ dependencies = [ [[package]] name = "gitlawb-core" -version = "0.3.9" +version = "0.4.0" dependencies = [ "anyhow", "base64", @@ -3355,7 +3355,7 @@ dependencies = [ [[package]] name = "gitlawb-node" -version = "0.3.9" +version = "0.4.0" dependencies = [ "alloy", "anyhow", @@ -3411,7 +3411,7 @@ dependencies = [ [[package]] name = "gl" -version = "0.3.9" +version = "0.4.0" dependencies = [ "alloy", "anyhow", diff --git a/crates/gitlawb-node/src/test_support.rs b/crates/gitlawb-node/src/test_support.rs index d84f23a..d9226d9 100644 --- a/crates/gitlawb-node/src/test_support.rs +++ b/crates/gitlawb-node/src/test_support.rs @@ -1953,5 +1953,202 @@ mod tests { "owner also 404s on dangling blobs under path-scoped rules (fail-closed default)" ); assert!(!body.contains("DANGLING SECRET")); + + fn pins_router(state: &AppState) -> Router { + Router::new() + .route( + "/api/v1/ipfs/pins", + axum::routing::get(crate::api::ipfs::list_pins), + ) + .layer(axum::middleware::from_fn(crate::auth::optional_signature)) + .with_state(state.clone()) + } + + fn signed_get(kp: &gitlawb_core::identity::Keypair, uri: &str) -> Request { + let s = gitlawb_core::http_sig::sign_request(kp, "GET", uri, b""); + Request::builder() + .method(Method::GET) + .uri(uri) + .header("content-digest", s.content_digest) + .header("signature-input", s.signature_input) + .header("signature", s.signature) + .body(Body::empty()) + .unwrap() + } + + /// #121: anonymous caller gets 401 from /api/v1/ipfs/pins. + #[sqlx::test] + async fn pins_list_denies_anonymous(pool: PgPool) { + let state = test_state(pool).await; + let resp = pins_router(&state) + .oneshot(anon_get("/api/v1/ipfs/pins")) + .await + .unwrap(); + assert_eq!(resp.status(), StatusCode::UNAUTHORIZED); + } + + /// #121: authenticated caller gets 200 from /api/v1/ipfs/pins. + #[sqlx::test] + async fn pins_list_allows_authenticated(pool: PgPool) { + use gitlawb_core::identity::Keypair; + + let state = test_state(pool).await; + let kp = Keypair::generate(); + + // Seed a pinned CID so we can verify the response has content. + state + .db + .record_pinned_cid( + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + "bafkreihipknrba7vz6ahh2l5qk6pxwtywn3u7worv6hmi6dkt6nv5phb4u", + ) + .await + .unwrap(); + + let resp = pins_router(&state) + .oneshot(signed_get(&kp, "/api/v1/ipfs/pins")) + .await + .unwrap(); + assert_eq!(resp.status(), StatusCode::OK); + let body = json_body(resp).await; + assert_eq!(body["count"], 1); + assert_eq!(body["pins"][0]["sha256_hex"], "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"); + } + + // ---- #121: GET /api/v1/arweave/anchors auth and visibility gate ---- + + fn anchors_router(state: &AppState) -> Router { + Router::new() + .route( + "/api/v1/arweave/anchors", + axum::routing::get(crate::api::arweave::list_anchors), + ) + .layer(axum::middleware::from_fn(crate::auth::optional_signature)) + .with_state(state.clone()) + } + + async fn seed_anchor(db: &crate::db::Db, repo: &str, owner_did: &str) { + use crate::db::RecordAnchorInput; + db.record_arweave_anchor(&RecordAnchorInput { + repo, + owner_did, + ref_name: "refs/heads/main", + old_sha: "0000000000000000000000000000000000000000000000000000000000000000", + new_sha: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + cid: Some("bafkreihipknrba7vz6ahh2l5qk6pxwtywn3u7worv6hmi6dkt6nv5phb4u"), + irys_tx_id: "tx-test", + arweave_url: "https://arweave.net/test", + node_did: "did:key:zNODE", + }) + .await + .unwrap(); + } + + /// #121: /api/v1/arweave/anchors without ?repo= denies anonymous. + #[sqlx::test] + async fn anchors_global_denies_anonymous(pool: PgPool) { + let state = test_state(pool).await; + let resp = anchors_router(&state) + .oneshot(anon_get("/api/v1/arweave/anchors")) + .await + .unwrap(); + assert_eq!(resp.status(), StatusCode::UNAUTHORIZED); + } + + /// #121: /api/v1/arweave/anchors without ?repo= allows authenticated. + #[sqlx::test] + async fn anchors_global_allows_authenticated(pool: PgPool) { + use gitlawb_core::identity::Keypair; + + let state = test_state(pool).await; + let kp = Keypair::generate(); + + seed_anchor(&state.db, "some/repo", &kp.did().to_string()).await; + + let resp = anchors_router(&state) + .oneshot(signed_get(&kp, "/api/v1/arweave/anchors")) + .await + .unwrap(); + assert_eq!(resp.status(), StatusCode::OK); + let body = json_body(resp).await; + assert_eq!(body["count"], 1); + } + + /// #121: /api/v1/arweave/anchors with ?repo= denies anonymous on private repo. + #[sqlx::test] + async fn anchors_repo_denies_anonymous_on_private(pool: PgPool) { + use gitlawb_core::identity::Keypair; + + let state = test_state(pool).await; + let owner = Keypair::generate(); + let owner_did = owner.did().to_string(); + let repo_name = "private-repo"; + + state + .db + .create_repo(&seed_private_repo(&owner_did, repo_name)) + .await + .unwrap(); + seed_anchor(&state.db, &format!("{owner_did}/{repo_name}"), &owner_did).await; + + let uri = format!("/api/v1/arweave/anchors?repo={owner_did}/{repo_name}"); + let resp = anchors_router(&state) + .oneshot(anon_get(&uri)) + .await + .unwrap(); + assert_eq!(resp.status(), StatusCode::NOT_FOUND); + } + + /// #121: /api/v1/arweave/anchors with ?repo= allows repo owner. + #[sqlx::test] + async fn anchors_repo_allows_owner(pool: PgPool) { + use gitlawb_core::identity::Keypair; + + let state = test_state(pool).await; + let owner = Keypair::generate(); + let owner_did = owner.did().to_string(); + let repo_name = "owners-repo"; + + state + .db + .create_repo(&seed_private_repo(&owner_did, repo_name)) + .await + .unwrap(); + seed_anchor(&state.db, &format!("{owner_did}/{repo_name}"), &owner_did).await; + + let uri = format!("/api/v1/arweave/anchors?repo={owner_did}/{repo_name}"); + let resp = anchors_router(&state) + .oneshot(signed_get(&owner, &uri)) + .await + .unwrap(); + assert_eq!(resp.status(), StatusCode::OK); + let body = json_body(resp).await; + assert_eq!(body["count"], 1); + } + + /// #121: /api/v1/arweave/anchors with ?repo= denies authenticated non-reader on private repo. + #[sqlx::test] + async fn anchors_repo_denies_non_reader(pool: PgPool) { + use gitlawb_core::identity::Keypair; + + let state = test_state(pool).await; + let owner = Keypair::generate(); + let owner_did = owner.did().to_string(); + let stranger = Keypair::generate(); + let repo_name = "private-repo"; + + state + .db + .create_repo(&seed_private_repo(&owner_did, repo_name)) + .await + .unwrap(); + seed_anchor(&state.db, &format!("{owner_did}/{repo_name}"), &owner_did).await; + + let uri = format!("/api/v1/arweave/anchors?repo={owner_did}/{repo_name}"); + let resp = anchors_router(&state) + .oneshot(signed_get(&stranger, &uri)) + .await + .unwrap(); + assert_eq!(resp.status(), StatusCode::NOT_FOUND); } } From 4aefeae437e929fa07f0fe3f60e1814901844ecd Mon Sep 17 00:00:00 2001 From: Gravirei Date: Tue, 30 Jun 2026 22:37:30 +0600 Subject: [PATCH 09/25] resolve findings --- crates/gitlawb-node/src/api/arweave.rs | 52 ++++++++++++--- crates/gitlawb-node/src/api/ipfs.rs | 89 +++++++++++++++++++++++-- crates/gitlawb-node/src/test_support.rs | 5 +- 3 files changed, 127 insertions(+), 19 deletions(-) diff --git a/crates/gitlawb-node/src/api/arweave.rs b/crates/gitlawb-node/src/api/arweave.rs index b080741..e629228 100644 --- a/crates/gitlawb-node/src/api/arweave.rs +++ b/crates/gitlawb-node/src/api/arweave.rs @@ -25,8 +25,9 @@ fn default_limit() -> i64 { /// /// Returns Arweave ref-update anchors. When `?repo=/` is provided, /// the response is gated on the caller's read visibility for that repo (deny -> -/// 404). Without a `?repo=` filter, authentication is required to prevent -/// anonymous node-wide anchor enumeration (#121). +/// 404). Without a `?repo=` filter, the global listing filters each row on +/// current visibility to prevent metadata disclosure when repos are made private +/// after push (#136). pub async fn list_anchors( State(state): State, auth: Option>, @@ -34,28 +35,57 @@ pub async fn list_anchors( ) -> Result> { let caller = auth.as_ref().map(|e| e.0 .0.as_str()); - if let Some(ref repo) = q.repo { + let normalized_repo = if let Some(ref repo) = q.repo { // Gate on per-repo visibility. let parts: Vec<&str> = repo.splitn(2, '/').collect(); if parts.len() != 2 { return Err(AppError::NotFound("repo not found".into())); } let (owner, name) = (parts[0], parts[1]); - crate::api::authorize_repo_read(&state, owner, name, caller, "/").await?; + let (record, _rules) = + crate::api::authorize_repo_read(&state, owner, name, caller, "/").await?; + + // Normalize to short-form slug that matches what's written to the table. + let owner_short = record + .owner_did + .split(':') + .next_back() + .unwrap_or(&record.owner_did); + Some(format!("{}/{}", owner_short, record.name)) } else { - // Global listing (no ?repo=) requires authentication. - if caller.is_none() { - return Err(AppError::Unauthorized("authentication required".into())); - } - } + None + }; let limit = q.limit.min(200); - let anchors = state + let raw_anchors = state .db - .list_arweave_anchors(q.repo.as_deref(), limit) + .list_arweave_anchors(normalized_repo.as_deref(), limit) .await .map_err(AppError::Internal)?; + // For global listings (no ?repo=), filter each anchor on current visibility. + let anchors = if normalized_repo.is_none() { + let mut filtered = Vec::new(); + for anchor in raw_anchors { + // Parse repo slug to resolve current visibility. + let parts: Vec<&str> = anchor.repo.splitn(2, '/').collect(); + if parts.len() != 2 { + continue; + } + let (owner, name) = (parts[0], parts[1]); + // Skip anchors for repos the caller cannot currently read. + if crate::api::authorize_repo_read(&state, owner, name, caller, "/") + .await + .is_ok() + { + filtered.push(anchor); + } + } + filtered + } else { + raw_anchors + }; + Ok(Json(serde_json::json!({ "anchors": anchors, "count": anchors.len(), diff --git a/crates/gitlawb-node/src/api/ipfs.rs b/crates/gitlawb-node/src/api/ipfs.rs index 41d4b08..b7f09dc 100644 --- a/crates/gitlawb-node/src/api/ipfs.rs +++ b/crates/gitlawb-node/src/api/ipfs.rs @@ -26,6 +26,7 @@ use std::str::FromStr; use crate::auth::AuthenticatedDid; use crate::error::{AppError, Result}; +use crate::git::push_delta; use crate::git::store; use crate::git::visibility_pack::{allowed_blob_set_for_caller, has_path_scoped_rule}; use crate::state::AppState; @@ -217,24 +218,98 @@ pub async fn get_by_cid( /// objects received via push. Each entry includes the git SHA-256 hex, the /// CIDv1 string, and the timestamp when it was pinned. /// -/// Authentication is required to prevent anonymous CID enumeration (#121). -/// Any authenticated caller may list pins — the node-wide index is gated on -/// the caller identity so it is not available anonymously. +/// The global listing filters each pinned object on current repo visibility +/// to prevent metadata disclosure when repos are made private after push (#136). +/// Only pins from repos the caller can currently read are returned. pub async fn list_pins( State(state): State, auth: Option>, ) -> Result> { let caller = auth.as_ref().map(|e| e.0 .0.as_str()); - if caller.is_none() { - return Err(AppError::Unauthorized("authentication required".into())); - } + let caller_owned = caller.map(|c| c.to_string()); - let pins = state + let raw_pins = state .db .list_pinned_cids() .await .map_err(AppError::Internal)?; + // Build a set of sha256_hex values from repos the caller can read. + let repos = state + .db + .list_all_repos() + .await + .map_err(AppError::Internal)?; + + let repo_ids: Vec = repos.iter().map(|r| r.id.clone()).collect(); + let rules_by_repo = state + .db + .list_visibility_rules_for_repos(&repo_ids) + .await + .map_err(AppError::Internal)?; + + let mut allowed_sha256s = std::collections::HashSet::new(); + + for repo in &repos { + let rules: &[crate::db::VisibilityRule] = rules_by_repo + .get(&repo.id) + .map(Vec::as_slice) + .unwrap_or(&[]); + + // Check repo-level visibility. + if visibility_check(rules, repo.is_public, &repo.owner_did, caller, "/") == Decision::Deny { + continue; + } + + let repo_path = match state.repo_store.acquire(&repo.owner_did, &repo.name).await { + Ok(p) => p, + Err(_) => continue, + }; + + // If path-scoped rules exist, we need to compute withheld blobs. + let withheld_set = if has_path_scoped_rule(rules) { + let rp = repo_path.clone(); + let r = rules.to_vec(); + let is_public = repo.is_public; + let owner = repo.owner_did.clone(); + let caller_for_walk = caller_owned.clone(); + + let walk = tokio::task::spawn_blocking(move || { + withheld_blob_oids(&rp, &r, is_public, &owner, caller_for_walk.as_deref()) + }) + .await; + + match walk { + Ok(Ok(set)) => Some(set), + _ => { + // Fail closed: if we can't compute withheld set, skip this repo. + tracing::warn!(repo = %repo.name, "withheld walk failed; skipping repo for pins listing"); + continue; + } + } + } else { + None + }; + + // Read all objects in this repo and add non-withheld ones to allowed set. + if let Ok(objects) = push_delta::list_all_objects(&repo_path) { + for sha in objects { + if let Some(ref withheld) = withheld_set { + if withheld.contains(&sha) { + continue; + } + } + allowed_sha256s.insert(sha); + } + } + } + + // Filter pins to only those in allowed set. + let pins: Vec<_> = raw_pins + .into_iter() + .filter(|pin| allowed_sha256s.contains(&pin.sha256_hex)) + .collect(); + Ok(Json(serde_json::json!({ "pins": pins, "count": pins.len(), diff --git a/crates/gitlawb-node/src/test_support.rs b/crates/gitlawb-node/src/test_support.rs index d9226d9..1113ea8 100644 --- a/crates/gitlawb-node/src/test_support.rs +++ b/crates/gitlawb-node/src/test_support.rs @@ -2012,7 +2012,10 @@ mod tests { assert_eq!(resp.status(), StatusCode::OK); let body = json_body(resp).await; assert_eq!(body["count"], 1); - assert_eq!(body["pins"][0]["sha256_hex"], "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"); + assert_eq!( + body["pins"][0]["sha256_hex"], + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + ); } // ---- #121: GET /api/v1/arweave/anchors auth and visibility gate ---- From 480349ebc203d9778872082a9c216238551d6a7d Mon Sep 17 00:00:00 2001 From: Gravirei Date: Wed, 1 Jul 2026 02:12:11 +0600 Subject: [PATCH 10/25] fix(node): require authentication for global anchor and pin listings --- crates/gitlawb-node/src/api/arweave.rs | 10 +++ crates/gitlawb-node/src/api/ipfs.rs | 23 ++++++ crates/gitlawb-node/src/test_support.rs | 95 +++++++++++++++++++++---- 3 files changed, 113 insertions(+), 15 deletions(-) diff --git a/crates/gitlawb-node/src/api/arweave.rs b/crates/gitlawb-node/src/api/arweave.rs index e629228..d0e102a 100644 --- a/crates/gitlawb-node/src/api/arweave.rs +++ b/crates/gitlawb-node/src/api/arweave.rs @@ -35,6 +35,16 @@ pub async fn list_anchors( ) -> Result> { let caller = auth.as_ref().map(|e| e.0 .0.as_str()); + // Global listings (no ?repo=) are restricted to authenticated callers: an + // anonymous request against the full-node index would disclose metadata for + // every repo ever pushed here. Per-repo requests are gated by + // authorize_repo_read which applies the per-repo visibility rules. + if q.repo.is_none() && caller.is_none() { + return Err(AppError::Unauthorized( + "authentication required for global anchor listing".into(), + )); + } + let normalized_repo = if let Some(ref repo) = q.repo { // Gate on per-repo visibility. let parts: Vec<&str> = repo.splitn(2, '/').collect(); diff --git a/crates/gitlawb-node/src/api/ipfs.rs b/crates/gitlawb-node/src/api/ipfs.rs index b7f09dc..467aced 100644 --- a/crates/gitlawb-node/src/api/ipfs.rs +++ b/crates/gitlawb-node/src/api/ipfs.rs @@ -218,6 +218,9 @@ pub async fn get_by_cid( /// objects received via push. Each entry includes the git SHA-256 hex, the /// CIDv1 string, and the timestamp when it was pinned. /// +/// Requires authentication: the global pin index would otherwise disclose +/// metadata for every object ever pushed to the node (#121). +/// /// The global listing filters each pinned object on current repo visibility /// to prevent metadata disclosure when repos are made private after push (#136). /// Only pins from repos the caller can currently read are returned. @@ -226,6 +229,14 @@ pub async fn list_pins( auth: Option>, ) -> Result> { let caller = auth.as_ref().map(|e| e.0 .0.as_str()); + + // Reject anonymous callers: the pin index spans the entire node and would + // expose metadata for every object ever pushed here (#121). + if caller.is_none() { + return Err(AppError::Unauthorized( + "authentication required for pin listing".into(), + )); + } let caller_owned = caller.map(|c| c.to_string()); let raw_pins = state @@ -251,6 +262,18 @@ pub async fn list_pins( let mut allowed_sha256s = std::collections::HashSet::new(); for repo in &repos { + // Preserve the quarantine gate from authorize_repo_read: a quarantined + // mirror is treated as nonexistent on every read surface, so its objects + // must not contribute to the allowed SHA-256 set (#P2). + if state + .db + .is_repo_quarantined(&repo.id) + .await + .map_err(AppError::Internal)? + { + continue; + } + let rules: &[crate::db::VisibilityRule] = rules_by_repo .get(&repo.id) .map(Vec::as_slice) diff --git a/crates/gitlawb-node/src/test_support.rs b/crates/gitlawb-node/src/test_support.rs index 1113ea8..ec61afd 100644 --- a/crates/gitlawb-node/src/test_support.rs +++ b/crates/gitlawb-node/src/test_support.rs @@ -1988,33 +1988,60 @@ mod tests { } /// #121: authenticated caller gets 200 from /api/v1/ipfs/pins. + /// + /// list_pins only returns pins whose SHA-256 appears in a repo the caller + /// can read. The test therefore: + /// 1. Creates a real SHA-256 bare git repo on disk (via seed_cid_repos) so + /// list_all_objects finds the object. + /// 2. Inserts a matching public repo row (owner_did = full DID from the keypair) + /// so list_all_repos/visibility_check passes. + /// 3. Records a pin for the real object OID. #[sqlx::test] async fn pins_list_allows_authenticated(pool: PgPool) { use gitlawb_core::identity::Keypair; + let owner = Keypair::generate(); + let owner_did = owner.did().to_string(); + // RepoStore maps owner_did to a filesystem slug by replacing ':' and '/' with '_'. + let fs_slug = owner_did.replace([':', '/'], "_"); + // The short slug is the last ':'-separated segment of the DID (what push writes). + let short = owner_did.split(':').next_back().unwrap().to_string(); + let state = test_state(pool).await; - let kp = Keypair::generate(); - // Seed a pinned CID so we can verify the response has content. + // Build a real SHA-256 bare git repo under /tmp//pinrepo.git so + // list_all_objects can enumerate actual object SHA-256 IDs. + let fx = seed_cid_repos(&fs_slug, &short, &["pinrepo"]); + // seed_cid_repos uses sha256 object format; fx.public_oid is a real git OID. + let pinned_sha = fx.public_oid.clone(); + let pinned_cid = cid_for_oid(&pinned_sha); + + // Seed the repo DB record with the same owner_did so list_all_repos returns + // it and visibility_check passes (public repo, no rules). state .db - .record_pinned_cid( - "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", - "bafkreihipknrba7vz6ahh2l5qk6pxwtywn3u7worv6hmi6dkt6nv5phb4u", - ) + .create_repo(&seed_repo(&owner_did, "pinrepo")) + .await + .unwrap(); + + // Record the pin for the real git object SHA. + state + .db + .record_pinned_cid(&pinned_sha, &pinned_cid) .await .unwrap(); let resp = pins_router(&state) - .oneshot(signed_get(&kp, "/api/v1/ipfs/pins")) + .oneshot(signed_get(&owner, "/api/v1/ipfs/pins")) .await .unwrap(); assert_eq!(resp.status(), StatusCode::OK); let body = json_body(resp).await; - assert_eq!(body["count"], 1); + assert_eq!(body["count"], 1, "pin for the real git object must be returned"); assert_eq!( body["pins"][0]["sha256_hex"], - "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + pinned_sha, + "returned pin must match the seeded object OID" ); } @@ -2059,14 +2086,34 @@ mod tests { } /// #121: /api/v1/arweave/anchors without ?repo= allows authenticated. + /// + /// The global listing filters each anchor through authorize_repo_read, which + /// splits anchor.repo on '/' to get (owner, name) and calls get_repo. The + /// anchor row must therefore carry the same short owner slug that the push + /// path writes (last ':'-separated segment of the DID) so get_repo's fuzzy + /// LIKE match finds the repo record. #[sqlx::test] async fn anchors_global_allows_authenticated(pool: PgPool) { use gitlawb_core::identity::Keypair; let state = test_state(pool).await; let kp = Keypair::generate(); + let owner_did = kp.did().to_string(); + let repo_name = "globalrepo"; + // Short slug: the last ':'-separated segment of the DID, matching what + // repos.rs writes to the arweave_anchors table on push. + let owner_short = owner_did.split(':').next_back().unwrap().to_string(); + let short_slug = format!("{owner_short}/{repo_name}"); + + // Seed a public repo so authorize_repo_read succeeds for this anchor. + state + .db + .create_repo(&seed_repo(&owner_did, repo_name)) + .await + .unwrap(); - seed_anchor(&state.db, "some/repo", &kp.did().to_string()).await; + // Record the anchor with the short-form slug that production writes. + seed_anchor(&state.db, &short_slug, &owner_did).await; let resp = anchors_router(&state) .oneshot(signed_get(&kp, "/api/v1/arweave/anchors")) @@ -2074,10 +2121,13 @@ mod tests { .unwrap(); assert_eq!(resp.status(), StatusCode::OK); let body = json_body(resp).await; - assert_eq!(body["count"], 1); + assert_eq!(body["count"], 1, "authenticated caller sees the seeded anchor"); } /// #121: /api/v1/arweave/anchors with ?repo= denies anonymous on private repo. + /// + /// The ?repo= path gate is purely visibility-based (authorize_repo_read returns + /// 404 for anon on private) — no anonymous-rejection guard is needed here. #[sqlx::test] async fn anchors_repo_denies_anonymous_on_private(pool: PgPool) { use gitlawb_core::identity::Keypair; @@ -2086,14 +2136,19 @@ mod tests { let owner = Keypair::generate(); let owner_did = owner.did().to_string(); let repo_name = "private-repo"; + // Short slug written by the push path. + let owner_short = owner_did.split(':').next_back().unwrap().to_string(); + let short_slug = format!("{owner_short}/{repo_name}"); state .db .create_repo(&seed_private_repo(&owner_did, repo_name)) .await .unwrap(); - seed_anchor(&state.db, &format!("{owner_did}/{repo_name}"), &owner_did).await; + // Store anchor with the short slug that production writes. + seed_anchor(&state.db, &short_slug, &owner_did).await; + // ?repo= accepts the full DID form; list_anchors normalises it before querying. let uri = format!("/api/v1/arweave/anchors?repo={owner_did}/{repo_name}"); let resp = anchors_router(&state) .oneshot(anon_get(&uri)) @@ -2103,6 +2158,10 @@ mod tests { } /// #121: /api/v1/arweave/anchors with ?repo= allows repo owner. + /// + /// Anchor is stored with the short owner slug that push writes; the ?repo= + /// query carries the full DID which list_anchors normalises to the same short + /// slug before issuing the DB query. #[sqlx::test] async fn anchors_repo_allows_owner(pool: PgPool) { use gitlawb_core::identity::Keypair; @@ -2111,13 +2170,17 @@ mod tests { let owner = Keypair::generate(); let owner_did = owner.did().to_string(); let repo_name = "owners-repo"; + // Short slug matching what repos.rs writes on push. + let owner_short = owner_did.split(':').next_back().unwrap().to_string(); + let short_slug = format!("{owner_short}/{repo_name}"); state .db .create_repo(&seed_private_repo(&owner_did, repo_name)) .await .unwrap(); - seed_anchor(&state.db, &format!("{owner_did}/{repo_name}"), &owner_did).await; + // Store anchor with the short slug that production writes. + seed_anchor(&state.db, &short_slug, &owner_did).await; let uri = format!("/api/v1/arweave/anchors?repo={owner_did}/{repo_name}"); let resp = anchors_router(&state) @@ -2126,7 +2189,7 @@ mod tests { .unwrap(); assert_eq!(resp.status(), StatusCode::OK); let body = json_body(resp).await; - assert_eq!(body["count"], 1); + assert_eq!(body["count"], 1, "repo owner sees their anchor"); } /// #121: /api/v1/arweave/anchors with ?repo= denies authenticated non-reader on private repo. @@ -2139,13 +2202,15 @@ mod tests { let owner_did = owner.did().to_string(); let stranger = Keypair::generate(); let repo_name = "private-repo"; + let owner_short = owner_did.split(':').next_back().unwrap().to_string(); + let short_slug = format!("{owner_short}/{repo_name}"); state .db .create_repo(&seed_private_repo(&owner_did, repo_name)) .await .unwrap(); - seed_anchor(&state.db, &format!("{owner_did}/{repo_name}"), &owner_did).await; + seed_anchor(&state.db, &short_slug, &owner_did).await; let uri = format!("/api/v1/arweave/anchors?repo={owner_did}/{repo_name}"); let resp = anchors_router(&state) From 417c0c24cb6ff60262dfcc3736783fe94df39ee9 Mon Sep 17 00:00:00 2001 From: Gravirei Date: Wed, 1 Jul 2026 07:33:21 +0600 Subject: [PATCH 11/25] test: format test_support.rs after CI failure --- crates/gitlawb-node/src/test_support.rs | 136 +++++++++++++++++++++++- 1 file changed, 132 insertions(+), 4 deletions(-) diff --git a/crates/gitlawb-node/src/test_support.rs b/crates/gitlawb-node/src/test_support.rs index ec61afd..9c32433 100644 --- a/crates/gitlawb-node/src/test_support.rs +++ b/crates/gitlawb-node/src/test_support.rs @@ -2037,14 +2037,107 @@ mod tests { .unwrap(); assert_eq!(resp.status(), StatusCode::OK); let body = json_body(resp).await; - assert_eq!(body["count"], 1, "pin for the real git object must be returned"); assert_eq!( - body["pins"][0]["sha256_hex"], - pinned_sha, + body["count"], 1, + "pin for the real git object must be returned" + ); + assert_eq!( + body["pins"][0]["sha256_hex"], pinned_sha, "returned pin must match the seeded object OID" ); } + #[sqlx::test] + async fn pins_list_excludes_quarantined_repos(pool: PgPool) { + use gitlawb_core::identity::Keypair; + + let owner = Keypair::generate(); + let owner_did = owner.did().to_string(); + let fs_slug = owner_did.replace([':', '/'], "_"); + let short = owner_did.split(':').next_back().unwrap().to_string(); + + let state = test_state(pool).await; + + let fx = seed_cid_repos(&fs_slug, &short, &["pinrepo"]); + let pinned_sha = fx.public_oid.clone(); + let pinned_cid = cid_for_oid(&pinned_sha); + + let repo = seed_repo(&owner_did, "pinrepo"); + state.db.create_repo(&repo).await.unwrap(); + state.db.set_repo_quarantine(&repo.id, true).await.unwrap(); + + state + .db + .record_pinned_cid(&pinned_sha, &pinned_cid) + .await + .unwrap(); + + let resp = pins_router(&state) + .oneshot(signed_get(&owner, "/api/v1/ipfs/pins")) + .await + .unwrap(); + assert_eq!(resp.status(), StatusCode::OK); + let body = json_body(resp).await; + assert_eq!( + body["count"], 0, + "quarantined repo objects must be withheld from pin index" + ); + } + + #[sqlx::test] + async fn pins_list_withholds_path_scoped_blobs(pool: PgPool) { + use crate::db::VisibilityMode; + use gitlawb_core::identity::Keypair; + + let owner = Keypair::generate(); + let owner_did = owner.did().to_string(); + let fs_slug = owner_did.replace([':', '/'], "_"); + let short = owner_did.split(':').next_back().unwrap().to_string(); + + let stranger = Keypair::generate(); + + let state = test_state(pool).await; + + let fx = seed_cid_repos(&fs_slug, &short, &["pinrepo"]); + let public_sha = fx.public_oid.clone(); + let public_cid = cid_for_oid(&public_sha); + let secret_sha = fx.secret_oid.clone(); + let secret_cid = cid_for_oid(&secret_sha); + + let repo = seed_repo(&owner_did, "pinrepo"); + state.db.create_repo(&repo).await.unwrap(); + + state + .db + .set_visibility_rule(&repo.id, "/secret/**", VisibilityMode::B, &[], &owner_did) + .await + .unwrap(); + + state + .db + .record_pinned_cid(&public_sha, &public_cid) + .await + .unwrap(); + state + .db + .record_pinned_cid(&secret_sha, &secret_cid) + .await + .unwrap(); + + let resp = pins_router(&state) + .oneshot(signed_get(&stranger, "/api/v1/ipfs/pins")) + .await + .unwrap(); + assert_eq!(resp.status(), StatusCode::OK); + let body = json_body(resp).await; + + assert_eq!(body["count"], 1, "stranger sees only the public pin"); + assert_eq!( + body["pins"][0]["sha256_hex"], public_sha, + "stranger sees the public pin" + ); + } + // ---- #121: GET /api/v1/arweave/anchors auth and visibility gate ---- fn anchors_router(state: &AppState) -> Router { @@ -2121,7 +2214,42 @@ mod tests { .unwrap(); assert_eq!(resp.status(), StatusCode::OK); let body = json_body(resp).await; - assert_eq!(body["count"], 1, "authenticated caller sees the seeded anchor"); + assert_eq!( + body["count"], 1, + "authenticated caller sees the seeded anchor" + ); + } + + #[sqlx::test] + async fn anchors_global_denies_non_reader(pool: PgPool) { + use gitlawb_core::identity::Keypair; + + let state = test_state(pool).await; + let owner = Keypair::generate(); + let owner_did = owner.did().to_string(); + let stranger = Keypair::generate(); + let repo_name = "privaterepo"; + let owner_short = owner_did.split(':').next_back().unwrap().to_string(); + let short_slug = format!("{owner_short}/{repo_name}"); + + state + .db + .create_repo(&seed_private_repo(&owner_did, repo_name)) + .await + .unwrap(); + + seed_anchor(&state.db, &short_slug, &owner_did).await; + + let resp = anchors_router(&state) + .oneshot(signed_get(&stranger, "/api/v1/arweave/anchors")) + .await + .unwrap(); + assert_eq!(resp.status(), StatusCode::OK); + let body = json_body(resp).await; + assert_eq!( + body["count"], 0, + "non-reader is denied the private anchor in global listing" + ); } /// #121: /api/v1/arweave/anchors with ?repo= denies anonymous on private repo. From 66329a146789d3b5b4fcb454685b8bd2281ca9a2 Mon Sep 17 00:00:00 2001 From: Gravirei Date: Wed, 1 Jul 2026 07:43:15 +0600 Subject: [PATCH 12/25] test: extract test helpers and add anchors anonymous coverage --- crates/gitlawb-node/src/test_support.rs | 250 +++++++++++------------- 1 file changed, 119 insertions(+), 131 deletions(-) diff --git a/crates/gitlawb-node/src/test_support.rs b/crates/gitlawb-node/src/test_support.rs index 9c32433..0bfeeb5 100644 --- a/crates/gitlawb-node/src/test_support.rs +++ b/crates/gitlawb-node/src/test_support.rs @@ -1987,6 +1987,33 @@ mod tests { assert_eq!(resp.status(), StatusCode::UNAUTHORIZED); } + struct PinTestFixture { + owner: gitlawb_core::identity::Keypair, + owner_did: String, + fx: CidFixture, + repo: crate::db::RepoRecord, + } + + async fn setup_pin_test(state: &AppState, repo_name: &str) -> PinTestFixture { + use gitlawb_core::identity::Keypair; + + let owner = Keypair::generate(); + let owner_did = owner.did().to_string(); + let fs_slug = owner_did.replace([':', '/'], "_"); + let short = owner_did.split(':').next_back().unwrap().to_string(); + + let fx = seed_cid_repos(&fs_slug, &short, &[repo_name]); + let repo = seed_repo(&owner_did, repo_name); + state.db.create_repo(&repo).await.unwrap(); + + PinTestFixture { + owner, + owner_did, + fx, + repo, + } + } + /// #121: authenticated caller gets 200 from /api/v1/ipfs/pins. /// /// list_pins only returns pins whose SHA-256 appears in a repo the caller @@ -1998,33 +2025,12 @@ mod tests { /// 3. Records a pin for the real object OID. #[sqlx::test] async fn pins_list_allows_authenticated(pool: PgPool) { - use gitlawb_core::identity::Keypair; - - let owner = Keypair::generate(); - let owner_did = owner.did().to_string(); - // RepoStore maps owner_did to a filesystem slug by replacing ':' and '/' with '_'. - let fs_slug = owner_did.replace([':', '/'], "_"); - // The short slug is the last ':'-separated segment of the DID (what push writes). - let short = owner_did.split(':').next_back().unwrap().to_string(); - let state = test_state(pool).await; + let setup = setup_pin_test(&state, "pinrepo").await; - // Build a real SHA-256 bare git repo under /tmp//pinrepo.git so - // list_all_objects can enumerate actual object SHA-256 IDs. - let fx = seed_cid_repos(&fs_slug, &short, &["pinrepo"]); - // seed_cid_repos uses sha256 object format; fx.public_oid is a real git OID. - let pinned_sha = fx.public_oid.clone(); + let pinned_sha = setup.fx.public_oid.clone(); let pinned_cid = cid_for_oid(&pinned_sha); - // Seed the repo DB record with the same owner_did so list_all_repos returns - // it and visibility_check passes (public repo, no rules). - state - .db - .create_repo(&seed_repo(&owner_did, "pinrepo")) - .await - .unwrap(); - - // Record the pin for the real git object SHA. state .db .record_pinned_cid(&pinned_sha, &pinned_cid) @@ -2032,7 +2038,7 @@ mod tests { .unwrap(); let resp = pins_router(&state) - .oneshot(signed_get(&owner, "/api/v1/ipfs/pins")) + .oneshot(signed_get(&setup.owner, "/api/v1/ipfs/pins")) .await .unwrap(); assert_eq!(resp.status(), StatusCode::OK); @@ -2049,22 +2055,17 @@ mod tests { #[sqlx::test] async fn pins_list_excludes_quarantined_repos(pool: PgPool) { - use gitlawb_core::identity::Keypair; - - let owner = Keypair::generate(); - let owner_did = owner.did().to_string(); - let fs_slug = owner_did.replace([':', '/'], "_"); - let short = owner_did.split(':').next_back().unwrap().to_string(); - let state = test_state(pool).await; + let setup = setup_pin_test(&state, "pinrepo").await; - let fx = seed_cid_repos(&fs_slug, &short, &["pinrepo"]); - let pinned_sha = fx.public_oid.clone(); + let pinned_sha = setup.fx.public_oid.clone(); let pinned_cid = cid_for_oid(&pinned_sha); - let repo = seed_repo(&owner_did, "pinrepo"); - state.db.create_repo(&repo).await.unwrap(); - state.db.set_repo_quarantine(&repo.id, true).await.unwrap(); + state + .db + .set_repo_quarantine(&setup.repo.id, true) + .await + .unwrap(); state .db @@ -2073,7 +2074,7 @@ mod tests { .unwrap(); let resp = pins_router(&state) - .oneshot(signed_get(&owner, "/api/v1/ipfs/pins")) + .oneshot(signed_get(&setup.owner, "/api/v1/ipfs/pins")) .await .unwrap(); assert_eq!(resp.status(), StatusCode::OK); @@ -2089,27 +2090,24 @@ mod tests { use crate::db::VisibilityMode; use gitlawb_core::identity::Keypair; - let owner = Keypair::generate(); - let owner_did = owner.did().to_string(); - let fs_slug = owner_did.replace([':', '/'], "_"); - let short = owner_did.split(':').next_back().unwrap().to_string(); - - let stranger = Keypair::generate(); - let state = test_state(pool).await; + let setup = setup_pin_test(&state, "pinrepo").await; + let stranger = Keypair::generate(); - let fx = seed_cid_repos(&fs_slug, &short, &["pinrepo"]); - let public_sha = fx.public_oid.clone(); + let public_sha = setup.fx.public_oid.clone(); let public_cid = cid_for_oid(&public_sha); - let secret_sha = fx.secret_oid.clone(); + let secret_sha = setup.fx.secret_oid.clone(); let secret_cid = cid_for_oid(&secret_sha); - let repo = seed_repo(&owner_did, "pinrepo"); - state.db.create_repo(&repo).await.unwrap(); - state .db - .set_visibility_rule(&repo.id, "/secret/**", VisibilityMode::B, &[], &owner_did) + .set_visibility_rule( + &setup.repo.id, + "/secret/**", + VisibilityMode::B, + &[], + &setup.owner_did, + ) .await .unwrap(); @@ -2178,6 +2176,35 @@ mod tests { assert_eq!(resp.status(), StatusCode::UNAUTHORIZED); } + struct AnchorTestFixture { + owner: gitlawb_core::identity::Keypair, + owner_did: String, + } + + async fn setup_anchor_test( + state: &AppState, + repo_name: &str, + is_public: bool, + ) -> AnchorTestFixture { + use gitlawb_core::identity::Keypair; + + let owner = Keypair::generate(); + let owner_did = owner.did().to_string(); + let owner_short = owner_did.split(':').next_back().unwrap().to_string(); + let short_slug = format!("{owner_short}/{repo_name}"); + + let repo = if is_public { + seed_repo(&owner_did, repo_name) + } else { + seed_private_repo(&owner_did, repo_name) + }; + + state.db.create_repo(&repo).await.unwrap(); + seed_anchor(&state.db, &short_slug, &owner_did).await; + + AnchorTestFixture { owner, owner_did } + } + /// #121: /api/v1/arweave/anchors without ?repo= allows authenticated. /// /// The global listing filters each anchor through authorize_repo_read, which @@ -2187,29 +2214,11 @@ mod tests { /// LIKE match finds the repo record. #[sqlx::test] async fn anchors_global_allows_authenticated(pool: PgPool) { - use gitlawb_core::identity::Keypair; - let state = test_state(pool).await; - let kp = Keypair::generate(); - let owner_did = kp.did().to_string(); - let repo_name = "globalrepo"; - // Short slug: the last ':'-separated segment of the DID, matching what - // repos.rs writes to the arweave_anchors table on push. - let owner_short = owner_did.split(':').next_back().unwrap().to_string(); - let short_slug = format!("{owner_short}/{repo_name}"); - - // Seed a public repo so authorize_repo_read succeeds for this anchor. - state - .db - .create_repo(&seed_repo(&owner_did, repo_name)) - .await - .unwrap(); - - // Record the anchor with the short-form slug that production writes. - seed_anchor(&state.db, &short_slug, &owner_did).await; + let fx = setup_anchor_test(&state, "globalrepo", true).await; let resp = anchors_router(&state) - .oneshot(signed_get(&kp, "/api/v1/arweave/anchors")) + .oneshot(signed_get(&fx.owner, "/api/v1/arweave/anchors")) .await .unwrap(); assert_eq!(resp.status(), StatusCode::OK); @@ -2223,22 +2232,9 @@ mod tests { #[sqlx::test] async fn anchors_global_denies_non_reader(pool: PgPool) { use gitlawb_core::identity::Keypair; - let state = test_state(pool).await; - let owner = Keypair::generate(); - let owner_did = owner.did().to_string(); + let _fx = setup_anchor_test(&state, "privaterepo", false).await; let stranger = Keypair::generate(); - let repo_name = "privaterepo"; - let owner_short = owner_did.split(':').next_back().unwrap().to_string(); - let short_slug = format!("{owner_short}/{repo_name}"); - - state - .db - .create_repo(&seed_private_repo(&owner_did, repo_name)) - .await - .unwrap(); - - seed_anchor(&state.db, &short_slug, &owner_did).await; let resp = anchors_router(&state) .oneshot(signed_get(&stranger, "/api/v1/arweave/anchors")) @@ -2258,31 +2254,42 @@ mod tests { /// 404 for anon on private) — no anonymous-rejection guard is needed here. #[sqlx::test] async fn anchors_repo_denies_anonymous_on_private(pool: PgPool) { - use gitlawb_core::identity::Keypair; - let state = test_state(pool).await; - let owner = Keypair::generate(); - let owner_did = owner.did().to_string(); let repo_name = "private-repo"; - // Short slug written by the push path. - let owner_short = owner_did.split(':').next_back().unwrap().to_string(); - let short_slug = format!("{owner_short}/{repo_name}"); + let fx = setup_anchor_test(&state, repo_name, false).await; - state - .db - .create_repo(&seed_private_repo(&owner_did, repo_name)) + let uri = format!( + "/api/v1/arweave/anchors?repo={}/{}", + fx.owner_did, repo_name + ); + let resp = anchors_router(&state) + .oneshot(anon_get(&uri)) .await .unwrap(); - // Store anchor with the short slug that production writes. - seed_anchor(&state.db, &short_slug, &owner_did).await; + assert_eq!(resp.status(), StatusCode::NOT_FOUND); + } + + /// #121: /api/v1/arweave/anchors with ?repo= allows anonymous on public repo. + #[sqlx::test] + async fn anchors_repo_allows_anonymous_on_public(pool: PgPool) { + let state = test_state(pool).await; + let repo_name = "public-repo"; + let fx = setup_anchor_test(&state, repo_name, true).await; - // ?repo= accepts the full DID form; list_anchors normalises it before querying. - let uri = format!("/api/v1/arweave/anchors?repo={owner_did}/{repo_name}"); + let uri = format!( + "/api/v1/arweave/anchors?repo={}/{}", + fx.owner_did, repo_name + ); let resp = anchors_router(&state) .oneshot(anon_get(&uri)) .await .unwrap(); - assert_eq!(resp.status(), StatusCode::NOT_FOUND); + assert_eq!(resp.status(), StatusCode::OK); + let body = json_body(resp).await; + assert_eq!( + body["count"], 1, + "anonymous caller sees the seeded public anchor" + ); } /// #121: /api/v1/arweave/anchors with ?repo= allows repo owner. @@ -2292,27 +2299,16 @@ mod tests { /// slug before issuing the DB query. #[sqlx::test] async fn anchors_repo_allows_owner(pool: PgPool) { - use gitlawb_core::identity::Keypair; - let state = test_state(pool).await; - let owner = Keypair::generate(); - let owner_did = owner.did().to_string(); let repo_name = "owners-repo"; - // Short slug matching what repos.rs writes on push. - let owner_short = owner_did.split(':').next_back().unwrap().to_string(); - let short_slug = format!("{owner_short}/{repo_name}"); - - state - .db - .create_repo(&seed_private_repo(&owner_did, repo_name)) - .await - .unwrap(); - // Store anchor with the short slug that production writes. - seed_anchor(&state.db, &short_slug, &owner_did).await; + let fx = setup_anchor_test(&state, repo_name, false).await; - let uri = format!("/api/v1/arweave/anchors?repo={owner_did}/{repo_name}"); + let uri = format!( + "/api/v1/arweave/anchors?repo={}/{}", + fx.owner_did, repo_name + ); let resp = anchors_router(&state) - .oneshot(signed_get(&owner, &uri)) + .oneshot(signed_get(&fx.owner, &uri)) .await .unwrap(); assert_eq!(resp.status(), StatusCode::OK); @@ -2324,23 +2320,15 @@ mod tests { #[sqlx::test] async fn anchors_repo_denies_non_reader(pool: PgPool) { use gitlawb_core::identity::Keypair; - let state = test_state(pool).await; - let owner = Keypair::generate(); - let owner_did = owner.did().to_string(); - let stranger = Keypair::generate(); let repo_name = "private-repo"; - let owner_short = owner_did.split(':').next_back().unwrap().to_string(); - let short_slug = format!("{owner_short}/{repo_name}"); - - state - .db - .create_repo(&seed_private_repo(&owner_did, repo_name)) - .await - .unwrap(); - seed_anchor(&state.db, &short_slug, &owner_did).await; + let fx = setup_anchor_test(&state, repo_name, false).await; + let stranger = Keypair::generate(); - let uri = format!("/api/v1/arweave/anchors?repo={owner_did}/{repo_name}"); + let uri = format!( + "/api/v1/arweave/anchors?repo={}/{}", + fx.owner_did, repo_name + ); let resp = anchors_router(&state) .oneshot(signed_get(&stranger, &uri)) .await From 128d6336d9933174ff9b077893ac6c48fa928c19 Mon Sep 17 00:00:00 2001 From: Gravirei Date: Wed, 1 Jul 2026 23:48:33 +0600 Subject: [PATCH 13/25] fix(node): clamp negative anchor limit and add build_router pin integration test --- crates/gitlawb-node/src/api/arweave.rs | 2 +- crates/gitlawb-node/src/test_support.rs | 37 +++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/crates/gitlawb-node/src/api/arweave.rs b/crates/gitlawb-node/src/api/arweave.rs index d0e102a..ca4041e 100644 --- a/crates/gitlawb-node/src/api/arweave.rs +++ b/crates/gitlawb-node/src/api/arweave.rs @@ -66,7 +66,7 @@ pub async fn list_anchors( None }; - let limit = q.limit.min(200); + let limit = q.limit.clamp(0, 200); let raw_anchors = state .db .list_arweave_anchors(normalized_repo.as_deref(), limit) diff --git a/crates/gitlawb-node/src/test_support.rs b/crates/gitlawb-node/src/test_support.rs index 0bfeeb5..f29a25c 100644 --- a/crates/gitlawb-node/src/test_support.rs +++ b/crates/gitlawb-node/src/test_support.rs @@ -2053,6 +2053,43 @@ mod tests { ); } + /// #121: authenticated caller gets 200 through the production build_router. + /// + /// Unlike pins_list_allows_authenticated (which uses a mini pins_router with + /// only the one route), this test exercises server::build_router to verify + /// that /api/v1/ipfs/pins is wired through optional_signature in the real + /// route table and that a signed request reaches list_pins successfully. + #[sqlx::test] + async fn pins_list_allows_authenticated_through_build_router(pool: PgPool) { + let state = test_state(pool).await; + let setup = setup_pin_test(&state, "build-router-pins").await; + + let pinned_sha = setup.fx.public_oid.clone(); + let pinned_cid = cid_for_oid(&pinned_sha); + + state + .db + .record_pinned_cid(&pinned_sha, &pinned_cid) + .await + .unwrap(); + + let router = crate::server::build_router(state); + let resp = router + .oneshot(signed_get(&setup.owner, "/api/v1/ipfs/pins")) + .await + .unwrap(); + assert_eq!(resp.status(), StatusCode::OK); + let body = json_body(resp).await; + assert_eq!( + body["count"], 1, + "build_router wiring: pin for the real git object must be returned" + ); + assert_eq!( + body["pins"][0]["sha256_hex"], pinned_sha, + "build_router wiring: returned pin must match the seeded object OID" + ); + } + #[sqlx::test] async fn pins_list_excludes_quarantined_repos(pool: PgPool) { let state = test_state(pool).await; From 004d9fd869b52b57f06b367fea5bf53ba321cfac Mon Sep 17 00:00:00 2001 From: Gravirei Date: Thu, 2 Jul 2026 07:54:33 +0600 Subject: [PATCH 14/25] fix(node): filter global anchors before limit and use deduped repo visibility --- crates/gitlawb-node/src/api/arweave.rs | 72 ++++++++++++++++--------- crates/gitlawb-node/src/api/ipfs.rs | 16 ++---- crates/gitlawb-node/src/test_support.rs | 18 +++++++ 3 files changed, 68 insertions(+), 38 deletions(-) diff --git a/crates/gitlawb-node/src/api/arweave.rs b/crates/gitlawb-node/src/api/arweave.rs index ca4041e..ed90c5e 100644 --- a/crates/gitlawb-node/src/api/arweave.rs +++ b/crates/gitlawb-node/src/api/arweave.rs @@ -9,6 +9,7 @@ use serde::Deserialize; use crate::auth::AuthenticatedDid; use crate::error::{AppError, Result}; use crate::state::AppState; +use crate::visibility::{visibility_check, Decision}; #[derive(Debug, Deserialize)] pub struct ListAnchorsQuery { @@ -67,33 +68,54 @@ pub async fn list_anchors( }; let limit = q.limit.clamp(0, 200); - let raw_anchors = state - .db - .list_arweave_anchors(normalized_repo.as_deref(), limit) - .await - .map_err(AppError::Internal)?; - // For global listings (no ?repo=), filter each anchor on current visibility. - let anchors = if normalized_repo.is_none() { - let mut filtered = Vec::new(); - for anchor in raw_anchors { - // Parse repo slug to resolve current visibility. - let parts: Vec<&str> = anchor.repo.splitn(2, '/').collect(); - if parts.len() != 2 { - continue; - } - let (owner, name) = (parts[0], parts[1]); - // Skip anchors for repos the caller cannot currently read. - if crate::api::authorize_repo_read(&state, owner, name, caller, "/") - .await - .is_ok() - { - filtered.push(anchor); - } - } - filtered + let anchors = if let Some(ref slug) = normalized_repo { + // Per-repo: filter at the DB level and return directly. + state + .db + .list_arweave_anchors(Some(slug), limit) + .await + .map_err(AppError::Internal)? } else { - raw_anchors + // Global listing: fetch all anchors, filter by current visibility + // against the deduped repo view (so mirror rows never bypass canonical + // visibility), then take limit. + let all_anchors = state + .db + .list_arweave_anchors(None, i64::MAX) + .await + .map_err(AppError::Internal)?; + + // Build a set of readable repo short-form slugs from the deduped list. + let repos = state + .db + .list_all_repos_deduped() + .await + .map_err(AppError::Internal)?; + let repo_ids: Vec = repos.iter().map(|r| r.id.clone()).collect(); + let rules_by_repo = state + .db + .list_visibility_rules_for_repos(&repo_ids) + .await + .map_err(AppError::Internal)?; + + let readable: std::collections::HashSet = repos + .iter() + .filter(|r| { + let rules = rules_by_repo.get(&r.id).map(Vec::as_slice).unwrap_or(&[]); + visibility_check(rules, r.is_public, &r.owner_did, caller, "/") != Decision::Deny + }) + .map(|r| { + let short = r.owner_did.split(':').next_back().unwrap_or(&r.owner_did); + format!("{}/{}", short, r.name) + }) + .collect(); + + all_anchors + .into_iter() + .filter(|a| readable.contains(&a.repo)) + .take(limit as usize) + .collect() }; Ok(Json(serde_json::json!({ diff --git a/crates/gitlawb-node/src/api/ipfs.rs b/crates/gitlawb-node/src/api/ipfs.rs index 467aced..0331ad9 100644 --- a/crates/gitlawb-node/src/api/ipfs.rs +++ b/crates/gitlawb-node/src/api/ipfs.rs @@ -246,9 +246,11 @@ pub async fn list_pins( .map_err(AppError::Internal)?; // Build a set of sha256_hex values from repos the caller can read. + // Use the deduped repo list so mirror rows never bypass the canonical + // repo's visibility rules (#136). let repos = state .db - .list_all_repos() + .list_all_repos_deduped() .await .map_err(AppError::Internal)?; @@ -262,18 +264,6 @@ pub async fn list_pins( let mut allowed_sha256s = std::collections::HashSet::new(); for repo in &repos { - // Preserve the quarantine gate from authorize_repo_read: a quarantined - // mirror is treated as nonexistent on every read surface, so its objects - // must not contribute to the allowed SHA-256 set (#P2). - if state - .db - .is_repo_quarantined(&repo.id) - .await - .map_err(AppError::Internal)? - { - continue; - } - let rules: &[crate::db::VisibilityRule] = rules_by_repo .get(&repo.id) .map(Vec::as_slice) diff --git a/crates/gitlawb-node/src/test_support.rs b/crates/gitlawb-node/src/test_support.rs index f29a25c..704f64a 100644 --- a/crates/gitlawb-node/src/test_support.rs +++ b/crates/gitlawb-node/src/test_support.rs @@ -2372,4 +2372,22 @@ mod tests { .unwrap(); assert_eq!(resp.status(), StatusCode::NOT_FOUND); } + + /// #121: negative limit is clamped to 0 and returns a bounded response (no 500). + #[sqlx::test] + async fn anchors_global_negative_limit_is_clamped(pool: PgPool) { + let state = test_state(pool).await; + let fx = setup_anchor_test(&state, "neg-limit-repo", true).await; + + let resp = anchors_router(&state) + .oneshot(signed_get(&fx.owner, "/api/v1/arweave/anchors?limit=-1")) + .await + .unwrap(); + assert_eq!(resp.status(), StatusCode::OK); + let body = json_body(resp).await; + assert_eq!( + body["count"], 0, + "negative limit clamps to 0, returning empty result" + ); + } } From 888a3c8500c47cac5af64c4f22e45c0d4d2072d8 Mon Sep 17 00:00:00 2001 From: Gravirei Date: Thu, 2 Jul 2026 08:04:57 +0600 Subject: [PATCH 15/25] fix: resolve rebase conflicts and update pins path to use allowed_blob_set_for_caller --- crates/gitlawb-node/src/api/ipfs.rs | 38 +++++++++++-------------- crates/gitlawb-node/src/test_support.rs | 1 + 2 files changed, 18 insertions(+), 21 deletions(-) diff --git a/crates/gitlawb-node/src/api/ipfs.rs b/crates/gitlawb-node/src/api/ipfs.rs index 0331ad9..a33c5ec 100644 --- a/crates/gitlawb-node/src/api/ipfs.rs +++ b/crates/gitlawb-node/src/api/ipfs.rs @@ -279,41 +279,37 @@ pub async fn list_pins( Err(_) => continue, }; - // If path-scoped rules exist, we need to compute withheld blobs. - let withheld_set = if has_path_scoped_rule(rules) { + // If path-scoped rules exist, compute the set of blobs the caller is + // allowed to read. Otherwise every blob in the repo is allowed. + let allowed_blobs = if has_path_scoped_rule(rules) { let rp = repo_path.clone(); let r = rules.to_vec(); let is_public = repo.is_public; let owner = repo.owner_did.clone(); let caller_for_walk = caller_owned.clone(); - let walk = tokio::task::spawn_blocking(move || { - withheld_blob_oids(&rp, &r, is_public, &owner, caller_for_walk.as_deref()) + match tokio::task::spawn_blocking(move || { + allowed_blob_set_for_caller(&rp, &r, is_public, &owner, caller_for_walk.as_deref()) }) - .await; - - match walk { - Ok(Ok(set)) => Some(set), + .await + { + Ok(Ok(set)) => set, _ => { - // Fail closed: if we can't compute withheld set, skip this repo. - tracing::warn!(repo = %repo.name, "withheld walk failed; skipping repo for pins listing"); + tracing::warn!(repo = %repo.name, "allowed-blob walk failed; skipping repo for pins listing"); continue; } } } else { - None + // No path-scoped rules: all objects reachable in this repo are allowed. + match push_delta::list_all_objects(&repo_path) { + Ok(objects) => objects.into_iter().collect(), + Err(_) => continue, + } }; - // Read all objects in this repo and add non-withheld ones to allowed set. - if let Ok(objects) = push_delta::list_all_objects(&repo_path) { - for sha in objects { - if let Some(ref withheld) = withheld_set { - if withheld.contains(&sha) { - continue; - } - } - allowed_sha256s.insert(sha); - } + // Add the allowed blobs to the global allowed set. + for sha in allowed_blobs { + allowed_sha256s.insert(sha); } } diff --git a/crates/gitlawb-node/src/test_support.rs b/crates/gitlawb-node/src/test_support.rs index 704f64a..076834c 100644 --- a/crates/gitlawb-node/src/test_support.rs +++ b/crates/gitlawb-node/src/test_support.rs @@ -1953,6 +1953,7 @@ mod tests { "owner also 404s on dangling blobs under path-scoped rules (fail-closed default)" ); assert!(!body.contains("DANGLING SECRET")); + } fn pins_router(state: &AppState) -> Router { Router::new() From 85f8f94a139d6eea4a2c0ca1fdc64decc1030888 Mon Sep 17 00:00:00 2001 From: Gravirei Date: Thu, 2 Jul 2026 09:59:43 +0600 Subject: [PATCH 16/25] fix(node): resolve mirror rows in anchor per-repo path and bound global anchor SQL --- crates/gitlawb-node/src/api/arweave.rs | 86 +++++++++++++++---------- crates/gitlawb-node/src/db/mod.rs | 34 ++++++++++ crates/gitlawb-node/src/test_support.rs | 52 +++++++++++++++ 3 files changed, 138 insertions(+), 34 deletions(-) diff --git a/crates/gitlawb-node/src/api/arweave.rs b/crates/gitlawb-node/src/api/arweave.rs index ed90c5e..bd92d30 100644 --- a/crates/gitlawb-node/src/api/arweave.rs +++ b/crates/gitlawb-node/src/api/arweave.rs @@ -29,64 +29,78 @@ fn default_limit() -> i64 { /// 404). Without a `?repo=` filter, the global listing filters each row on /// current visibility to prevent metadata disclosure when repos are made private /// after push (#136). +/// +/// Both paths resolve visibility against the deduped repo view so mirror rows +/// never bypass the canonical repo's rules. pub async fn list_anchors( State(state): State, auth: Option>, Query(q): Query, ) -> Result> { let caller = auth.as_ref().map(|e| e.0 .0.as_str()); + let limit = q.limit.clamp(0, 200); - // Global listings (no ?repo=) are restricted to authenticated callers: an - // anonymous request against the full-node index would disclose metadata for - // every repo ever pushed here. Per-repo requests are gated by - // authorize_repo_read which applies the per-repo visibility rules. + // Global listings (no ?repo=) are restricted to authenticated callers. if q.repo.is_none() && caller.is_none() { return Err(AppError::Unauthorized( "authentication required for global anchor listing".into(), )); } - let normalized_repo = if let Some(ref repo) = q.repo { - // Gate on per-repo visibility. + let anchors = if let Some(ref repo) = q.repo { + // ── Per-repo path ── + // Resolve against the deduped repo view so mirror rows never bypass + // the canonical repo's visibility rules. Use did_matches to handle + // both full DID and bare short-form owner in the URL. let parts: Vec<&str> = repo.splitn(2, '/').collect(); if parts.len() != 2 { return Err(AppError::NotFound("repo not found".into())); } let (owner, name) = (parts[0], parts[1]); - let (record, _rules) = - crate::api::authorize_repo_read(&state, owner, name, caller, "/").await?; - // Normalize to short-form slug that matches what's written to the table. + // Fetch the deduped list (mirror rows collapsed, quarantined excluded). + let repos = state + .db + .list_all_repos_deduped() + .await + .map_err(AppError::Internal)?; + + let record = repos + .into_iter() + .find(|r| crate::api::did_matches(owner, &r.owner_did) && r.name == name) + .ok_or_else(|| AppError::RepoNotFound(format!("{owner}/{name}")))?; + + // Quarantine gate (belt-and-suspenders — deduped already filters). + if state.db.is_repo_quarantined(&record.id).await? { + return Err(AppError::RepoNotFound(format!("{owner}/{name}"))); + } + + // Visibility gate against the canonical survivor's rules. + let rules = state.db.list_visibility_rules(&record.id).await?; + if visibility_check(&rules, record.is_public, &record.owner_did, caller, "/") + == Decision::Deny + { + return Err(AppError::RepoNotFound(format!("{owner}/{name}"))); + } + + // Normalize to short-form slug matching the anchor table. let owner_short = record .owner_did .split(':') .next_back() .unwrap_or(&record.owner_did); - Some(format!("{}/{}", owner_short, record.name)) - } else { - None - }; - - let limit = q.limit.clamp(0, 200); + let slug = Some(format!("{}/{}", owner_short, record.name)); - let anchors = if let Some(ref slug) = normalized_repo { - // Per-repo: filter at the DB level and return directly. state .db - .list_arweave_anchors(Some(slug), limit) + .list_arweave_anchors(slug.as_deref(), limit) .await .map_err(AppError::Internal)? } else { - // Global listing: fetch all anchors, filter by current visibility - // against the deduped repo view (so mirror rows never bypass canonical - // visibility), then take limit. - let all_anchors = state - .db - .list_arweave_anchors(None, i64::MAX) - .await - .map_err(AppError::Internal)?; - - // Build a set of readable repo short-form slugs from the deduped list. + // ── Global listing ── + // Build the set of readable repo slugs from the deduped repo view + // (mirror rows already collapsed, quarantined excluded), then query + // anchors bounded in SQL via WHERE repo = ANY(...). let repos = state .db .list_all_repos_deduped() @@ -99,7 +113,7 @@ pub async fn list_anchors( .await .map_err(AppError::Internal)?; - let readable: std::collections::HashSet = repos + let readable: Vec = repos .iter() .filter(|r| { let rules = rules_by_repo.get(&r.id).map(Vec::as_slice).unwrap_or(&[]); @@ -111,11 +125,15 @@ pub async fn list_anchors( }) .collect(); - all_anchors - .into_iter() - .filter(|a| readable.contains(&a.repo)) - .take(limit as usize) - .collect() + if readable.is_empty() { + Vec::new() + } else { + state + .db + .list_arweave_anchors_for_repos(&readable, limit) + .await + .map_err(AppError::Internal)? + } }; Ok(Json(serde_json::json!({ diff --git a/crates/gitlawb-node/src/db/mod.rs b/crates/gitlawb-node/src/db/mod.rs index 31ff72f..73ecaba 100644 --- a/crates/gitlawb-node/src/db/mod.rs +++ b/crates/gitlawb-node/src/db/mod.rs @@ -2382,6 +2382,40 @@ impl Db { }) .collect()) } + + /// Bounded global anchor query: returns anchors for any of the given repo + /// slugs, ordered by anchored_at DESC, capped at `limit`. + pub async fn list_arweave_anchors_for_repos( + &self, + repos: &[String], + limit: i64, + ) -> Result> { + let rows = sqlx::query( + "SELECT id, repo, owner_did, ref_name, old_sha, new_sha, cid, irys_tx_id, arweave_url, node_did, anchored_at + FROM arweave_anchors WHERE repo = ANY($1) ORDER BY anchored_at DESC LIMIT $2", + ) + .bind(repos) + .bind(limit) + .fetch_all(&self.pool) + .await?; + + Ok(rows + .into_iter() + .map(|r| ArweaveAnchor { + id: r.get("id"), + repo: r.get("repo"), + owner_did: r.get("owner_did"), + ref_name: r.get("ref_name"), + old_sha: r.get("old_sha"), + new_sha: r.get("new_sha"), + cid: r.get("cid"), + irys_tx_id: r.get("irys_tx_id"), + arweave_url: r.get("arweave_url"), + node_did: r.get("node_did"), + anchored_at: r.get("anchored_at"), + }) + .collect()) + } } // ── Row helpers ─────────────────────────────────────────────────────────────── diff --git a/crates/gitlawb-node/src/test_support.rs b/crates/gitlawb-node/src/test_support.rs index 076834c..92a7618 100644 --- a/crates/gitlawb-node/src/test_support.rs +++ b/crates/gitlawb-node/src/test_support.rs @@ -2374,6 +2374,58 @@ mod tests { assert_eq!(resp.status(), StatusCode::NOT_FOUND); } + /// #136: ?repo= resolves against the deduped canonical repo, not a public mirror. + /// + /// When a repo has both a private canonical row and a public mirror row, the + /// ?repo= path must gate on the canonical survivor's visibility (denying a + /// stranger) rather than the public mirror's (which would allow). + #[sqlx::test] + async fn anchors_repo_denies_stranger_when_canonical_is_private_even_with_public_mirror( + pool: PgPool, + ) { + use gitlawb_core::identity::Keypair; + + let state = test_state(pool).await; + let owner = Keypair::generate(); + let owner_did = owner.did().to_string(); + let short = owner_did.split(':').next_back().unwrap().to_string(); + let repo_name = "mirror-canonical"; + + // Create a private canonical repo. + let canonical = seed_private_repo(&owner_did, repo_name); + state.db.create_repo(&canonical).await.unwrap(); + + // Create a public mirror for the same repo. + state + .db + .upsert_mirror_repo(&short, repo_name, "/tmp/mirror", None, false) + .await + .unwrap(); + + // Seed an anchor with the short slug (matching both rows). + let short_slug = format!("{short}/{repo_name}"); + seed_anchor(&state.db, &short_slug, &owner_did).await; + + let stranger = Keypair::generate(); + let uri = format!("/api/v1/arweave/anchors?repo={owner_did}/{repo_name}"); + + // Stranger must be denied (404), not served anchor via the public mirror. + let resp = anchors_router(&state) + .oneshot(signed_get(&stranger, &uri)) + .await + .unwrap(); + assert_eq!(resp.status(), StatusCode::NOT_FOUND); + + // Owner still gets the anchor. + let resp = anchors_router(&state) + .oneshot(signed_get(&owner, &uri)) + .await + .unwrap(); + assert_eq!(resp.status(), StatusCode::OK); + let body = json_body(resp).await; + assert_eq!(body["count"], 1, "owner sees their anchor through ?repo="); + } + /// #121: negative limit is clamped to 0 and returns a bounded response (no 500). #[sqlx::test] async fn anchors_global_negative_limit_is_clamped(pool: PgPool) { From 3fe4e9522868c7d32ce1fab6f95170e92227ca45 Mon Sep 17 00:00:00 2001 From: Gravirei Date: Thu, 2 Jul 2026 21:19:57 +0600 Subject: [PATCH 17/25] fix(node): constrain anchor queries by owner_did to prevent cross-DID slug collision --- crates/gitlawb-node/src/api/arweave.rs | 39 ++++++--- crates/gitlawb-node/src/db/mod.rs | 49 ++++++++---- crates/gitlawb-node/src/test_support.rs | 100 ++++++++++++++++++++++-- 3 files changed, 157 insertions(+), 31 deletions(-) diff --git a/crates/gitlawb-node/src/api/arweave.rs b/crates/gitlawb-node/src/api/arweave.rs index bd92d30..1729ff5 100644 --- a/crates/gitlawb-node/src/api/arweave.rs +++ b/crates/gitlawb-node/src/api/arweave.rs @@ -6,6 +6,8 @@ use axum::{ }; use serde::Deserialize; +use std::collections::HashMap; + use crate::auth::AuthenticatedDid; use crate::error::{AppError, Result}; use crate::state::AppState; @@ -54,7 +56,7 @@ pub async fn list_anchors( // both full DID and bare short-form owner in the URL. let parts: Vec<&str> = repo.splitn(2, '/').collect(); if parts.len() != 2 { - return Err(AppError::NotFound("repo not found".into())); + return Err(AppError::RepoNotFound(repo.clone())); } let (owner, name) = (parts[0], parts[1]); @@ -93,7 +95,7 @@ pub async fn list_anchors( state .db - .list_arweave_anchors(slug.as_deref(), limit) + .list_arweave_anchors(slug.as_deref(), Some(&record.owner_did), limit) .await .map_err(AppError::Internal)? } else { @@ -113,26 +115,45 @@ pub async fn list_anchors( .await .map_err(AppError::Internal)?; + // Build both the readable slug set and a map: slug → readable owner DIDs. + // Post-filtering by owner DID prevents leakage between two DIDs whose + // short-form slug collides (e.g. did:key:z6Same and did:gitlawb:z6Same). + let mut readable_owners: HashMap> = HashMap::new(); let readable: Vec = repos .iter() - .filter(|r| { + .filter_map(|r| { let rules = rules_by_repo.get(&r.id).map(Vec::as_slice).unwrap_or(&[]); - visibility_check(rules, r.is_public, &r.owner_did, caller, "/") != Decision::Deny - }) - .map(|r| { + if visibility_check(rules, r.is_public, &r.owner_did, caller, "/") == Decision::Deny + { + return None; + } let short = r.owner_did.split(':').next_back().unwrap_or(&r.owner_did); - format!("{}/{}", short, r.name) + let slug = format!("{}/{}", short, r.name); + readable_owners + .entry(slug.clone()) + .or_default() + .push(r.owner_did.clone()); + Some(slug) }) .collect(); if readable.is_empty() { Vec::new() } else { - state + let anchors = state .db .list_arweave_anchors_for_repos(&readable, limit) .await - .map_err(AppError::Internal)? + .map_err(AppError::Internal)?; + + anchors + .into_iter() + .filter(|a| { + readable_owners + .get(&a.repo) + .map_or(false, |owners| owners.contains(&a.owner_did)) + }) + .collect() } }; diff --git a/crates/gitlawb-node/src/db/mod.rs b/crates/gitlawb-node/src/db/mod.rs index 73ecaba..64a6474 100644 --- a/crates/gitlawb-node/src/db/mod.rs +++ b/crates/gitlawb-node/src/db/mod.rs @@ -2344,25 +2344,40 @@ impl Db { pub async fn list_arweave_anchors( &self, repo: Option<&str>, + owner_did: Option<&str>, limit: i64, ) -> Result> { - let rows = if let Some(repo) = repo { - sqlx::query( - "SELECT id, repo, owner_did, ref_name, old_sha, new_sha, cid, irys_tx_id, arweave_url, node_did, anchored_at - FROM arweave_anchors WHERE repo=$1 ORDER BY anchored_at DESC LIMIT $2", - ) - .bind(repo) - .bind(limit) - .fetch_all(&self.pool) - .await? - } else { - sqlx::query( - "SELECT id, repo, owner_did, ref_name, old_sha, new_sha, cid, irys_tx_id, arweave_url, node_did, anchored_at - FROM arweave_anchors ORDER BY anchored_at DESC LIMIT $1", - ) - .bind(limit) - .fetch_all(&self.pool) - .await? + let rows = match (repo, owner_did) { + (Some(repo), Some(owner_did)) => { + sqlx::query( + "SELECT id, repo, owner_did, ref_name, old_sha, new_sha, cid, irys_tx_id, arweave_url, node_did, anchored_at + FROM arweave_anchors WHERE repo=$1 AND owner_did=$2 ORDER BY anchored_at DESC LIMIT $3", + ) + .bind(repo) + .bind(owner_did) + .bind(limit) + .fetch_all(&self.pool) + .await? + } + (Some(repo), None) => { + sqlx::query( + "SELECT id, repo, owner_did, ref_name, old_sha, new_sha, cid, irys_tx_id, arweave_url, node_did, anchored_at + FROM arweave_anchors WHERE repo=$1 ORDER BY anchored_at DESC LIMIT $2", + ) + .bind(repo) + .bind(limit) + .fetch_all(&self.pool) + .await? + } + (None, _) => { + sqlx::query( + "SELECT id, repo, owner_did, ref_name, old_sha, new_sha, cid, irys_tx_id, arweave_url, node_did, anchored_at + FROM arweave_anchors ORDER BY anchored_at DESC LIMIT $1", + ) + .bind(limit) + .fetch_all(&self.pool) + .await? + } }; Ok(rows diff --git a/crates/gitlawb-node/src/test_support.rs b/crates/gitlawb-node/src/test_support.rs index 92a7618..6fb6578 100644 --- a/crates/gitlawb-node/src/test_support.rs +++ b/crates/gitlawb-node/src/test_support.rs @@ -2245,11 +2245,11 @@ mod tests { /// #121: /api/v1/arweave/anchors without ?repo= allows authenticated. /// - /// The global listing filters each anchor through authorize_repo_read, which - /// splits anchor.repo on '/' to get (owner, name) and calls get_repo. The - /// anchor row must therefore carry the same short owner slug that the push - /// path writes (last ':'-separated segment of the DID) so get_repo's fuzzy - /// LIKE match finds the repo record. + /// The global listing resolves visibility against the deduped repo view + /// (list_all_repos_deduped + visibility_check), queries anchors bounded in + /// SQL via WHERE repo = ANY(...), and post-filters by owner_did to prevent + /// cross-DID slug collision. The anchor row must carry the same short owner + /// slug that the push path writes (last ':'-separated segment of the DID). #[sqlx::test] async fn anchors_global_allows_authenticated(pool: PgPool) { let state = test_state(pool).await; @@ -2426,6 +2426,96 @@ mod tests { assert_eq!(body["count"], 1, "owner sees their anchor through ?repo="); } + /// Cross-owner slug-collision regression: two DIDs sharing a last segment + /// (e.g. did:key:z6Same and did:web:evil:z6Same) produce the same anchor + /// slug z6Same/name. The global listing must not leak the private canonical's + /// anchor rows under the public mirror's slug after the owner_did post-filter. + #[sqlx::test] + async fn anchors_global_denies_cross_owner_slug_collision(pool: PgPool) { + use gitlawb_core::identity::Keypair; + + let state = test_state(pool).await; + let victim = Keypair::generate(); + let victim_did = victim.did().to_string(); + let victim_short = victim_did.split(':').next_back().unwrap().to_string(); + let repo_name = "collision"; + + // Private canonical repo for the victim (did:key:z6Victim). + let canonical = seed_private_repo(&victim_did, repo_name); + state.db.create_repo(&canonical).await.unwrap(); + let slug = format!("{}/{}", victim_short, repo_name); + seed_anchor(&state.db, &slug, &victim_did).await; + + // Public repo sharing the same last segment via a different DID method + // (did:web:evil:z6Victim). The distinct owner_did prevents dedup collapse, + // so both rows appear in list_all_repos_deduped. A different disk_path + // avoids the unique repos_disk_path_key constraint. + let attacker_did = format!("did:web:evil:{victim_short}"); + let mirror = RepoRecord { + disk_path: format!("/tmp/attacker-{}/{}", victim_short, repo_name), + ..seed_repo(&attacker_did, repo_name) + }; + state.db.create_repo(&mirror).await.unwrap(); + + // Stranger must not see the victim's anchor under the colliding slug. + let stranger = Keypair::generate(); + let resp = anchors_router(&state) + .oneshot(signed_get(&stranger, "/api/v1/arweave/anchors")) + .await + .unwrap(); + assert_eq!(resp.status(), StatusCode::OK); + let body = json_body(resp).await; + assert_eq!( + body["count"], 0, + "stranger must not see victim's private anchor under colliding slug" + ); + + // Victim still sees their own anchor. + let resp = anchors_router(&state) + .oneshot(signed_get(&victim, "/api/v1/arweave/anchors")) + .await + .unwrap(); + assert_eq!(resp.status(), StatusCode::OK); + let body = json_body(resp).await; + assert_eq!( + body["count"], 1, + "victim sees their own anchor through global listing" + ); + } + + /// Quarantined repos are excluded from anchor listings. A quarantined + /// mirror row (no canonical counterpart) is filtered by DEDUP_CTE's + /// `WHERE quarantined = FALSE`, so its slug never enters the readable set. + #[sqlx::test] + async fn anchors_global_excludes_quarantined_repo(pool: PgPool) { + let state = test_state(pool).await; + let short = "zQREPOTEST"; + let repo_name = "q-repo"; + + // Only a quarantined mirror row — no canonical counterpart. + state + .db + .upsert_mirror_repo(short, repo_name, "/tmp/q", None, true) + .await + .unwrap(); + + // Directly seed an anchor for the quarantined repo's slug. + let slug = format!("{short}/{repo_name}"); + seed_anchor(&state.db, &slug, "did:key:zDummyOwner").await; + + let owner = gitlawb_core::identity::Keypair::generate(); + let resp = anchors_router(&state) + .oneshot(signed_get(&owner, "/api/v1/arweave/anchors")) + .await + .unwrap(); + assert_eq!(resp.status(), StatusCode::OK); + let body = json_body(resp).await; + assert_eq!( + body["count"], 0, + "quarantined repo's anchors are excluded from global listing" + ); + } + /// #121: negative limit is clamped to 0 and returns a bounded response (no 500). #[sqlx::test] async fn anchors_global_negative_limit_is_clamped(pool: PgPool) { From 77f42f0dd0d607fcc867d342f9ea95b5e1f4c5df Mon Sep 17 00:00:00 2001 From: Gravirei Date: Thu, 2 Jul 2026 21:24:18 +0600 Subject: [PATCH 18/25] fix(clippy): use is_some_and instead of map_or(false, ...) --- crates/gitlawb-node/src/api/arweave.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/gitlawb-node/src/api/arweave.rs b/crates/gitlawb-node/src/api/arweave.rs index 1729ff5..4500683 100644 --- a/crates/gitlawb-node/src/api/arweave.rs +++ b/crates/gitlawb-node/src/api/arweave.rs @@ -151,7 +151,7 @@ pub async fn list_anchors( .filter(|a| { readable_owners .get(&a.repo) - .map_or(false, |owners| owners.contains(&a.owner_did)) + .is_some_and(|owners| owners.contains(&a.owner_did)) }) .collect() } From a98e544056e8010eb7d75cf0d11d66d4feeb496b Mon Sep 17 00:00:00 2001 From: Gravirei Date: Fri, 3 Jul 2026 00:23:18 +0600 Subject: [PATCH 19/25] fix(db,cli): filter arweave anchors in SQL and sign IPFS pin requests --- crates/gitlawb-node/src/api/arweave.rs | 61 +++++++++---------------- crates/gitlawb-node/src/db/mod.rs | 12 +++-- crates/gitlawb-node/src/test_support.rs | 8 ++-- crates/gl/src/http.rs | 4 ++ crates/gl/src/ipfs_cmd.rs | 18 +++++--- crates/gl/src/node.rs | 17 ++++++- 6 files changed, 65 insertions(+), 55 deletions(-) diff --git a/crates/gitlawb-node/src/api/arweave.rs b/crates/gitlawb-node/src/api/arweave.rs index 4500683..a7a8783 100644 --- a/crates/gitlawb-node/src/api/arweave.rs +++ b/crates/gitlawb-node/src/api/arweave.rs @@ -6,8 +6,6 @@ use axum::{ }; use serde::Deserialize; -use std::collections::HashMap; - use crate::auth::AuthenticatedDid; use crate::error::{AppError, Result}; use crate::state::AppState; @@ -100,9 +98,9 @@ pub async fn list_anchors( .map_err(AppError::Internal)? } else { // ── Global listing ── - // Build the set of readable repo slugs from the deduped repo view + // Build the set of readable repo slugs and owner DIDs from the deduped repo view // (mirror rows already collapsed, quarantined excluded), then query - // anchors bounded in SQL via WHERE repo = ANY(...). + // anchors bounded in SQL. let repos = state .db .list_all_repos_deduped() @@ -115,45 +113,30 @@ pub async fn list_anchors( .await .map_err(AppError::Internal)?; - // Build both the readable slug set and a map: slug → readable owner DIDs. - // Post-filtering by owner DID prevents leakage between two DIDs whose - // short-form slug collides (e.g. did:key:z6Same and did:gitlawb:z6Same). - let mut readable_owners: HashMap> = HashMap::new(); - let readable: Vec = repos - .iter() - .filter_map(|r| { - let rules = rules_by_repo.get(&r.id).map(Vec::as_slice).unwrap_or(&[]); - if visibility_check(rules, r.is_public, &r.owner_did, caller, "/") == Decision::Deny - { - return None; - } - let short = r.owner_did.split(':').next_back().unwrap_or(&r.owner_did); - let slug = format!("{}/{}", short, r.name); - readable_owners - .entry(slug.clone()) - .or_default() - .push(r.owner_did.clone()); - Some(slug) - }) - .collect(); - - if readable.is_empty() { + // Build parallel vectors of readable (slug, owner_did) pairs to query in SQL. + // This avoids filter-before-limit leaks or loss of pages. + let mut query_repos = Vec::new(); + let mut query_owner_dids = Vec::new(); + + for r in &repos { + let rules = rules_by_repo.get(&r.id).map(Vec::as_slice).unwrap_or(&[]); + if visibility_check(rules, r.is_public, &r.owner_did, caller, "/") == Decision::Deny { + continue; + } + let short = r.owner_did.split(':').next_back().unwrap_or(&r.owner_did); + let slug = format!("{}/{}", short, r.name); + query_repos.push(slug); + query_owner_dids.push(r.owner_did.clone()); + } + + if query_repos.is_empty() { Vec::new() } else { - let anchors = state + state .db - .list_arweave_anchors_for_repos(&readable, limit) + .list_arweave_anchors_for_repos(&query_repos, &query_owner_dids, limit) .await - .map_err(AppError::Internal)?; - - anchors - .into_iter() - .filter(|a| { - readable_owners - .get(&a.repo) - .is_some_and(|owners| owners.contains(&a.owner_did)) - }) - .collect() + .map_err(AppError::Internal)? } }; diff --git a/crates/gitlawb-node/src/db/mod.rs b/crates/gitlawb-node/src/db/mod.rs index 64a6474..53a6621 100644 --- a/crates/gitlawb-node/src/db/mod.rs +++ b/crates/gitlawb-node/src/db/mod.rs @@ -2398,18 +2398,24 @@ impl Db { .collect()) } - /// Bounded global anchor query: returns anchors for any of the given repo - /// slugs, ordered by anchored_at DESC, capped at `limit`. + /// Bounded global anchor query: returns anchors for any of the given (repo, owner_did) + /// pairs, ordered by anchored_at DESC, capped at `limit`. pub async fn list_arweave_anchors_for_repos( &self, repos: &[String], + owner_dids: &[String], limit: i64, ) -> Result> { let rows = sqlx::query( "SELECT id, repo, owner_did, ref_name, old_sha, new_sha, cid, irys_tx_id, arweave_url, node_did, anchored_at - FROM arweave_anchors WHERE repo = ANY($1) ORDER BY anchored_at DESC LIMIT $2", + FROM arweave_anchors + WHERE (repo, owner_did) IN ( + SELECT * FROM UNNEST($1::text[], $2::text[]) + ) + ORDER BY anchored_at DESC LIMIT $3", ) .bind(repos) + .bind(owner_dids) .bind(limit) .fetch_all(&self.pool) .await?; diff --git a/crates/gitlawb-node/src/test_support.rs b/crates/gitlawb-node/src/test_support.rs index 6fb6578..6c818ec 100644 --- a/crates/gitlawb-node/src/test_support.rs +++ b/crates/gitlawb-node/src/test_support.rs @@ -2246,10 +2246,10 @@ mod tests { /// #121: /api/v1/arweave/anchors without ?repo= allows authenticated. /// /// The global listing resolves visibility against the deduped repo view - /// (list_all_repos_deduped + visibility_check), queries anchors bounded in - /// SQL via WHERE repo = ANY(...), and post-filters by owner_did to prevent - /// cross-DID slug collision. The anchor row must carry the same short owner - /// slug that the push path writes (last ':'-separated segment of the DID). + /// (list_all_repos_deduped + visibility_check) and queries anchors bounded in + /// SQL filtering by (repo, owner_did) pairs to prevent cross-DID slug collision. + /// The anchor row must carry the same short owner slug that the push path writes + /// (last ':'-separated segment of the DID). #[sqlx::test] async fn anchors_global_allows_authenticated(pool: PgPool) { let state = test_state(pool).await; diff --git a/crates/gl/src/http.rs b/crates/gl/src/http.rs index 32e90a1..1159e2c 100644 --- a/crates/gl/src/http.rs +++ b/crates/gl/src/http.rs @@ -24,6 +24,10 @@ impl NodeClient { } } + pub fn has_keypair(&self) -> bool { + self.keypair.is_some() + } + /// GET request — no auth (public read endpoints). pub async fn get(&self, path: &str) -> Result { let url = format!("{}{}", self.node_url, path); diff --git a/crates/gl/src/ipfs_cmd.rs b/crates/gl/src/ipfs_cmd.rs index b1b12f0..d1b632d 100644 --- a/crates/gl/src/ipfs_cmd.rs +++ b/crates/gl/src/ipfs_cmd.rs @@ -39,9 +39,10 @@ pub async fn run(args: IpfsArgs) -> Result<()> { } async fn cmd_list(node: String) -> Result<()> { - let client = NodeClient::new(&node, None); + let kp = crate::identity::load_keypair_from_dir(None)?; + let client = NodeClient::new(&node, Some(kp)); let resp: Value = client - .get("/api/v1/ipfs/pins") + .get_signed("/api/v1/ipfs/pins") .await? .json() .await @@ -77,12 +78,15 @@ async fn cmd_list(node: String) -> Result<()> { } async fn cmd_get(cid: String, node: String) -> Result<()> { - let client = NodeClient::new(&node, None); + let kp = crate::identity::load_keypair_from_dir(None).ok(); + let client = NodeClient::new(&node, kp); let path = format!("/ipfs/{cid}"); - let resp = client - .get(&path) - .await - .with_context(|| format!("failed to fetch CID {cid} from {node}"))?; + let resp = if client.has_keypair() { + client.get_signed(&path).await + } else { + client.get(&path).await + } + .with_context(|| format!("failed to fetch CID {cid} from {node}"))?; let status = resp.status(); if !status.is_success() { diff --git a/crates/gl/src/node.rs b/crates/gl/src/node.rs index 9b3a6e1..fc5a04f 100644 --- a/crates/gl/src/node.rs +++ b/crates/gl/src/node.rs @@ -177,8 +177,21 @@ async fn try_get_json(client: &NodeClient, path: &str) -> Option { resp.json::().await.ok() } +/// Attempt a signed GET and parse JSON; returns None on any error or non-2xx status or if keypair is absent. +async fn try_get_json_signed(client: &NodeClient, path: &str) -> Option { + if !client.has_keypair() { + return None; + } + let resp = client.get_signed(path).await.ok()?; + if !resp.status().is_success() { + return None; + } + resp.json::().await.ok() +} + async fn cmd_status(node: String) -> Result<()> { - let client = NodeClient::new(&node, None); + let kp = crate::identity::load_keypair_from_dir(None).ok(); + let client = NodeClient::new(&node, kp); // ── Fetch node info (required — bail if unreachable) ────────────────── let info_resp = client @@ -200,7 +213,7 @@ async fn cmd_status(node: String) -> Result<()> { try_get_json(&client, "/api/v1/repos"), try_get_json(&client, "/api/v1/p2p/info"), try_get_json(&client, "/api/v1/events/ref-updates?limit=5"), - try_get_json(&client, "/api/v1/ipfs/pins"), + try_get_json_signed(&client, "/api/v1/ipfs/pins"), ); // ── Render dashboard ────────────────────────────────────────────────── From 70acb1637cd54f1264ad0cc9dbaf21d4d28f2836 Mon Sep 17 00:00:00 2001 From: Gravirei Date: Fri, 3 Jul 2026 00:29:19 +0600 Subject: [PATCH 20/25] test: add slug-collision regression test for global anchor query --- crates/gitlawb-node/src/test_support.rs | 56 +++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/crates/gitlawb-node/src/test_support.rs b/crates/gitlawb-node/src/test_support.rs index 6c818ec..3a23730 100644 --- a/crates/gitlawb-node/src/test_support.rs +++ b/crates/gitlawb-node/src/test_support.rs @@ -2533,4 +2533,60 @@ mod tests { "negative limit clamps to 0, returning empty result" ); } + + /// Regression test for slug-collision and global anchor query filter-before-limit: + /// Seed a readable and an unreadable repo sharing owner_short/name, give the + /// unreadable one the newer rows, and assert the caller sees their own anchor + /// and never the other owner's. + #[sqlx::test] + async fn anchors_global_slug_collision_regression(pool: PgPool) { + use gitlawb_core::identity::Keypair; + + let state = test_state(pool).await; + let caller = Keypair::generate(); + let caller_did = caller.did().to_string(); + let owner_short = caller_did.split(':').next_back().unwrap().to_string(); + let unreadable_did = format!("did:gitlawb:{owner_short}"); + let repo_name = "collision-repo"; + let slug = format!("{owner_short}/{repo_name}"); + + // Create the readable (public) repo owned by caller + let mut repo_readable = seed_repo(&caller_did, repo_name); + repo_readable.disk_path = format!("/tmp/{repo_name}-readable-{}", uuid::Uuid::new_v4()); + state.db.create_repo(&repo_readable).await.unwrap(); + + // Create the unreadable (private) repo owned by the collision DID + let mut repo_unreadable = seed_private_repo(&unreadable_did, repo_name); + repo_unreadable.disk_path = format!("/tmp/{repo_name}-unreadable-{}", uuid::Uuid::new_v4()); + state.db.create_repo(&repo_unreadable).await.unwrap(); + + // Seed readable anchor (older) + seed_anchor(&state.db, &slug, &caller_did).await; + + // Sleep to ensure a distinct timestamp for the newer anchor + tokio::time::sleep(std::time::Duration::from_millis(5)).await; + + // Seed unreadable anchor (newer) + seed_anchor(&state.db, &slug, &unreadable_did).await; + + // Request global listing with limit = 1 + let resp = anchors_router(&state) + .oneshot(signed_get(&caller, "/api/v1/arweave/anchors?limit=1")) + .await + .unwrap(); + + assert_eq!(resp.status(), StatusCode::OK); + let body = json_body(resp).await; + + // The count should be 1 because the database query is correctly constrained + // to the readable (repo, owner_did) pair, thus skipping the newer unreadable anchor. + assert_eq!( + body["count"], 1, + "caller should see their own anchor despite the newer colliding unreadable anchor" + ); + assert_eq!( + body["anchors"][0]["owner_did"], caller_did, + "returned anchor must belong to the caller" + ); + } } From dbce2445434ce11d7e9487be5347590f8ebdce2e Mon Sep 17 00:00:00 2001 From: Gravirei Date: Fri, 3 Jul 2026 06:30:12 +0600 Subject: [PATCH 21/25] fix: optimize global pin listing and verify CLI pin signatures --- crates/gitlawb-node/src/api/ipfs.rs | 158 +++++++++++++++--------- crates/gitlawb-node/src/api/repos.rs | 10 ++ crates/gitlawb-node/src/db/mod.rs | 136 +++++++++++++++----- crates/gitlawb-node/src/ipfs_pin.rs | 22 +++- crates/gitlawb-node/src/pinata.rs | 21 +++- crates/gitlawb-node/src/test_support.rs | 18 ++- crates/gl/src/ipfs_cmd.rs | 89 ++++++++++--- crates/gl/src/node.rs | 117 +++++++++++++++--- 8 files changed, 441 insertions(+), 130 deletions(-) diff --git a/crates/gitlawb-node/src/api/ipfs.rs b/crates/gitlawb-node/src/api/ipfs.rs index a33c5ec..91d0638 100644 --- a/crates/gitlawb-node/src/api/ipfs.rs +++ b/crates/gitlawb-node/src/api/ipfs.rs @@ -15,7 +15,7 @@ //! see `get_by_cid`). use axum::{ - extract::{Path, State}, + extract::{Path, Query, State}, http::{HeaderMap, HeaderName, HeaderValue, StatusCode}, response::{IntoResponse, Response}, Extension, Json, @@ -26,11 +26,11 @@ use std::str::FromStr; use crate::auth::AuthenticatedDid; use crate::error::{AppError, Result}; -use crate::git::push_delta; use crate::git::store; use crate::git::visibility_pack::{allowed_blob_set_for_caller, has_path_scoped_rule}; use crate::state::AppState; use crate::visibility::{visibility_check, Decision}; +use serde::Deserialize; /// GET /ipfs/{cid} /// @@ -212,6 +212,28 @@ pub async fn get_by_cid( ))) } +/// GET /api/v1/ipfs/pins +/// +/// Returns all CIDs that have been pinned to the local IPFS node from git +/// objects received via push. Each entry includes the git SHA-256 hex, the +/// CIDv1 string, and the timestamp when it was pinned. +/// +/// Requires authentication: the global pin index would otherwise disclose +/// metadata for every object ever pushed to the node (#121). +/// +/// The global listing filters each pinned object on current repo visibility +/// to prevent metadata disclosure when repos are made private after push (#136). +/// Only pins from repos the caller can currently read are returned. +#[derive(Debug, Deserialize)] +pub struct ListPinsQuery { + #[serde(default = "default_limit")] + pub limit: i64, +} + +fn default_limit() -> i64 { + 1000 +} + /// GET /api/v1/ipfs/pins /// /// Returns all CIDs that have been pinned to the local IPFS node from git @@ -226,6 +248,7 @@ pub async fn get_by_cid( /// Only pins from repos the caller can currently read are returned. pub async fn list_pins( State(state): State, + Query(query): Query, auth: Option>, ) -> Result> { let caller = auth.as_ref().map(|e| e.0 .0.as_str()); @@ -239,21 +262,14 @@ pub async fn list_pins( } let caller_owned = caller.map(|c| c.to_string()); - let raw_pins = state - .db - .list_pinned_cids() - .await - .map_err(AppError::Internal)?; - - // Build a set of sha256_hex values from repos the caller can read. - // Use the deduped repo list so mirror rows never bypass the canonical - // repo's visibility rules (#136). + // Build the set of readable repo slugs and owner DIDs from the deduped repo view + // (mirror rows already collapsed, quarantined excluded), then query + // anchors bounded in SQL. let repos = state .db .list_all_repos_deduped() .await .map_err(AppError::Internal)?; - let repo_ids: Vec = repos.iter().map(|r| r.id.clone()).collect(); let rules_by_repo = state .db @@ -261,63 +277,91 @@ pub async fn list_pins( .await .map_err(AppError::Internal)?; - let mut allowed_sha256s = std::collections::HashSet::new(); - - for repo in &repos { - let rules: &[crate::db::VisibilityRule] = rules_by_repo - .get(&repo.id) - .map(Vec::as_slice) - .unwrap_or(&[]); + // Build parallel vectors of readable (slug, owner_did) pairs to query in SQL. + // This avoids filter-before-limit leaks or loss of pages. + let mut query_repos = Vec::new(); + let mut query_owner_dids = Vec::new(); - // Check repo-level visibility. - if visibility_check(rules, repo.is_public, &repo.owner_did, caller, "/") == Decision::Deny { + for r in &repos { + let rules = rules_by_repo.get(&r.id).map(Vec::as_slice).unwrap_or(&[]); + if visibility_check(rules, r.is_public, &r.owner_did, caller, "/") == Decision::Deny { continue; } + let short = r.owner_did.split(':').next_back().unwrap_or(&r.owner_did); + let slug = format!("{}/{}", short, r.name); + query_repos.push(slug); + query_owner_dids.push(r.owner_did.clone()); + } - let repo_path = match state.repo_store.acquire(&repo.owner_did, &repo.name).await { - Ok(p) => p, - Err(_) => continue, + let raw_pins = if query_repos.is_empty() { + Vec::new() + } else { + state + .db + .list_pinned_cids_for_repos(&query_repos, &query_owner_dids, query.limit) + .await + .map_err(AppError::Internal)? + }; + + let mut repos_by_slug = HashMap::new(); + for r in repos { + let short = r.owner_did.split(':').next_back().unwrap_or(&r.owner_did); + let slug = format!("{}/{}", short, r.name); + let rules = rules_by_repo.get(&r.id).cloned().unwrap_or_default(); + repos_by_slug.insert(slug, (r, rules)); + } + + let mut filtered_pins = Vec::new(); + let mut allowed_blobs_by_repo: HashMap> = HashMap::new(); + + for pin in raw_pins { + let Some((repo, rules)) = repos_by_slug.get(&pin.repo) else { + continue; }; - // If path-scoped rules exist, compute the set of blobs the caller is - // allowed to read. Otherwise every blob in the repo is allowed. - let allowed_blobs = if has_path_scoped_rule(rules) { - let rp = repo_path.clone(); - let r = rules.to_vec(); - let is_public = repo.is_public; - let owner = repo.owner_did.clone(); - let caller_for_walk = caller_owned.clone(); - - match tokio::task::spawn_blocking(move || { - allowed_blob_set_for_caller(&rp, &r, is_public, &owner, caller_for_walk.as_deref()) - }) - .await - { - Ok(Ok(set)) => set, - _ => { - tracing::warn!(repo = %repo.name, "allowed-blob walk failed; skipping repo for pins listing"); - continue; - } - } + if !has_path_scoped_rule(rules) { + filtered_pins.push(pin); + continue; + } + + let allowed_set = if let Some(set) = allowed_blobs_by_repo.get(&repo.id) { + set } else { - // No path-scoped rules: all objects reachable in this repo are allowed. - match push_delta::list_all_objects(&repo_path) { - Ok(objects) => objects.into_iter().collect(), - Err(_) => continue, - } + let set = match state.repo_store.acquire(&repo.owner_did, &repo.name).await { + Ok(rp) => { + let rp_clone = rp.clone(); + let r_clone = rules.clone(); + let is_public = repo.is_public; + let owner = repo.owner_did.clone(); + let caller_for_walk = caller_owned.clone(); + + match tokio::task::spawn_blocking(move || { + allowed_blob_set_for_caller( + &rp_clone, + &r_clone, + is_public, + &owner, + caller_for_walk.as_deref(), + ) + }) + .await + { + Ok(Ok(s)) => s, + _ => HashSet::new(), + } + } + Err(_) => HashSet::new(), + }; + allowed_blobs_by_repo.insert(repo.id.clone(), set); + allowed_blobs_by_repo.get(&repo.id).unwrap() }; - // Add the allowed blobs to the global allowed set. - for sha in allowed_blobs { - allowed_sha256s.insert(sha); + if allowed_set.contains(&pin.sha256_hex) { + filtered_pins.push(pin); } } - // Filter pins to only those in allowed set. - let pins: Vec<_> = raw_pins - .into_iter() - .filter(|pin| allowed_sha256s.contains(&pin.sha256_hex)) - .collect(); + let pins = filtered_pins; Ok(Json(serde_json::json!({ "pins": pins, diff --git a/crates/gitlawb-node/src/api/repos.rs b/crates/gitlawb-node/src/api/repos.rs index b74f3f6..cc36b31 100644 --- a/crates/gitlawb-node/src/api/repos.rs +++ b/crates/gitlawb-node/src/api/repos.rs @@ -1051,12 +1051,20 @@ pub async fn git_receive_pack( let node_did_str = state.node_did.to_string(); let node_seed = state.node_keypair.to_seed(); let repo_name = record.name.clone(); + let owner_short = owner_did + .split(':') + .next_back() + .unwrap_or(&owner_did) + .to_string(); + let slug = format!("{}/{}", owner_short, repo_name); tokio::spawn(async move { let pinned = crate::ipfs_pin::pin_new_objects( &ipfs_api, &repo_path_clone, object_list_ipfs, &db_clone, + &slug, + &owner_did, ) .await; if !pinned.is_empty() { @@ -1171,6 +1179,8 @@ pub async fn git_receive_pack( &repo_path_clone, object_list_pinata, &db_clone, + &repo_slug, + &owner_did_for_arweave, ) .await } else { diff --git a/crates/gitlawb-node/src/db/mod.rs b/crates/gitlawb-node/src/db/mod.rs index 53a6621..8547c08 100644 --- a/crates/gitlawb-node/src/db/mod.rs +++ b/crates/gitlawb-node/src/db/mod.rs @@ -159,6 +159,10 @@ pub struct PinnedCidRecord { pub cid: String, pub pinned_at: String, pub pinata_cid: Option, + #[serde(default)] + pub repo: String, + #[serde(default)] + pub owner_did: String, } #[derive(Debug, Clone, Serialize, Deserialize)] @@ -822,6 +826,21 @@ const MIGRATIONS: &[Migration] = &[ "ALTER TABLE repos ADD COLUMN IF NOT EXISTS quarantined BOOLEAN NOT NULL DEFAULT FALSE", ], }, + Migration { + version: 10, + name: "pinned_cids_repo_owner", + stmts: &[ + "ALTER TABLE pinned_cids ADD COLUMN IF NOT EXISTS repo TEXT", + "ALTER TABLE pinned_cids ADD COLUMN IF NOT EXISTS owner_did TEXT", + "UPDATE pinned_cids SET repo = '' WHERE repo IS NULL", + "UPDATE pinned_cids SET owner_did = '' WHERE owner_did IS NULL", + "ALTER TABLE pinned_cids ALTER COLUMN repo SET NOT NULL", + "ALTER TABLE pinned_cids ALTER COLUMN owner_did SET NOT NULL", + "ALTER TABLE pinned_cids DROP CONSTRAINT IF EXISTS pinned_cids_pkey", + "ALTER TABLE pinned_cids ADD PRIMARY KEY (repo, sha256_hex)", + "CREATE INDEX IF NOT EXISTS idx_pinned_cids_repo_owner ON pinned_cids (repo, owner_did)", + ], + }, ]; // ── Repos ───────────────────────────────────────────────────────────────────── @@ -1943,23 +1962,42 @@ impl Db { // ── Pinned CIDs ─────────────────────────────────────────────────────────────── impl Db { - pub async fn is_pinned(&self, sha256_hex: &str) -> Result { - let row = sqlx::query("SELECT COUNT(*) as cnt FROM pinned_cids WHERE sha256_hex = $1") + pub async fn is_pinned_for_repo(&self, sha256_hex: &str, repo: &str) -> Result { + let row = sqlx::query( + "SELECT COUNT(*) as cnt FROM pinned_cids WHERE sha256_hex = $1 AND repo = $2", + ) + .bind(sha256_hex) + .bind(repo) + .fetch_one(&self.pool) + .await?; + Ok(row.get::("cnt") > 0) + } + + pub async fn get_pinned_cid(&self, sha256_hex: &str) -> Result> { + let row = sqlx::query("SELECT cid FROM pinned_cids WHERE sha256_hex = $1 LIMIT 1") .bind(sha256_hex) - .fetch_one(&self.pool) + .fetch_optional(&self.pool) .await?; - Ok(row.get::("cnt") > 0) + Ok(row.map(|r| r.get("cid"))) } - pub async fn record_pinned_cid(&self, sha256_hex: &str, cid: &str) -> Result<()> { + pub async fn record_pinned_cid( + &self, + sha256_hex: &str, + cid: &str, + repo: &str, + owner_did: &str, + ) -> Result<()> { sqlx::query( - "INSERT INTO pinned_cids (sha256_hex, cid, pinned_at) - VALUES ($1, $2, $3) - ON CONFLICT(sha256_hex) DO NOTHING", + "INSERT INTO pinned_cids (sha256_hex, cid, pinned_at, repo, owner_did) + VALUES ($1, $2, $3, $4, $5) + ON CONFLICT(repo, sha256_hex) DO NOTHING", ) .bind(sha256_hex) .bind(cid) .bind(Utc::now().to_rfc3339()) + .bind(repo) + .bind(owner_did) .execute(&self.pool) .await?; Ok(()) @@ -2035,51 +2073,85 @@ impl Db { Ok(row.map(|r| r.get("recipients_tag"))) } - pub async fn list_pinned_cids(&self) -> Result> { - let rows = sqlx::query( - "SELECT sha256_hex, cid, pinned_at, pinata_cid FROM pinned_cids ORDER BY pinned_at DESC", - ) - .fetch_all(&self.pool) - .await?; - Ok(rows - .into_iter() - .map(|r| PinnedCidRecord { - sha256_hex: r.get("sha256_hex"), - cid: r.get("cid"), - pinned_at: r.get("pinned_at"), - pinata_cid: r.get("pinata_cid"), - }) - .collect()) - } - - /// Returns true if this object already has a Pinata CID recorded. - pub async fn has_pinata_cid(&self, sha256_hex: &str) -> Result { + pub async fn has_pinata_cid_for_repo(&self, sha256_hex: &str, repo: &str) -> Result { let row = sqlx::query( - "SELECT COUNT(*) as cnt FROM pinned_cids WHERE sha256_hex = $1 AND pinata_cid IS NOT NULL", + "SELECT COUNT(*) as cnt FROM pinned_cids WHERE sha256_hex = $1 AND repo = $2 AND pinata_cid IS NOT NULL", ) .bind(sha256_hex) + .bind(repo) .fetch_one(&self.pool) .await?; Ok(row.get::("cnt") > 0) } + pub async fn get_pinata_cid(&self, sha256_hex: &str) -> Result> { + let row = sqlx::query("SELECT pinata_cid FROM pinned_cids WHERE sha256_hex = $1 AND pinata_cid IS NOT NULL LIMIT 1") + .bind(sha256_hex) + .fetch_optional(&self.pool) + .await?; + Ok(row.map(|r| r.get("pinata_cid"))) + } + /// Record the Pinata CID for a git object. /// Inserts the row if it doesn't exist (objects pinned directly to Pinata /// without a prior local IPFS pin get cid = pinata_cid). - pub async fn record_pinata_cid(&self, sha256_hex: &str, pinata_cid: &str) -> Result<()> { + pub async fn record_pinata_cid( + &self, + sha256_hex: &str, + pinata_cid: &str, + repo: &str, + owner_did: &str, + ) -> Result<()> { sqlx::query( - "INSERT INTO pinned_cids (sha256_hex, cid, pinned_at, pinata_cid) - VALUES ($1, $2, $3, $4) - ON CONFLICT(sha256_hex) DO UPDATE SET pinata_cid = EXCLUDED.pinata_cid", + "INSERT INTO pinned_cids (sha256_hex, cid, pinned_at, pinata_cid, repo, owner_did) + VALUES ($1, $2, $3, $4, $5, $6) + ON CONFLICT(repo, sha256_hex) DO UPDATE SET pinata_cid = EXCLUDED.pinata_cid", ) .bind(sha256_hex) .bind(pinata_cid) // fallback local cid if row is new .bind(Utc::now().to_rfc3339()) .bind(pinata_cid) + .bind(repo) + .bind(owner_did) .execute(&self.pool) .await?; Ok(()) } + + /// Bounded global pin query: returns pins for any of the given (repo, owner_did) + /// pairs, ordered by pinned_at DESC, capped at `limit`. + pub async fn list_pinned_cids_for_repos( + &self, + repos: &[String], + owner_dids: &[String], + limit: i64, + ) -> Result> { + let rows = sqlx::query( + "SELECT sha256_hex, cid, pinned_at, pinata_cid, repo, owner_did + FROM pinned_cids + WHERE (repo, owner_did) IN ( + SELECT * FROM UNNEST($1::text[], $2::text[]) + ) + ORDER BY pinned_at DESC LIMIT $3", + ) + .bind(repos) + .bind(owner_dids) + .bind(limit) + .fetch_all(&self.pool) + .await?; + + Ok(rows + .into_iter() + .map(|r| PinnedCidRecord { + sha256_hex: r.get("sha256_hex"), + cid: r.get("cid"), + pinned_at: r.get("pinned_at"), + pinata_cid: r.get("pinata_cid"), + repo: r.get("repo"), + owner_did: r.get("owner_did"), + }) + .collect()) + } } // ── Received Ref Updates ────────────────────────────────────────────────────── diff --git a/crates/gitlawb-node/src/ipfs_pin.rs b/crates/gitlawb-node/src/ipfs_pin.rs index 3b34619..754c99b 100644 --- a/crates/gitlawb-node/src/ipfs_pin.rs +++ b/crates/gitlawb-node/src/ipfs_pin.rs @@ -99,6 +99,8 @@ pub async fn pin_new_objects( repo_path: &std::path::Path, object_list: Vec, db: &crate::db::Db, + repo: &str, + owner_did: &str, ) -> Vec<(String, String)> { if ipfs_api.is_empty() { return vec![]; @@ -107,16 +109,28 @@ pub async fn pin_new_objects( let mut pinned = Vec::new(); for sha in object_list { - // Skip if already pinned - match db.is_pinned(&sha).await { + // Skip if already pinned for this repo + match db.is_pinned_for_repo(&sha, repo).await { Ok(true) => continue, Ok(false) => {} Err(e) => { - tracing::warn!(sha = %sha, err = %e, "DB error checking pinned status"); + tracing::warn!(sha = %sha, err = %e, "DB error checking pinned status for repo"); continue; } } + // Reuse globally pinned CID if available to avoid duplicate uploads + if let Ok(Some(existing_cid)) = db.get_pinned_cid(&sha).await { + if let Err(e) = db + .record_pinned_cid(&sha, &existing_cid, repo, owner_did) + .await + { + tracing::warn!(sha = %sha, err = %e, "failed to record pinned CID in DB"); + } + pinned.push((sha, existing_cid)); + continue; + } + // Read raw object content let data = match crate::git::store::read_object(repo_path, &sha) { Ok(Some((_obj_type, bytes))) => bytes, @@ -130,7 +144,7 @@ pub async fn pin_new_objects( // Pin to IPFS match pin_git_object(ipfs_api, &sha, &data).await { Ok(cid) if !cid.is_empty() => { - if let Err(e) = db.record_pinned_cid(&sha, &cid).await { + if let Err(e) = db.record_pinned_cid(&sha, &cid, repo, owner_did).await { tracing::warn!(sha = %sha, err = %e, "failed to record pinned CID in DB"); } pinned.push((sha, cid)); diff --git a/crates/gitlawb-node/src/pinata.rs b/crates/gitlawb-node/src/pinata.rs index 6c9c0bf..4a32745 100644 --- a/crates/gitlawb-node/src/pinata.rs +++ b/crates/gitlawb-node/src/pinata.rs @@ -83,6 +83,8 @@ pub async fn pin_new_objects( repo_path: &std::path::Path, object_list: Vec, db: &crate::db::Db, + repo: &str, + owner_did: &str, ) -> Vec<(String, String)> { if jwt.is_empty() { return vec![]; @@ -91,15 +93,28 @@ pub async fn pin_new_objects( let mut pinned = Vec::new(); for sha in object_list { - match db.has_pinata_cid(&sha).await { + // Skip if already has Pinata CID for this repo + match db.has_pinata_cid_for_repo(&sha, repo).await { Ok(true) => continue, Ok(false) => {} Err(e) => { - tracing::warn!(sha = %sha, err = %e, "DB error checking pinata_cid"); + tracing::warn!(sha = %sha, err = %e, "DB error checking pinata_cid for repo"); continue; } } + // Reuse globally pinned Pinata CID if available to avoid duplicate uploads + if let Ok(Some(existing_pinata_cid)) = db.get_pinata_cid(&sha).await { + if let Err(e) = db + .record_pinata_cid(&sha, &existing_pinata_cid, repo, owner_did) + .await + { + tracing::warn!(sha = %sha, err = %e, "failed to record pinata_cid in DB"); + } + pinned.push((sha, existing_pinata_cid)); + continue; + } + let data = match crate::git::store::read_object(repo_path, &sha) { Ok(Some((_kind, bytes))) => bytes, Ok(None) => continue, @@ -111,7 +126,7 @@ pub async fn pin_new_objects( match pin_object(client, upload_url, jwt, &sha, &data).await { Ok(cid) if !cid.is_empty() => { - if let Err(e) = db.record_pinata_cid(&sha, &cid).await { + if let Err(e) = db.record_pinata_cid(&sha, &cid, repo, owner_did).await { tracing::warn!(sha = %sha, err = %e, "failed to record pinata_cid in DB"); } pinned.push((sha, cid)); diff --git a/crates/gitlawb-node/src/test_support.rs b/crates/gitlawb-node/src/test_support.rs index 3a23730..7158c77 100644 --- a/crates/gitlawb-node/src/test_support.rs +++ b/crates/gitlawb-node/src/test_support.rs @@ -2032,9 +2032,11 @@ mod tests { let pinned_sha = setup.fx.public_oid.clone(); let pinned_cid = cid_for_oid(&pinned_sha); + let short = setup.owner_did.split(':').next_back().unwrap().to_string(); + let slug = format!("{}/{}", short, setup.repo.name); state .db - .record_pinned_cid(&pinned_sha, &pinned_cid) + .record_pinned_cid(&pinned_sha, &pinned_cid, &slug, &setup.owner_did) .await .unwrap(); @@ -2068,9 +2070,11 @@ mod tests { let pinned_sha = setup.fx.public_oid.clone(); let pinned_cid = cid_for_oid(&pinned_sha); + let short = setup.owner_did.split(':').next_back().unwrap().to_string(); + let slug = format!("{}/{}", short, setup.repo.name); state .db - .record_pinned_cid(&pinned_sha, &pinned_cid) + .record_pinned_cid(&pinned_sha, &pinned_cid, &slug, &setup.owner_did) .await .unwrap(); @@ -2105,9 +2109,11 @@ mod tests { .await .unwrap(); + let short = setup.owner_did.split(':').next_back().unwrap().to_string(); + let slug = format!("{}/{}", short, setup.repo.name); state .db - .record_pinned_cid(&pinned_sha, &pinned_cid) + .record_pinned_cid(&pinned_sha, &pinned_cid, &slug, &setup.owner_did) .await .unwrap(); @@ -2149,14 +2155,16 @@ mod tests { .await .unwrap(); + let short = setup.owner_did.split(':').next_back().unwrap().to_string(); + let slug = format!("{}/{}", short, setup.repo.name); state .db - .record_pinned_cid(&public_sha, &public_cid) + .record_pinned_cid(&public_sha, &public_cid, &slug, &setup.owner_did) .await .unwrap(); state .db - .record_pinned_cid(&secret_sha, &secret_cid) + .record_pinned_cid(&secret_sha, &secret_cid, &slug, &setup.owner_did) .await .unwrap(); diff --git a/crates/gl/src/ipfs_cmd.rs b/crates/gl/src/ipfs_cmd.rs index d1b632d..1b37a04 100644 --- a/crates/gl/src/ipfs_cmd.rs +++ b/crates/gl/src/ipfs_cmd.rs @@ -31,25 +31,30 @@ pub enum IpfsCmd { }, } +use std::path::PathBuf; + pub async fn run(args: IpfsArgs) -> Result<()> { match args.cmd { - IpfsCmd::List { node } => cmd_list(node).await, - IpfsCmd::Get { cid, node } => cmd_get(cid, node).await, + IpfsCmd::List { node } => cmd_list(node, None).await, + IpfsCmd::Get { cid, node } => cmd_get(cid, node, None).await, } } -async fn cmd_list(node: String) -> Result<()> { - let kp = crate::identity::load_keypair_from_dir(None)?; +async fn cmd_list(node: String, dir: Option) -> Result<()> { + let kp = crate::identity::load_keypair_from_dir(dir.as_deref())?; let client = NodeClient::new(&node, Some(kp)); - let resp: Value = client - .get_signed("/api/v1/ipfs/pins") - .await? - .json() - .await - .context("failed to parse pins response")?; + let resp = client.get_signed("/api/v1/ipfs/pins").await?; + + let status = resp.status(); + if !status.is_success() { + let body = resp.text().await.unwrap_or_default(); + anyhow::bail!("node returned {status}: {body}"); + } + + let resp_val: Value = resp.json().await.context("failed to parse pins response")?; - let pins = resp["pins"].as_array().cloned().unwrap_or_default(); - let count = resp["count"].as_u64().unwrap_or(pins.len() as u64); + let pins = resp_val["pins"].as_array().cloned().unwrap_or_default(); + let count = resp_val["count"].as_u64().unwrap_or(pins.len() as u64); if pins.is_empty() { println!("No IPFS pins recorded on {node}"); @@ -77,8 +82,8 @@ async fn cmd_list(node: String) -> Result<()> { Ok(()) } -async fn cmd_get(cid: String, node: String) -> Result<()> { - let kp = crate::identity::load_keypair_from_dir(None).ok(); +async fn cmd_get(cid: String, node: String, dir: Option) -> Result<()> { + let kp = crate::identity::load_keypair_from_dir(dir.as_deref()).ok(); let client = NodeClient::new(&node, kp); let path = format!("/ipfs/{cid}"); let resp = if client.has_keypair() { @@ -112,3 +117,59 @@ async fn cmd_get(cid: String, node: String) -> Result<()> { Ok(()) } + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::TempDir; + + fn setup_test_keypair(dir: &TempDir) -> gitlawb_core::identity::Keypair { + let kp = gitlawb_core::identity::Keypair::generate(); + std::fs::write( + dir.path().join("identity.pem"), + kp.to_pem().unwrap().as_bytes(), + ) + .unwrap(); + kp + } + + #[tokio::test] + async fn test_cmd_list_success() { + let mut server = mockito::Server::new_async().await; + let dir = TempDir::new().unwrap(); + let _kp = setup_test_keypair(&dir); + + let _m = server + .mock("GET", "/api/v1/ipfs/pins") + .with_status(200) + .with_header("content-type", "application/json") + .with_body(r#"{"count":1,"pins":[{"sha256_hex":"abc","cid":"bafkrei","pinned_at":"2026-07-03T00:00:00Z"}]}"#) + .create_async() + .await; + + cmd_list(server.url(), Some(dir.path().to_path_buf())) + .await + .unwrap(); + } + + #[tokio::test] + async fn test_cmd_list_unauthorized() { + let mut server = mockito::Server::new_async().await; + let dir = TempDir::new().unwrap(); + let _kp = setup_test_keypair(&dir); + + let _m = server + .mock("GET", "/api/v1/ipfs/pins") + .with_status(401) + .with_body("authentication required") + .create_async() + .await; + + let err = cmd_list(server.url(), Some(dir.path().to_path_buf())) + .await + .unwrap_err(); + assert!(err + .to_string() + .contains("node returned 401 Unauthorized: authentication required")); + } +} diff --git a/crates/gl/src/node.rs b/crates/gl/src/node.rs index fc5a04f..2509c2e 100644 --- a/crates/gl/src/node.rs +++ b/crates/gl/src/node.rs @@ -177,16 +177,36 @@ async fn try_get_json(client: &NodeClient, path: &str) -> Option { resp.json::().await.ok() } -/// Attempt a signed GET and parse JSON; returns None on any error or non-2xx status or if keypair is absent. -async fn try_get_json_signed(client: &NodeClient, path: &str) -> Option { +#[derive(Debug)] +enum PinsError { + NoIdentity, + Unauthorized, + Other(String), +} + +async fn get_pins_status(client: &NodeClient) -> Result { if !client.has_keypair() { - return None; + return Err(PinsError::NoIdentity); } - let resp = client.get_signed(path).await.ok()?; - if !resp.status().is_success() { - return None; + let resp = match client.get_signed("/api/v1/ipfs/pins").await { + Ok(r) => r, + Err(e) => return Err(PinsError::Other(e.to_string())), + }; + let status = resp.status(); + if status == reqwest::StatusCode::UNAUTHORIZED || status == reqwest::StatusCode::FORBIDDEN { + return Err(PinsError::Unauthorized); } - resp.json::().await.ok() + if !status.is_success() { + return Err(PinsError::Other(format!("HTTP {status}"))); + } + let val: Value = match resp.json().await { + Ok(v) => v, + Err(e) => return Err(PinsError::Other(e.to_string())), + }; + let count = val["count"] + .as_u64() + .unwrap_or_else(|| val["pins"].as_array().map(|a| a.len() as u64).unwrap_or(0)); + Ok(count) } async fn cmd_status(node: String) -> Result<()> { @@ -213,7 +233,7 @@ async fn cmd_status(node: String) -> Result<()> { try_get_json(&client, "/api/v1/repos"), try_get_json(&client, "/api/v1/p2p/info"), try_get_json(&client, "/api/v1/events/ref-updates?limit=5"), - try_get_json_signed(&client, "/api/v1/ipfs/pins"), + get_pins_status(&client), ); // ── Render dashboard ────────────────────────────────────────────────── @@ -320,13 +340,19 @@ async fn cmd_status(node: String) -> Result<()> { // Pins println!("Pins"); - if let Some(ref pins) = pins_val { - let count = pins["count"] - .as_u64() - .unwrap_or_else(|| pins["pins"].as_array().map(|a| a.len() as u64).unwrap_or(0)); - println!(" Pinned CIDs: {count}"); - } else { - println!(" IPFS not configured"); + match pins_val { + Ok(count) => { + println!(" Pinned CIDs: {count}"); + } + Err(PinsError::NoIdentity) => { + println!(" (identity required to view pins)"); + } + Err(PinsError::Unauthorized) => { + println!(" (unauthorized to view pins)"); + } + Err(PinsError::Other(err)) => { + println!(" (pins unavailable: {err})"); + } } println!(); @@ -422,3 +448,64 @@ async fn cmd_resolve(did: String, node: String) -> Result<()> { Ok(()) } + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::TempDir; + + fn setup_test_keypair(dir: &TempDir) -> gitlawb_core::identity::Keypair { + let kp = gitlawb_core::identity::Keypair::generate(); + std::fs::write( + dir.path().join("identity.pem"), + kp.to_pem().unwrap().as_bytes(), + ) + .unwrap(); + kp + } + + #[tokio::test] + async fn test_get_pins_status_success() { + let mut server = mockito::Server::new_async().await; + let dir = TempDir::new().unwrap(); + let kp = setup_test_keypair(&dir); + let client = NodeClient::new(&server.url(), Some(kp)); + + let _m = server + .mock("GET", "/api/v1/ipfs/pins") + .with_status(200) + .with_header("content-type", "application/json") + .with_body(r#"{"count":5,"pins":[]}"#) + .create_async() + .await; + + let count = get_pins_status(&client).await.unwrap(); + assert_eq!(count, 5); + } + + #[tokio::test] + async fn test_get_pins_status_no_identity() { + let server = mockito::Server::new_async().await; + let client = NodeClient::new(&server.url(), None); + + let err = get_pins_status(&client).await.unwrap_err(); + assert!(matches!(err, PinsError::NoIdentity)); + } + + #[tokio::test] + async fn test_get_pins_status_unauthorized() { + let mut server = mockito::Server::new_async().await; + let dir = TempDir::new().unwrap(); + let kp = setup_test_keypair(&dir); + let client = NodeClient::new(&server.url(), Some(kp)); + + let _m = server + .mock("GET", "/api/v1/ipfs/pins") + .with_status(401) + .create_async() + .await; + + let err = get_pins_status(&client).await.unwrap_err(); + assert!(matches!(err, PinsError::Unauthorized)); + } +} From 459a29ddcf269fdb57a6992bc5f55a2979dbfad6 Mon Sep 17 00:00:00 2001 From: Gravirei Date: Fri, 3 Jul 2026 06:34:18 +0600 Subject: [PATCH 22/25] fix: address cargo clippy warnings --- crates/gitlawb-node/src/pinata.rs | 1 + crates/gl/src/node.rs | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/crates/gitlawb-node/src/pinata.rs b/crates/gitlawb-node/src/pinata.rs index 4a32745..953bf59 100644 --- a/crates/gitlawb-node/src/pinata.rs +++ b/crates/gitlawb-node/src/pinata.rs @@ -76,6 +76,7 @@ pub async fn pin_object( /// this shape — change both in lockstep. Objects already recorded with a /// `pinata_cid` are skipped. Returns `(sha_hex, cid)` pairs for each newly /// pinned object. +#[allow(clippy::too_many_arguments)] pub async fn pin_new_objects( client: &reqwest::Client, upload_url: &str, diff --git a/crates/gl/src/node.rs b/crates/gl/src/node.rs index 2509c2e..78abed0 100644 --- a/crates/gl/src/node.rs +++ b/crates/gl/src/node.rs @@ -469,7 +469,7 @@ mod tests { let mut server = mockito::Server::new_async().await; let dir = TempDir::new().unwrap(); let kp = setup_test_keypair(&dir); - let client = NodeClient::new(&server.url(), Some(kp)); + let client = NodeClient::new(server.url(), Some(kp)); let _m = server .mock("GET", "/api/v1/ipfs/pins") @@ -486,7 +486,7 @@ mod tests { #[tokio::test] async fn test_get_pins_status_no_identity() { let server = mockito::Server::new_async().await; - let client = NodeClient::new(&server.url(), None); + let client = NodeClient::new(server.url(), None); let err = get_pins_status(&client).await.unwrap_err(); assert!(matches!(err, PinsError::NoIdentity)); @@ -497,7 +497,7 @@ mod tests { let mut server = mockito::Server::new_async().await; let dir = TempDir::new().unwrap(); let kp = setup_test_keypair(&dir); - let client = NodeClient::new(&server.url(), Some(kp)); + let client = NodeClient::new(server.url(), Some(kp)); let _m = server .mock("GET", "/api/v1/ipfs/pins") From b3dcc6c978e2f59dcc1af244f94eae4607f8df51 Mon Sep 17 00:00:00 2001 From: Gravirei Date: Fri, 3 Jul 2026 12:39:57 +0600 Subject: [PATCH 23/25] fix: clamp pin limits, normalize owner DIDs, and add migration v10 upgrade tests --- crates/gitlawb-node/src/api/ipfs.rs | 7 +- crates/gitlawb-node/src/api/repos.rs | 12 +- crates/gitlawb-node/src/db/mod.rs | 146 +++++++++++++++++++++++- crates/gitlawb-node/src/test_support.rs | 18 +++ 4 files changed, 169 insertions(+), 14 deletions(-) diff --git a/crates/gitlawb-node/src/api/ipfs.rs b/crates/gitlawb-node/src/api/ipfs.rs index 91d0638..dee7bfb 100644 --- a/crates/gitlawb-node/src/api/ipfs.rs +++ b/crates/gitlawb-node/src/api/ipfs.rs @@ -287,25 +287,26 @@ pub async fn list_pins( if visibility_check(rules, r.is_public, &r.owner_did, caller, "/") == Decision::Deny { continue; } - let short = r.owner_did.split(':').next_back().unwrap_or(&r.owner_did); + let short = crate::db::normalize_owner_did(&r.owner_did); let slug = format!("{}/{}", short, r.name); query_repos.push(slug); query_owner_dids.push(r.owner_did.clone()); } + let limit = query.limit.clamp(0, 200); let raw_pins = if query_repos.is_empty() { Vec::new() } else { state .db - .list_pinned_cids_for_repos(&query_repos, &query_owner_dids, query.limit) + .list_pinned_cids_for_repos(&query_repos, &query_owner_dids, limit) .await .map_err(AppError::Internal)? }; let mut repos_by_slug = HashMap::new(); for r in repos { - let short = r.owner_did.split(':').next_back().unwrap_or(&r.owner_did); + let short = crate::db::normalize_owner_did(&r.owner_did); let slug = format!("{}/{}", short, r.name); let rules = rules_by_repo.get(&r.id).cloned().unwrap_or_default(); repos_by_slug.insert(slug, (r, rules)); diff --git a/crates/gitlawb-node/src/api/repos.rs b/crates/gitlawb-node/src/api/repos.rs index cc36b31..1033d34 100644 --- a/crates/gitlawb-node/src/api/repos.rs +++ b/crates/gitlawb-node/src/api/repos.rs @@ -1051,11 +1051,7 @@ pub async fn git_receive_pack( let node_did_str = state.node_did.to_string(); let node_seed = state.node_keypair.to_seed(); let repo_name = record.name.clone(); - let owner_short = owner_did - .split(':') - .next_back() - .unwrap_or(&owner_did) - .to_string(); + let owner_short = crate::db::normalize_owner_did(&owner_did); let slug = format!("{}/{}", owner_short, repo_name); tokio::spawn(async move { let pinned = crate::ipfs_pin::pin_new_objects( @@ -1149,11 +1145,7 @@ pub async fn git_receive_pack( let node_did_str = state.node_did.to_string(); let repo_slug = format!( "{}/{}", - record - .owner_did - .split(':') - .next_back() - .unwrap_or(&record.owner_did), + crate::db::normalize_owner_did(&record.owner_did), record.name ); let ref_updates_clone = ref_updates diff --git a/crates/gitlawb-node/src/db/mod.rs b/crates/gitlawb-node/src/db/mod.rs index 8547c08..42900b3 100644 --- a/crates/gitlawb-node/src/db/mod.rs +++ b/crates/gitlawb-node/src/db/mod.rs @@ -832,6 +832,10 @@ const MIGRATIONS: &[Migration] = &[ stmts: &[ "ALTER TABLE pinned_cids ADD COLUMN IF NOT EXISTS repo TEXT", "ALTER TABLE pinned_cids ADD COLUMN IF NOT EXISTS owner_did TEXT", + // Backfill repo and owner_did from branch_cids where possible + "UPDATE pinned_cids SET repo = branch_cids.repo FROM branch_cids WHERE pinned_cids.cid = branch_cids.cid", + "UPDATE pinned_cids SET owner_did = repos.owner_did FROM branch_cids JOIN repos ON repos.name = split_part(branch_cids.repo, '/', 2) AND repos.owner_did LIKE '%' || split_part(branch_cids.repo, '/', 1) WHERE pinned_cids.cid = branch_cids.cid", + // Fallback for remaining rows "UPDATE pinned_cids SET repo = '' WHERE repo IS NULL", "UPDATE pinned_cids SET owner_did = '' WHERE owner_did IS NULL", "ALTER TABLE pinned_cids ALTER COLUMN repo SET NOT NULL", @@ -3218,6 +3222,16 @@ impl Db { } } +pub fn normalize_owner_did(owner_did: &str) -> &str { + if owner_did.starts_with("did:key:") { + let remainder = &owner_did[8..]; + if !remainder.contains(':') { + return remainder; + } + } + owner_did +} + // ── Tests ───────────────────────────────────────────────────────────────────── // // These tests don't require a live Postgres connection. They validate the @@ -3228,7 +3242,8 @@ impl Db { #[cfg(test)] mod migration_tests { - use super::{MIGRATIONS, MIGRATION_V1_NAME}; + use super::{Db, MIGRATIONS, MIGRATION_V1_NAME}; + use sqlx::{PgPool, Row}; #[test] fn migrations_are_non_empty() { @@ -3306,6 +3321,135 @@ mod migration_tests { // it, you must also update the backfill. assert_eq!(MIGRATIONS[0].name, MIGRATION_V1_NAME); } + + #[sqlx::test] + async fn test_migration_v10_upgrade_path(pool: PgPool) { + let db = Db::for_testing(pool); + + // Run migrations up to version 9 + async fn run_migrations_up_to(db: &Db, version: i64) { + sqlx::query( + r#"CREATE TABLE IF NOT EXISTS schema_migrations ( + version BIGINT NOT NULL PRIMARY KEY, + name TEXT NOT NULL, + applied_at TEXT NOT NULL + )"#, + ) + .execute(&db.pool) + .await + .unwrap(); + + for m in super::MIGRATIONS { + if m.version > version { + break; + } + let already: bool = sqlx::query( + "SELECT EXISTS(SELECT 1 FROM schema_migrations WHERE version = $1) AS applied", + ) + .bind(m.version) + .fetch_one(&db.pool) + .await + .unwrap() + .get::("applied"); + + if already { + continue; + } + + let mut tx = db.pool.begin().await.unwrap(); + for stmt in m.stmts { + sqlx::query(stmt).execute(&mut *tx).await.unwrap(); + } + sqlx::query( + "INSERT INTO schema_migrations (version, name, applied_at) VALUES ($1, $2, $3)", + ) + .bind(m.version) + .bind(m.name) + .bind(chrono::Utc::now().to_rfc3339()) + .execute(&mut *tx) + .await + .unwrap(); + tx.commit().await.unwrap(); + } + } + + run_migrations_up_to(&db, 9).await; + + // Seed a repo, branch_cids, and pinned_cids under v9 schema + sqlx::query( + "INSERT INTO repos (id, name, owner_did, description, is_public, default_branch, created_at, updated_at, disk_path) + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)" + ) + .bind("repo-123") + .bind("myrepo") + .bind("did:key:z6Mkwowner") + .bind("desc") + .bind(true) + .bind("main") + .bind("2026-07-03T00:00:00Z") + .bind("2026-07-03T00:00:00Z") + .bind("/srv/repo-123") + .execute(&db.pool) + .await + .unwrap(); + + sqlx::query( + "INSERT INTO branch_cids (repo, ref_name, sha, cid, node_did, updated_at) + VALUES ($1, $2, $3, $4, $5, $6)", + ) + .bind("z6Mkwowner/myrepo") + .bind("refs/heads/main") + .bind("old-sha") + .bind("old-cid") + .bind("node-did") + .bind("2026-07-03T00:00:00Z") + .execute(&db.pool) + .await + .unwrap(); + + sqlx::query( + "INSERT INTO pinned_cids (sha256_hex, cid, pinned_at) + VALUES ($1, $2, $3)", + ) + .bind("old-sha") + .bind("old-cid") + .bind("2026-07-03T00:00:00Z") + .execute(&db.pool) + .await + .unwrap(); + + // Run remaining migrations (version 10) + db.run_migrations().await.unwrap(); + + // Verify backfilling of repo and owner_did columns + let row = sqlx::query( + "SELECT sha256_hex, cid, repo, owner_did FROM pinned_cids WHERE sha256_hex = 'old-sha'", + ) + .fetch_one(&db.pool) + .await + .unwrap(); + + assert_eq!(row.get::("repo"), "z6Mkwowner/myrepo"); + assert_eq!(row.get::("owner_did"), "did:key:z6Mkwowner"); + + // Verify the compound primary key (repo, sha256_hex) allows duplicate sha256_hex with different repo + let res = sqlx::query( + "INSERT INTO pinned_cids (sha256_hex, cid, pinned_at, repo, owner_did) + VALUES ($1, $2, $3, $4, $5)", + ) + .bind("old-sha") + .bind("old-cid") + .bind("2026-07-03T00:00:00Z") + .bind("other-repo") + .bind("other-owner") + .execute(&db.pool) + .await; + + assert!( + res.is_ok(), + "Compound PK must allow same SHA in different repos" + ); + } } #[cfg(test)] diff --git a/crates/gitlawb-node/src/test_support.rs b/crates/gitlawb-node/src/test_support.rs index 7158c77..e83129d 100644 --- a/crates/gitlawb-node/src/test_support.rs +++ b/crates/gitlawb-node/src/test_support.rs @@ -2542,6 +2542,24 @@ mod tests { ); } + /// #121: pins listing negative limit is clamped to 0 and returns a bounded response (no 500). + #[sqlx::test] + async fn pins_global_negative_limit_is_clamped(pool: PgPool) { + let state = test_state(pool).await; + let setup = setup_pin_test(&state, "neg-limit-repo").await; + + let resp = pins_router(&state) + .oneshot(signed_get(&setup.owner, "/api/v1/ipfs/pins?limit=-1")) + .await + .unwrap(); + assert_eq!(resp.status(), StatusCode::OK); + let body = json_body(resp).await; + assert_eq!( + body["count"], 0, + "negative limit clamps to 0, returning empty result" + ); + } + /// Regression test for slug-collision and global anchor query filter-before-limit: /// Seed a readable and an unreadable repo sharing owner_short/name, give the /// unreadable one the newer rows, and assert the caller sees their own anchor From 2759539135f20a0d8af8d746148ca1ac1c47ce05 Mon Sep 17 00:00:00 2001 From: Gravirei <147187533+Gravirei@users.noreply.github.com> Date: Fri, 3 Jul 2026 14:02:33 +0600 Subject: [PATCH 24/25] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- crates/gitlawb-node/src/db/mod.rs | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/crates/gitlawb-node/src/db/mod.rs b/crates/gitlawb-node/src/db/mod.rs index 1ada21b..65e8174 100644 --- a/crates/gitlawb-node/src/db/mod.rs +++ b/crates/gitlawb-node/src/db/mod.rs @@ -833,9 +833,29 @@ const MIGRATIONS: &[Migration] = &[ stmts: &[ "ALTER TABLE pinned_cids ADD COLUMN IF NOT EXISTS repo TEXT", "ALTER TABLE pinned_cids ADD COLUMN IF NOT EXISTS owner_did TEXT", - // Backfill repo and owner_did from branch_cids where possible - "UPDATE pinned_cids SET repo = branch_cids.repo FROM branch_cids WHERE pinned_cids.cid = branch_cids.cid", - "UPDATE pinned_cids SET owner_did = repos.owner_did FROM branch_cids JOIN repos ON repos.name = split_part(branch_cids.repo, '/', 2) AND repos.owner_did LIKE '%' || split_part(branch_cids.repo, '/', 1) WHERE pinned_cids.cid = branch_cids.cid", + // Backfill repo and owner_did from branch_cids where the mapping is unambiguous + r#"UPDATE pinned_cids p + SET repo = m.repo + FROM ( + SELECT cid, MIN(repo) AS repo + FROM branch_cids + GROUP BY cid + HAVING COUNT(DISTINCT repo) = 1 + ) m + WHERE p.cid = m.cid"#, + r#"UPDATE pinned_cids p + SET owner_did = m.owner_did + FROM ( + SELECT bc.cid, MIN(r.owner_did) AS owner_did + FROM branch_cids bc + JOIN repos r + ON r.name = split_part(bc.repo, '/', 2) + AND (CASE WHEN r.owner_did LIKE 'did:key:%' AND position(':' in substr(r.owner_did, 9)) = 0 THEN substr(r.owner_did, 9) ELSE r.owner_did END) + = split_part(bc.repo, '/', 1) + GROUP BY bc.cid + HAVING COUNT(DISTINCT r.owner_did) = 1 + ) m + WHERE p.cid = m.cid"#, // Fallback for remaining rows "UPDATE pinned_cids SET repo = '' WHERE repo IS NULL", "UPDATE pinned_cids SET owner_did = '' WHERE owner_did IS NULL", From 4d1dab23570d1f9788fb1614518fb16423b0ac3a Mon Sep 17 00:00:00 2001 From: Gravirei Date: Fri, 3 Jul 2026 14:07:12 +0600 Subject: [PATCH 25/25] fix(node): apply Copilot review suggestions on PR 134 Use normalize_owner_key consistently for arweave/ipfs/repo slugs, remove duplicate normalize_owner_did helper, and make pinned_cids migration backfill deterministic with did:key-aware owner matching. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- crates/gitlawb-node/src/api/arweave.rs | 10 +++----- crates/gitlawb-node/src/api/ipfs.rs | 6 ++--- crates/gitlawb-node/src/api/repos.rs | 2 +- crates/gitlawb-node/src/db/mod.rs | 33 ++++++++------------------ 4 files changed, 17 insertions(+), 34 deletions(-) diff --git a/crates/gitlawb-node/src/api/arweave.rs b/crates/gitlawb-node/src/api/arweave.rs index a7a8783..36a1ece 100644 --- a/crates/gitlawb-node/src/api/arweave.rs +++ b/crates/gitlawb-node/src/api/arweave.rs @@ -83,12 +83,8 @@ pub async fn list_anchors( return Err(AppError::RepoNotFound(format!("{owner}/{name}"))); } - // Normalize to short-form slug matching the anchor table. - let owner_short = record - .owner_did - .split(':') - .next_back() - .unwrap_or(&record.owner_did); + // Normalize owner exactly like anchor writes do. + let owner_short = crate::db::normalize_owner_key(&record.owner_did); let slug = Some(format!("{}/{}", owner_short, record.name)); state @@ -123,7 +119,7 @@ pub async fn list_anchors( if visibility_check(rules, r.is_public, &r.owner_did, caller, "/") == Decision::Deny { continue; } - let short = r.owner_did.split(':').next_back().unwrap_or(&r.owner_did); + let short = crate::db::normalize_owner_key(&r.owner_did); let slug = format!("{}/{}", short, r.name); query_repos.push(slug); query_owner_dids.push(r.owner_did.clone()); diff --git a/crates/gitlawb-node/src/api/ipfs.rs b/crates/gitlawb-node/src/api/ipfs.rs index dee7bfb..45b2601 100644 --- a/crates/gitlawb-node/src/api/ipfs.rs +++ b/crates/gitlawb-node/src/api/ipfs.rs @@ -264,7 +264,7 @@ pub async fn list_pins( // Build the set of readable repo slugs and owner DIDs from the deduped repo view // (mirror rows already collapsed, quarantined excluded), then query - // anchors bounded in SQL. + // pins bounded in SQL. let repos = state .db .list_all_repos_deduped() @@ -287,7 +287,7 @@ pub async fn list_pins( if visibility_check(rules, r.is_public, &r.owner_did, caller, "/") == Decision::Deny { continue; } - let short = crate::db::normalize_owner_did(&r.owner_did); + let short = crate::db::normalize_owner_key(&r.owner_did); let slug = format!("{}/{}", short, r.name); query_repos.push(slug); query_owner_dids.push(r.owner_did.clone()); @@ -306,7 +306,7 @@ pub async fn list_pins( let mut repos_by_slug = HashMap::new(); for r in repos { - let short = crate::db::normalize_owner_did(&r.owner_did); + let short = crate::db::normalize_owner_key(&r.owner_did); let slug = format!("{}/{}", short, r.name); let rules = rules_by_repo.get(&r.id).cloned().unwrap_or_default(); repos_by_slug.insert(slug, (r, rules)); diff --git a/crates/gitlawb-node/src/api/repos.rs b/crates/gitlawb-node/src/api/repos.rs index 79ea858..4fc0c0e 100644 --- a/crates/gitlawb-node/src/api/repos.rs +++ b/crates/gitlawb-node/src/api/repos.rs @@ -1047,7 +1047,7 @@ pub async fn git_receive_pack( let node_did_str = state.node_did.to_string(); let node_seed = state.node_keypair.to_seed(); let repo_name = record.name.clone(); - let owner_short = crate::db::normalize_owner_did(&owner_did); + let owner_short = crate::db::normalize_owner_key(&owner_did); let slug = format!("{}/{}", owner_short, repo_name); tokio::spawn(async move { let pinned = crate::ipfs_pin::pin_new_objects( diff --git a/crates/gitlawb-node/src/db/mod.rs b/crates/gitlawb-node/src/db/mod.rs index 65e8174..5515056 100644 --- a/crates/gitlawb-node/src/db/mod.rs +++ b/crates/gitlawb-node/src/db/mod.rs @@ -833,27 +833,24 @@ const MIGRATIONS: &[Migration] = &[ stmts: &[ "ALTER TABLE pinned_cids ADD COLUMN IF NOT EXISTS repo TEXT", "ALTER TABLE pinned_cids ADD COLUMN IF NOT EXISTS owner_did TEXT", - // Backfill repo and owner_did from branch_cids where the mapping is unambiguous + // Backfill repo/owner only when the cid maps to exactly one + // unambiguous (repo, owner_did) pair under did:key-aware owner key + // normalization. This avoids nondeterministic assignments. r#"UPDATE pinned_cids p - SET repo = m.repo + SET repo = m.repo, + owner_did = m.owner_did FROM ( - SELECT cid, MIN(repo) AS repo - FROM branch_cids - GROUP BY cid - HAVING COUNT(DISTINCT repo) = 1 - ) m - WHERE p.cid = m.cid"#, - r#"UPDATE pinned_cids p - SET owner_did = m.owner_did - FROM ( - SELECT bc.cid, MIN(r.owner_did) AS owner_did + SELECT + bc.cid, + MIN(bc.repo) AS repo, + MIN(r.owner_did) AS owner_did FROM branch_cids bc JOIN repos r ON r.name = split_part(bc.repo, '/', 2) AND (CASE WHEN r.owner_did LIKE 'did:key:%' AND position(':' in substr(r.owner_did, 9)) = 0 THEN substr(r.owner_did, 9) ELSE r.owner_did END) = split_part(bc.repo, '/', 1) GROUP BY bc.cid - HAVING COUNT(DISTINCT r.owner_did) = 1 + HAVING COUNT(DISTINCT (bc.repo || '|' || r.owner_did)) = 1 ) m WHERE p.cid = m.cid"#, // Fallback for remaining rows @@ -3340,16 +3337,6 @@ impl Db { } } -pub fn normalize_owner_did(owner_did: &str) -> &str { - if owner_did.starts_with("did:key:") { - let remainder = &owner_did[8..]; - if !remainder.contains(':') { - return remainder; - } - } - owner_did -} - // ── Tests ───────────────────────────────────────────────────────────────────── // // These tests don't require a live Postgres connection. They validate the