diff --git a/crates/gitlawb-node/src/api/arweave.rs b/crates/gitlawb-node/src/api/arweave.rs index 0d728c7..36a1ece 100644 --- a/crates/gitlawb-node/src/api/arweave.rs +++ b/crates/gitlawb-node/src/api/arweave.rs @@ -2,12 +2,14 @@ use axum::{ extract::{Query, State}, - Json, + Extension, Json, }; use serde::Deserialize; -use crate::error::Result; +use crate::auth::AuthenticatedDid; +use crate::error::{AppError, Result}; use crate::state::AppState; +use crate::visibility::{visibility_check, Decision}; #[derive(Debug, Deserialize)] pub struct ListAnchorsQuery { @@ -21,16 +23,118 @@ fn default_limit() -> i64 { } /// GET /api/v1/arweave/anchors +/// +/// Returns Arweave ref-update anchors. When `?repo=/` is provided, +/// the response is gated on the caller's read visibility for that repo (deny -> +/// 404). Without a `?repo=` filter, the global listing filters each row on +/// current visibility to prevent metadata disclosure when repos are made private +/// after push (#136). +/// +/// Both paths resolve visibility against the deduped repo view so mirror rows +/// never bypass the canonical repo's rules. pub async fn list_anchors( State(state): State, + auth: Option>, Query(q): Query, ) -> Result> { - let limit = q.limit.min(200); - let anchors = state - .db - .list_arweave_anchors(q.repo.as_deref(), limit) - .await - .map_err(crate::error::AppError::Internal)?; + let caller = auth.as_ref().map(|e| e.0 .0.as_str()); + let limit = q.limit.clamp(0, 200); + + // Global listings (no ?repo=) are restricted to authenticated callers. + if q.repo.is_none() && caller.is_none() { + return Err(AppError::Unauthorized( + "authentication required for global anchor listing".into(), + )); + } + + let anchors = if let Some(ref repo) = q.repo { + // ── Per-repo path ── + // Resolve against the deduped repo view so mirror rows never bypass + // the canonical repo's visibility rules. Use did_matches to handle + // both full DID and bare short-form owner in the URL. + let parts: Vec<&str> = repo.splitn(2, '/').collect(); + if parts.len() != 2 { + return Err(AppError::RepoNotFound(repo.clone())); + } + let (owner, name) = (parts[0], parts[1]); + + // Fetch the deduped list (mirror rows collapsed, quarantined excluded). + let repos = state + .db + .list_all_repos_deduped() + .await + .map_err(AppError::Internal)?; + + let record = repos + .into_iter() + .find(|r| crate::api::did_matches(owner, &r.owner_did) && r.name == name) + .ok_or_else(|| AppError::RepoNotFound(format!("{owner}/{name}")))?; + + // Quarantine gate (belt-and-suspenders — deduped already filters). + if state.db.is_repo_quarantined(&record.id).await? { + return Err(AppError::RepoNotFound(format!("{owner}/{name}"))); + } + + // Visibility gate against the canonical survivor's rules. + let rules = state.db.list_visibility_rules(&record.id).await?; + if visibility_check(&rules, record.is_public, &record.owner_did, caller, "/") + == Decision::Deny + { + return Err(AppError::RepoNotFound(format!("{owner}/{name}"))); + } + + // Normalize owner exactly like anchor writes do. + let owner_short = crate::db::normalize_owner_key(&record.owner_did); + let slug = Some(format!("{}/{}", owner_short, record.name)); + + state + .db + .list_arweave_anchors(slug.as_deref(), Some(&record.owner_did), limit) + .await + .map_err(AppError::Internal)? + } else { + // ── Global listing ── + // Build the set of readable repo slugs and owner DIDs from the deduped repo view + // (mirror rows already collapsed, quarantined excluded), then query + // anchors bounded in SQL. + let repos = state + .db + .list_all_repos_deduped() + .await + .map_err(AppError::Internal)?; + let repo_ids: Vec = repos.iter().map(|r| r.id.clone()).collect(); + let rules_by_repo = state + .db + .list_visibility_rules_for_repos(&repo_ids) + .await + .map_err(AppError::Internal)?; + + // Build parallel vectors of readable (slug, owner_did) pairs to query in SQL. + // This avoids filter-before-limit leaks or loss of pages. + let mut query_repos = Vec::new(); + let mut query_owner_dids = Vec::new(); + + for r in &repos { + let rules = rules_by_repo.get(&r.id).map(Vec::as_slice).unwrap_or(&[]); + if visibility_check(rules, r.is_public, &r.owner_did, caller, "/") == Decision::Deny { + continue; + } + let short = crate::db::normalize_owner_key(&r.owner_did); + let slug = format!("{}/{}", short, r.name); + query_repos.push(slug); + query_owner_dids.push(r.owner_did.clone()); + } + + if query_repos.is_empty() { + Vec::new() + } else { + state + .db + .list_arweave_anchors_for_repos(&query_repos, &query_owner_dids, limit) + .await + .map_err(AppError::Internal)? + } + }; Ok(Json(serde_json::json!({ "anchors": anchors, diff --git a/crates/gitlawb-node/src/api/ipfs.rs b/crates/gitlawb-node/src/api/ipfs.rs index f3de757..45b2601 100644 --- a/crates/gitlawb-node/src/api/ipfs.rs +++ b/crates/gitlawb-node/src/api/ipfs.rs @@ -15,7 +15,7 @@ //! see `get_by_cid`). use axum::{ - extract::{Path, State}, + extract::{Path, Query, State}, http::{HeaderMap, HeaderName, HeaderValue, StatusCode}, response::{IntoResponse, Response}, Extension, Json, @@ -30,6 +30,7 @@ use crate::git::store; use crate::git::visibility_pack::{allowed_blob_set_for_caller, has_path_scoped_rule}; use crate::state::AppState; use crate::visibility::{visibility_check, Decision}; +use serde::Deserialize; /// GET /ipfs/{cid} /// @@ -216,12 +217,152 @@ pub async fn get_by_cid( /// Returns all CIDs that have been pinned to the local IPFS node from git /// objects received via push. Each entry includes the git SHA-256 hex, the /// CIDv1 string, and the timestamp when it was pinned. -pub async fn list_pins(State(state): State) -> Result> { - let pins = state +/// +/// Requires authentication: the global pin index would otherwise disclose +/// metadata for every object ever pushed to the node (#121). +/// +/// The global listing filters each pinned object on current repo visibility +/// to prevent metadata disclosure when repos are made private after push (#136). +/// Only pins from repos the caller can currently read are returned. +#[derive(Debug, Deserialize)] +pub struct ListPinsQuery { + #[serde(default = "default_limit")] + pub limit: i64, +} + +fn default_limit() -> i64 { + 1000 +} + +/// GET /api/v1/ipfs/pins +/// +/// Returns all CIDs that have been pinned to the local IPFS node from git +/// objects received via push. Each entry includes the git SHA-256 hex, the +/// CIDv1 string, and the timestamp when it was pinned. +/// +/// Requires authentication: the global pin index would otherwise disclose +/// metadata for every object ever pushed to the node (#121). +/// +/// The global listing filters each pinned object on current repo visibility +/// to prevent metadata disclosure when repos are made private after push (#136). +/// Only pins from repos the caller can currently read are returned. +pub async fn list_pins( + State(state): State, + Query(query): Query, + auth: Option>, +) -> Result> { + let caller = auth.as_ref().map(|e| e.0 .0.as_str()); + + // Reject anonymous callers: the pin index spans the entire node and would + // expose metadata for every object ever pushed here (#121). + if caller.is_none() { + return Err(AppError::Unauthorized( + "authentication required for pin listing".into(), + )); + } + let caller_owned = caller.map(|c| c.to_string()); + + // Build the set of readable repo slugs and owner DIDs from the deduped repo view + // (mirror rows already collapsed, quarantined excluded), then query + // pins bounded in SQL. + let repos = state .db - .list_pinned_cids() + .list_all_repos_deduped() .await .map_err(AppError::Internal)?; + let repo_ids: Vec = repos.iter().map(|r| r.id.clone()).collect(); + let rules_by_repo = state + .db + .list_visibility_rules_for_repos(&repo_ids) + .await + .map_err(AppError::Internal)?; + + // Build parallel vectors of readable (slug, owner_did) pairs to query in SQL. + // This avoids filter-before-limit leaks or loss of pages. + let mut query_repos = Vec::new(); + let mut query_owner_dids = Vec::new(); + + for r in &repos { + let rules = rules_by_repo.get(&r.id).map(Vec::as_slice).unwrap_or(&[]); + if visibility_check(rules, r.is_public, &r.owner_did, caller, "/") == Decision::Deny { + continue; + } + let short = crate::db::normalize_owner_key(&r.owner_did); + let slug = format!("{}/{}", short, r.name); + query_repos.push(slug); + query_owner_dids.push(r.owner_did.clone()); + } + + let limit = query.limit.clamp(0, 200); + let raw_pins = if query_repos.is_empty() { + Vec::new() + } else { + state + .db + .list_pinned_cids_for_repos(&query_repos, &query_owner_dids, limit) + .await + .map_err(AppError::Internal)? + }; + + let mut repos_by_slug = HashMap::new(); + for r in repos { + let short = crate::db::normalize_owner_key(&r.owner_did); + let slug = format!("{}/{}", short, r.name); + let rules = rules_by_repo.get(&r.id).cloned().unwrap_or_default(); + repos_by_slug.insert(slug, (r, rules)); + } + + let mut filtered_pins = Vec::new(); + let mut allowed_blobs_by_repo: HashMap> = HashMap::new(); + + for pin in raw_pins { + let Some((repo, rules)) = repos_by_slug.get(&pin.repo) else { + continue; + }; + + if !has_path_scoped_rule(rules) { + filtered_pins.push(pin); + continue; + } + + let allowed_set = if let Some(set) = allowed_blobs_by_repo.get(&repo.id) { + set + } else { + let set = match state.repo_store.acquire(&repo.owner_did, &repo.name).await { + Ok(rp) => { + let rp_clone = rp.clone(); + let r_clone = rules.clone(); + let is_public = repo.is_public; + let owner = repo.owner_did.clone(); + let caller_for_walk = caller_owned.clone(); + + match tokio::task::spawn_blocking(move || { + allowed_blob_set_for_caller( + &rp_clone, + &r_clone, + is_public, + &owner, + caller_for_walk.as_deref(), + ) + }) + .await + { + Ok(Ok(s)) => s, + _ => HashSet::new(), + } + } + Err(_) => HashSet::new(), + }; + allowed_blobs_by_repo.insert(repo.id.clone(), set); + allowed_blobs_by_repo.get(&repo.id).unwrap() + }; + + if allowed_set.contains(&pin.sha256_hex) { + filtered_pins.push(pin); + } + } + + let pins = filtered_pins; Ok(Json(serde_json::json!({ "pins": pins, diff --git a/crates/gitlawb-node/src/api/repos.rs b/crates/gitlawb-node/src/api/repos.rs index 4742998..4fc0c0e 100644 --- a/crates/gitlawb-node/src/api/repos.rs +++ b/crates/gitlawb-node/src/api/repos.rs @@ -1047,12 +1047,16 @@ pub async fn git_receive_pack( let node_did_str = state.node_did.to_string(); let node_seed = state.node_keypair.to_seed(); let repo_name = record.name.clone(); + let owner_short = crate::db::normalize_owner_key(&owner_did); + let slug = format!("{}/{}", owner_short, repo_name); tokio::spawn(async move { let pinned = crate::ipfs_pin::pin_new_objects( &ipfs_api, &repo_path_clone, object_list_ipfs, &db_clone, + &slug, + &owner_did, ) .await; if !pinned.is_empty() { @@ -1163,6 +1167,8 @@ pub async fn git_receive_pack( &repo_path_clone, object_list_pinata, &db_clone, + &repo_slug, + &owner_did_for_arweave, ) .await } else { diff --git a/crates/gitlawb-node/src/db/mod.rs b/crates/gitlawb-node/src/db/mod.rs index 7c35b67..5515056 100644 --- a/crates/gitlawb-node/src/db/mod.rs +++ b/crates/gitlawb-node/src/db/mod.rs @@ -159,6 +159,10 @@ pub struct PinnedCidRecord { pub cid: String, pub pinned_at: String, pub pinata_cid: Option, + #[serde(default)] + pub repo: String, + #[serde(default)] + pub owner_did: String, } #[derive(Debug, Clone, Serialize, Deserialize)] @@ -823,6 +827,42 @@ const MIGRATIONS: &[Migration] = &[ "ALTER TABLE repos ADD COLUMN IF NOT EXISTS quarantined BOOLEAN NOT NULL DEFAULT FALSE", ], }, + Migration { + version: 10, + name: "pinned_cids_repo_owner", + stmts: &[ + "ALTER TABLE pinned_cids ADD COLUMN IF NOT EXISTS repo TEXT", + "ALTER TABLE pinned_cids ADD COLUMN IF NOT EXISTS owner_did TEXT", + // Backfill repo/owner only when the cid maps to exactly one + // unambiguous (repo, owner_did) pair under did:key-aware owner key + // normalization. This avoids nondeterministic assignments. + r#"UPDATE pinned_cids p + SET repo = m.repo, + owner_did = m.owner_did + FROM ( + SELECT + bc.cid, + MIN(bc.repo) AS repo, + MIN(r.owner_did) AS owner_did + FROM branch_cids bc + JOIN repos r + ON r.name = split_part(bc.repo, '/', 2) + AND (CASE WHEN r.owner_did LIKE 'did:key:%' AND position(':' in substr(r.owner_did, 9)) = 0 THEN substr(r.owner_did, 9) ELSE r.owner_did END) + = split_part(bc.repo, '/', 1) + GROUP BY bc.cid + HAVING COUNT(DISTINCT (bc.repo || '|' || r.owner_did)) = 1 + ) m + WHERE p.cid = m.cid"#, + // Fallback for remaining rows + "UPDATE pinned_cids SET repo = '' WHERE repo IS NULL", + "UPDATE pinned_cids SET owner_did = '' WHERE owner_did IS NULL", + "ALTER TABLE pinned_cids ALTER COLUMN repo SET NOT NULL", + "ALTER TABLE pinned_cids ALTER COLUMN owner_did SET NOT NULL", + "ALTER TABLE pinned_cids DROP CONSTRAINT IF EXISTS pinned_cids_pkey", + "ALTER TABLE pinned_cids ADD PRIMARY KEY (repo, sha256_hex)", + "CREATE INDEX IF NOT EXISTS idx_pinned_cids_repo_owner ON pinned_cids (repo, owner_did)", + ], + }, ]; // ── Repos ───────────────────────────────────────────────────────────────────── @@ -2041,23 +2081,42 @@ impl Db { // ── Pinned CIDs ─────────────────────────────────────────────────────────────── impl Db { - pub async fn is_pinned(&self, sha256_hex: &str) -> Result { - let row = sqlx::query("SELECT COUNT(*) as cnt FROM pinned_cids WHERE sha256_hex = $1") + pub async fn is_pinned_for_repo(&self, sha256_hex: &str, repo: &str) -> Result { + let row = sqlx::query( + "SELECT COUNT(*) as cnt FROM pinned_cids WHERE sha256_hex = $1 AND repo = $2", + ) + .bind(sha256_hex) + .bind(repo) + .fetch_one(&self.pool) + .await?; + Ok(row.get::("cnt") > 0) + } + + pub async fn get_pinned_cid(&self, sha256_hex: &str) -> Result> { + let row = sqlx::query("SELECT cid FROM pinned_cids WHERE sha256_hex = $1 LIMIT 1") .bind(sha256_hex) - .fetch_one(&self.pool) + .fetch_optional(&self.pool) .await?; - Ok(row.get::("cnt") > 0) + Ok(row.map(|r| r.get("cid"))) } - pub async fn record_pinned_cid(&self, sha256_hex: &str, cid: &str) -> Result<()> { + pub async fn record_pinned_cid( + &self, + sha256_hex: &str, + cid: &str, + repo: &str, + owner_did: &str, + ) -> Result<()> { sqlx::query( - "INSERT INTO pinned_cids (sha256_hex, cid, pinned_at) - VALUES ($1, $2, $3) - ON CONFLICT(sha256_hex) DO NOTHING", + "INSERT INTO pinned_cids (sha256_hex, cid, pinned_at, repo, owner_did) + VALUES ($1, $2, $3, $4, $5) + ON CONFLICT(repo, sha256_hex) DO NOTHING", ) .bind(sha256_hex) .bind(cid) .bind(Utc::now().to_rfc3339()) + .bind(repo) + .bind(owner_did) .execute(&self.pool) .await?; Ok(()) @@ -2133,51 +2192,85 @@ impl Db { Ok(row.map(|r| r.get("recipients_tag"))) } - pub async fn list_pinned_cids(&self) -> Result> { - let rows = sqlx::query( - "SELECT sha256_hex, cid, pinned_at, pinata_cid FROM pinned_cids ORDER BY pinned_at DESC", - ) - .fetch_all(&self.pool) - .await?; - Ok(rows - .into_iter() - .map(|r| PinnedCidRecord { - sha256_hex: r.get("sha256_hex"), - cid: r.get("cid"), - pinned_at: r.get("pinned_at"), - pinata_cid: r.get("pinata_cid"), - }) - .collect()) - } - - /// Returns true if this object already has a Pinata CID recorded. - pub async fn has_pinata_cid(&self, sha256_hex: &str) -> Result { + pub async fn has_pinata_cid_for_repo(&self, sha256_hex: &str, repo: &str) -> Result { let row = sqlx::query( - "SELECT COUNT(*) as cnt FROM pinned_cids WHERE sha256_hex = $1 AND pinata_cid IS NOT NULL", + "SELECT COUNT(*) as cnt FROM pinned_cids WHERE sha256_hex = $1 AND repo = $2 AND pinata_cid IS NOT NULL", ) .bind(sha256_hex) + .bind(repo) .fetch_one(&self.pool) .await?; Ok(row.get::("cnt") > 0) } + pub async fn get_pinata_cid(&self, sha256_hex: &str) -> Result> { + let row = sqlx::query("SELECT pinata_cid FROM pinned_cids WHERE sha256_hex = $1 AND pinata_cid IS NOT NULL LIMIT 1") + .bind(sha256_hex) + .fetch_optional(&self.pool) + .await?; + Ok(row.map(|r| r.get("pinata_cid"))) + } + /// Record the Pinata CID for a git object. /// Inserts the row if it doesn't exist (objects pinned directly to Pinata /// without a prior local IPFS pin get cid = pinata_cid). - pub async fn record_pinata_cid(&self, sha256_hex: &str, pinata_cid: &str) -> Result<()> { + pub async fn record_pinata_cid( + &self, + sha256_hex: &str, + pinata_cid: &str, + repo: &str, + owner_did: &str, + ) -> Result<()> { sqlx::query( - "INSERT INTO pinned_cids (sha256_hex, cid, pinned_at, pinata_cid) - VALUES ($1, $2, $3, $4) - ON CONFLICT(sha256_hex) DO UPDATE SET pinata_cid = EXCLUDED.pinata_cid", + "INSERT INTO pinned_cids (sha256_hex, cid, pinned_at, pinata_cid, repo, owner_did) + VALUES ($1, $2, $3, $4, $5, $6) + ON CONFLICT(repo, sha256_hex) DO UPDATE SET pinata_cid = EXCLUDED.pinata_cid", ) .bind(sha256_hex) .bind(pinata_cid) // fallback local cid if row is new .bind(Utc::now().to_rfc3339()) .bind(pinata_cid) + .bind(repo) + .bind(owner_did) .execute(&self.pool) .await?; Ok(()) } + + /// Bounded global pin query: returns pins for any of the given (repo, owner_did) + /// pairs, ordered by pinned_at DESC, capped at `limit`. + pub async fn list_pinned_cids_for_repos( + &self, + repos: &[String], + owner_dids: &[String], + limit: i64, + ) -> Result> { + let rows = sqlx::query( + "SELECT sha256_hex, cid, pinned_at, pinata_cid, repo, owner_did + FROM pinned_cids + WHERE (repo, owner_did) IN ( + SELECT * FROM UNNEST($1::text[], $2::text[]) + ) + ORDER BY pinned_at DESC LIMIT $3", + ) + .bind(repos) + .bind(owner_dids) + .bind(limit) + .fetch_all(&self.pool) + .await?; + + Ok(rows + .into_iter() + .map(|r| PinnedCidRecord { + sha256_hex: r.get("sha256_hex"), + cid: r.get("cid"), + pinned_at: r.get("pinned_at"), + pinata_cid: r.get("pinata_cid"), + repo: r.get("repo"), + owner_did: r.get("owner_did"), + }) + .collect()) + } } // ── Received Ref Updates ────────────────────────────────────────────────────── @@ -2442,25 +2535,40 @@ impl Db { pub async fn list_arweave_anchors( &self, repo: Option<&str>, + owner_did: Option<&str>, limit: i64, ) -> Result> { - let rows = if let Some(repo) = repo { - sqlx::query( - "SELECT id, repo, owner_did, ref_name, old_sha, new_sha, cid, irys_tx_id, arweave_url, node_did, anchored_at - FROM arweave_anchors WHERE repo=$1 ORDER BY anchored_at DESC LIMIT $2", - ) - .bind(repo) - .bind(limit) - .fetch_all(&self.pool) - .await? - } else { - sqlx::query( - "SELECT id, repo, owner_did, ref_name, old_sha, new_sha, cid, irys_tx_id, arweave_url, node_did, anchored_at - FROM arweave_anchors ORDER BY anchored_at DESC LIMIT $1", - ) - .bind(limit) - .fetch_all(&self.pool) - .await? + let rows = match (repo, owner_did) { + (Some(repo), Some(owner_did)) => { + sqlx::query( + "SELECT id, repo, owner_did, ref_name, old_sha, new_sha, cid, irys_tx_id, arweave_url, node_did, anchored_at + FROM arweave_anchors WHERE repo=$1 AND owner_did=$2 ORDER BY anchored_at DESC LIMIT $3", + ) + .bind(repo) + .bind(owner_did) + .bind(limit) + .fetch_all(&self.pool) + .await? + } + (Some(repo), None) => { + sqlx::query( + "SELECT id, repo, owner_did, ref_name, old_sha, new_sha, cid, irys_tx_id, arweave_url, node_did, anchored_at + FROM arweave_anchors WHERE repo=$1 ORDER BY anchored_at DESC LIMIT $2", + ) + .bind(repo) + .bind(limit) + .fetch_all(&self.pool) + .await? + } + (None, _) => { + sqlx::query( + "SELECT id, repo, owner_did, ref_name, old_sha, new_sha, cid, irys_tx_id, arweave_url, node_did, anchored_at + FROM arweave_anchors ORDER BY anchored_at DESC LIMIT $1", + ) + .bind(limit) + .fetch_all(&self.pool) + .await? + } }; Ok(rows @@ -2480,6 +2588,46 @@ impl Db { }) .collect()) } + + /// Bounded global anchor query: returns anchors for any of the given (repo, owner_did) + /// pairs, ordered by anchored_at DESC, capped at `limit`. + pub async fn list_arweave_anchors_for_repos( + &self, + repos: &[String], + owner_dids: &[String], + limit: i64, + ) -> Result> { + let rows = sqlx::query( + "SELECT id, repo, owner_did, ref_name, old_sha, new_sha, cid, irys_tx_id, arweave_url, node_did, anchored_at + FROM arweave_anchors + WHERE (repo, owner_did) IN ( + SELECT * FROM UNNEST($1::text[], $2::text[]) + ) + ORDER BY anchored_at DESC LIMIT $3", + ) + .bind(repos) + .bind(owner_dids) + .bind(limit) + .fetch_all(&self.pool) + .await?; + + Ok(rows + .into_iter() + .map(|r| ArweaveAnchor { + id: r.get("id"), + repo: r.get("repo"), + owner_did: r.get("owner_did"), + ref_name: r.get("ref_name"), + old_sha: r.get("old_sha"), + new_sha: r.get("new_sha"), + cid: r.get("cid"), + irys_tx_id: r.get("irys_tx_id"), + arweave_url: r.get("arweave_url"), + node_did: r.get("node_did"), + anchored_at: r.get("anchored_at"), + }) + .collect()) + } } // ── Row helpers ─────────────────────────────────────────────────────────────── @@ -3199,7 +3347,8 @@ impl Db { #[cfg(test)] mod migration_tests { - use super::{MIGRATIONS, MIGRATION_V1_NAME}; + use super::{Db, MIGRATIONS, MIGRATION_V1_NAME}; + use sqlx::{PgPool, Row}; #[test] fn migrations_are_non_empty() { @@ -3277,6 +3426,135 @@ mod migration_tests { // it, you must also update the backfill. assert_eq!(MIGRATIONS[0].name, MIGRATION_V1_NAME); } + + #[sqlx::test] + async fn test_migration_v10_upgrade_path(pool: PgPool) { + let db = Db::for_testing(pool); + + // Run migrations up to version 9 + async fn run_migrations_up_to(db: &Db, version: i64) { + sqlx::query( + r#"CREATE TABLE IF NOT EXISTS schema_migrations ( + version BIGINT NOT NULL PRIMARY KEY, + name TEXT NOT NULL, + applied_at TEXT NOT NULL + )"#, + ) + .execute(&db.pool) + .await + .unwrap(); + + for m in super::MIGRATIONS { + if m.version > version { + break; + } + let already: bool = sqlx::query( + "SELECT EXISTS(SELECT 1 FROM schema_migrations WHERE version = $1) AS applied", + ) + .bind(m.version) + .fetch_one(&db.pool) + .await + .unwrap() + .get::("applied"); + + if already { + continue; + } + + let mut tx = db.pool.begin().await.unwrap(); + for stmt in m.stmts { + sqlx::query(stmt).execute(&mut *tx).await.unwrap(); + } + sqlx::query( + "INSERT INTO schema_migrations (version, name, applied_at) VALUES ($1, $2, $3)", + ) + .bind(m.version) + .bind(m.name) + .bind(chrono::Utc::now().to_rfc3339()) + .execute(&mut *tx) + .await + .unwrap(); + tx.commit().await.unwrap(); + } + } + + run_migrations_up_to(&db, 9).await; + + // Seed a repo, branch_cids, and pinned_cids under v9 schema + sqlx::query( + "INSERT INTO repos (id, name, owner_did, description, is_public, default_branch, created_at, updated_at, disk_path) + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)" + ) + .bind("repo-123") + .bind("myrepo") + .bind("did:key:z6Mkwowner") + .bind("desc") + .bind(true) + .bind("main") + .bind("2026-07-03T00:00:00Z") + .bind("2026-07-03T00:00:00Z") + .bind("/srv/repo-123") + .execute(&db.pool) + .await + .unwrap(); + + sqlx::query( + "INSERT INTO branch_cids (repo, ref_name, sha, cid, node_did, updated_at) + VALUES ($1, $2, $3, $4, $5, $6)", + ) + .bind("z6Mkwowner/myrepo") + .bind("refs/heads/main") + .bind("old-sha") + .bind("old-cid") + .bind("node-did") + .bind("2026-07-03T00:00:00Z") + .execute(&db.pool) + .await + .unwrap(); + + sqlx::query( + "INSERT INTO pinned_cids (sha256_hex, cid, pinned_at) + VALUES ($1, $2, $3)", + ) + .bind("old-sha") + .bind("old-cid") + .bind("2026-07-03T00:00:00Z") + .execute(&db.pool) + .await + .unwrap(); + + // Run remaining migrations (version 10) + db.run_migrations().await.unwrap(); + + // Verify backfilling of repo and owner_did columns + let row = sqlx::query( + "SELECT sha256_hex, cid, repo, owner_did FROM pinned_cids WHERE sha256_hex = 'old-sha'", + ) + .fetch_one(&db.pool) + .await + .unwrap(); + + assert_eq!(row.get::("repo"), "z6Mkwowner/myrepo"); + assert_eq!(row.get::("owner_did"), "did:key:z6Mkwowner"); + + // Verify the compound primary key (repo, sha256_hex) allows duplicate sha256_hex with different repo + let res = sqlx::query( + "INSERT INTO pinned_cids (sha256_hex, cid, pinned_at, repo, owner_did) + VALUES ($1, $2, $3, $4, $5)", + ) + .bind("old-sha") + .bind("old-cid") + .bind("2026-07-03T00:00:00Z") + .bind("other-repo") + .bind("other-owner") + .execute(&db.pool) + .await; + + assert!( + res.is_ok(), + "Compound PK must allow same SHA in different repos" + ); + } } #[cfg(test)] diff --git a/crates/gitlawb-node/src/ipfs_pin.rs b/crates/gitlawb-node/src/ipfs_pin.rs index 3b34619..754c99b 100644 --- a/crates/gitlawb-node/src/ipfs_pin.rs +++ b/crates/gitlawb-node/src/ipfs_pin.rs @@ -99,6 +99,8 @@ pub async fn pin_new_objects( repo_path: &std::path::Path, object_list: Vec, db: &crate::db::Db, + repo: &str, + owner_did: &str, ) -> Vec<(String, String)> { if ipfs_api.is_empty() { return vec![]; @@ -107,16 +109,28 @@ pub async fn pin_new_objects( let mut pinned = Vec::new(); for sha in object_list { - // Skip if already pinned - match db.is_pinned(&sha).await { + // Skip if already pinned for this repo + match db.is_pinned_for_repo(&sha, repo).await { Ok(true) => continue, Ok(false) => {} Err(e) => { - tracing::warn!(sha = %sha, err = %e, "DB error checking pinned status"); + tracing::warn!(sha = %sha, err = %e, "DB error checking pinned status for repo"); continue; } } + // Reuse globally pinned CID if available to avoid duplicate uploads + if let Ok(Some(existing_cid)) = db.get_pinned_cid(&sha).await { + if let Err(e) = db + .record_pinned_cid(&sha, &existing_cid, repo, owner_did) + .await + { + tracing::warn!(sha = %sha, err = %e, "failed to record pinned CID in DB"); + } + pinned.push((sha, existing_cid)); + continue; + } + // Read raw object content let data = match crate::git::store::read_object(repo_path, &sha) { Ok(Some((_obj_type, bytes))) => bytes, @@ -130,7 +144,7 @@ pub async fn pin_new_objects( // Pin to IPFS match pin_git_object(ipfs_api, &sha, &data).await { Ok(cid) if !cid.is_empty() => { - if let Err(e) = db.record_pinned_cid(&sha, &cid).await { + if let Err(e) = db.record_pinned_cid(&sha, &cid, repo, owner_did).await { tracing::warn!(sha = %sha, err = %e, "failed to record pinned CID in DB"); } pinned.push((sha, cid)); diff --git a/crates/gitlawb-node/src/pinata.rs b/crates/gitlawb-node/src/pinata.rs index 6c9c0bf..953bf59 100644 --- a/crates/gitlawb-node/src/pinata.rs +++ b/crates/gitlawb-node/src/pinata.rs @@ -76,6 +76,7 @@ pub async fn pin_object( /// this shape — change both in lockstep. Objects already recorded with a /// `pinata_cid` are skipped. Returns `(sha_hex, cid)` pairs for each newly /// pinned object. +#[allow(clippy::too_many_arguments)] pub async fn pin_new_objects( client: &reqwest::Client, upload_url: &str, @@ -83,6 +84,8 @@ pub async fn pin_new_objects( repo_path: &std::path::Path, object_list: Vec, db: &crate::db::Db, + repo: &str, + owner_did: &str, ) -> Vec<(String, String)> { if jwt.is_empty() { return vec![]; @@ -91,15 +94,28 @@ pub async fn pin_new_objects( let mut pinned = Vec::new(); for sha in object_list { - match db.has_pinata_cid(&sha).await { + // Skip if already has Pinata CID for this repo + match db.has_pinata_cid_for_repo(&sha, repo).await { Ok(true) => continue, Ok(false) => {} Err(e) => { - tracing::warn!(sha = %sha, err = %e, "DB error checking pinata_cid"); + tracing::warn!(sha = %sha, err = %e, "DB error checking pinata_cid for repo"); continue; } } + // Reuse globally pinned Pinata CID if available to avoid duplicate uploads + if let Ok(Some(existing_pinata_cid)) = db.get_pinata_cid(&sha).await { + if let Err(e) = db + .record_pinata_cid(&sha, &existing_pinata_cid, repo, owner_did) + .await + { + tracing::warn!(sha = %sha, err = %e, "failed to record pinata_cid in DB"); + } + pinned.push((sha, existing_pinata_cid)); + continue; + } + let data = match crate::git::store::read_object(repo_path, &sha) { Ok(Some((_kind, bytes))) => bytes, Ok(None) => continue, @@ -111,7 +127,7 @@ pub async fn pin_new_objects( match pin_object(client, upload_url, jwt, &sha, &data).await { Ok(cid) if !cid.is_empty() => { - if let Err(e) = db.record_pinata_cid(&sha, &cid).await { + if let Err(e) = db.record_pinata_cid(&sha, &cid, repo, owner_did).await { tracing::warn!(sha = %sha, err = %e, "failed to record pinata_cid in DB"); } pinned.push((sha, cid)); diff --git a/crates/gitlawb-node/src/server.rs b/crates/gitlawb-node/src/server.rs index d38d638..8f4e20a 100644 --- a/crates/gitlawb-node/src/server.rs +++ b/crates/gitlawb-node/src/server.rs @@ -187,17 +187,20 @@ pub fn build_router(state: AppState) -> Router { ); // ── IPFS content-addressed retrieval and pin listing ────────────────── - // `/ipfs/{cid}` carries `optional_signature` so `get_by_cid` sees the caller - // identity and can apply per-repo visibility (#110); anonymous callers stay - // anonymous and still read genuinely public content. `/api/v1/ipfs/pins` - // stays unsigned — gating the pin index is tracked separately (#121). + // Both routes carry `optional_signature` so `get_by_cid` and `list_pins` + // can apply per-repo visibility and authentication checks (#110, #121); + // anonymous callers stay anonymous and still read genuinely public content. let ipfs_routes = Router::new() .route("/ipfs/{cid}", get(ipfs::get_by_cid)) - .layer(middleware::from_fn(auth::optional_signature)) - .merge(Router::new().route("/api/v1/ipfs/pins", get(ipfs::list_pins))); + .route("/api/v1/ipfs/pins", get(ipfs::list_pins)) + .layer(middleware::from_fn(auth::optional_signature)); // ── Arweave permanent anchors ────────────────────────────────────────── - let arweave_routes = Router::new().route("/api/v1/arweave/anchors", get(arweave::list_anchors)); + // Carries `optional_signature` so `list_anchors` can check caller visibility + // when `?repo=` is provided and require authentication for global listing (#121). + let arweave_routes = Router::new() + .route("/api/v1/arweave/anchors", get(arweave::list_anchors)) + .layer(middleware::from_fn(auth::optional_signature)); // ── Bounty routes (write — require HTTP Signature) ───────────────── let bounty_write_routes = add_auth_layers( diff --git a/crates/gitlawb-node/src/test_support.rs b/crates/gitlawb-node/src/test_support.rs index d84f23a..e83129d 100644 --- a/crates/gitlawb-node/src/test_support.rs +++ b/crates/gitlawb-node/src/test_support.rs @@ -1954,4 +1954,665 @@ mod tests { ); assert!(!body.contains("DANGLING SECRET")); } + + fn pins_router(state: &AppState) -> Router { + Router::new() + .route( + "/api/v1/ipfs/pins", + axum::routing::get(crate::api::ipfs::list_pins), + ) + .layer(axum::middleware::from_fn(crate::auth::optional_signature)) + .with_state(state.clone()) + } + + fn signed_get(kp: &gitlawb_core::identity::Keypair, uri: &str) -> Request { + let s = gitlawb_core::http_sig::sign_request(kp, "GET", uri, b""); + Request::builder() + .method(Method::GET) + .uri(uri) + .header("content-digest", s.content_digest) + .header("signature-input", s.signature_input) + .header("signature", s.signature) + .body(Body::empty()) + .unwrap() + } + + /// #121: anonymous caller gets 401 from /api/v1/ipfs/pins. + #[sqlx::test] + async fn pins_list_denies_anonymous(pool: PgPool) { + let state = test_state(pool).await; + let resp = pins_router(&state) + .oneshot(anon_get("/api/v1/ipfs/pins")) + .await + .unwrap(); + assert_eq!(resp.status(), StatusCode::UNAUTHORIZED); + } + + struct PinTestFixture { + owner: gitlawb_core::identity::Keypair, + owner_did: String, + fx: CidFixture, + repo: crate::db::RepoRecord, + } + + async fn setup_pin_test(state: &AppState, repo_name: &str) -> PinTestFixture { + use gitlawb_core::identity::Keypair; + + let owner = Keypair::generate(); + let owner_did = owner.did().to_string(); + let fs_slug = owner_did.replace([':', '/'], "_"); + let short = owner_did.split(':').next_back().unwrap().to_string(); + + let fx = seed_cid_repos(&fs_slug, &short, &[repo_name]); + let repo = seed_repo(&owner_did, repo_name); + state.db.create_repo(&repo).await.unwrap(); + + PinTestFixture { + owner, + owner_did, + fx, + repo, + } + } + + /// #121: authenticated caller gets 200 from /api/v1/ipfs/pins. + /// + /// list_pins only returns pins whose SHA-256 appears in a repo the caller + /// can read. The test therefore: + /// 1. Creates a real SHA-256 bare git repo on disk (via seed_cid_repos) so + /// list_all_objects finds the object. + /// 2. Inserts a matching public repo row (owner_did = full DID from the keypair) + /// so list_all_repos/visibility_check passes. + /// 3. Records a pin for the real object OID. + #[sqlx::test] + async fn pins_list_allows_authenticated(pool: PgPool) { + let state = test_state(pool).await; + let setup = setup_pin_test(&state, "pinrepo").await; + + let pinned_sha = setup.fx.public_oid.clone(); + let pinned_cid = cid_for_oid(&pinned_sha); + + let short = setup.owner_did.split(':').next_back().unwrap().to_string(); + let slug = format!("{}/{}", short, setup.repo.name); + state + .db + .record_pinned_cid(&pinned_sha, &pinned_cid, &slug, &setup.owner_did) + .await + .unwrap(); + + let resp = pins_router(&state) + .oneshot(signed_get(&setup.owner, "/api/v1/ipfs/pins")) + .await + .unwrap(); + assert_eq!(resp.status(), StatusCode::OK); + let body = json_body(resp).await; + assert_eq!( + body["count"], 1, + "pin for the real git object must be returned" + ); + assert_eq!( + body["pins"][0]["sha256_hex"], pinned_sha, + "returned pin must match the seeded object OID" + ); + } + + /// #121: authenticated caller gets 200 through the production build_router. + /// + /// Unlike pins_list_allows_authenticated (which uses a mini pins_router with + /// only the one route), this test exercises server::build_router to verify + /// that /api/v1/ipfs/pins is wired through optional_signature in the real + /// route table and that a signed request reaches list_pins successfully. + #[sqlx::test] + async fn pins_list_allows_authenticated_through_build_router(pool: PgPool) { + let state = test_state(pool).await; + let setup = setup_pin_test(&state, "build-router-pins").await; + + let pinned_sha = setup.fx.public_oid.clone(); + let pinned_cid = cid_for_oid(&pinned_sha); + + let short = setup.owner_did.split(':').next_back().unwrap().to_string(); + let slug = format!("{}/{}", short, setup.repo.name); + state + .db + .record_pinned_cid(&pinned_sha, &pinned_cid, &slug, &setup.owner_did) + .await + .unwrap(); + + let router = crate::server::build_router(state); + let resp = router + .oneshot(signed_get(&setup.owner, "/api/v1/ipfs/pins")) + .await + .unwrap(); + assert_eq!(resp.status(), StatusCode::OK); + let body = json_body(resp).await; + assert_eq!( + body["count"], 1, + "build_router wiring: pin for the real git object must be returned" + ); + assert_eq!( + body["pins"][0]["sha256_hex"], pinned_sha, + "build_router wiring: returned pin must match the seeded object OID" + ); + } + + #[sqlx::test] + async fn pins_list_excludes_quarantined_repos(pool: PgPool) { + let state = test_state(pool).await; + let setup = setup_pin_test(&state, "pinrepo").await; + + let pinned_sha = setup.fx.public_oid.clone(); + let pinned_cid = cid_for_oid(&pinned_sha); + + state + .db + .set_repo_quarantine(&setup.repo.id, true) + .await + .unwrap(); + + let short = setup.owner_did.split(':').next_back().unwrap().to_string(); + let slug = format!("{}/{}", short, setup.repo.name); + state + .db + .record_pinned_cid(&pinned_sha, &pinned_cid, &slug, &setup.owner_did) + .await + .unwrap(); + + let resp = pins_router(&state) + .oneshot(signed_get(&setup.owner, "/api/v1/ipfs/pins")) + .await + .unwrap(); + assert_eq!(resp.status(), StatusCode::OK); + let body = json_body(resp).await; + assert_eq!( + body["count"], 0, + "quarantined repo objects must be withheld from pin index" + ); + } + + #[sqlx::test] + async fn pins_list_withholds_path_scoped_blobs(pool: PgPool) { + use crate::db::VisibilityMode; + use gitlawb_core::identity::Keypair; + + let state = test_state(pool).await; + let setup = setup_pin_test(&state, "pinrepo").await; + let stranger = Keypair::generate(); + + let public_sha = setup.fx.public_oid.clone(); + let public_cid = cid_for_oid(&public_sha); + let secret_sha = setup.fx.secret_oid.clone(); + let secret_cid = cid_for_oid(&secret_sha); + + state + .db + .set_visibility_rule( + &setup.repo.id, + "/secret/**", + VisibilityMode::B, + &[], + &setup.owner_did, + ) + .await + .unwrap(); + + let short = setup.owner_did.split(':').next_back().unwrap().to_string(); + let slug = format!("{}/{}", short, setup.repo.name); + state + .db + .record_pinned_cid(&public_sha, &public_cid, &slug, &setup.owner_did) + .await + .unwrap(); + state + .db + .record_pinned_cid(&secret_sha, &secret_cid, &slug, &setup.owner_did) + .await + .unwrap(); + + let resp = pins_router(&state) + .oneshot(signed_get(&stranger, "/api/v1/ipfs/pins")) + .await + .unwrap(); + assert_eq!(resp.status(), StatusCode::OK); + let body = json_body(resp).await; + + assert_eq!(body["count"], 1, "stranger sees only the public pin"); + assert_eq!( + body["pins"][0]["sha256_hex"], public_sha, + "stranger sees the public pin" + ); + } + + // ---- #121: GET /api/v1/arweave/anchors auth and visibility gate ---- + + fn anchors_router(state: &AppState) -> Router { + Router::new() + .route( + "/api/v1/arweave/anchors", + axum::routing::get(crate::api::arweave::list_anchors), + ) + .layer(axum::middleware::from_fn(crate::auth::optional_signature)) + .with_state(state.clone()) + } + + async fn seed_anchor(db: &crate::db::Db, repo: &str, owner_did: &str) { + use crate::db::RecordAnchorInput; + db.record_arweave_anchor(&RecordAnchorInput { + repo, + owner_did, + ref_name: "refs/heads/main", + old_sha: "0000000000000000000000000000000000000000000000000000000000000000", + new_sha: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + cid: Some("bafkreihipknrba7vz6ahh2l5qk6pxwtywn3u7worv6hmi6dkt6nv5phb4u"), + irys_tx_id: "tx-test", + arweave_url: "https://arweave.net/test", + node_did: "did:key:zNODE", + }) + .await + .unwrap(); + } + + /// #121: /api/v1/arweave/anchors without ?repo= denies anonymous. + #[sqlx::test] + async fn anchors_global_denies_anonymous(pool: PgPool) { + let state = test_state(pool).await; + let resp = anchors_router(&state) + .oneshot(anon_get("/api/v1/arweave/anchors")) + .await + .unwrap(); + assert_eq!(resp.status(), StatusCode::UNAUTHORIZED); + } + + struct AnchorTestFixture { + owner: gitlawb_core::identity::Keypair, + owner_did: String, + } + + async fn setup_anchor_test( + state: &AppState, + repo_name: &str, + is_public: bool, + ) -> AnchorTestFixture { + use gitlawb_core::identity::Keypair; + + let owner = Keypair::generate(); + let owner_did = owner.did().to_string(); + let owner_short = owner_did.split(':').next_back().unwrap().to_string(); + let short_slug = format!("{owner_short}/{repo_name}"); + + let repo = if is_public { + seed_repo(&owner_did, repo_name) + } else { + seed_private_repo(&owner_did, repo_name) + }; + + state.db.create_repo(&repo).await.unwrap(); + seed_anchor(&state.db, &short_slug, &owner_did).await; + + AnchorTestFixture { owner, owner_did } + } + + /// #121: /api/v1/arweave/anchors without ?repo= allows authenticated. + /// + /// The global listing resolves visibility against the deduped repo view + /// (list_all_repos_deduped + visibility_check) and queries anchors bounded in + /// SQL filtering by (repo, owner_did) pairs to prevent cross-DID slug collision. + /// The anchor row must carry the same short owner slug that the push path writes + /// (last ':'-separated segment of the DID). + #[sqlx::test] + async fn anchors_global_allows_authenticated(pool: PgPool) { + let state = test_state(pool).await; + let fx = setup_anchor_test(&state, "globalrepo", true).await; + + let resp = anchors_router(&state) + .oneshot(signed_get(&fx.owner, "/api/v1/arweave/anchors")) + .await + .unwrap(); + assert_eq!(resp.status(), StatusCode::OK); + let body = json_body(resp).await; + assert_eq!( + body["count"], 1, + "authenticated caller sees the seeded anchor" + ); + } + + #[sqlx::test] + async fn anchors_global_denies_non_reader(pool: PgPool) { + use gitlawb_core::identity::Keypair; + let state = test_state(pool).await; + let _fx = setup_anchor_test(&state, "privaterepo", false).await; + let stranger = Keypair::generate(); + + let resp = anchors_router(&state) + .oneshot(signed_get(&stranger, "/api/v1/arweave/anchors")) + .await + .unwrap(); + assert_eq!(resp.status(), StatusCode::OK); + let body = json_body(resp).await; + assert_eq!( + body["count"], 0, + "non-reader is denied the private anchor in global listing" + ); + } + + /// #121: /api/v1/arweave/anchors with ?repo= denies anonymous on private repo. + /// + /// The ?repo= path gate is purely visibility-based (authorize_repo_read returns + /// 404 for anon on private) — no anonymous-rejection guard is needed here. + #[sqlx::test] + async fn anchors_repo_denies_anonymous_on_private(pool: PgPool) { + let state = test_state(pool).await; + let repo_name = "private-repo"; + let fx = setup_anchor_test(&state, repo_name, false).await; + + let uri = format!( + "/api/v1/arweave/anchors?repo={}/{}", + fx.owner_did, repo_name + ); + let resp = anchors_router(&state) + .oneshot(anon_get(&uri)) + .await + .unwrap(); + assert_eq!(resp.status(), StatusCode::NOT_FOUND); + } + + /// #121: /api/v1/arweave/anchors with ?repo= allows anonymous on public repo. + #[sqlx::test] + async fn anchors_repo_allows_anonymous_on_public(pool: PgPool) { + let state = test_state(pool).await; + let repo_name = "public-repo"; + let fx = setup_anchor_test(&state, repo_name, true).await; + + let uri = format!( + "/api/v1/arweave/anchors?repo={}/{}", + fx.owner_did, repo_name + ); + let resp = anchors_router(&state) + .oneshot(anon_get(&uri)) + .await + .unwrap(); + assert_eq!(resp.status(), StatusCode::OK); + let body = json_body(resp).await; + assert_eq!( + body["count"], 1, + "anonymous caller sees the seeded public anchor" + ); + } + + /// #121: /api/v1/arweave/anchors with ?repo= allows repo owner. + /// + /// Anchor is stored with the short owner slug that push writes; the ?repo= + /// query carries the full DID which list_anchors normalises to the same short + /// slug before issuing the DB query. + #[sqlx::test] + async fn anchors_repo_allows_owner(pool: PgPool) { + let state = test_state(pool).await; + let repo_name = "owners-repo"; + let fx = setup_anchor_test(&state, repo_name, false).await; + + let uri = format!( + "/api/v1/arweave/anchors?repo={}/{}", + fx.owner_did, repo_name + ); + let resp = anchors_router(&state) + .oneshot(signed_get(&fx.owner, &uri)) + .await + .unwrap(); + assert_eq!(resp.status(), StatusCode::OK); + let body = json_body(resp).await; + assert_eq!(body["count"], 1, "repo owner sees their anchor"); + } + + /// #121: /api/v1/arweave/anchors with ?repo= denies authenticated non-reader on private repo. + #[sqlx::test] + async fn anchors_repo_denies_non_reader(pool: PgPool) { + use gitlawb_core::identity::Keypair; + let state = test_state(pool).await; + let repo_name = "private-repo"; + let fx = setup_anchor_test(&state, repo_name, false).await; + let stranger = Keypair::generate(); + + let uri = format!( + "/api/v1/arweave/anchors?repo={}/{}", + fx.owner_did, repo_name + ); + let resp = anchors_router(&state) + .oneshot(signed_get(&stranger, &uri)) + .await + .unwrap(); + assert_eq!(resp.status(), StatusCode::NOT_FOUND); + } + + /// #136: ?repo= resolves against the deduped canonical repo, not a public mirror. + /// + /// When a repo has both a private canonical row and a public mirror row, the + /// ?repo= path must gate on the canonical survivor's visibility (denying a + /// stranger) rather than the public mirror's (which would allow). + #[sqlx::test] + async fn anchors_repo_denies_stranger_when_canonical_is_private_even_with_public_mirror( + pool: PgPool, + ) { + use gitlawb_core::identity::Keypair; + + let state = test_state(pool).await; + let owner = Keypair::generate(); + let owner_did = owner.did().to_string(); + let short = owner_did.split(':').next_back().unwrap().to_string(); + let repo_name = "mirror-canonical"; + + // Create a private canonical repo. + let canonical = seed_private_repo(&owner_did, repo_name); + state.db.create_repo(&canonical).await.unwrap(); + + // Create a public mirror for the same repo. + state + .db + .upsert_mirror_repo(&short, repo_name, "/tmp/mirror", None, false) + .await + .unwrap(); + + // Seed an anchor with the short slug (matching both rows). + let short_slug = format!("{short}/{repo_name}"); + seed_anchor(&state.db, &short_slug, &owner_did).await; + + let stranger = Keypair::generate(); + let uri = format!("/api/v1/arweave/anchors?repo={owner_did}/{repo_name}"); + + // Stranger must be denied (404), not served anchor via the public mirror. + let resp = anchors_router(&state) + .oneshot(signed_get(&stranger, &uri)) + .await + .unwrap(); + assert_eq!(resp.status(), StatusCode::NOT_FOUND); + + // Owner still gets the anchor. + let resp = anchors_router(&state) + .oneshot(signed_get(&owner, &uri)) + .await + .unwrap(); + assert_eq!(resp.status(), StatusCode::OK); + let body = json_body(resp).await; + assert_eq!(body["count"], 1, "owner sees their anchor through ?repo="); + } + + /// Cross-owner slug-collision regression: two DIDs sharing a last segment + /// (e.g. did:key:z6Same and did:web:evil:z6Same) produce the same anchor + /// slug z6Same/name. The global listing must not leak the private canonical's + /// anchor rows under the public mirror's slug after the owner_did post-filter. + #[sqlx::test] + async fn anchors_global_denies_cross_owner_slug_collision(pool: PgPool) { + use gitlawb_core::identity::Keypair; + + let state = test_state(pool).await; + let victim = Keypair::generate(); + let victim_did = victim.did().to_string(); + let victim_short = victim_did.split(':').next_back().unwrap().to_string(); + let repo_name = "collision"; + + // Private canonical repo for the victim (did:key:z6Victim). + let canonical = seed_private_repo(&victim_did, repo_name); + state.db.create_repo(&canonical).await.unwrap(); + let slug = format!("{}/{}", victim_short, repo_name); + seed_anchor(&state.db, &slug, &victim_did).await; + + // Public repo sharing the same last segment via a different DID method + // (did:web:evil:z6Victim). The distinct owner_did prevents dedup collapse, + // so both rows appear in list_all_repos_deduped. A different disk_path + // avoids the unique repos_disk_path_key constraint. + let attacker_did = format!("did:web:evil:{victim_short}"); + let mirror = RepoRecord { + disk_path: format!("/tmp/attacker-{}/{}", victim_short, repo_name), + ..seed_repo(&attacker_did, repo_name) + }; + state.db.create_repo(&mirror).await.unwrap(); + + // Stranger must not see the victim's anchor under the colliding slug. + let stranger = Keypair::generate(); + let resp = anchors_router(&state) + .oneshot(signed_get(&stranger, "/api/v1/arweave/anchors")) + .await + .unwrap(); + assert_eq!(resp.status(), StatusCode::OK); + let body = json_body(resp).await; + assert_eq!( + body["count"], 0, + "stranger must not see victim's private anchor under colliding slug" + ); + + // Victim still sees their own anchor. + let resp = anchors_router(&state) + .oneshot(signed_get(&victim, "/api/v1/arweave/anchors")) + .await + .unwrap(); + assert_eq!(resp.status(), StatusCode::OK); + let body = json_body(resp).await; + assert_eq!( + body["count"], 1, + "victim sees their own anchor through global listing" + ); + } + + /// Quarantined repos are excluded from anchor listings. A quarantined + /// mirror row (no canonical counterpart) is filtered by DEDUP_CTE's + /// `WHERE quarantined = FALSE`, so its slug never enters the readable set. + #[sqlx::test] + async fn anchors_global_excludes_quarantined_repo(pool: PgPool) { + let state = test_state(pool).await; + let short = "zQREPOTEST"; + let repo_name = "q-repo"; + + // Only a quarantined mirror row — no canonical counterpart. + state + .db + .upsert_mirror_repo(short, repo_name, "/tmp/q", None, true) + .await + .unwrap(); + + // Directly seed an anchor for the quarantined repo's slug. + let slug = format!("{short}/{repo_name}"); + seed_anchor(&state.db, &slug, "did:key:zDummyOwner").await; + + let owner = gitlawb_core::identity::Keypair::generate(); + let resp = anchors_router(&state) + .oneshot(signed_get(&owner, "/api/v1/arweave/anchors")) + .await + .unwrap(); + assert_eq!(resp.status(), StatusCode::OK); + let body = json_body(resp).await; + assert_eq!( + body["count"], 0, + "quarantined repo's anchors are excluded from global listing" + ); + } + + /// #121: negative limit is clamped to 0 and returns a bounded response (no 500). + #[sqlx::test] + async fn anchors_global_negative_limit_is_clamped(pool: PgPool) { + let state = test_state(pool).await; + let fx = setup_anchor_test(&state, "neg-limit-repo", true).await; + + let resp = anchors_router(&state) + .oneshot(signed_get(&fx.owner, "/api/v1/arweave/anchors?limit=-1")) + .await + .unwrap(); + assert_eq!(resp.status(), StatusCode::OK); + let body = json_body(resp).await; + assert_eq!( + body["count"], 0, + "negative limit clamps to 0, returning empty result" + ); + } + + /// #121: pins listing negative limit is clamped to 0 and returns a bounded response (no 500). + #[sqlx::test] + async fn pins_global_negative_limit_is_clamped(pool: PgPool) { + let state = test_state(pool).await; + let setup = setup_pin_test(&state, "neg-limit-repo").await; + + let resp = pins_router(&state) + .oneshot(signed_get(&setup.owner, "/api/v1/ipfs/pins?limit=-1")) + .await + .unwrap(); + assert_eq!(resp.status(), StatusCode::OK); + let body = json_body(resp).await; + assert_eq!( + body["count"], 0, + "negative limit clamps to 0, returning empty result" + ); + } + + /// Regression test for slug-collision and global anchor query filter-before-limit: + /// Seed a readable and an unreadable repo sharing owner_short/name, give the + /// unreadable one the newer rows, and assert the caller sees their own anchor + /// and never the other owner's. + #[sqlx::test] + async fn anchors_global_slug_collision_regression(pool: PgPool) { + use gitlawb_core::identity::Keypair; + + let state = test_state(pool).await; + let caller = Keypair::generate(); + let caller_did = caller.did().to_string(); + let owner_short = caller_did.split(':').next_back().unwrap().to_string(); + let unreadable_did = format!("did:gitlawb:{owner_short}"); + let repo_name = "collision-repo"; + let slug = format!("{owner_short}/{repo_name}"); + + // Create the readable (public) repo owned by caller + let mut repo_readable = seed_repo(&caller_did, repo_name); + repo_readable.disk_path = format!("/tmp/{repo_name}-readable-{}", uuid::Uuid::new_v4()); + state.db.create_repo(&repo_readable).await.unwrap(); + + // Create the unreadable (private) repo owned by the collision DID + let mut repo_unreadable = seed_private_repo(&unreadable_did, repo_name); + repo_unreadable.disk_path = format!("/tmp/{repo_name}-unreadable-{}", uuid::Uuid::new_v4()); + state.db.create_repo(&repo_unreadable).await.unwrap(); + + // Seed readable anchor (older) + seed_anchor(&state.db, &slug, &caller_did).await; + + // Sleep to ensure a distinct timestamp for the newer anchor + tokio::time::sleep(std::time::Duration::from_millis(5)).await; + + // Seed unreadable anchor (newer) + seed_anchor(&state.db, &slug, &unreadable_did).await; + + // Request global listing with limit = 1 + let resp = anchors_router(&state) + .oneshot(signed_get(&caller, "/api/v1/arweave/anchors?limit=1")) + .await + .unwrap(); + + assert_eq!(resp.status(), StatusCode::OK); + let body = json_body(resp).await; + + // The count should be 1 because the database query is correctly constrained + // to the readable (repo, owner_did) pair, thus skipping the newer unreadable anchor. + assert_eq!( + body["count"], 1, + "caller should see their own anchor despite the newer colliding unreadable anchor" + ); + assert_eq!( + body["anchors"][0]["owner_did"], caller_did, + "returned anchor must belong to the caller" + ); + } } diff --git a/crates/gl/src/http.rs b/crates/gl/src/http.rs index 32e90a1..1159e2c 100644 --- a/crates/gl/src/http.rs +++ b/crates/gl/src/http.rs @@ -24,6 +24,10 @@ impl NodeClient { } } + pub fn has_keypair(&self) -> bool { + self.keypair.is_some() + } + /// GET request — no auth (public read endpoints). pub async fn get(&self, path: &str) -> Result { let url = format!("{}{}", self.node_url, path); diff --git a/crates/gl/src/ipfs_cmd.rs b/crates/gl/src/ipfs_cmd.rs index b1b12f0..1b37a04 100644 --- a/crates/gl/src/ipfs_cmd.rs +++ b/crates/gl/src/ipfs_cmd.rs @@ -31,24 +31,30 @@ pub enum IpfsCmd { }, } +use std::path::PathBuf; + pub async fn run(args: IpfsArgs) -> Result<()> { match args.cmd { - IpfsCmd::List { node } => cmd_list(node).await, - IpfsCmd::Get { cid, node } => cmd_get(cid, node).await, + IpfsCmd::List { node } => cmd_list(node, None).await, + IpfsCmd::Get { cid, node } => cmd_get(cid, node, None).await, } } -async fn cmd_list(node: String) -> Result<()> { - let client = NodeClient::new(&node, None); - let resp: Value = client - .get("/api/v1/ipfs/pins") - .await? - .json() - .await - .context("failed to parse pins response")?; +async fn cmd_list(node: String, dir: Option) -> Result<()> { + let kp = crate::identity::load_keypair_from_dir(dir.as_deref())?; + let client = NodeClient::new(&node, Some(kp)); + let resp = client.get_signed("/api/v1/ipfs/pins").await?; - let pins = resp["pins"].as_array().cloned().unwrap_or_default(); - let count = resp["count"].as_u64().unwrap_or(pins.len() as u64); + let status = resp.status(); + if !status.is_success() { + let body = resp.text().await.unwrap_or_default(); + anyhow::bail!("node returned {status}: {body}"); + } + + let resp_val: Value = resp.json().await.context("failed to parse pins response")?; + + let pins = resp_val["pins"].as_array().cloned().unwrap_or_default(); + let count = resp_val["count"].as_u64().unwrap_or(pins.len() as u64); if pins.is_empty() { println!("No IPFS pins recorded on {node}"); @@ -76,13 +82,16 @@ async fn cmd_list(node: String) -> Result<()> { Ok(()) } -async fn cmd_get(cid: String, node: String) -> Result<()> { - let client = NodeClient::new(&node, None); +async fn cmd_get(cid: String, node: String, dir: Option) -> Result<()> { + let kp = crate::identity::load_keypair_from_dir(dir.as_deref()).ok(); + let client = NodeClient::new(&node, kp); let path = format!("/ipfs/{cid}"); - let resp = client - .get(&path) - .await - .with_context(|| format!("failed to fetch CID {cid} from {node}"))?; + let resp = if client.has_keypair() { + client.get_signed(&path).await + } else { + client.get(&path).await + } + .with_context(|| format!("failed to fetch CID {cid} from {node}"))?; let status = resp.status(); if !status.is_success() { @@ -108,3 +117,59 @@ async fn cmd_get(cid: String, node: String) -> Result<()> { Ok(()) } + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::TempDir; + + fn setup_test_keypair(dir: &TempDir) -> gitlawb_core::identity::Keypair { + let kp = gitlawb_core::identity::Keypair::generate(); + std::fs::write( + dir.path().join("identity.pem"), + kp.to_pem().unwrap().as_bytes(), + ) + .unwrap(); + kp + } + + #[tokio::test] + async fn test_cmd_list_success() { + let mut server = mockito::Server::new_async().await; + let dir = TempDir::new().unwrap(); + let _kp = setup_test_keypair(&dir); + + let _m = server + .mock("GET", "/api/v1/ipfs/pins") + .with_status(200) + .with_header("content-type", "application/json") + .with_body(r#"{"count":1,"pins":[{"sha256_hex":"abc","cid":"bafkrei","pinned_at":"2026-07-03T00:00:00Z"}]}"#) + .create_async() + .await; + + cmd_list(server.url(), Some(dir.path().to_path_buf())) + .await + .unwrap(); + } + + #[tokio::test] + async fn test_cmd_list_unauthorized() { + let mut server = mockito::Server::new_async().await; + let dir = TempDir::new().unwrap(); + let _kp = setup_test_keypair(&dir); + + let _m = server + .mock("GET", "/api/v1/ipfs/pins") + .with_status(401) + .with_body("authentication required") + .create_async() + .await; + + let err = cmd_list(server.url(), Some(dir.path().to_path_buf())) + .await + .unwrap_err(); + assert!(err + .to_string() + .contains("node returned 401 Unauthorized: authentication required")); + } +} diff --git a/crates/gl/src/node.rs b/crates/gl/src/node.rs index 9b3a6e1..78abed0 100644 --- a/crates/gl/src/node.rs +++ b/crates/gl/src/node.rs @@ -177,8 +177,41 @@ async fn try_get_json(client: &NodeClient, path: &str) -> Option { resp.json::().await.ok() } +#[derive(Debug)] +enum PinsError { + NoIdentity, + Unauthorized, + Other(String), +} + +async fn get_pins_status(client: &NodeClient) -> Result { + if !client.has_keypair() { + return Err(PinsError::NoIdentity); + } + let resp = match client.get_signed("/api/v1/ipfs/pins").await { + Ok(r) => r, + Err(e) => return Err(PinsError::Other(e.to_string())), + }; + let status = resp.status(); + if status == reqwest::StatusCode::UNAUTHORIZED || status == reqwest::StatusCode::FORBIDDEN { + return Err(PinsError::Unauthorized); + } + if !status.is_success() { + return Err(PinsError::Other(format!("HTTP {status}"))); + } + let val: Value = match resp.json().await { + Ok(v) => v, + Err(e) => return Err(PinsError::Other(e.to_string())), + }; + let count = val["count"] + .as_u64() + .unwrap_or_else(|| val["pins"].as_array().map(|a| a.len() as u64).unwrap_or(0)); + Ok(count) +} + async fn cmd_status(node: String) -> Result<()> { - let client = NodeClient::new(&node, None); + let kp = crate::identity::load_keypair_from_dir(None).ok(); + let client = NodeClient::new(&node, kp); // ── Fetch node info (required — bail if unreachable) ────────────────── let info_resp = client @@ -200,7 +233,7 @@ async fn cmd_status(node: String) -> Result<()> { try_get_json(&client, "/api/v1/repos"), try_get_json(&client, "/api/v1/p2p/info"), try_get_json(&client, "/api/v1/events/ref-updates?limit=5"), - try_get_json(&client, "/api/v1/ipfs/pins"), + get_pins_status(&client), ); // ── Render dashboard ────────────────────────────────────────────────── @@ -307,13 +340,19 @@ async fn cmd_status(node: String) -> Result<()> { // Pins println!("Pins"); - if let Some(ref pins) = pins_val { - let count = pins["count"] - .as_u64() - .unwrap_or_else(|| pins["pins"].as_array().map(|a| a.len() as u64).unwrap_or(0)); - println!(" Pinned CIDs: {count}"); - } else { - println!(" IPFS not configured"); + match pins_val { + Ok(count) => { + println!(" Pinned CIDs: {count}"); + } + Err(PinsError::NoIdentity) => { + println!(" (identity required to view pins)"); + } + Err(PinsError::Unauthorized) => { + println!(" (unauthorized to view pins)"); + } + Err(PinsError::Other(err)) => { + println!(" (pins unavailable: {err})"); + } } println!(); @@ -409,3 +448,64 @@ async fn cmd_resolve(did: String, node: String) -> Result<()> { Ok(()) } + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::TempDir; + + fn setup_test_keypair(dir: &TempDir) -> gitlawb_core::identity::Keypair { + let kp = gitlawb_core::identity::Keypair::generate(); + std::fs::write( + dir.path().join("identity.pem"), + kp.to_pem().unwrap().as_bytes(), + ) + .unwrap(); + kp + } + + #[tokio::test] + async fn test_get_pins_status_success() { + let mut server = mockito::Server::new_async().await; + let dir = TempDir::new().unwrap(); + let kp = setup_test_keypair(&dir); + let client = NodeClient::new(server.url(), Some(kp)); + + let _m = server + .mock("GET", "/api/v1/ipfs/pins") + .with_status(200) + .with_header("content-type", "application/json") + .with_body(r#"{"count":5,"pins":[]}"#) + .create_async() + .await; + + let count = get_pins_status(&client).await.unwrap(); + assert_eq!(count, 5); + } + + #[tokio::test] + async fn test_get_pins_status_no_identity() { + let server = mockito::Server::new_async().await; + let client = NodeClient::new(server.url(), None); + + let err = get_pins_status(&client).await.unwrap_err(); + assert!(matches!(err, PinsError::NoIdentity)); + } + + #[tokio::test] + async fn test_get_pins_status_unauthorized() { + let mut server = mockito::Server::new_async().await; + let dir = TempDir::new().unwrap(); + let kp = setup_test_keypair(&dir); + let client = NodeClient::new(server.url(), Some(kp)); + + let _m = server + .mock("GET", "/api/v1/ipfs/pins") + .with_status(401) + .create_async() + .await; + + let err = get_pins_status(&client).await.unwrap_err(); + assert!(matches!(err, PinsError::Unauthorized)); + } +}