From 9c3df2c8bcacaa60b2cbeb4c77e622c0a9bbd58d Mon Sep 17 00:00:00 2001 From: Gravirei Date: Wed, 1 Jul 2026 09:46:33 +0600 Subject: [PATCH 1/7] fix(node): prefer canonical repo row over mirror row in get_repo (#124) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit get_repo matched both mirror rows (bare short-DID, is_public=true, no visibility rules) and canonical rows (full did:key: DID, correct rules), returning one nondeterministically. When the mirror row won, the visibility gate saw is_public=true + empty rules, allowing unauthenticated read of private/withheld content. Add ORDER BY to prefer canonical rows (UUID id, no slash) over mirror rows (slash-form id), matching the ranking logic already used by DEDUP_CTE and dedupe_canonical_repos. Test seeds mirror FIRST so an unordered fetch_optional returns the mirror row — proving the test locks in the fix. --- Cargo.lock | 10 ++-- crates/gitlawb-node/src/db/mod.rs | 80 ++++++++++++++++++++++++++++++- 2 files changed, 84 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d42c370..dc6540e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3300,7 +3300,7 @@ dependencies = [ [[package]] name = "git-remote-gitlawb" -version = "0.3.9" +version = "0.4.0" dependencies = [ "anyhow", "gitlawb-core", @@ -3311,7 +3311,7 @@ dependencies = [ [[package]] name = "gitlawb-attest" -version = "0.3.9" +version = "0.4.0" dependencies = [ "base64", "ed25519-dalek", @@ -3328,7 +3328,7 @@ dependencies = [ [[package]] name = "gitlawb-core" -version = "0.3.9" +version = "0.4.0" dependencies = [ "anyhow", "base64", @@ -3355,7 +3355,7 @@ dependencies = [ [[package]] name = "gitlawb-node" -version = "0.3.9" +version = "0.4.0" dependencies = [ "alloy", "anyhow", @@ -3411,7 +3411,7 @@ dependencies = [ [[package]] name = "gl" -version = "0.3.9" +version = "0.4.0" dependencies = [ "alloy", "anyhow", diff --git a/crates/gitlawb-node/src/db/mod.rs b/crates/gitlawb-node/src/db/mod.rs index 31ff72f..3257fb1 100644 --- a/crates/gitlawb-node/src/db/mod.rs +++ b/crates/gitlawb-node/src/db/mod.rs @@ -891,7 +891,13 @@ impl Db { "SELECT id, name, owner_did, description, is_public, default_branch, created_at, updated_at, disk_path, forked_from, machine_id FROM repos - WHERE (owner_did = $1 OR owner_did LIKE '%:' || $1 || '%') AND name = $2", + WHERE (owner_did = $1 OR owner_did LIKE '%:' || $1 || '%') AND name = $2 + -- Prefer canonical rows (UUID id, no slash) over mirror rows (slash id). + -- Mirror rows are inserted by upsert_mirror_repo with is_public=true and + -- no visibility rules; using them for visibility checks would bypass the + -- canonical row's gate (issue #124). + ORDER BY CASE WHEN position('/' in id) > 0 THEN 1 ELSE 0 END, + created_at ASC, id ASC", ) .bind(owner_did) .bind(name) @@ -3700,6 +3706,78 @@ mod dedup_db_tests { assert_eq!(list_len, 2); assert_eq!(count, list_len, "count must equal the deduped list length"); } + + /// get_repo must prefer the canonical row over the mirror row when both match, + /// so the visibility gate keys off the canonical row's rules and is_public flag + /// rather than the mirror's hardcoded public-with-no-rules (issue #124). + #[sqlx::test] + async fn get_repo_prefers_canonical_over_mirror(pool: PgPool) { + let db = db(pool).await; + let short = "z6Mkwbud"; + let owner_did = "did:key:z6Mkwbud"; + + // Mirror row seeded FIRST — hardcoded is_public=true, no visibility rules. + // Without the ORDER BY fix, fetch_optional returns this row by insertion order, + // so the test fails (proving it locks in the fix). + db.upsert_mirror_repo(short, "secret-repo", "/srv/mirror", None, false) + .await + .unwrap(); + + // Canonical row with is_public=false. + let canonical = RepoRecord { + id: uuid::Uuid::new_v4().to_string(), + name: "secret-repo".into(), + owner_did: owner_did.to_string(), + description: None, + is_public: false, + default_branch: "main".into(), + created_at: ts("2026-01-01T00:00:00Z"), + updated_at: ts("2026-01-01T00:00:00Z"), + disk_path: "/srv/secret".into(), + forked_from: None, + machine_id: None, + }; + db.create_repo(&canonical).await.unwrap(); + + // Querying with bare short DID should return the canonical row. + let got = db + .get_repo(short, "secret-repo") + .await + .unwrap() + .expect("get_repo should find the repo"); + + assert_eq!( + got.owner_did, owner_did, + "canonical row (did:key: form) must win over mirror row (bare short DID)" + ); + assert!( + !got.id.contains('/'), + "canonical row id must not contain a slash" + ); + assert!( + !got.is_public, + "canonical row's is_public must be preserved" + ); + } + + /// get_repo still returns the mirror row when no canonical row exists + /// (mirror-only group), so sync and read paths remain functional. + #[sqlx::test] + async fn get_repo_returns_mirror_when_no_canonical(pool: PgPool) { + let db = db(pool).await; + db.upsert_mirror_repo("z6Lonely", "orphan", "/srv/m", None, false) + .await + .unwrap(); + + let got = db + .get_repo("z6Lonely", "orphan") + .await + .unwrap() + .expect("get_repo should find the mirror"); + + assert_eq!(got.id, "z6Lonely/orphan", "mirror row is returned"); + assert!(got.is_public, "mirror row's is_public should be true"); + } } /// Exercises the iCaptcha single-use proof ledger (`icaptcha_consumed_proofs`), From e7f1ee13c03cb7217bb7967b143a21eb26528040 Mon Sep 17 00:00:00 2001 From: Gravirei Date: Thu, 2 Jul 2026 00:01:37 +0600 Subject: [PATCH 2/7] fix(test): date canonical row after mirror to guard CASE term in get_repo fix (#124) --- crates/gitlawb-node/src/db/mod.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/crates/gitlawb-node/src/db/mod.rs b/crates/gitlawb-node/src/db/mod.rs index 3257fb1..1f482f8 100644 --- a/crates/gitlawb-node/src/db/mod.rs +++ b/crates/gitlawb-node/src/db/mod.rs @@ -3731,8 +3731,11 @@ mod dedup_db_tests { description: None, is_public: false, default_branch: "main".into(), - created_at: ts("2026-01-01T00:00:00Z"), - updated_at: ts("2026-01-01T00:00:00Z"), + // Date after the mirror (Utc::now()) so that created_at ASC alone + // would pick the mirror; the CASE WHEN position('/' in id) > 0 term + // is what makes the canonical row win. + created_at: ts("2126-01-01T00:00:00Z"), + updated_at: ts("2126-01-01T00:00:00Z"), disk_path: "/srv/secret".into(), forked_from: None, machine_id: None, From ca003cbda331c55d364886bf7170d4620dd07892 Mon Sep 17 00:00:00 2001 From: Gravirei Date: Thu, 2 Jul 2026 07:26:33 +0600 Subject: [PATCH 3/7] fix(node): tighten get_repo owner matching to did:key only, not any DID method (#124 P2) --- crates/gitlawb-node/src/db/mod.rs | 74 ++++++++++++++++++++++++++++++- 1 file changed, 72 insertions(+), 2 deletions(-) diff --git a/crates/gitlawb-node/src/db/mod.rs b/crates/gitlawb-node/src/db/mod.rs index 1f482f8..dae5d0b 100644 --- a/crates/gitlawb-node/src/db/mod.rs +++ b/crates/gitlawb-node/src/db/mod.rs @@ -887,11 +887,21 @@ impl Db { } pub async fn get_repo(&self, owner_did: &str, name: &str) -> Result> { + // Normalize owner_did to its did:key short form, mirroring did_matches and + // the DEDUP_CTE's owner-key CASE: strip `did:key:` only when the remainder + // is a bare key id (no further `:`). This keeps `did:key:z...` and bare + // `z...` interchangeable while `did:gitlawb:z...` / `did:web:z...` stay + // distinct — the old LIKE '%:' || $1 || '%' was too broad (issue #124 P2). + let owner_key = match owner_did.strip_prefix("did:key:") { + Some(rest) if !rest.contains(':') => rest, + _ => owner_did, + }; let row = sqlx::query( "SELECT id, name, owner_did, description, is_public, default_branch, created_at, updated_at, disk_path, forked_from, machine_id FROM repos - WHERE (owner_did = $1 OR owner_did LIKE '%:' || $1 || '%') AND name = $2 + WHERE (CASE WHEN owner_did LIKE 'did:key:%' AND position(':' in substr(owner_did, 9)) = 0 THEN substr(owner_did, 9) ELSE owner_did END) = $1 + AND name = $2 -- Prefer canonical rows (UUID id, no slash) over mirror rows (slash id). -- Mirror rows are inserted by upsert_mirror_repo with is_public=true and -- no visibility rules; using them for visibility checks would bypass the @@ -899,7 +909,7 @@ impl Db { ORDER BY CASE WHEN position('/' in id) > 0 THEN 1 ELSE 0 END, created_at ASC, id ASC", ) - .bind(owner_did) + .bind(owner_key) .bind(name) .fetch_optional(&self.pool) .await?; @@ -3781,6 +3791,66 @@ mod dedup_db_tests { assert_eq!(got.id, "z6Lonely/orphan", "mirror row is returned"); assert!(got.is_public, "mirror row's is_public should be true"); } + + /// get_repo must NOT match a non-key DID row (e.g. did:gitlawb:) when queried + /// with the bare short DID — the old LIKE '%:' || $1 || '%' was too broad and + /// could rank a non-key canonical row ahead of the exact mirror. + #[sqlx::test] + async fn get_repo_does_not_match_non_key_did(pool: PgPool) { + let db = db(pool).await; + let short = "z6Mkwbud"; + + // Mirror row for the bare short DID. + db.upsert_mirror_repo(short, "shared-name", "/srv/m", None, false) + .await + .unwrap(); + + // Non-key DID row sharing the same trailing id — must stay distinct. + let non_key = RepoRecord { + id: uuid::Uuid::new_v4().to_string(), + name: "shared-name".into(), + owner_did: format!("did:gitlawb:{short}"), + description: None, + is_public: false, + default_branch: "main".into(), + created_at: ts("2126-01-01T00:00:00Z"), + updated_at: ts("2126-01-01T00:00:00Z"), + disk_path: "/srv/other".into(), + forked_from: None, + machine_id: None, + }; + db.create_repo(&non_key).await.unwrap(); + + // Querying with the bare short DID must return the mirror, NOT the + // did:gitlawb row (different DID method, separate owner). + let got = db + .get_repo(short, "shared-name") + .await + .unwrap() + .expect("get_repo should find the mirror row"); + + assert!( + got.id.contains('/'), + "must return the mirror (slash id), not a non-key canonical row" + ); + assert!(got.is_public, "mirror row's is_public should be true"); + + // Querying with the full non-key DID must return that exact row. + let got = db + .get_repo(&format!("did:gitlawb:{short}"), "shared-name") + .await + .unwrap() + .expect("get_repo should find the non-key DID row"); + + assert!( + !got.id.contains('/'), + "must return the non-key canonical row (UUID id)" + ); + assert!( + !got.is_public, + "non-key row's is_public must be preserved" + ); + } } /// Exercises the iCaptcha single-use proof ledger (`icaptcha_consumed_proofs`), From 7f92302abc2e8a875cf5389acedebcd4d42384f0 Mon Sep 17 00:00:00 2001 From: Gravirei Date: Thu, 2 Jul 2026 07:35:10 +0600 Subject: [PATCH 4/7] fix(node): refactor owner DID normalization to a dedicated function in create_repo --- crates/gitlawb-node/src/db/mod.rs | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/crates/gitlawb-node/src/db/mod.rs b/crates/gitlawb-node/src/db/mod.rs index dae5d0b..ac47afb 100644 --- a/crates/gitlawb-node/src/db/mod.rs +++ b/crates/gitlawb-node/src/db/mod.rs @@ -826,6 +826,13 @@ const MIGRATIONS: &[Migration] = &[ // ── Repos ───────────────────────────────────────────────────────────────────── +fn normalize_owner_key(did: &str) -> &str { + match did.strip_prefix("did:key:") { + Some(rest) if !rest.contains(':') => rest, + _ => did, + } +} + impl Db { pub async fn create_repo(&self, repo: &RepoRecord) -> Result<()> { sqlx::query( @@ -892,10 +899,7 @@ impl Db { // is a bare key id (no further `:`). This keeps `did:key:z...` and bare // `z...` interchangeable while `did:gitlawb:z...` / `did:web:z...` stay // distinct — the old LIKE '%:' || $1 || '%' was too broad (issue #124 P2). - let owner_key = match owner_did.strip_prefix("did:key:") { - Some(rest) if !rest.contains(':') => rest, - _ => owner_did, - }; + let owner_key = normalize_owner_key(owner_did); let row = sqlx::query( "SELECT id, name, owner_did, description, is_public, default_branch, created_at, updated_at, disk_path, forked_from, machine_id @@ -1031,10 +1035,7 @@ impl Db { // Mirror did_matches: strip `did:key:` only when the remainder is a bare // key id (no further `:`). The DEDUP_CTE applies the identical CASE to // owner_did, so the two compare on the same normalized key. - let owner_key = owner_filter.map(|o| match o.strip_prefix("did:key:") { - Some(rest) if !rest.contains(':') => rest, - _ => o, - }); + let owner_key = owner_filter.map(normalize_owner_key); let sql = format!( "{} SELECT @@ -3846,10 +3847,7 @@ mod dedup_db_tests { !got.id.contains('/'), "must return the non-key canonical row (UUID id)" ); - assert!( - !got.is_public, - "non-key row's is_public must be preserved" - ); + assert!(!got.is_public, "non-key row's is_public must be preserved"); } } From 084ca046bd9b17a9b2d402c697f8051eea8421b7 Mon Sep 17 00:00:00 2001 From: Gravirei Date: Thu, 2 Jul 2026 11:12:58 +0600 Subject: [PATCH 5/7] fix(node): align clone URL and slug construction with tightened did:key matching - Make normalize_owner_key pub(crate) and reuse it in api/repos, api/events, api/peers, and visibility modules. - Add read-gate authorize_repo_read and to_response clone_url slug tests. --- crates/gitlawb-node/src/api/events.rs | 6 +-- crates/gitlawb-node/src/api/peers.rs | 4 +- crates/gitlawb-node/src/api/repos.rs | 72 ++++++++++++++++++++------- crates/gitlawb-node/src/db/mod.rs | 62 ++++++++++++++++++++++- crates/gitlawb-node/src/visibility.rs | 2 +- 5 files changed, 120 insertions(+), 26 deletions(-) diff --git a/crates/gitlawb-node/src/api/events.rs b/crates/gitlawb-node/src/api/events.rs index 45db8a0..d925ecb 100644 --- a/crates/gitlawb-node/src/api/events.rs +++ b/crates/gitlawb-node/src/api/events.rs @@ -67,11 +67,7 @@ pub async fn list_repo_events( let repo_id_str = if let Some(ref record) = repo_record { format!( "{}/{}", - record - .owner_did - .split(':') - .next_back() - .unwrap_or(&record.owner_did), + crate::db::normalize_owner_key(&record.owner_did), repo_name ) } else { diff --git a/crates/gitlawb-node/src/api/peers.rs b/crates/gitlawb-node/src/api/peers.rs index 0e71f06..7dc006a 100644 --- a/crates/gitlawb-node/src/api/peers.rs +++ b/crates/gitlawb-node/src/api/peers.rs @@ -296,8 +296,8 @@ pub async fn trigger_sync(State(state): State) -> Result { - // Use short owner (last colon segment) matching DB convention - let short = owner.split(':').next_back().unwrap_or(owner); + // Use short owner (did:key-only normalization) matching DB convention + let short = crate::db::normalize_owner_key(owner); format!("{short}/{name}") } _ => continue, diff --git a/crates/gitlawb-node/src/api/repos.rs b/crates/gitlawb-node/src/api/repos.rs index b74f3f6..09db853 100644 --- a/crates/gitlawb-node/src/api/repos.rs +++ b/crates/gitlawb-node/src/api/repos.rs @@ -945,11 +945,7 @@ pub async fn git_receive_pack( .as_deref() .unwrap_or("http://127.0.0.1:7545") .trim_end_matches('/'); - let owner_short = record - .owner_did - .split(':') - .next_back() - .unwrap_or(&record.owner_did); + let owner_short = crate::db::normalize_owner_key(&record.owner_did); let clone_url = format!("{}/{}/{}.git", base_url, owner_short, record.name); for update in &ref_updates { @@ -1096,7 +1092,7 @@ pub async fn git_receive_pack( // this push to Arweave, so the oid->cid index survives total // node loss. Best-effort; never fails the push. if !delta.is_empty() && !irys_url.is_empty() { - let owner_short = owner_did.split(':').next_back().unwrap_or(&owner_did); + let owner_short = crate::db::normalize_owner_key(&owner_did); let repo_slug = format!("{owner_short}/{repo_name}"); let ts = chrono::Utc::now().to_rfc3339(); let manifest = crate::arweave::EncryptedManifest { @@ -1141,11 +1137,7 @@ pub async fn git_receive_pack( let node_did_str = state.node_did.to_string(); let repo_slug = format!( "{}/{}", - record - .owner_did - .split(':') - .next_back() - .unwrap_or(&record.owner_did), + crate::db::normalize_owner_key(&record.owner_did), record.name ); let ref_updates_clone = ref_updates @@ -1474,7 +1466,7 @@ pub async fn fork_repo( } // Check no name conflict under the forker's ownership - let forker_short = forker_did.split(':').next_back().unwrap_or(&forker_did); + let forker_short = crate::db::normalize_owner_key(&forker_did); if state.db.get_repo(forker_short, &fork_name).await?.is_some() { return Err(AppError::BadRequest(format!( "you already have a repo named {fork_name}" @@ -1632,11 +1624,7 @@ fn parse_ref_updates(body: &[u8]) -> Vec { // ── Helpers ─────────────────────────────────────────────────────────────── fn to_response(record: &crate::db::RepoRecord, state: &AppState, star_count: i64) -> RepoResponse { - let owner_short = record - .owner_did - .split(':') - .next_back() - .unwrap_or(&record.owner_did); + let owner_short = crate::db::normalize_owner_key(&record.owner_did); let base_url = state .config @@ -2340,4 +2328,54 @@ mod tests { _mock.assert_async().await; } + + #[tokio::test] + async fn to_response_generates_correct_clone_url_slug() { + let state = crate::test_support::test_state_lazy(); + let now = chrono::Utc::now(); + + // 1. did:key owner (should strip did:key: prefix) + let repo_key = crate::db::RepoRecord { + id: "uuid-1".into(), + name: "my-repo".into(), + owner_did: "did:key:z6Mkwbud".into(), + description: None, + is_public: true, + default_branch: "main".into(), + created_at: now, + updated_at: now, + disk_path: "/tmp/my-repo".into(), + forked_from: None, + machine_id: None, + }; + let response_key = to_response(&repo_key, &state, 5); + assert!( + response_key.clone_url.contains("/z6Mkwbud/my-repo.git"), + "clone_url should use the bare did:key ID. got: {}", + response_key.clone_url + ); + + // 2. did:gitlawb owner (non-key DID method, should NOT strip) + let repo_non_key = crate::db::RepoRecord { + id: "uuid-2".into(), + name: "other-repo".into(), + owner_did: "did:gitlawb:z6Mkwbud".into(), + description: None, + is_public: true, + default_branch: "main".into(), + created_at: now, + updated_at: now, + disk_path: "/tmp/other-repo".into(), + forked_from: None, + machine_id: None, + }; + let response_non_key = to_response(&repo_non_key, &state, 10); + assert!( + response_non_key + .clone_url + .contains("/did:gitlawb:z6Mkwbud/other-repo.git"), + "clone_url should preserve the full non-key owner DID. got: {}", + response_non_key.clone_url + ); + } } diff --git a/crates/gitlawb-node/src/db/mod.rs b/crates/gitlawb-node/src/db/mod.rs index ac47afb..9ec984b 100644 --- a/crates/gitlawb-node/src/db/mod.rs +++ b/crates/gitlawb-node/src/db/mod.rs @@ -826,7 +826,7 @@ const MIGRATIONS: &[Migration] = &[ // ── Repos ───────────────────────────────────────────────────────────────────── -fn normalize_owner_key(did: &str) -> &str { +pub(crate) fn normalize_owner_key(did: &str) -> &str { match did.strip_prefix("did:key:") { Some(rest) if !rest.contains(':') => rest, _ => did, @@ -3772,6 +3772,66 @@ mod dedup_db_tests { !got.is_public, "canonical row's is_public must be preserved" ); + + // Querying with full did:key: form should also return the canonical row. + let got_full = db + .get_repo(owner_did, "secret-repo") + .await + .unwrap() + .expect("get_repo should find the repo with full did:key"); + + assert_eq!( + got_full.owner_did, owner_did, + "canonical row must be found using full did:key: form" + ); + assert!( + !got_full.id.contains('/'), + "canonical row id must not contain a slash" + ); + assert!( + !got_full.is_public, + "canonical row's is_public must be preserved" + ); + } + + /// Seed a private canonical plus a public mirror twin for the same owner+name + /// (mirror inserted first), call authorize_repo_read with caller=None, and + /// assert Err(RepoNotFound). That locks the property at the gate. + #[sqlx::test] + async fn authorize_repo_read_denies_private_canonical_even_with_public_mirror(pool: PgPool) { + let state = crate::test_support::test_state(pool).await; + let short = "z6Mkwbud"; + let owner_did = "did:key:z6Mkwbud"; + + // Mirror row seeded FIRST — hardcoded is_public=true, no visibility rules. + state + .db + .upsert_mirror_repo(short, "secret-repo", "/srv/mirror", None, false) + .await + .unwrap(); + + // Canonical row with is_public=false. + let canonical = RepoRecord { + id: uuid::Uuid::new_v4().to_string(), + name: "secret-repo".into(), + owner_did: owner_did.to_string(), + description: None, + is_public: false, + default_branch: "main".into(), + created_at: ts("2126-01-01T00:00:00Z"), + updated_at: ts("2126-01-01T00:00:00Z"), + disk_path: "/srv/secret".into(), + forked_from: None, + machine_id: None, + }; + state.db.create_repo(&canonical).await.unwrap(); + + // call authorize_repo_read with caller=None, and assert Err(RepoNotFound) + let res = crate::api::authorize_repo_read(&state, short, "secret-repo", None, "/").await; + assert!( + matches!(res, Err(crate::error::AppError::RepoNotFound(_))), + "expected Err(RepoNotFound), got {res:?}" + ); } /// get_repo still returns the mirror row when no canonical row exists diff --git a/crates/gitlawb-node/src/visibility.rs b/crates/gitlawb-node/src/visibility.rs index 335d261..aed987d 100644 --- a/crates/gitlawb-node/src/visibility.rs +++ b/crates/gitlawb-node/src/visibility.rs @@ -27,7 +27,7 @@ fn nfc(s: &str) -> String { /// True if `caller` is the repo owner (matches full did:key or its short form), /// mirroring the owner-match idiom in `api/protect.rs`. fn is_owner(owner_did: &str, caller: &str) -> bool { - let owner_short = owner_did.split(':').next_back().unwrap_or(owner_did); + let owner_short = crate::db::normalize_owner_key(owner_did); caller == owner_did || caller == owner_short } From 7506de202945d2e621e93056a8af1870e9db941d Mon Sep 17 00:00:00 2001 From: Gravirei Date: Thu, 2 Jul 2026 21:41:20 +0600 Subject: [PATCH 6/7] fix(node): hoist owner-key CASE into shared const + address #124 review feedback - Hoist 7 verbatim SQL CASE copies into OWNER_KEY_CASE_SQL const (dedup_cte(), get_repo, count_repos_deduped), keeping one literal copy in the v7 migration with a cross-reference comment. - Add 9 unit tests for normalize_owner_key over the full boundary set. - Add LIMIT 1 to get_repo query for single-row intent. - Add non_key_owner_bare_short_does_not_match is_owner regression test. - Update is_owner doc to reflect did:key-only rule. - Document non-key clone_url disk-path constraint in to_response. --- crates/gitlawb-node/src/api/repos.rs | 8 ++ crates/gitlawb-node/src/db/mod.rs | 127 +++++++++++++++++++++----- crates/gitlawb-node/src/visibility.rs | 33 ++++++- 3 files changed, 143 insertions(+), 25 deletions(-) diff --git a/crates/gitlawb-node/src/api/repos.rs b/crates/gitlawb-node/src/api/repos.rs index 09db853..4742998 100644 --- a/crates/gitlawb-node/src/api/repos.rs +++ b/crates/gitlawb-node/src/api/repos.rs @@ -1622,6 +1622,14 @@ fn parse_ref_updates(body: &[u8]) -> Vec { } // ── Helpers ─────────────────────────────────────────────────────────────── +// +// For a non-key DID owner, `normalize_owner_key` returns the full DID, so +// `clone_url` becomes `/did:gitlawb:z6.../repo.git`. That resolves through +// `get_repo`, but the colon-bearing path segment would break the `sync.rs` +// disk-path join (`owner_slug/repo`). Not reachable today (auth is +// did:key-only), so this is a forward constraint to handle before non-key +// ownership lands: the owner-first disk layout must either reject colons or +// encode them. fn to_response(record: &crate::db::RepoRecord, state: &AppState, star_count: i64) -> RepoResponse { let owner_short = crate::db::normalize_owner_key(&record.owner_did); diff --git a/crates/gitlawb-node/src/db/mod.rs b/crates/gitlawb-node/src/db/mod.rs index 9ec984b..8852895 100644 --- a/crates/gitlawb-node/src/db/mod.rs +++ b/crates/gitlawb-node/src/db/mod.rs @@ -778,6 +778,7 @@ const MIGRATIONS: &[Migration] = &[ // the matching expression index. The CASE must stay byte-for-byte in // sync with DEDUP_CTE / count_repos_deduped or Postgres won't use it. "DROP INDEX IF EXISTS idx_repos_owner_short_name", + // Keep byte-identical to OWNER_KEY_CASE_SQL so Postgres uses the index. "CREATE INDEX IF NOT EXISTS idx_repos_owner_key_name ON repos ((CASE WHEN owner_did LIKE 'did:key:%' AND position(':' in substr(owner_did, 9)) = 0 THEN substr(owner_did, 9) ELSE owner_did END), name)", ], }, @@ -833,6 +834,78 @@ pub(crate) fn normalize_owner_key(did: &str) -> &str { } } +/// SQL CASE expression byte-identical to `normalize_owner_key`. All queries that +/// filter or group by owner key use this const so the Rust and SQL sides cannot +/// drift apart. If you change `normalize_owner_key`, update this const too. +const OWNER_KEY_CASE_SQL: &str = "CASE WHEN owner_did LIKE 'did:key:%' AND position(':' in substr(owner_did, 9)) = 0 THEN substr(owner_did, 9) ELSE owner_did END"; + +#[cfg(test)] +mod normalize_owner_key_tests { + use super::normalize_owner_key; + + // Boundary set matching the SQL CASE: did:key short/full, empty residual, + // did:key:z:extra, non-key, bare, empty, uppercase. + #[test] + fn strips_did_key_prefix() { + assert_eq!(normalize_owner_key("did:key:z6Mkfoo"), "z6Mkfoo"); + } + + #[test] + fn keeps_full_did_key_unchanged_when_not_a_prefix() { + assert_eq!(normalize_owner_key("z6Mkfoo"), "z6Mkfoo"); + } + + #[test] + fn leaves_non_key_did_intact() { + assert_eq!( + normalize_owner_key("did:gitlawb:z6Mkfoo"), + "did:gitlawb:z6Mkfoo" + ); + } + + #[test] + fn leaves_web_did_intact() { + assert_eq!( + normalize_owner_key("did:web:example.com:alice"), + "did:web:example.com:alice" + ); + } + + #[test] + fn does_not_strip_did_key_with_extra_colon() { + // did:key:did:gitlawb:z6... — the remainder contains ':', so it's left whole. + assert_eq!( + normalize_owner_key("did:key:did:gitlawb:z6Mkfoo"), + "did:key:did:gitlawb:z6Mkfoo" + ); + } + + #[test] + fn empty_string_returns_empty() { + assert_eq!(normalize_owner_key(""), ""); + } + + #[test] + fn bare_did_key_colon_becomes_empty() { + // did:key: with nothing after still has the prefix stripped. + assert_eq!(normalize_owner_key("did:key:"), ""); + } + + #[test] + fn uppercase_prefix_is_untouched() { + assert_eq!(normalize_owner_key("DID:KEY:z6Mkfoo"), "DID:KEY:z6Mkfoo"); + } + + #[test] + fn strips_did_key_even_with_trailing_slash() { + // did:key:z6Mkfoo/extra has no ':' in the remainder, so it strips. + assert_eq!( + normalize_owner_key("did:key:z6Mkfoo/extra"), + "z6Mkfoo/extra" + ); + } +} + impl Db { pub async fn create_repo(&self, repo: &RepoRecord) -> Result<()> { sqlx::query( @@ -900,23 +973,26 @@ impl Db { // `z...` interchangeable while `did:gitlawb:z...` / `did:web:z...` stay // distinct — the old LIKE '%:' || $1 || '%' was too broad (issue #124 P2). let owner_key = normalize_owner_key(owner_did); - let row = sqlx::query( + let sql = format!( "SELECT id, name, owner_did, description, is_public, default_branch, created_at, updated_at, disk_path, forked_from, machine_id FROM repos - WHERE (CASE WHEN owner_did LIKE 'did:key:%' AND position(':' in substr(owner_did, 9)) = 0 THEN substr(owner_did, 9) ELSE owner_did END) = $1 + WHERE ({key}) = $1 AND name = $2 -- Prefer canonical rows (UUID id, no slash) over mirror rows (slash id). -- Mirror rows are inserted by upsert_mirror_repo with is_public=true and -- no visibility rules; using them for visibility checks would bypass the -- canonical row's gate (issue #124). ORDER BY CASE WHEN position('/' in id) > 0 THEN 1 ELSE 0 END, - created_at ASC, id ASC", - ) - .bind(owner_key) - .bind(name) - .fetch_optional(&self.pool) - .await?; + created_at ASC, id ASC + LIMIT 1", + key = OWNER_KEY_CASE_SQL + ); + let row = sqlx::query(&sql) + .bind(owner_key) + .bind(name) + .fetch_optional(&self.pool) + .await?; Ok(row.map(row_to_repo)) } @@ -989,15 +1065,17 @@ impl Db { /// full tie still picks a deterministic survivor. `list_all_repos_deduped_with_stars`, /// `list_all_repos_deduped`, and the marker logic in /// `crate::api::repos::dedupe_canonical_repos` must stay in sync. - const DEDUP_CTE: &'static str = "WITH deduped AS ( - SELECT DISTINCT ON (CASE WHEN owner_did LIKE 'did:key:%' AND position(':' in substr(owner_did, 9)) = 0 THEN substr(owner_did, 9) ELSE owner_did END, name) + fn dedup_cte() -> String { + format!( + "WITH deduped AS ( + SELECT DISTINCT ON ({key}, name) id, name, owner_did, description, is_public, default_branch, created_at, -- group MAX, not the canonical row's own value: pushes that -- arrive via gossip touch only the mirror row, so the -- canonical updated_at goes stale MAX(updated_at) OVER ( - PARTITION BY CASE WHEN owner_did LIKE 'did:key:%' AND position(':' in substr(owner_did, 9)) = 0 THEN substr(owner_did, 9) ELSE owner_did END, name + PARTITION BY {key}, name ) AS updated_at, disk_path, forked_from, machine_id FROM repos @@ -1007,15 +1085,18 @@ impl Db { -- does. Callers bind the already-normalized key ($1). -- Quarantined mirrors (admitted but unvalidated by the iCaptcha -- propagation gate) are withheld from every listing surface. - WHERE quarantined = FALSE AND ($1::text IS NULL OR (CASE WHEN owner_did LIKE 'did:key:%' AND position(':' in substr(owner_did, 9)) = 0 THEN substr(owner_did, 9) ELSE owner_did END) = $1) - ORDER BY CASE WHEN owner_did LIKE 'did:key:%' AND position(':' in substr(owner_did, 9)) = 0 THEN substr(owner_did, 9) ELSE owner_did END, name, - -- mirror rows carry a slash-form id (\"{owner_short}/{name}\"), + WHERE quarantined = FALSE AND ($1::text IS NULL OR ({key}) = $1) + ORDER BY {key}, name, + -- mirror rows carry a slash-form id (\"{{owner_short}}/{{name}}\"), -- written only by upsert_mirror_repo; canonical ids are UUIDs. -- Rank canonical (no slash) ahead of the mirror in each group, -- keyed on the structural id, not the user-settable description. CASE WHEN position('/' in id) > 0 THEN 1 ELSE 0 END, created_at ASC, id ASC - )"; + )", + key = OWNER_KEY_CASE_SQL + ) + } /// All repos with star counts, mirror-deduplicated via `DEDUP_CTE` and /// ordered newest-first, optionally filtered to one owner. Returns the full @@ -1048,7 +1129,7 @@ impl Db { SELECT repo_id, COUNT(*) AS cnt FROM repo_stars GROUP BY repo_id ) s ON s.repo_id = d.id ORDER BY d.updated_at DESC", - Self::DEDUP_CTE + Self::dedup_cte() ); let rows = sqlx::query(&sql) .bind(owner_key) @@ -1066,7 +1147,7 @@ impl Db { /// Deduped repo list (no stars, no paging) for the unfiltered read surfaces /// (GraphQL `repos`). One logical repo per mirror+canonical group, ordered by - /// the group's most recent activity. Shares `DEDUP_CTE` with the paged path so + /// the group's most recent activity. Shares `dedup_cte()` with the paged path so /// the dedup rule cannot drift; binds a NULL owner filter (all rows). pub async fn list_all_repos_deduped(&self) -> Result> { let sql = format!( @@ -1076,7 +1157,7 @@ impl Db { d.forked_from, d.machine_id FROM deduped d ORDER BY d.updated_at DESC", - Self::DEDUP_CTE + Self::dedup_cte() ); let rows = sqlx::query(&sql) .bind(None::<&str>) @@ -1097,11 +1178,11 @@ impl Db { /// CASE that the live list paths depend on. Allowed dead outside tests. #[cfg_attr(not(test), allow(dead_code))] pub async fn count_repos_deduped(&self) -> Result { - let row = sqlx::query( - "SELECT COUNT(DISTINCT (CASE WHEN owner_did LIKE 'did:key:%' AND position(':' in substr(owner_did, 9)) = 0 THEN substr(owner_did, 9) ELSE owner_did END, name)) AS cnt FROM repos", - ) - .fetch_one(&self.pool) - .await?; + let sql = format!( + "SELECT COUNT(DISTINCT ({key}, name)) AS cnt FROM repos", + key = OWNER_KEY_CASE_SQL + ); + let row = sqlx::query(&sql).fetch_one(&self.pool).await?; Ok(row.get::("cnt")) } diff --git a/crates/gitlawb-node/src/visibility.rs b/crates/gitlawb-node/src/visibility.rs index aed987d..3e231b7 100644 --- a/crates/gitlawb-node/src/visibility.rs +++ b/crates/gitlawb-node/src/visibility.rs @@ -24,8 +24,12 @@ fn nfc(s: &str) -> String { s.nfc().collect() } -/// True if `caller` is the repo owner (matches full did:key or its short form), -/// mirroring the owner-match idiom in `api/protect.rs`. +/// True if `caller` is the repo owner. Matches exactly or against the +/// did:key short form — a non-key DID (did:gitlawb:/did:web:) in `owner_did` +/// never matches a bare short segment, and a non-key DID in `caller` never +/// matches a did:key owner's short form. Uses `normalize_owner_key` (not +/// `did_matches`) so the did:key-only rule is stricter: cross-method matching +/// would let a non-key canonical row bypass the #124 visibility gate. fn is_owner(owner_did: &str, caller: &str) -> bool { let owner_short = crate::db::normalize_owner_key(owner_did); caller == owner_did || caller == owner_short @@ -299,6 +303,31 @@ mod tests { ); } + #[test] + fn non_key_owner_bare_short_does_not_match() { + // A non-key DID owner (did:gitlawb:z6MkFoo) probed with the bare last + // segment (z6MkFoo) must NOT match — the old is_owner would return true + // via split(':').next_back(), which this PR tightens. + let rules = [rule("/", VisibilityMode::A, &[])]; + assert_eq!( + visibility_check(&rules, false, "did:gitlawb:z6MkFoo", Some("z6MkFoo"), "/"), + Decision::Deny, + "bare short must not match a non-key DID owner" + ); + // The full non-key DID still matches itself. + assert_eq!( + visibility_check( + &rules, + false, + "did:gitlawb:z6MkFoo", + Some("did:gitlawb:z6MkFoo"), + "/" + ), + Decision::Allow, + "full non-key DID still matches itself" + ); + } + #[test] fn root_rule_allows_listed_reader() { let rules = [rule("/", VisibilityMode::A, &["did:key:z6MkFriend"])]; From cc0722e9bc79fe5480ec49e8e52b479fd77c0d1e Mon Sep 17 00:00:00 2001 From: Gravirei Date: Thu, 2 Jul 2026 21:47:13 +0600 Subject: [PATCH 7/7] test(node): add SQL round-trip test for normalize_owner_key vs OWNER_KEY_CASE_SQL Verifies every boundary value produces identical results from the Rust function and the SQL CASE expression, preventing silent drift. --- crates/gitlawb-node/src/db/mod.rs | 50 +++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/crates/gitlawb-node/src/db/mod.rs b/crates/gitlawb-node/src/db/mod.rs index 8852895..7c35b67 100644 --- a/crates/gitlawb-node/src/db/mod.rs +++ b/crates/gitlawb-node/src/db/mod.rs @@ -3990,6 +3990,56 @@ mod dedup_db_tests { ); assert!(!got.is_public, "non-key row's is_public must be preserved"); } + + /// Verify that the Rust `normalize_owner_key` and the `OWNER_KEY_CASE_SQL` + /// expression agree on every boundary value in the owner-key normalization + /// set. A mismatch would let the Rust code bind a different key than the SQL + /// predicate filters on, silently breaking the did:key-only matching contract. + #[sqlx::test] + async fn normalize_owner_key_matches_sql_case(pool: PgPool) { + // The full boundary set: did:key short/full, bare, non-key DIDs, + // did:key with extra colon, empty, empty residual, uppercase. + let boundary_values = [ + "did:key:z6Mkfoo", + "z6Mkfoo", + "did:gitlawb:z6Mkfoo", + "did:web:example.com:alice", + "did:key:did:gitlawb:z6Mkfoo", + "", + "did:key:", + "DID:KEY:z6Mkfoo", + ]; + + // Build a VALUES list with the column aliased as `owner_did` so the + // OWNER_KEY_CASE_SQL expression (which references `owner_did`) works + // verbatim — no search-and-replace that could hide a drift. + let values_sql: String = boundary_values + .iter() + .map(|v| format!("('{}'::text)", v)) + .collect::>() + .join(", "); + let sql = format!( + "WITH data(owner_did) AS (VALUES {values_sql}) + SELECT owner_did, ({key}) AS normalized FROM data ORDER BY owner_did", + key = super::OWNER_KEY_CASE_SQL + ); + + let rows: Vec<(String, String)> = sqlx::query_as(&sql).fetch_all(&pool).await.unwrap(); + + assert_eq!( + rows.len(), + boundary_values.len(), + "every boundary value must produce a row" + ); + + for (val, sql_result) in &rows { + let rust_result = super::normalize_owner_key(val); + assert_eq!( + sql_result, rust_result, + "normalize_owner_key(\"{val}\") mismatch: Rust = \"{rust_result}\", SQL CASE = \"{sql_result}\"" + ); + } + } } /// Exercises the iCaptcha single-use proof ledger (`icaptcha_consumed_proofs`),