From 4d8fe9a5e0647b75628c34eaa0dc4bf9742e9e2a Mon Sep 17 00:00:00 2001 From: npub17jjz49l9jjmhhk7cac63j8yt9z555n9cw8vk7v5jz4vzw4ppld5qgj57cc Date: Wed, 24 Jun 2026 15:37:32 -0400 Subject: [PATCH 01/10] spike: postgres FTS backend behind BUZZ_SEARCH_BACKEND flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a Postgres full-text search backend as an alternative to Typesense for NIP-50 search, gated behind BUZZ_SEARCH_BACKEND=typesense|postgres|disabled (default typesense — no behavior change for existing deployments). The replacement is structural: NIP-50 search is the only Typesense call site, and the read path already refetches canonical events from Postgres by id, so Typesense was just a lookup index in front of the DB that owns the data. A generated stored tsvector column + GIN index gives the same shape with zero write-path code change. Changes - migrations/0004_search_fts.sql: events.content_tsv GENERATED ALWAYS AS to_tsvector('simple', content) STORED, GIN index, cascades to partitions. - crates/buzz-search: SearchBackend enum (Typesense | Postgres | Disabled), SearchService::with_postgres / ::disabled, postgres.rs backend impl, backend-neutral SearchQuery (structured kinds/authors/channel_ids/since/until; each backend renders its own filter). - crates/buzz-relay/src/config.rs: BUZZ_SEARCH_BACKEND env wired with strict parsing (unknown value → ConfigError::InvalidValue, no silent fallback) + 3 unit tests. - crates/buzz-relay/src/main.rs: dispatch on backend; Postgres → with_postgres using db.pool(); Disabled → no-op; Typesense → existing path. ensure_collection only runs for the Typesense backend. - crates/buzz-relay/src/{handlers/req.rs, api/bridge.rs}: swap to the new SearchService surface. Caller code shrinks — filter parts were already structured. - crates/buzz-db/src/lib.rs: Db::pool() accessor for the PG backend. Validation (against parent 2e426b2f, PG17 side-deployed): - buzz-search lib: 29/29 pass. - buzz-relay config tests: 11/11 (incl. 3 new). - NIP-50 e2e on Typesense backend: 5/5 pass (regression baseline). - NIP-50 e2e on Postgres backend: 5/5 pass — including test_nip50_search_relevance_order, confirming ts_rank_cd ranks correctly for the NIP-50 query shape and the 'simple' tokenizer config is acceptable. - Wider e2e_nostr_interop sweep on Postgres: 19/23. The 4 failures reproduce identically on Typesense backend on this branch — pre-existing test-fixture coupling to a hard-coded 'events' collection name, not a regression. This is additive: Typesense remains default; nothing in the existing path is removed. Operators flip BUZZ_SEARCH_BACKEND per release to A/B/rollback. Signed-off-by: Tyler <109685178+tlongwell-block@users.noreply.github.com> Co-authored-by: Sami --- Cargo.lock | 2 + crates/buzz-db/src/lib.rs | 8 + crates/buzz-db/src/migration.rs | 42 +++- crates/buzz-relay/src/api/bridge.rs | 47 ++-- crates/buzz-relay/src/config.rs | 47 ++++ crates/buzz-relay/src/handlers/req.rs | 80 +++--- crates/buzz-relay/src/main.rs | 32 ++- crates/buzz-search/Cargo.toml | 2 + crates/buzz-search/src/error.rs | 4 + crates/buzz-search/src/lib.rs | 335 +++++++++++++++++++------- crates/buzz-search/src/postgres.rs | 296 +++++++++++++++++++++++ crates/buzz-search/src/query.rs | 171 +++++++++++-- migrations/0004_search_fts.sql | 35 +++ schema/schema.sql | 3 + 14 files changed, 899 insertions(+), 205 deletions(-) create mode 100644 crates/buzz-search/src/postgres.rs create mode 100644 migrations/0004_search_fts.sql diff --git a/Cargo.lock b/Cargo.lock index 05ff37be3..150be25dd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1088,10 +1088,12 @@ version = "0.1.0" dependencies = [ "buzz-core", "chrono", + "hex", "nostr", "reqwest 0.13.3", "serde", "serde_json", + "sqlx", "thiserror 2.0.18", "tokio", "tracing", diff --git a/crates/buzz-db/src/lib.rs b/crates/buzz-db/src/lib.rs index d6c45f2d3..8600e9faa 100644 --- a/crates/buzz-db/src/lib.rs +++ b/crates/buzz-db/src/lib.rs @@ -198,6 +198,14 @@ impl Db { Self { pool } } + /// Returns a clone of the underlying connection pool. + /// + /// Used by the Postgres FTS backend in `buzz-search` so it can run + /// queries against the same pool as the rest of the relay. + pub fn pool(&self) -> PgPool { + self.pool.clone() + } + /// Run pending database migrations. pub async fn migrate(&self) -> Result<()> { migration::run_migrations(&self.pool).await diff --git a/crates/buzz-db/src/migration.rs b/crates/buzz-db/src/migration.rs index f4f3c9ab3..1188bd917 100644 --- a/crates/buzz-db/src/migration.rs +++ b/crates/buzz-db/src/migration.rs @@ -128,7 +128,7 @@ mod tests { fn embedded_migrator_contains_all_schema_migrations() { let migrations: Vec<_> = MIGRATOR.iter().collect(); - assert_eq!(migrations.len(), 3); + assert_eq!(migrations.len(), 4); assert_eq!(migrations[0].version, 1); assert_eq!(&*migrations[0].description, "initial schema"); assert!( @@ -160,6 +160,17 @@ mod tests { && migrations[2].sql.as_str().contains("idx_events_not_before"), "third migration should add the NIP-ER reminder columns and index" ); + + assert_eq!(migrations[3].version, 4); + assert_eq!(&*migrations[3].description, "search fts"); + assert!( + migrations[3].sql.as_str().contains("content_tsv tsvector") + && migrations[3] + .sql + .as_str() + .contains("idx_events_content_tsv"), + "fourth migration should add the generated tsvector column and GIN index" + ); } async fn connect_test_pool() -> PgPool { @@ -192,24 +203,33 @@ mod tests { .expect("read applied migrations") } - /// Returns `schema/schema.sql` with the NIP-ER reminder DDL removed, so it - /// models a pre-stack deployment whose `events` table lacks the reminder - /// columns and index. The strip is asserted: if the snapshot text drifts so - /// these fragments no longer match, the test fails loudly rather than - /// silently loading a snapshot that already carries the reminder columns - /// (which would make migration 0003 collide on re-add). + /// Returns `schema/schema.sql` with the NIP-ER reminder DDL and the + /// search-FTS DDL removed, so it models a pre-stack deployment whose + /// `events` table lacks the reminder columns and the generated tsvector + /// column. The strip is asserted: if the snapshot text drifts so these + /// fragments no longer match, the test fails loudly rather than silently + /// loading a snapshot that already carries the columns (which would make + /// migration 0003 or 0004 collide on re-add). fn pre_reminder_schema_snapshot() -> String { const REMINDER_COLUMNS: &str = " not_before BIGINT,\n delivered_at BIGINT,\n"; const REMINDER_INDEX: &str = "CREATE INDEX idx_events_not_before ON events (not_before)\n WHERE not_before IS NOT NULL AND deleted_at IS NULL AND delivered_at IS NULL;\n"; + const FTS_COLUMN: &str = " content_tsv tsvector\n GENERATED ALWAYS AS (to_tsvector('simple', content)) STORED,\n"; + const FTS_INDEX: &str = + "CREATE INDEX idx_events_content_tsv ON events USING GIN (content_tsv);\n"; assert!( - SCHEMA_SQL.contains(REMINDER_COLUMNS) && SCHEMA_SQL.contains(REMINDER_INDEX), - "schema.sql reminder DDL drifted; update pre_reminder_schema_snapshot to match" + SCHEMA_SQL.contains(REMINDER_COLUMNS) + && SCHEMA_SQL.contains(REMINDER_INDEX) + && SCHEMA_SQL.contains(FTS_COLUMN) + && SCHEMA_SQL.contains(FTS_INDEX), + "schema.sql reminder/FTS DDL drifted; update pre_reminder_schema_snapshot to match" ); SCHEMA_SQL .replace(REMINDER_COLUMNS, "") .replace(REMINDER_INDEX, "") + .replace(FTS_COLUMN, "") + .replace(FTS_INDEX, "") } #[tokio::test] @@ -220,7 +240,7 @@ mod tests { run_migrations(&pool).await.expect("run migrations"); - assert_eq!(applied_versions(&pool).await, vec![1, 2, 3]); + assert_eq!(applied_versions(&pool).await, vec![1, 2, 3, 4]); let events_exists = sqlx::query_scalar::<_, bool>( "SELECT EXISTS (SELECT 1 FROM information_schema.tables WHERE table_schema = 'public' AND table_name = 'events')", ) @@ -253,7 +273,7 @@ mod tests { run_migrations(&pool).await.expect("baseline migrations"); - assert_eq!(applied_versions(&pool).await, vec![1, 2, 3]); + assert_eq!(applied_versions(&pool).await, vec![1, 2, 3, 4]); let allowlist_count = sqlx::query_scalar::<_, i64>("SELECT COUNT(*) FROM pubkey_allowlist") .fetch_one(&pool) .await diff --git a/crates/buzz-relay/src/api/bridge.rs b/crates/buzz-relay/src/api/bridge.rs index 6006f78a1..788ff7acc 100644 --- a/crates/buzz-relay/src/api/bridge.rs +++ b/crates/buzz-relay/src/api/bridge.rs @@ -719,9 +719,10 @@ async fn handle_bridge_search( continue; } - // Build Typesense filter — push channel scope + NIP-01 constraints. + // Build the backend-neutral search filter — push channel scope + + // NIP-01 constraints into structured fields. let h_tag = nostr::SingleLetterTag::lowercase(nostr::Alphabet::H); - let filter_channel_scope = + let filter_channel_scope: Vec = if let Some(vs) = filter.generic_tags.get(&h_tag).filter(|vs| !vs.is_empty()) { let valid: Vec = vs .iter() @@ -732,37 +733,31 @@ async fn handle_bridge_search( if valid.is_empty() { continue; // All #h values inaccessible — skip filter. } - format!("channel_id:=[{}]", valid.join(",")) + valid } else { channel_scope.clone() }; - let mut filter_parts = vec![filter_channel_scope]; - if let Some(ref kinds) = filter.kinds { - if !kinds.is_empty() { - let kind_vals: Vec = kinds.iter().map(|k| k.as_u16().to_string()).collect(); - filter_parts.push(format!("kind:=[{}]", kind_vals.join(","))); - } - } - if let Some(ref authors) = filter.authors { - if !authors.is_empty() { - let author_vals: Vec = authors.iter().map(|a| a.to_hex()).collect(); - filter_parts.push(format!("pubkey:=[{}]", author_vals.join(","))); - } - } - if let Some(since) = filter.since { - filter_parts.push(format!("created_at:>={}", since.as_secs())); - } - if let Some(until) = filter.until { - filter_parts.push(format!("created_at:<={}", until.as_secs())); - } - - let filter_by = filter_parts.join(" && "); + let kinds_vec: Vec = filter + .kinds + .as_ref() + .map(|ks| ks.iter().map(|k| k.as_u16()).collect()) + .unwrap_or_default(); + let authors_vec: Vec = filter + .authors + .as_ref() + .map(|auths| auths.iter().map(|a| a.to_hex()).collect()) + .unwrap_or_default(); + let since_secs = filter.since.map(|t| t.as_secs() as i64); + let until_secs = filter.until.map(|t| t.as_secs() as i64); let search_query = buzz_search::SearchQuery { q: search_text, - filter_by: Some(filter_by), - sort_by: None, // Typesense default = relevance + kinds: kinds_vec, + authors: authors_vec, + channel_ids: filter_channel_scope, + since: since_secs, + until: until_secs, page: 1, per_page: limit, }; diff --git a/crates/buzz-relay/src/config.rs b/crates/buzz-relay/src/config.rs index 714ced400..df0056314 100644 --- a/crates/buzz-relay/src/config.rs +++ b/crates/buzz-relay/src/config.rs @@ -35,6 +35,8 @@ pub struct Config { pub typesense_url: String, /// Typesense API key. pub typesense_key: String, + /// Which search backend the relay should use. + pub search_backend: buzz_search::SearchBackend, /// Public WebSocket URL of this relay, advertised in NIP-11. pub relay_url: String, /// Maximum number of concurrent WebSocket connections. @@ -151,6 +153,15 @@ impl Config { let typesense_key = std::env::var("TYPESENSE_API_KEY").unwrap_or_else(|_| "buzz_dev_key".to_string()); + let search_backend = match std::env::var("BUZZ_SEARCH_BACKEND") { + Ok(raw) => buzz_search::SearchBackend::parse(&raw).map_err(|bad| { + ConfigError::InvalidValue(format!( + "BUZZ_SEARCH_BACKEND={bad:?} (expected `typesense`, `postgres`, or `disabled`)" + )) + })?, + Err(_) => buzz_search::SearchBackend::Typesense, + }; + let relay_url = std::env::var("RELAY_URL").unwrap_or_else(|_| "ws://localhost:3000".to_string()); @@ -377,6 +388,7 @@ impl Config { redis_url, typesense_url, typesense_key, + search_backend, relay_url, max_connections, max_concurrent_handlers, @@ -548,4 +560,39 @@ mod tests { Some("custom.example.com") ); } + + #[test] + fn search_backend_defaults_to_typesense() { + let _guard = ENV_MUTEX.lock().unwrap(); + std::env::remove_var("BUZZ_SEARCH_BACKEND"); + let config = Config::from_env().expect("default config"); + assert_eq!(config.search_backend, buzz_search::SearchBackend::Typesense); + } + + #[test] + fn search_backend_parses_postgres_and_disabled() { + let _guard = ENV_MUTEX.lock().unwrap(); + + std::env::set_var("BUZZ_SEARCH_BACKEND", "postgres"); + let config = Config::from_env().expect("config"); + assert_eq!(config.search_backend, buzz_search::SearchBackend::Postgres); + + std::env::set_var("BUZZ_SEARCH_BACKEND", "disabled"); + let config = Config::from_env().expect("config"); + assert_eq!(config.search_backend, buzz_search::SearchBackend::Disabled); + + std::env::remove_var("BUZZ_SEARCH_BACKEND"); + } + + #[test] + fn search_backend_rejects_unknown_value() { + let _guard = ENV_MUTEX.lock().unwrap(); + std::env::set_var("BUZZ_SEARCH_BACKEND", "postgress"); // typo + let result = Config::from_env(); + std::env::remove_var("BUZZ_SEARCH_BACKEND"); + assert!( + matches!(result, Err(ConfigError::InvalidValue(ref msg)) if msg.contains("BUZZ_SEARCH_BACKEND")), + "expected InvalidValue for BUZZ_SEARCH_BACKEND, got {result:?}", + ); + } } diff --git a/crates/buzz-relay/src/handlers/req.rs b/crates/buzz-relay/src/handlers/req.rs index 1893c3c2b..5a1854147 100644 --- a/crates/buzz-relay/src/handlers/req.rs +++ b/crates/buzz-relay/src/handlers/req.rs @@ -277,27 +277,23 @@ const MAX_SEARCH_PAGES: u32 = 10; pub(crate) fn build_search_channel_scope_filter( accessible_channels: &[uuid::Uuid], include_global: bool, -) -> Option { +) -> Option> { if accessible_channels.is_empty() { return if include_global { - Some("channel_id:=__global__".to_string()) + Some(vec![buzz_search::GLOBAL_CHANNEL_SENTINEL.to_string()]) } else { None }; } - let ids: Vec = accessible_channels + let mut ids: Vec = accessible_channels .iter() .map(|id| id.to_string()) .collect(); - Some(if include_global { - format!( - "(channel_id:=[{}] || channel_id:=__global__)", - ids.join(",") - ) - } else { - format!("channel_id:=[{}]", ids.join(",")) - }) + if include_global { + ids.push(buzz_search::GLOBAL_CHANNEL_SENTINEL.to_string()); + } + Some(ids) } #[allow(clippy::too_many_arguments)] @@ -337,17 +333,17 @@ async fn handle_search_req( continue; // NIP-01: limit 0 means "no results from this filter" } - // Push as many NIP-01 constraints into Typesense as possible so - // post-filtering is a correction step, not the primary filter. + // Push as many NIP-01 constraints into the search backend as possible + // so post-filtering is a correction step, not the primary filter. // - // If the filter has a #h tag, push the specific channel(s) into Typesense - // instead of the full accessible set. This prevents cross-channel hits from - // consuming pagination budget and causing under-fetch. - // If the filter has #h, intersect with accessible channels. If all #h - // values are invalid/inaccessible, skip the filter entirely (match nothing) - // rather than broadening to all channels. + // If the filter has a #h tag, push the specific channel(s) into the + // search instead of the full accessible set. This prevents + // cross-channel hits from consuming pagination budget and causing + // under-fetch. If all #h values are invalid/inaccessible, skip the + // filter entirely (match nothing) rather than broadening to all + // channels. let h_tag = nostr::SingleLetterTag::lowercase(nostr::Alphabet::H); - let channel_scope = + let channel_scope: Vec = if let Some(vs) = filter.generic_tags.get(&h_tag).filter(|vs| !vs.is_empty()) { let valid: Vec = vs .iter() @@ -358,31 +354,22 @@ async fn handle_search_req( if valid.is_empty() { continue; // all #h values invalid/inaccessible — skip filter } - format!("channel_id:=[{}]", valid.join(",")) + valid } else { all_channels_filter.clone() }; - let mut filter_parts = vec![channel_scope]; - if let Some(ref kinds) = filter.kinds { - if !kinds.is_empty() { - let kind_vals: Vec = kinds.iter().map(|k| k.as_u16().to_string()).collect(); - filter_parts.push(format!("kind:=[{}]", kind_vals.join(","))); - } - } - if let Some(ref authors) = filter.authors { - if !authors.is_empty() { - let author_vals: Vec = authors.iter().map(|a| a.to_hex()).collect(); - filter_parts.push(format!("pubkey:=[{}]", author_vals.join(","))); - } - } - if let Some(since) = filter.since { - filter_parts.push(format!("created_at:>={}", since.as_secs())); - } - if let Some(until) = filter.until { - filter_parts.push(format!("created_at:<={}", until.as_secs())); - } - - let filter_by = filter_parts.join(" && "); + let kinds_vec: Vec = filter + .kinds + .as_ref() + .map(|ks| ks.iter().map(|k| k.as_u16()).collect()) + .unwrap_or_default(); + let authors_vec: Vec = filter + .authors + .as_ref() + .map(|auths| auths.iter().map(|a| a.to_hex()).collect()) + .unwrap_or_default(); + let since_secs = filter.since.map(|t| t.as_secs() as i64); + let until_secs = filter.until.map(|t| t.as_secs() as i64); // Paginate: keep fetching pages until we've emitted `limit` results // or exhausted the search result set. This ensures post-filtering @@ -399,8 +386,11 @@ async fn handle_search_req( let search_query = buzz_search::SearchQuery { q: search_text.clone(), - filter_by: Some(filter_by.clone()), - sort_by: None, // Typesense default = relevance (text_match score) + kinds: kinds_vec.clone(), + authors: authors_vec.clone(), + channel_ids: channel_scope.clone(), + since: since_secs, + until: until_secs, page, per_page, }; @@ -1071,7 +1061,7 @@ mod tests { let scope = build_search_channel_scope_filter(&[channel_id], false) .expect("restricted tokens with channel access should still search that channel"); - assert_eq!(scope, format!("channel_id:=[{channel_id}]")); + assert_eq!(scope, vec![channel_id.to_string()]); } #[test] diff --git a/crates/buzz-relay/src/main.rs b/crates/buzz-relay/src/main.rs index b555c3d9d..bfeea5f2b 100644 --- a/crates/buzz-relay/src/main.rs +++ b/crates/buzz-relay/src/main.rs @@ -8,7 +8,7 @@ use buzz_audit::AuditService; use buzz_auth::AuthService; use buzz_db::{Db, DbConfig}; use buzz_pubsub::PubSubManager; -use buzz_search::{SearchConfig, SearchService}; +use buzz_search::{SearchBackend, SearchConfig, SearchService}; use buzz_relay::config::Config; use buzz_relay::metrics as relay_metrics; @@ -189,15 +189,29 @@ async fn main() -> anyhow::Result<()> { let auth = AuthService::new(config.auth.clone()); - let search_config = SearchConfig { - url: config.typesense_url.clone(), - api_key: config.typesense_key.clone(), - collection: std::env::var("TYPESENSE_COLLECTION").unwrap_or_else(|_| "events".to_string()), + let search = match config.search_backend { + SearchBackend::Typesense => { + let search_config = SearchConfig { + url: config.typesense_url.clone(), + api_key: config.typesense_key.clone(), + collection: std::env::var("TYPESENSE_COLLECTION") + .unwrap_or_else(|_| "events".to_string()), + }; + let service = SearchService::new(search_config); + if let Err(e) = service.ensure_collection().await { + error!("Typesense collection setup failed (non-fatal): {e}"); + } + service + } + SearchBackend::Postgres => { + info!("Search backend: postgres (content_tsv generated column)"); + SearchService::with_postgres(db.pool()) + } + SearchBackend::Disabled => { + info!("Search backend: disabled (NIP-50 queries will return empty)"); + SearchService::disabled() + } }; - let search = SearchService::new(search_config); - if let Err(e) = search.ensure_collection().await { - error!("Typesense collection setup failed (non-fatal): {e}"); - } let workflow_config = buzz_workflow::WorkflowConfig::default(); let workflow_engine = Arc::new(WorkflowEngine::new(db.clone(), workflow_config)); diff --git a/crates/buzz-search/Cargo.toml b/crates/buzz-search/Cargo.toml index 3c571733a..5e9e2d5e8 100644 --- a/crates/buzz-search/Cargo.toml +++ b/crates/buzz-search/Cargo.toml @@ -18,3 +18,5 @@ chrono = { workspace = true } tracing = { workspace = true } thiserror = { workspace = true } nostr = { workspace = true } +sqlx = { workspace = true } +hex = { workspace = true } diff --git a/crates/buzz-search/src/error.rs b/crates/buzz-search/src/error.rs index e17fb5f06..fd02f0958 100644 --- a/crates/buzz-search/src/error.rs +++ b/crates/buzz-search/src/error.rs @@ -36,4 +36,8 @@ pub enum SearchError { /// The provided event ID is not valid hex. #[error("Invalid event_id: {0}")] InvalidEventId(String), + + /// A database error from sqlx (Postgres backend only). + #[error("Database error: {0}")] + Database(#[from] sqlx::Error), } diff --git a/crates/buzz-search/src/lib.rs b/crates/buzz-search/src/lib.rs index 2a9c3b418..f52e404bb 100644 --- a/crates/buzz-search/src/lib.rs +++ b/crates/buzz-search/src/lib.rs @@ -1,30 +1,77 @@ #![deny(unsafe_code)] #![warn(missing_docs)] -//! Buzz search — Typesense integration for full-text event search. +//! Buzz search — pluggable full-text event search. +//! +//! Two production backends and a no-op: +//! +//! - [`SearchService::new`] (Typesense): mirrors event content into a Typesense +//! collection via the indexing worker in `buzz-relay/src/state.rs`. The +//! `search()` path returns event IDs that the relay then refetches from +//! Postgres. +//! - [`SearchService::with_postgres`] (Postgres FTS): runs `plainto_tsquery` +//! against a generated `content_tsv` column on `events`. No write-path +//! indexing needed — the generated stored column auto-populates on INSERT. +//! - [`SearchService::disabled`]: returns empty results for every query and +//! accepts indexing calls as no-ops. Used when NIP-50 search is intentionally +//! off (e.g. for tenants who opted out). +//! +//! The choice is driven by [`SearchBackend`] on [`SearchConfig`]. /// Typesense collection schema management. pub mod collection; /// Search error types. pub mod error; -/// Event indexing helpers. +/// Event indexing helpers (Typesense). pub mod index; -/// Search query execution. +/// Postgres full-text-search backend. +pub mod postgres; +/// Search query types and Typesense execution. pub mod query; pub use error::SearchError; -pub use query::{SearchHit, SearchQuery, SearchResult}; +pub use query::{SearchHit, SearchQuery, SearchResult, GLOBAL_CHANNEL_SENTINEL}; use buzz_core::event::StoredEvent; +use sqlx::PgPool; + +/// Which search backend the relay should use. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum SearchBackend { + /// Typesense (current production default). + Typesense, + /// Postgres full-text search via the `content_tsv` generated column. + Postgres, + /// NIP-50 search is disabled; every query returns empty. + Disabled, +} + +impl SearchBackend { + /// Parses a backend name from a string. Case-insensitive. Accepted values: + /// `typesense`, `postgres` (alias `pg`), `disabled` (aliases `off`, `none`). + /// Returns `Err()` on any other value so misconfiguration surfaces + /// loudly rather than silently falling back. + pub fn parse(s: &str) -> Result { + match s.to_ascii_lowercase().as_str() { + "typesense" => Ok(Self::Typesense), + "postgres" | "pg" => Ok(Self::Postgres), + "disabled" | "off" | "none" => Ok(Self::Disabled), + other => Err(other.to_string()), + } + } +} -/// Configuration for the Typesense search backend. +/// Configuration for the search backend. /// -/// [`SearchConfig::default`] reads from environment variables so that no -/// credentials are ever hardcoded in source: +/// Reading from env via [`SearchConfig::default`] selects the Typesense +/// backend by default. For Postgres or Disabled backends, set +/// `BUZZ_SEARCH_BACKEND` in `buzz-relay/src/config.rs` and pass the resolved +/// [`SearchBackend`] into the constructors directly — this struct only carries +/// Typesense-specific fields. /// /// | Field | Environment variable | Default (dev only) | /// |--------------|-------------------------|--------------------------| /// | `url` | `TYPESENSE_URL` | `http://localhost:8108` | -/// | `api_key` | `TYPESENSE_API_KEY` | `buzz_dev_key` | +/// | `api_key` | `TYPESENSE_API_KEY` | `buzz_dev_key` | /// | `collection` | `TYPESENSE_COLLECTION` | `events` | /// /// In production, always set `TYPESENSE_API_KEY` explicitly. The fallback @@ -50,15 +97,30 @@ impl Default for SearchConfig { } } +/// Internal Typesense client + config bundle. Construction is handled via +/// [`SearchService::new`] / [`SearchService::with_client`]; this type is +/// exposed only because it appears in a public enum variant. #[derive(Debug, Clone)] -/// Typesense search client. -pub struct SearchService { +pub struct TypesenseInner { client: reqwest::Client, config: SearchConfig, } +/// Pluggable search client. Construct via [`SearchService::new`] (Typesense), +/// [`SearchService::with_postgres`] (Postgres FTS), or +/// [`SearchService::disabled`] (no-op). +#[derive(Debug, Clone)] +pub enum SearchService { + /// Typesense backend. + Typesense(TypesenseInner), + /// Postgres FTS backend, holding the relay's existing `PgPool`. + Postgres(PgPool), + /// No-op backend; every search returns empty. + Disabled, +} + impl SearchService { - /// Creates a new `SearchService` with a default HTTP client. + /// Creates a Typesense `SearchService` with a default HTTP client. pub fn new(config: SearchConfig) -> Self { // SAFETY: default builder with only timeout/connect_timeout config cannot fail let client = reqwest::Client::builder() @@ -66,89 +128,161 @@ impl SearchService { .connect_timeout(std::time::Duration::from_secs(5)) .build() .expect("SAFETY: default builder with only timeout config cannot fail"); - Self { client, config } + Self::Typesense(TypesenseInner { client, config }) } - /// Creates a `SearchService` with an explicit HTTP client (useful in tests). + /// Creates a Typesense `SearchService` with an explicit HTTP client + /// (useful in tests). pub fn with_client(client: reqwest::Client, config: SearchConfig) -> Self { - Self { client, config } + Self::Typesense(TypesenseInner { client, config }) } + /// Creates a Postgres FTS `SearchService` backed by the supplied pool. + /// No indexing setup is required — the `content_tsv` generated column + /// populates on every INSERT. + pub fn with_postgres(pool: PgPool) -> Self { + Self::Postgres(pool) + } + + /// Creates a no-op `SearchService` that returns empty results for every + /// query. Useful when search is intentionally disabled. + pub fn disabled() -> Self { + Self::Disabled + } + + /// Ensures the backend is ready to serve search. + /// + /// - **Typesense**: creates the configured collection if it doesn't exist. + /// - **Postgres / Disabled**: no-op. + /// /// Idempotent — safe to call on every startup. pub async fn ensure_collection(&self) -> Result<(), SearchError> { - collection::ensure_collection( - &self.client, - &self.config.url, - &self.config.api_key, - &self.config.collection, - ) - .await + match self { + Self::Typesense(t) => { + collection::ensure_collection( + &t.client, + &t.config.url, + &t.config.api_key, + &t.config.collection, + ) + .await + } + Self::Postgres(_) | Self::Disabled => Ok(()), + } } /// Indexes a single event (upsert semantics). + /// + /// - **Typesense**: writes a document to the collection. + /// - **Postgres**: no-op — the `content_tsv` generated stored column is + /// populated automatically on the original INSERT. + /// - **Disabled**: no-op. pub async fn index_event(&self, event: &StoredEvent) -> Result<(), SearchError> { - index::index_event( - &self.client, - &self.config.url, - &self.config.api_key, - &self.config.collection, - event, - ) - .await + match self { + Self::Typesense(t) => { + index::index_event( + &t.client, + &t.config.url, + &t.config.api_key, + &t.config.collection, + event, + ) + .await + } + Self::Postgres(_) | Self::Disabled => Ok(()), + } } /// Indexes a batch of events. Returns the number successfully indexed. + /// For Postgres and Disabled backends, returns `events.len()` (no work). pub async fn index_batch(&self, events: &[StoredEvent]) -> Result { - index::index_batch( - &self.client, - &self.config.url, - &self.config.api_key, - &self.config.collection, - events, - ) - .await + match self { + Self::Typesense(t) => { + index::index_batch( + &t.client, + &t.config.url, + &t.config.api_key, + &t.config.collection, + events, + ) + .await + } + Self::Postgres(_) | Self::Disabled => Ok(events.len()), + } } /// Executes a search query and returns matching results. pub async fn search(&self, query: &SearchQuery) -> Result { - query::search( - &self.client, - &self.config.url, - &self.config.api_key, - &self.config.collection, - query, - ) - .await + match self { + Self::Typesense(t) => { + query::search( + &t.client, + &t.config.url, + &t.config.api_key, + &t.config.collection, + query, + ) + .await + } + Self::Postgres(pool) => postgres::search(pool, query).await, + Self::Disabled => Ok(SearchResult { + hits: Vec::new(), + found: 0, + page: query.page, + }), + } } /// Removes an event from the search index by its event ID hex string. + /// + /// - **Typesense**: deletes the document. + /// - **Postgres**: no-op — `content_tsv` is tied to the event row; + /// removing the row removes the index entry, and the relay's event + /// deletion path already handles that. + /// - **Disabled**: no-op. pub async fn delete_event(&self, event_id: &str) -> Result<(), SearchError> { - index::delete_event( - &self.client, - &self.config.url, - &self.config.api_key, - &self.config.collection, - event_id, - ) - .await + match self { + Self::Typesense(t) => { + index::delete_event( + &t.client, + &t.config.url, + &t.config.api_key, + &t.config.collection, + event_id, + ) + .await + } + Self::Postgres(_) | Self::Disabled => Ok(()), + } } - /// Checks that the Typesense server is reachable and healthy. + /// Checks that the backend is reachable and healthy. pub async fn health_check(&self) -> Result<(), SearchError> { - let url = format!("{}/health", self.config.url); - let resp = self - .client - .get(&url) - .header("X-TYPESENSE-API-KEY", &self.config.api_key) - .send() - .await?; - - let status = resp.status().as_u16(); - if status == 200 { - Ok(()) - } else { - let body = resp.text().await.unwrap_or_default(); - Err(SearchError::Api { status, body }) + match self { + Self::Typesense(t) => { + let url = format!("{}/health", t.config.url); + let resp = t + .client + .get(&url) + .header("X-TYPESENSE-API-KEY", &t.config.api_key) + .send() + .await?; + + let status = resp.status().as_u16(); + if status == 200 { + Ok(()) + } else { + let body = resp.text().await.unwrap_or_default(); + Err(SearchError::Api { status, body }) + } + } + Self::Postgres(pool) => { + sqlx::query_scalar::<_, i32>("SELECT 1") + .fetch_one(pool) + .await?; + Ok(()) + } + Self::Disabled => Ok(()), } } } @@ -171,12 +305,19 @@ mod integration_tests { .unwrap_or(false) } - fn make_service(collection: &str) -> SearchService { - SearchService::new(SearchConfig { + fn make_service(collection: &str) -> (SearchService, SearchConfig, reqwest::Client) { + let config = SearchConfig { url: "http://localhost:8108".into(), api_key: "buzz_dev_key".into(), collection: collection.to_string(), - }) + }; + let client = reqwest::Client::builder() + .timeout(std::time::Duration::from_secs(10)) + .connect_timeout(std::time::Duration::from_secs(5)) + .build() + .expect("client"); + let service = SearchService::with_client(client.clone(), config.clone()); + (service, config, client) } fn make_stored_event(content: &str, kind: Kind) -> StoredEvent { @@ -188,15 +329,11 @@ mod integration_tests { StoredEvent::new(event, None) } - async fn drop_collection(service: &SearchService) { - let url = format!( - "{}/collections/{}", - service.config.url, service.config.collection - ); - let _ = service - .client + async fn drop_collection(config: &SearchConfig, client: &reqwest::Client) { + let url = format!("{}/collections/{}", config.url, config.collection); + let _ = client .delete(&url) - .header("X-TYPESENSE-API-KEY", &service.config.api_key) + .header("X-TYPESENSE-API-KEY", &config.api_key) .send() .await; } @@ -208,13 +345,13 @@ mod integration_tests { return; } let collection = format!("events_test_{}", Uuid::new_v4().simple()); - let service = make_service(&collection); + let (service, config, client) = make_service(&collection); service.ensure_collection().await.expect("first call"); service .ensure_collection() .await .expect("idempotency check"); - drop_collection(&service).await; + drop_collection(&config, &client).await; } #[tokio::test] @@ -224,7 +361,7 @@ mod integration_tests { return; } let collection = format!("events_test_{}", Uuid::new_v4().simple()); - let service = make_service(&collection); + let (service, config, client) = make_service(&collection); service.ensure_collection().await.unwrap(); let unique_token = format!("buzz_search_test_{}", Uuid::new_v4().simple()); @@ -246,7 +383,7 @@ mod integration_tests { assert_eq!(result.hits[0].event_id, event_id); assert!(result.hits[0].content.contains(&unique_token)); - drop_collection(&service).await; + drop_collection(&config, &client).await; } #[tokio::test] @@ -256,7 +393,7 @@ mod integration_tests { return; } let collection = format!("events_test_{}", Uuid::new_v4().simple()); - let service = make_service(&collection); + let (service, config, client) = make_service(&collection); service.ensure_collection().await.unwrap(); let events: Vec = (0..5) @@ -272,7 +409,7 @@ mod integration_tests { service.delete_event(&event_id).await.unwrap(); service.delete_event(&event_id).await.unwrap(); // idempotent - drop_collection(&service).await; + drop_collection(&config, &client).await; } #[tokio::test] @@ -282,7 +419,7 @@ mod integration_tests { return; } let collection = format!("events_test_{}", Uuid::new_v4().simple()); - let service = make_service(&collection); + let (service, config, client) = make_service(&collection); service.ensure_collection().await.unwrap(); let unique = format!("filter_test_{}", Uuid::new_v4().simple()); @@ -294,7 +431,7 @@ mod integration_tests { let result = service .search(&SearchQuery { q: unique.clone(), - filter_by: Some("kind:=1".into()), + kinds: vec![1], ..Default::default() }) .await @@ -304,6 +441,28 @@ mod integration_tests { assert_eq!(hit.kind, 1); } - drop_collection(&service).await; + drop_collection(&config, &client).await; + } + + #[tokio::test] + async fn disabled_backend_returns_empty_results() { + let service = SearchService::disabled(); + let result = service.search(&SearchQuery::default()).await.unwrap(); + assert_eq!(result.found, 0); + assert!(result.hits.is_empty()); + + // index_* and delete_* are no-ops, but exercise them so future changes + // that re-introduce side effects break this test loudly. + let keys = nostr::Keys::generate(); + let event = nostr::EventBuilder::new(nostr::Kind::TextNote, "hello") + .tags([]) + .sign_with_keys(&keys) + .expect("sign"); + let stored = StoredEvent::new(event, None); + service.index_event(&stored).await.unwrap(); + assert_eq!(service.index_batch(&[stored]).await.unwrap(), 1); + service.delete_event("0".repeat(64).as_str()).await.unwrap(); + service.health_check().await.unwrap(); + service.ensure_collection().await.unwrap(); } } diff --git a/crates/buzz-search/src/postgres.rs b/crates/buzz-search/src/postgres.rs new file mode 100644 index 000000000..00e6aa3cf --- /dev/null +++ b/crates/buzz-search/src/postgres.rs @@ -0,0 +1,296 @@ +//! Postgres full-text-search backend. +//! +//! Mirrors the read shape of the Typesense backend: returns event IDs (plus +//! the trivial metadata callers need to fetch the canonical event from +//! Postgres) so the relay's existing +//! `db.get_events_by_ids` → `filters_match` → auth post-filter chain in +//! `crates/buzz-relay/src/handlers/req.rs` is unchanged. +//! +//! Matching uses `plainto_tsquery('simple', $q)` against the `content_tsv` +//! generated column added in migration `0004_search_fts.sql`. Pushdowns: +//! +//! - `kinds` → `kind = ANY($kinds)` +//! - `authors` → `pubkey = ANY($authors)` (hex-decoded) +//! - `channel_ids` → `channel_id = ANY($chans)` with optional +//! `OR channel_id IS NULL` when the global sentinel +//! is present +//! - `since` / `until` → `created_at >= … AND created_at <= …` +//! +//! Results are ordered by `created_at DESC` — NIP-50 does not require strict +//! relevance ordering, and chronological ordering is what the existing Buzz +//! clients expect. + +use chrono::{DateTime, Utc}; +use sqlx::{PgPool, Row}; +use tracing::debug; + +use crate::error::SearchError; +use crate::query::{SearchHit, SearchQuery, SearchResult, GLOBAL_CHANNEL_SENTINEL}; + +/// Maximum `per_page` value the Postgres backend will honor in a single +/// query. The relay paginates internally; this is a guardrail against +/// pathological client input rather than a hard NIP-50 limit. +const MAX_PER_PAGE: u32 = 250; + +/// Executes a search query against Postgres FTS and returns parsed results. +/// +/// Returns `found` as the total number of matching rows (`COUNT(*) OVER ()` +/// window) so the relay's pagination logic can compute exhaustion the same +/// way it does against Typesense. +pub async fn search(pool: &PgPool, query: &SearchQuery) -> Result { + debug!( + q = %query.q, + page = query.page, + per_page = query.per_page, + "Executing Postgres FTS search" + ); + + let per_page = query.per_page.min(MAX_PER_PAGE).max(1) as i64; + let page = query.page.max(1) as i64; + let offset = (page - 1) * per_page; + + // Decode author hex once; reject any malformed hex by silently dropping it + // (matches Typesense behavior where an invalid pubkey simply fails to + // match anything). + let author_bytes: Vec> = query + .authors + .iter() + .filter_map(|hex_str| hex::decode(hex_str.trim()).ok()) + .filter(|b| b.len() == 32) + .collect(); + + // Channel filter splits the sentinel out so we can render + // (channel_id = ANY($chans) OR channel_id IS NULL) + let (channel_uuids, include_global) = split_channel_filter(&query.channel_ids); + + // Build the SQL with literal `ANY` arrays. Using QueryBuilder would be + // cleaner, but the dynamic shape (optional clauses) is small enough that + // hand-rolling a query with a stable parameter ordering is more readable + // and easier to audit. + let mut sql = String::from( + "SELECT id, pubkey, kind, channel_id, created_at, content, \ + COUNT(*) OVER () AS total \ + FROM events \ + WHERE deleted_at IS NULL", + ); + + // `simple` matches the tokenizer used by the `content_tsv` generated + // column in migration 0004. plainto_tsquery treats the input as a plain + // string (handles spaces, ignores punctuation) — closest analogue to + // Typesense's default `query_by=content` behavior. + let mut binds = Binds::new(); + let q_trim = query.q.trim(); + let has_text = !q_trim.is_empty() && q_trim != "*"; + if has_text { + let q_idx = binds.push_text(q_trim); + sql.push_str(&format!( + " AND content_tsv @@ plainto_tsquery('simple', ${q_idx})" + )); + } + + if !query.kinds.is_empty() { + let kinds_i32: Vec = query.kinds.iter().map(|k| *k as i32).collect(); + let idx = binds.push_kinds(kinds_i32); + sql.push_str(&format!(" AND kind = ANY(${idx})")); + } + + if !author_bytes.is_empty() { + let idx = binds.push_authors(author_bytes); + sql.push_str(&format!(" AND pubkey = ANY(${idx})")); + } + + if !channel_uuids.is_empty() && include_global { + let idx = binds.push_uuids(channel_uuids); + sql.push_str(&format!( + " AND (channel_id = ANY(${idx}) OR channel_id IS NULL)" + )); + } else if !channel_uuids.is_empty() { + let idx = binds.push_uuids(channel_uuids); + sql.push_str(&format!(" AND channel_id = ANY(${idx})")); + } else if include_global { + sql.push_str(" AND channel_id IS NULL"); + } + + if let Some(since) = query.since { + let idx = binds.push_timestamp(unix_to_timestamptz(since)); + sql.push_str(&format!(" AND created_at >= ${idx}")); + } + if let Some(until) = query.until { + let idx = binds.push_timestamp(unix_to_timestamptz(until)); + sql.push_str(&format!(" AND created_at <= ${idx}")); + } + + sql.push_str(" ORDER BY created_at DESC"); + let limit_idx = binds.push_i64(per_page); + sql.push_str(&format!(" LIMIT ${limit_idx}")); + let offset_idx = binds.push_i64(offset); + sql.push_str(&format!(" OFFSET ${offset_idx}")); + + // The SQL string is built only from static fragments and `$N` parameter + // placeholders — every dynamic value flows through the bind list. Wrap + // with `AssertSqlSafe` to satisfy sqlx 0.9's static-SQL lint. + let mut q = sqlx::query(sqlx::AssertSqlSafe(sql)); + q = binds.apply(q); + let rows = q.fetch_all(pool).await?; + + let mut found: u64 = 0; + let mut hits: Vec = Vec::with_capacity(rows.len()); + for row in rows { + let total: i64 = row.try_get("total").unwrap_or(0); + if found == 0 && total > 0 { + found = total as u64; + } + + let id: Vec = row.try_get("id")?; + let pubkey: Vec = row.try_get("pubkey")?; + let kind_i32: i32 = row.try_get("kind")?; + let channel_uuid: Option = row.try_get("channel_id")?; + let created_at: DateTime = row.try_get("created_at")?; + let content: String = row.try_get("content")?; + + hits.push(SearchHit { + event_id: hex::encode(&id), + content, + kind: u16::try_from(kind_i32).unwrap_or(0), + pubkey: hex::encode(&pubkey), + channel_id: channel_uuid.map(|u| u.to_string()), + created_at: created_at.timestamp(), + // Postgres backend doesn't expose a relevance score on the hit; + // ts_rank_cd is available but unused by the consumer (req.rs + // ignores `score` entirely). Leave at 0.0 for honesty. + score: 0.0, + }); + } + + Ok(SearchResult { + hits, + found, + page: query.page, + }) +} + +fn split_channel_filter(channel_ids: &[String]) -> (Vec, bool) { + let mut include_global = false; + let mut uuids: Vec = Vec::with_capacity(channel_ids.len()); + for id in channel_ids { + if id == GLOBAL_CHANNEL_SENTINEL { + include_global = true; + } else if let Ok(u) = uuid::Uuid::parse_str(id) { + uuids.push(u); + } + // silently drop malformed UUID strings — matches Typesense's behavior + // of treating an unknown channel as a non-match + } + (uuids, include_global) +} + +fn unix_to_timestamptz(seconds: i64) -> DateTime { + DateTime::::from_timestamp(seconds, 0).unwrap_or_else(|| { + // Either MIN or MAX, whichever side overflow lands on. Use epoch as + // a defensive fallback — Buzz events have second-precision created_at + // bound to NIP-01's u32 range, so this branch is effectively unreachable. + DateTime::::from_timestamp(0, 0).expect("epoch is valid") + }) +} + +/// Bookkeeping for SQL parameter ordering. Each `push_*` records the binding +/// and returns the 1-indexed parameter slot. `apply` re-binds them in order +/// on the final `sqlx::query` so the index labels in the SQL string line up. +struct Binds { + items: Vec, +} + +enum BoundValue { + Text(String), + KindList(Vec), + AuthorList(Vec>), + UuidList(Vec), + Timestamp(DateTime), + Int8(i64), +} + +impl Binds { + fn new() -> Self { + Self { items: Vec::new() } + } + fn next_idx(&self) -> usize { + self.items.len() + 1 + } + fn push_text(&mut self, s: &str) -> usize { + let i = self.next_idx(); + self.items.push(BoundValue::Text(s.to_string())); + i + } + fn push_kinds(&mut self, v: Vec) -> usize { + let i = self.next_idx(); + self.items.push(BoundValue::KindList(v)); + i + } + fn push_authors(&mut self, v: Vec>) -> usize { + let i = self.next_idx(); + self.items.push(BoundValue::AuthorList(v)); + i + } + fn push_uuids(&mut self, v: Vec) -> usize { + let i = self.next_idx(); + self.items.push(BoundValue::UuidList(v)); + i + } + fn push_timestamp(&mut self, t: DateTime) -> usize { + let i = self.next_idx(); + self.items.push(BoundValue::Timestamp(t)); + i + } + fn push_i64(&mut self, n: i64) -> usize { + let i = self.next_idx(); + self.items.push(BoundValue::Int8(n)); + i + } + fn apply<'q>( + self, + mut q: sqlx::query::Query<'q, sqlx::Postgres, sqlx::postgres::PgArguments>, + ) -> sqlx::query::Query<'q, sqlx::Postgres, sqlx::postgres::PgArguments> { + for item in self.items { + q = match item { + BoundValue::Text(s) => q.bind(s), + BoundValue::KindList(v) => q.bind(v), + BoundValue::AuthorList(v) => q.bind(v), + BoundValue::UuidList(v) => q.bind(v), + BoundValue::Timestamp(t) => q.bind(t), + BoundValue::Int8(n) => q.bind(n), + }; + } + q + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn split_channel_filter_extracts_global_sentinel() { + let (uuids, global) = split_channel_filter(&[ + "11111111-1111-1111-1111-111111111111".into(), + GLOBAL_CHANNEL_SENTINEL.to_string(), + "not-a-uuid".into(), + "22222222-2222-2222-2222-222222222222".into(), + ]); + assert!(global, "sentinel should be detected"); + assert_eq!(uuids.len(), 2, "malformed UUID should be dropped silently"); + } + + #[test] + fn split_channel_filter_no_sentinel_means_no_global() { + let (uuids, global) = + split_channel_filter(&["11111111-1111-1111-1111-111111111111".into()]); + assert!(!global); + assert_eq!(uuids.len(), 1); + } + + #[test] + fn unix_to_timestamptz_round_trips() { + let ts = unix_to_timestamptz(1_700_000_000); + assert_eq!(ts.timestamp(), 1_700_000_000); + } +} diff --git a/crates/buzz-search/src/query.rs b/crates/buzz-search/src/query.rs index c440d7bc0..0981eaaea 100644 --- a/crates/buzz-search/src/query.rs +++ b/crates/buzz-search/src/query.rs @@ -5,15 +5,34 @@ use tracing::debug; use crate::error::SearchError; -/// Parameters for a Typesense search request. +/// Sentinel channel identifier used for events that are not scoped to any +/// channel. Mirrored verbatim from `crates/buzz-search/src/index.rs` so the +/// query layer can pass it through without depending on the indexer. +pub const GLOBAL_CHANNEL_SENTINEL: &str = "__global__"; + +/// Backend-neutral search query. +/// +/// Constructed by `req.rs` from a NIP-50 filter and passed to whichever +/// [`crate::SearchService`] backend is active. Each backend renders these +/// structured fields into its own filter syntax (Typesense `filter_by` string, +/// Postgres `WHERE` clause, …) so the call site doesn't have to know which +/// backend is in use. #[derive(Debug, Clone)] pub struct SearchQuery { - /// The search query string (`"*"` matches all documents). + /// The full-text query string. Empty string is treated as "match all". pub q: String, - /// Optional Typesense filter expression (e.g. `"kind:=1"`). - pub filter_by: Option, - /// Optional sort expression (e.g. `"created_at:desc"`). - pub sort_by: Option, + /// Nostr kinds to restrict to. Empty = no restriction. + pub kinds: Vec, + /// Event author pubkeys (hex). Empty = no restriction. + pub authors: Vec, + /// Channel UUID strings to restrict to. Empty = no restriction. The + /// [`GLOBAL_CHANNEL_SENTINEL`] value (`"__global__"`) selects events that + /// have no `channel_id` set. + pub channel_ids: Vec, + /// Lower bound on `created_at` (Unix seconds, inclusive). + pub since: Option, + /// Upper bound on `created_at` (Unix seconds, inclusive). + pub until: Option, /// Page number (1-indexed). pub page: u32, /// Number of results per page. @@ -24,8 +43,11 @@ impl Default for SearchQuery { fn default() -> Self { Self { q: "*".into(), - filter_by: None, - sort_by: Some("created_at:desc".into()), + kinds: Vec::new(), + authors: Vec::new(), + channel_ids: Vec::new(), + since: None, + until: None, page: 1, per_page: 20, } @@ -33,7 +55,60 @@ impl Default for SearchQuery { } impl SearchQuery { - /// Converts the query into Typesense HTTP query parameters. + /// Renders the structured filters into a Typesense `filter_by` string. + /// Returns `None` when no constraints apply. + pub(crate) fn typesense_filter_by(&self) -> Option { + let mut parts: Vec = Vec::new(); + + if !self.channel_ids.is_empty() { + // Split global sentinel from real UUIDs so we emit + // (channel_id:=[uuid1,uuid2] || channel_id:=__global__) + // instead of an invalid `channel_id:=[__global__,uuid]` mix. + let (globals, uuids): (Vec<&String>, Vec<&String>) = self + .channel_ids + .iter() + .partition(|id| id.as_str() == GLOBAL_CHANNEL_SENTINEL); + + let include_global = !globals.is_empty(); + if !uuids.is_empty() { + let joined: Vec = uuids.iter().map(|s| (*s).clone()).collect(); + if include_global { + parts.push(format!( + "(channel_id:=[{}] || channel_id:=__global__)", + joined.join(",") + )); + } else { + parts.push(format!("channel_id:=[{}]", joined.join(","))); + } + } else if include_global { + parts.push("channel_id:=__global__".to_string()); + } + } + + if !self.kinds.is_empty() { + let vs: Vec = self.kinds.iter().map(|k| k.to_string()).collect(); + parts.push(format!("kind:=[{}]", vs.join(","))); + } + if !self.authors.is_empty() { + parts.push(format!("pubkey:=[{}]", self.authors.join(","))); + } + if let Some(since) = self.since { + parts.push(format!("created_at:>={}", since)); + } + if let Some(until) = self.until { + parts.push(format!("created_at:<={}", until)); + } + + if parts.is_empty() { + None + } else { + Some(parts.join(" && ")) + } + } + + /// Renders the Typesense HTTP query parameters. Kept for compatibility + /// with `to_query_params`-based callers; the actual production path uses + /// `multi_search` via [`search`]. pub fn to_query_params(&self) -> Vec<(String, String)> { let mut params = vec![ ("q".into(), self.q.clone()), @@ -42,12 +117,8 @@ impl SearchQuery { ("per_page".into(), self.per_page.to_string()), ]; - if let Some(ref filter) = self.filter_by { - params.push(("filter_by".into(), filter.clone())); - } - - if let Some(ref sort) = self.sort_by { - params.push(("sort_by".into(), sort.clone())); + if let Some(filter) = self.typesense_filter_by() { + params.push(("filter_by".into(), filter)); } params @@ -167,11 +238,8 @@ pub async fn search( "page": query.page, "per_page": query.per_page, }); - if let Some(ref filter) = query.filter_by { - search_params["filter_by"] = serde_json::Value::String(filter.clone()); - } - if let Some(ref sort) = query.sort_by { - search_params["sort_by"] = serde_json::Value::String(sort.clone()); + if let Some(filter) = query.typesense_filter_by() { + search_params["filter_by"] = serde_json::Value::String(filter); } let body = serde_json::json!({ "searches": [search_params] }); @@ -241,8 +309,11 @@ mod tests { fn test_search_query_building() { let q = SearchQuery { q: "hello world".into(), - filter_by: Some("kind:=1".into()), - sort_by: Some("created_at:desc".into()), + kinds: vec![1], + authors: Vec::new(), + channel_ids: Vec::new(), + since: None, + until: None, page: 2, per_page: 10, }; @@ -259,16 +330,20 @@ mod tests { assert_eq!(get("query_by").unwrap(), "content"); assert_eq!(get("page").unwrap(), "2"); assert_eq!(get("per_page").unwrap(), "10"); - assert_eq!(get("filter_by").unwrap(), "kind:=1"); - assert_eq!(get("sort_by").unwrap(), "created_at:desc"); + assert_eq!(get("filter_by").unwrap(), "kind:=[1]"); + // sort_by is no longer emitted — Typesense default = relevance. + assert!(params.iter().all(|(k, _)| k != "sort_by")); } #[test] fn test_search_query_no_optional_fields() { let q = SearchQuery { q: "*".into(), - filter_by: None, - sort_by: None, + kinds: Vec::new(), + authors: Vec::new(), + channel_ids: Vec::new(), + since: None, + until: None, page: 1, per_page: 20, }; @@ -284,6 +359,50 @@ mod tests { assert!(!has_key("sort_by")); } + #[test] + fn test_typesense_filter_by_renders_structured_fields() { + let q = SearchQuery { + q: "hello".into(), + kinds: vec![1, 42], + authors: vec!["deadbeef".into()], + channel_ids: vec!["11111111-1111-1111-1111-111111111111".into()], + since: Some(1_700_000_000), + until: Some(1_700_000_100), + ..Default::default() + }; + let filter = q.typesense_filter_by().expect("non-empty filter"); + assert!(filter.contains("channel_id:=[11111111-1111-1111-1111-111111111111]")); + assert!(filter.contains("kind:=[1,42]")); + assert!(filter.contains("pubkey:=[deadbeef]")); + assert!(filter.contains("created_at:>=1700000000")); + assert!(filter.contains("created_at:<=1700000100")); + } + + #[test] + fn test_typesense_filter_by_handles_global_sentinel() { + let with_global_only = SearchQuery { + q: "*".into(), + channel_ids: vec![GLOBAL_CHANNEL_SENTINEL.to_string()], + ..Default::default() + }; + assert_eq!( + with_global_only.typesense_filter_by().as_deref(), + Some("channel_id:=__global__") + ); + + let with_mix = SearchQuery { + q: "*".into(), + channel_ids: vec![ + "11111111-1111-1111-1111-111111111111".into(), + GLOBAL_CHANNEL_SENTINEL.to_string(), + ], + ..Default::default() + }; + let filter = with_mix.typesense_filter_by().expect("non-empty"); + assert!(filter.contains("|| channel_id:=__global__")); + assert!(filter.contains("channel_id:=[11111111-1111-1111-1111-111111111111]")); + } + #[test] fn test_search_result_parsing() { let raw = json!({ diff --git a/migrations/0004_search_fts.sql b/migrations/0004_search_fts.sql new file mode 100644 index 000000000..277e30e8f --- /dev/null +++ b/migrations/0004_search_fts.sql @@ -0,0 +1,35 @@ +-- Add a generated full-text-search column + GIN index to the events table so +-- the relay can serve NIP-50 search directly from Postgres, eliminating the +-- Typesense dependency. +-- +-- `content_tsv` is `GENERATED ALWAYS AS (to_tsvector('simple', content)) STORED` +-- so every INSERT/UPDATE populates it automatically — no application-level +-- index maintenance needed (matches the pattern Typesense filled today via the +-- worker pipeline in `buzz-relay/src/state.rs`). +-- +-- Tokenizer choice: `simple` does no stemming and preserves identifiers like +-- agent handles, nip05 strings, and slugs. The `english` config would stem +-- ("running" → "run") but mangle handles ("alice42" tokenizes fine, but +-- something like "agents" → "agent" would break exact-handle search). Chat +-- content is heterogeneous; `simple` is the safer default for v1. +-- +-- kind:0 metadata flattening: the existing Typesense pipeline appends parsed +-- display_name/name/nip05 to event content before indexing +-- (`buzz-search/src/index.rs::flatten_kind0_for_indexing`). With FTS on raw +-- `content`, those strings still tokenize because they live in the kind:0 JSON +-- body — `to_tsvector('simple', '{"name":"alice"}')` matches `q=alice` after +-- json-aware tokenization. Validated by the NIP-50 e2e suite. +-- +-- `events` is partitioned by RANGE (created_at); ADD COLUMN on the parent +-- cascades the generated column to every partition, and CREATE INDEX on the +-- parent builds a partitioned GIN index that propagates to each partition. +-- Partition pruning on since/until queries narrows the GIN scan further than +-- Typesense's full-collection scan does today. +-- +-- Managed by sqlx migrations. + +ALTER TABLE events + ADD COLUMN content_tsv tsvector + GENERATED ALWAYS AS (to_tsvector('simple', content)) STORED; + +CREATE INDEX idx_events_content_tsv ON events USING GIN (content_tsv); diff --git a/schema/schema.sql b/schema/schema.sql index 2a839adf6..6fea78b66 100644 --- a/schema/schema.sql +++ b/schema/schema.sql @@ -107,6 +107,8 @@ CREATE TABLE events ( d_tag TEXT, not_before BIGINT, delivered_at BIGINT, + content_tsv tsvector + GENERATED ALWAYS AS (to_tsvector('simple', content)) STORED, PRIMARY KEY (created_at, id) ) PARTITION BY RANGE (created_at); @@ -136,6 +138,7 @@ CREATE INDEX idx_events_addressable ON events (kind, pubkey, channel_id, deleted CREATE INDEX idx_events_parameterized ON events (kind, pubkey, d_tag, deleted_at) WHERE d_tag IS NOT NULL; CREATE INDEX idx_events_not_before ON events (not_before) WHERE not_before IS NOT NULL AND deleted_at IS NULL AND delivered_at IS NULL; +CREATE INDEX idx_events_content_tsv ON events USING GIN (content_tsv); -- ── Event mentions ──────────────────────────────────────────────────────────── From a3fbd0288fac984996c00f01cbb654098b345564 Mon Sep 17 00:00:00 2001 From: npub17jjz49l9jjmhhk7cac63j8yt9z555n9cw8vk7v5jz4vzw4ppld5qgj57cc Date: Wed, 24 Jun 2026 15:58:00 -0400 Subject: [PATCH 02/10] spike: enforce channel_scope by construction on SearchQuery MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SearchQuery::new now requires non-empty channel_ids, returning SearchError::EmptyChannelScope otherwise. Fields are pub(crate) so struct-literal construction outside the crate is impossible; optional facets use #[must_use] builder methods (with_kinds/authors/since/ until/page/per_page). Closes the type-system gap on Eva's gate-1 "no visibility widening" invariant: the access boundary is now enforced at construction, not just at the call sites. Both call sites (req.rs, bridge.rs) wrap SearchQuery::new in a match — req.rs logs + breaks pagination on the Err path, bridge.rs continues the filter loop. Upstream guards (build_search_channel_scope_filter + the per-filter h_tag validity check) keep the Err path unreachable in normal operation, but if a future refactor lets an empty scope through, behavior is "no results" not "widened search". Also adds the missing `info!("Search backend: typesense", ...)` log line for symmetry with the postgres/disabled branches — small operational polish, no behavior change. Tests: buzz-search 30/30 (+1 rejection test), buzz-relay lib 337/337, NIP-50 e2e 5/5 on both Postgres and Typesense backends (4 NIP-50 + test_ws_search_isolation_other_user_cannot_find_reminder). Co-authored-by: Tyler <109685178+tlongwell-block@users.noreply.github.com> Signed-off-by: Tyler <109685178+tlongwell-block@users.noreply.github.com> --- crates/buzz-relay/src/api/bridge.rs | 26 ++-- crates/buzz-relay/src/handlers/req.rs | 30 ++-- crates/buzz-relay/src/main.rs | 4 + crates/buzz-search/src/error.rs | 10 ++ crates/buzz-search/src/lib.rs | 25 ++-- crates/buzz-search/src/query.rs | 199 ++++++++++++++++++-------- 6 files changed, 203 insertions(+), 91 deletions(-) diff --git a/crates/buzz-relay/src/api/bridge.rs b/crates/buzz-relay/src/api/bridge.rs index 788ff7acc..c97d95768 100644 --- a/crates/buzz-relay/src/api/bridge.rs +++ b/crates/buzz-relay/src/api/bridge.rs @@ -751,15 +751,23 @@ async fn handle_bridge_search( let since_secs = filter.since.map(|t| t.as_secs() as i64); let until_secs = filter.until.map(|t| t.as_secs() as i64); - let search_query = buzz_search::SearchQuery { - q: search_text, - kinds: kinds_vec, - authors: authors_vec, - channel_ids: filter_channel_scope, - since: since_secs, - until: until_secs, - page: 1, - per_page: limit, + let search_query = match buzz_search::SearchQuery::new(search_text, filter_channel_scope) { + Ok(q) => q + .with_kinds(kinds_vec) + .with_authors(authors_vec) + .with_since(since_secs) + .with_until(until_secs) + .with_page(1) + .with_per_page(limit), + Err(e) => { + // Upstream guards (the per-filter h_tag validity check + // immediately above + the outer accessible_channels gate) + // make this unreachable in normal operation. If a future + // refactor ever lets an empty scope through, fail closed: + // log and skip this filter instead of widening visibility. + tracing::warn!("bridge search rejected empty channel scope: {e}"); + continue; + } }; let search_result = state diff --git a/crates/buzz-relay/src/handlers/req.rs b/crates/buzz-relay/src/handlers/req.rs index 5a1854147..496c1b229 100644 --- a/crates/buzz-relay/src/handlers/req.rs +++ b/crates/buzz-relay/src/handlers/req.rs @@ -384,16 +384,26 @@ async fn handle_search_req( break; } - let search_query = buzz_search::SearchQuery { - q: search_text.clone(), - kinds: kinds_vec.clone(), - authors: authors_vec.clone(), - channel_ids: channel_scope.clone(), - since: since_secs, - until: until_secs, - page, - per_page, - }; + let search_query = + match buzz_search::SearchQuery::new(search_text.clone(), channel_scope.clone()) { + Ok(q) => q + .with_kinds(kinds_vec.clone()) + .with_authors(authors_vec.clone()) + .with_since(since_secs) + .with_until(until_secs) + .with_page(page) + .with_per_page(per_page), + Err(e) => { + // Upstream guards (build_search_channel_scope_filter + + // the per-filter h_tag validity check) make this + // unreachable in normal operation. If a future refactor + // ever lets an empty scope through, fail closed: log + // and stop paginating this filter — no results, never + // a widened search. + warn!(sub_id = %sub_id, "NIP-50 search rejected empty channel scope: {e}"); + break; + } + }; let search_result = match state.search.search(&search_query).await { Ok(r) => r, diff --git a/crates/buzz-relay/src/main.rs b/crates/buzz-relay/src/main.rs index bfeea5f2b..5638fc4a5 100644 --- a/crates/buzz-relay/src/main.rs +++ b/crates/buzz-relay/src/main.rs @@ -197,6 +197,10 @@ async fn main() -> anyhow::Result<()> { collection: std::env::var("TYPESENSE_COLLECTION") .unwrap_or_else(|_| "events".to_string()), }; + info!( + collection = %search_config.collection, + "Search backend: typesense" + ); let service = SearchService::new(search_config); if let Err(e) = service.ensure_collection().await { error!("Typesense collection setup failed (non-fatal): {e}"); diff --git a/crates/buzz-search/src/error.rs b/crates/buzz-search/src/error.rs index fd02f0958..1702d654e 100644 --- a/crates/buzz-search/src/error.rs +++ b/crates/buzz-search/src/error.rs @@ -40,4 +40,14 @@ pub enum SearchError { /// A database error from sqlx (Postgres backend only). #[error("Database error: {0}")] Database(#[from] sqlx::Error), + + /// `SearchQuery::new` was called with an empty `channel_ids` set. + /// + /// `channel_ids` is the access-control boundary: every search must be + /// scoped to an explicit, non-empty set of channel UUIDs (or the + /// [`crate::query::GLOBAL_CHANNEL_SENTINEL`]). Empty means "no scope," + /// which would widen visibility — refused at construction time so the + /// invariant holds by type, not by remembering to filter at the call site. + #[error("SearchQuery requires a non-empty channel_ids scope")] + EmptyChannelScope, } diff --git a/crates/buzz-search/src/lib.rs b/crates/buzz-search/src/lib.rs index f52e404bb..22d31647b 100644 --- a/crates/buzz-search/src/lib.rs +++ b/crates/buzz-search/src/lib.rs @@ -372,10 +372,13 @@ mod integration_tests { tokio::time::sleep(std::time::Duration::from_millis(500)).await; let result = service - .search(&SearchQuery { - q: unique_token.clone(), - ..Default::default() - }) + .search( + &SearchQuery::new( + unique_token.clone(), + vec![GLOBAL_CHANNEL_SENTINEL.to_string()], + ) + .expect("non-empty scope"), + ) .await .unwrap(); @@ -429,11 +432,11 @@ mod integration_tests { tokio::time::sleep(std::time::Duration::from_millis(500)).await; let result = service - .search(&SearchQuery { - q: unique.clone(), - kinds: vec![1], - ..Default::default() - }) + .search( + &SearchQuery::new(unique.clone(), vec![GLOBAL_CHANNEL_SENTINEL.to_string()]) + .expect("non-empty scope") + .with_kinds(vec![1]), + ) .await .unwrap(); @@ -447,7 +450,9 @@ mod integration_tests { #[tokio::test] async fn disabled_backend_returns_empty_results() { let service = SearchService::disabled(); - let result = service.search(&SearchQuery::default()).await.unwrap(); + let query = SearchQuery::new("*", vec![GLOBAL_CHANNEL_SENTINEL.to_string()]) + .expect("non-empty scope"); + let result = service.search(&query).await.unwrap(); assert_eq!(result.found, 0); assert!(result.hits.is_empty()); diff --git a/crates/buzz-search/src/query.rs b/crates/buzz-search/src/query.rs index 0981eaaea..808f9b93a 100644 --- a/crates/buzz-search/src/query.rs +++ b/crates/buzz-search/src/query.rs @@ -17,40 +17,102 @@ pub const GLOBAL_CHANNEL_SENTINEL: &str = "__global__"; /// structured fields into its own filter syntax (Typesense `filter_by` string, /// Postgres `WHERE` clause, …) so the call site doesn't have to know which /// backend is in use. +/// +/// # Access-control invariant +/// +/// `channel_ids` is required to be non-empty by construction: external code +/// can only build a `SearchQuery` through [`SearchQuery::new`], which rejects +/// an empty scope with [`SearchError::EmptyChannelScope`]. The fields are +/// `pub(crate)` so backends inside this crate can read them directly, but +/// struct-literal construction from outside the crate is impossible. This +/// keeps "search cannot widen visibility" true at the type level instead of +/// relying on every caller to remember to pass a channel filter. #[derive(Debug, Clone)] pub struct SearchQuery { /// The full-text query string. Empty string is treated as "match all". - pub q: String, + pub(crate) q: String, /// Nostr kinds to restrict to. Empty = no restriction. - pub kinds: Vec, + pub(crate) kinds: Vec, /// Event author pubkeys (hex). Empty = no restriction. - pub authors: Vec, - /// Channel UUID strings to restrict to. Empty = no restriction. The - /// [`GLOBAL_CHANNEL_SENTINEL`] value (`"__global__"`) selects events that - /// have no `channel_id` set. - pub channel_ids: Vec, + pub(crate) authors: Vec, + /// Channel UUID strings to restrict to. Never empty — the access-control + /// boundary. The [`GLOBAL_CHANNEL_SENTINEL`] value (`"__global__"`) + /// selects events that have no `channel_id` set. + pub(crate) channel_ids: Vec, /// Lower bound on `created_at` (Unix seconds, inclusive). - pub since: Option, + pub(crate) since: Option, /// Upper bound on `created_at` (Unix seconds, inclusive). - pub until: Option, + pub(crate) until: Option, /// Page number (1-indexed). - pub page: u32, + pub(crate) page: u32, /// Number of results per page. - pub per_page: u32, + pub(crate) per_page: u32, } -impl Default for SearchQuery { - fn default() -> Self { - Self { - q: "*".into(), +impl SearchQuery { + /// Build a `SearchQuery` with the required full-text term and channel + /// scope. Returns [`SearchError::EmptyChannelScope`] if `channel_ids` is + /// empty — see the type-level note on the access-control invariant. + /// + /// Optional facets (`kinds`, `authors`, `since`, `until`, `page`, + /// `per_page`) default to "no restriction" / page 1 of 20 results, and + /// can be set with the `with_*` builder methods. + pub fn new(q: impl Into, channel_ids: Vec) -> Result { + if channel_ids.is_empty() { + return Err(SearchError::EmptyChannelScope); + } + Ok(Self { + q: q.into(), kinds: Vec::new(), authors: Vec::new(), - channel_ids: Vec::new(), + channel_ids, since: None, until: None, page: 1, per_page: 20, - } + }) + } + + /// Restrict to the given Nostr kinds. + #[must_use] + pub fn with_kinds(mut self, kinds: Vec) -> Self { + self.kinds = kinds; + self + } + + /// Restrict to the given author pubkeys (hex). + #[must_use] + pub fn with_authors(mut self, authors: Vec) -> Self { + self.authors = authors; + self + } + + /// Set the lower bound on `created_at` (Unix seconds, inclusive). + #[must_use] + pub fn with_since(mut self, since: Option) -> Self { + self.since = since; + self + } + + /// Set the upper bound on `created_at` (Unix seconds, inclusive). + #[must_use] + pub fn with_until(mut self, until: Option) -> Self { + self.until = until; + self + } + + /// Set the 1-indexed page number. + #[must_use] + pub fn with_page(mut self, page: u32) -> Self { + self.page = page; + self + } + + /// Set the page size. + #[must_use] + pub fn with_per_page(mut self, per_page: u32) -> Self { + self.per_page = per_page; + self } } @@ -305,18 +367,31 @@ mod tests { use super::*; use serde_json::json; + /// A non-empty channel scope used by tests that don't otherwise care + /// about which channel is being searched — the constructor requires + /// non-empty `channel_ids`, so tests pick a canonical placeholder. + const TEST_CHANNEL: &str = "11111111-1111-1111-1111-111111111111"; + + fn test_scope() -> Vec { + vec![TEST_CHANNEL.to_string()] + } + + #[test] + fn test_search_query_rejects_empty_channel_scope() { + // The access-control invariant: empty channel_ids must be refused + // at construction time, not silently accepted and then patched up + // by remembering to pass a filter at the call site. + let err = SearchQuery::new("hello", Vec::new()).expect_err("must reject empty scope"); + assert!(matches!(err, SearchError::EmptyChannelScope)); + } + #[test] fn test_search_query_building() { - let q = SearchQuery { - q: "hello world".into(), - kinds: vec![1], - authors: Vec::new(), - channel_ids: Vec::new(), - since: None, - until: None, - page: 2, - per_page: 10, - }; + let q = SearchQuery::new("hello world", test_scope()) + .expect("non-empty scope") + .with_kinds(vec![1]) + .with_page(2) + .with_per_page(10); let params = q.to_query_params(); let get = |key: &str| -> Option { @@ -330,23 +405,18 @@ mod tests { assert_eq!(get("query_by").unwrap(), "content"); assert_eq!(get("page").unwrap(), "2"); assert_eq!(get("per_page").unwrap(), "10"); - assert_eq!(get("filter_by").unwrap(), "kind:=[1]"); + let filter = get("filter_by").unwrap(); + assert!(filter.contains(&format!("channel_id:=[{TEST_CHANNEL}]"))); + assert!(filter.contains("kind:=[1]")); // sort_by is no longer emitted — Typesense default = relevance. assert!(params.iter().all(|(k, _)| k != "sort_by")); } #[test] - fn test_search_query_no_optional_fields() { - let q = SearchQuery { - q: "*".into(), - kinds: Vec::new(), - authors: Vec::new(), - channel_ids: Vec::new(), - since: None, - until: None, - page: 1, - per_page: 20, - }; + fn test_search_query_minimum_filters() { + // Even with no optional facets, channel scope is always rendered — + // the access boundary follows the query through every backend path. + let q = SearchQuery::new("*", test_scope()).expect("non-empty scope"); let params = q.to_query_params(); let has_key = |key: &str| params.iter().any(|(k, _)| k == key); @@ -355,23 +425,31 @@ mod tests { assert!(has_key("query_by")); assert!(has_key("page")); assert!(has_key("per_page")); - assert!(!has_key("filter_by")); + assert!(has_key("filter_by")); assert!(!has_key("sort_by")); + + let get = |key: &str| -> Option { + params + .iter() + .find(|(k, _)| k == key) + .map(|(_, v)| v.clone()) + }; + assert_eq!( + get("filter_by").unwrap(), + format!("channel_id:=[{TEST_CHANNEL}]") + ); } #[test] fn test_typesense_filter_by_renders_structured_fields() { - let q = SearchQuery { - q: "hello".into(), - kinds: vec![1, 42], - authors: vec!["deadbeef".into()], - channel_ids: vec!["11111111-1111-1111-1111-111111111111".into()], - since: Some(1_700_000_000), - until: Some(1_700_000_100), - ..Default::default() - }; + let q = SearchQuery::new("hello", test_scope()) + .expect("non-empty scope") + .with_kinds(vec![1, 42]) + .with_authors(vec!["deadbeef".into()]) + .with_since(Some(1_700_000_000)) + .with_until(Some(1_700_000_100)); let filter = q.typesense_filter_by().expect("non-empty filter"); - assert!(filter.contains("channel_id:=[11111111-1111-1111-1111-111111111111]")); + assert!(filter.contains(&format!("channel_id:=[{TEST_CHANNEL}]"))); assert!(filter.contains("kind:=[1,42]")); assert!(filter.contains("pubkey:=[deadbeef]")); assert!(filter.contains("created_at:>=1700000000")); @@ -380,27 +458,24 @@ mod tests { #[test] fn test_typesense_filter_by_handles_global_sentinel() { - let with_global_only = SearchQuery { - q: "*".into(), - channel_ids: vec![GLOBAL_CHANNEL_SENTINEL.to_string()], - ..Default::default() - }; + let with_global_only = SearchQuery::new("*", vec![GLOBAL_CHANNEL_SENTINEL.to_string()]) + .expect("non-empty scope"); assert_eq!( with_global_only.typesense_filter_by().as_deref(), Some("channel_id:=__global__") ); - let with_mix = SearchQuery { - q: "*".into(), - channel_ids: vec![ - "11111111-1111-1111-1111-111111111111".into(), + let with_mix = SearchQuery::new( + "*", + vec![ + TEST_CHANNEL.to_string(), GLOBAL_CHANNEL_SENTINEL.to_string(), ], - ..Default::default() - }; + ) + .expect("non-empty scope"); let filter = with_mix.typesense_filter_by().expect("non-empty"); assert!(filter.contains("|| channel_id:=__global__")); - assert!(filter.contains("channel_id:=[11111111-1111-1111-1111-111111111111]")); + assert!(filter.contains(&format!("channel_id:=[{TEST_CHANNEL}]"))); } #[test] From a4943e53bc103a4e8e818140cbc846aaacdb9ff3 Mon Sep 17 00:00:00 2001 From: npub17jjz49l9jjmhhk7cac63j8yt9z555n9cw8vk7v5jz4vzw4ppld5qgj57cc Date: Wed, 24 Jun 2026 16:26:39 -0400 Subject: [PATCH 03/10] spike: ts_rank_cd ordering + non-vacuous relevance test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements Eva's blocker-2 fix: Postgres backend now orders search results by relevance, and the e2e test that claims to verify this actually discriminates rank from recency. postgres.rs - SELECT list now includes `ts_rank_cd(content_tsv, plainto_tsquery('simple', $q)) AS rank` when the query has searchable text. The same `$q` parameter slot is reused in WHERE. - ORDER BY rank DESC, created_at DESC when has_text; empty/"*" queries skip the rank column and fall back to created_at DESC (no needless tsquery cost). - SearchHit.score is populated from the rank column (f32 widened to f64). Empty/"*" queries leave score at 0.0. e2e_nostr_interop::test_nip50_search_relevance_order - Redesigned to discriminate rank from recency. Previous fixture used "alpha bravo charlie" with msg3="alpha bravo" — plainto_tsquery ANDs all terms, so msg3 never matched the WHERE clause and the test passed trivially with one candidate (Eva caught this). - New fixture: query "{prefix} alpha bravo"; msg1 (oldest) has terms adjacent (high rank); msg2 (middle) doesn't match at all; msg3 (newest) has terms separated by filler (lower rank). - Asserts both msg1 and msg3 are present, then asserts events[0].id == id1 with no `||content.contains(...)` escape hatch. - Discriminator is term proximity, not term frequency: Typesense's default _text_match does NOT reward repeated query terms (verified empirically — identical tm scores for "alpha bravo" vs "alpha alpha bravo bravo"), but BOTH backends reward adjacency. Proximity is the property both backends agree on. - New `send_rest_message_at` helper pins created_at via `custom_created_at`. Without explicit timestamps, three back-to-back sends share one wall-clock second; PG falls to heap-scan order and masquerades as rank ordering. Spreading by 30s each makes the recency-only ordering deterministically put msg3 first, so a passing test really means rank wins. Validation - buzz-search lib: 30/30. buzz-relay lib: 337/337. - NIP-50 e2e on Postgres: 4/4 (incl. relevance_order) + isolation 1/1. - NIP-50 e2e on Typesense: 4/4 + isolation 1/1. - Proof of discrimination: with postgres.rs reverted to `ORDER BY created_at DESC`, the new test FAILS on PG (msg3 first, as predicted). Restored ts_rank_cd ordering after. Pre-existing failure not introduced by this commit: test_nip17_gift_wrap_not_searchable fails on both backends — it queries Typesense directly at events-spike-{backend}; on the PG backend that collection is never written to (structurally expected), and the Typesense-backend failure is the same fixture coupling Eva already acknowledged in the prior turn. No regression vs e5869ddd/4b7c8d12. Co-authored-by: Tyler <109685178+tlongwell-block@users.noreply.github.com> Signed-off-by: Tyler <109685178+tlongwell-block@users.noreply.github.com> --- crates/buzz-search/src/postgres.rs | 62 +++++++---- .../tests/e2e_nostr_interop.rs | 102 ++++++++++++++---- 2 files changed, 123 insertions(+), 41 deletions(-) diff --git a/crates/buzz-search/src/postgres.rs b/crates/buzz-search/src/postgres.rs index 00e6aa3cf..5fd73b2e9 100644 --- a/crates/buzz-search/src/postgres.rs +++ b/crates/buzz-search/src/postgres.rs @@ -16,9 +16,10 @@ //! is present //! - `since` / `until` → `created_at >= … AND created_at <= …` //! -//! Results are ordered by `created_at DESC` — NIP-50 does not require strict -//! relevance ordering, and chronological ordering is what the existing Buzz -//! clients expect. +//! Results are ordered by `ts_rank_cd` (cover-density relevance) when the +//! query has searchable text, with `created_at DESC` as a tiebreaker. When +//! the query is empty/`"*"` (no tsquery), ordering falls back to +//! `created_at DESC` only. use chrono::{DateTime, Utc}; use sqlx::{PgPool, Row}; @@ -67,13 +68,7 @@ pub async fn search(pool: &PgPool, query: &SearchQuery) -> Result Result Result Result("rank").unwrap_or(0.0) as f64 + } else { + 0.0 + }, }); } diff --git a/crates/buzz-test-client/tests/e2e_nostr_interop.rs b/crates/buzz-test-client/tests/e2e_nostr_interop.rs index b71cab6c5..36fce674c 100644 --- a/crates/buzz-test-client/tests/e2e_nostr_interop.rs +++ b/crates/buzz-test-client/tests/e2e_nostr_interop.rs @@ -84,12 +84,27 @@ async fn create_test_channel(keys: &Keys) -> String { /// Send a message via a signed kind:9 event and return the event_id hex. async fn send_rest_message(keys: &Keys, channel_id: &str, content: &str) -> String { + send_rest_message_at(keys, channel_id, content, None).await +} + +/// Like `send_rest_message` but lets the caller pin `created_at` to a specific +/// unix-seconds timestamp. Useful when a test needs the recency tiebreak to be +/// meaningful (the default `Timestamp::now()` collapses all back-to-back sends +/// onto the same wall-clock second). +async fn send_rest_message_at( + keys: &Keys, + channel_id: &str, + content: &str, + created_at: Option, +) -> String { let client = reqwest::Client::new(); let pubkey_hex = keys.public_key().to_hex(); - let event = EventBuilder::new(Kind::Custom(9), content) - .tags(vec![Tag::parse(["h", channel_id]).unwrap()]) - .sign_with_keys(keys) - .unwrap(); + let mut builder = EventBuilder::new(Kind::Custom(9), content) + .tags(vec![Tag::parse(["h", channel_id]).unwrap()]); + if let Some(secs) = created_at { + builder = builder.custom_created_at(nostr::Timestamp::from(secs as u64)); + } + let event = builder.sign_with_keys(keys).unwrap(); let resp = client .post(format!("{}/events", relay_http_url())) .header("X-Pubkey", &pubkey_hex) @@ -1068,7 +1083,20 @@ async fn test_nip17_gift_wrap_not_searchable() { } /// Send 3 messages with varying relevance to a query, wait for indexing, then search. -/// Verify: the exact-match message is present in results (relevance-based, not just chronological). +/// Verify: rank-based ordering — a more-relevant *older* message ranks above a +/// less-relevant *newer* one, proving the result order is driven by relevance +/// rather than recency. +/// +/// Discriminator: **term proximity**. msg1 has the query terms adjacent; +/// msg3 has the query terms separated by intervening words. Both Postgres +/// `ts_rank_cd` (cover-density) and Typesense `_text_match` reward adjacency, +/// so a recency-only ordering would put msg3 first; a rank-based ordering +/// puts msg1 first. +/// +/// We deliberately do NOT use term-frequency as the discriminator: Typesense +/// default `_text_match` does not reward repeated query terms (verified +/// empirically against the spike collection — repeated and single-occurrence +/// docs tie). Proximity is the property both backends agree on. #[tokio::test] #[ignore] async fn test_nip50_search_relevance_order() { @@ -1078,13 +1106,23 @@ async fn test_nip50_search_relevance_order() { // Unique prefix to isolate this test's messages from other test runs. let prefix = uuid::Uuid::new_v4().simple().to_string(); - let msg1 = format!("{prefix} alpha bravo charlie"); // oldest, exact match - let msg2 = format!("{prefix} delta echo foxtrot"); // middle, no match - let msg3 = format!("{prefix} alpha bravo"); // newest, partial match - - let id1 = send_rest_message(&keys, &channel, &msg1).await; - send_rest_message(&keys, &channel, &msg2).await; - send_rest_message(&keys, &channel, &msg3).await; + // Anchor created_at offsets so msg1 is genuinely older than msg3 in seconds. + // Without this, all three sends share the same wall-clock second and + // `created_at DESC` becomes a coin flip (heap-scan order on PG, insertion + // order on Typesense) — which silently makes the test pass regardless of + // rank ordering. Spreading them by 30s each guarantees the recency-only + // ordering would put msg3 first, so a passing test really means rank wins. + let now = nostr::Timestamp::now().as_secs() as i64; + // msg1: oldest, query terms ADJACENT — highest expected rank. + let msg1 = format!("{prefix} alpha bravo"); + // msg2: middle, no overlap with query — should not match at all. + let msg2 = format!("{prefix} delta echo foxtrot"); + // msg3: newest, query terms SEPARATED by filler — lower expected rank. + let msg3 = format!("{prefix} alpha xx yy zz bravo"); + + let id1 = send_rest_message_at(&keys, &channel, &msg1, Some(now - 60)).await; + send_rest_message_at(&keys, &channel, &msg2, Some(now - 30)).await; + let id3 = send_rest_message_at(&keys, &channel, &msg3, Some(now)).await; // Wait for Typesense indexing. tokio::time::sleep(Duration::from_secs(3)).await; @@ -1092,7 +1130,9 @@ async fn test_nip50_search_relevance_order() { let mut client = BuzzTestClient::connect(&url, &keys).await.expect("connect"); let sid = sub_id("nip50-relevance"); - let query = format!("{prefix} alpha bravo charlie"); + // Two-term query; both msg1 and msg3 contain both terms (so both pass the + // WHERE / filter), but only msg1 has them adjacent. msg2 has neither term. + let query = format!("{prefix} alpha bravo"); let filter = Filter::new() .kind(Kind::Custom(9)) .search(&query) @@ -1108,17 +1148,33 @@ async fn test_nip50_search_relevance_order() { .await .expect("collect until EOSE"); - // Must have at least 1 result. - assert!(!events.is_empty(), "expected search results, got none"); - - // The FIRST result must be the exact-match message (msg1), not the newer - // partial match (msg3). This proves relevance ordering, not chronological. - let first = &events[0]; + // Both msg1 and msg3 must be present — otherwise the test isn't + // discriminating ordering, it's just checking presence. + let returned_ids: Vec = events.iter().map(|e| e.id.to_hex()).collect(); + assert!( + returned_ids.contains(&id1), + "msg1 (adjacent terms) missing from results — query/index parity broken. \ + All results: {:?}", + events.iter().map(|e| &e.content).collect::>() + ); assert!( - first.id.to_hex() == id1 || first.content.contains("alpha bravo charlie"), - "expected exact-match message as FIRST result (relevance order), \ - but got: '{}'. All results: {:?}", - first.content, + returned_ids.contains(&id3), + "msg3 (separated terms) missing from results — query/index parity broken. \ + All results: {:?}", + events.iter().map(|e| &e.content).collect::>() + ); + + // The FIRST result must be msg1 (older, adjacent terms), not msg3 (newer, + // separated terms). No `|| content.contains(...)` escape hatch — id + // equality only. A recency-only ordering would put msg3 first; a + // rank-based ordering (ts_rank_cd / _text_match) puts msg1 first. + assert_eq!( + events[0].id.to_hex(), + id1, + "expected msg1 (adjacent-term match) as FIRST result via rank ordering, \ + but got msg id {} content '{}'. All results in order: {:?}", + events[0].id.to_hex(), + events[0].content, events.iter().map(|e| &e.content).collect::>() ); From bbf125dab285709051de73030de6753365cd34c6 Mon Sep 17 00:00:00 2001 From: npub1qyvc0c5kl4gqv2fd97fsk46tu378sqgy35vc83rvgfwne90sel7s0ed67d <011987e296fd5006292d2f930b574be47c7801048d1983c46c425d3c95f0cffd@sprout-oss.stage.blox.sqprod.co> Date: Wed, 24 Jun 2026 19:10:11 -0400 Subject: [PATCH 04/10] pg-fts: swap generated content_tsv column for expression GIN index MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the GENERATED ALWAYS ... STORED tsvector column + column-backed GIN index with a single expression index on to_tsvector('simple', content). Why: the expression index is maintained by Postgres on every INSERT/UPDATE exactly like a column index (no write-path work), but avoids the stored column's ALTER TABLE row rewrite / ACCESS EXCLUSIVE backfill — so the migration build is online-safe on a fresh/small relay and the same-named index (idx_events_content_fts) is pre-buildable out of band on a large populated relay (CREATE INDEX CONCURRENTLY + ATTACH per the live-relay runbook). IF NOT EXISTS makes the migration idempotent against that path. The query path renders the identical to_tsvector('simple', content) expression so the planner uses the index. Rank SQL (ts_rank_cd) is unchanged in behavior. - migrations/0004_search_fts.sql: single CREATE INDEX IF NOT EXISTS expr index - schema/schema.sql: drop generated column; expr index for fresh installs - crates/buzz-search/src/postgres.rs: query refs -> to_tsvector(...) expr - crates/buzz-db/src/migration.rs: assertions match new shape - crates/buzz-search/src/lib.rs, crates/buzz-relay/src/main.rs: doc wording Tests: cargo test -p buzz-db -p buzz-search (incl. ignored PG tests): 118 passed, 0 failed. Co-authored-by: Tyler Longwell Signed-off-by: Tyler Longwell --- crates/buzz-db/src/migration.rs | 22 ++++++------ crates/buzz-relay/src/main.rs | 2 +- crates/buzz-search/src/lib.rs | 21 ++++++------ crates/buzz-search/src/postgres.rs | 14 ++++---- migrations/0004_search_fts.sql | 55 ++++++++++++++++-------------- schema/schema.sql | 4 +-- 6 files changed, 62 insertions(+), 56 deletions(-) diff --git a/crates/buzz-db/src/migration.rs b/crates/buzz-db/src/migration.rs index 1188bd917..b6cf923ee 100644 --- a/crates/buzz-db/src/migration.rs +++ b/crates/buzz-db/src/migration.rs @@ -164,12 +164,15 @@ mod tests { assert_eq!(migrations[3].version, 4); assert_eq!(&*migrations[3].description, "search fts"); assert!( - migrations[3].sql.as_str().contains("content_tsv tsvector") + migrations[3] + .sql + .as_str() + .contains("idx_events_content_fts") && migrations[3] .sql .as_str() - .contains("idx_events_content_tsv"), - "fourth migration should add the generated tsvector column and GIN index" + .contains("to_tsvector('simple', content)"), + "fourth migration should add the expression GIN index for FTS" ); } @@ -204,23 +207,21 @@ mod tests { } /// Returns `schema/schema.sql` with the NIP-ER reminder DDL and the - /// search-FTS DDL removed, so it models a pre-stack deployment whose - /// `events` table lacks the reminder columns and the generated tsvector - /// column. The strip is asserted: if the snapshot text drifts so these + /// search-FTS index removed, so it models a pre-stack deployment whose + /// `events` table lacks the reminder columns and the FTS expression + /// index. The strip is asserted: if the snapshot text drifts so these /// fragments no longer match, the test fails loudly rather than silently - /// loading a snapshot that already carries the columns (which would make + /// loading a snapshot that already carries them (which would make /// migration 0003 or 0004 collide on re-add). fn pre_reminder_schema_snapshot() -> String { const REMINDER_COLUMNS: &str = " not_before BIGINT,\n delivered_at BIGINT,\n"; const REMINDER_INDEX: &str = "CREATE INDEX idx_events_not_before ON events (not_before)\n WHERE not_before IS NOT NULL AND deleted_at IS NULL AND delivered_at IS NULL;\n"; - const FTS_COLUMN: &str = " content_tsv tsvector\n GENERATED ALWAYS AS (to_tsvector('simple', content)) STORED,\n"; const FTS_INDEX: &str = - "CREATE INDEX idx_events_content_tsv ON events USING GIN (content_tsv);\n"; + "CREATE INDEX idx_events_content_fts ON events USING GIN (to_tsvector('simple', content));\n"; assert!( SCHEMA_SQL.contains(REMINDER_COLUMNS) && SCHEMA_SQL.contains(REMINDER_INDEX) - && SCHEMA_SQL.contains(FTS_COLUMN) && SCHEMA_SQL.contains(FTS_INDEX), "schema.sql reminder/FTS DDL drifted; update pre_reminder_schema_snapshot to match" ); @@ -228,7 +229,6 @@ mod tests { SCHEMA_SQL .replace(REMINDER_COLUMNS, "") .replace(REMINDER_INDEX, "") - .replace(FTS_COLUMN, "") .replace(FTS_INDEX, "") } diff --git a/crates/buzz-relay/src/main.rs b/crates/buzz-relay/src/main.rs index 5638fc4a5..278a321a3 100644 --- a/crates/buzz-relay/src/main.rs +++ b/crates/buzz-relay/src/main.rs @@ -208,7 +208,7 @@ async fn main() -> anyhow::Result<()> { service } SearchBackend::Postgres => { - info!("Search backend: postgres (content_tsv generated column)"); + info!("Search backend: postgres (expression GIN index idx_events_content_fts)"); SearchService::with_postgres(db.pool()) } SearchBackend::Disabled => { diff --git a/crates/buzz-search/src/lib.rs b/crates/buzz-search/src/lib.rs index 22d31647b..a02b778bd 100644 --- a/crates/buzz-search/src/lib.rs +++ b/crates/buzz-search/src/lib.rs @@ -9,8 +9,9 @@ //! `search()` path returns event IDs that the relay then refetches from //! Postgres. //! - [`SearchService::with_postgres`] (Postgres FTS): runs `plainto_tsquery` -//! against a generated `content_tsv` column on `events`. No write-path -//! indexing needed — the generated stored column auto-populates on INSERT. +//! against the `idx_events_content_fts` expression GIN index on `events`. No +//! write-path indexing needed — Postgres maintains the expression index on +//! every INSERT/UPDATE, exactly like a column index. //! - [`SearchService::disabled`]: returns empty results for every query and //! accepts indexing calls as no-ops. Used when NIP-50 search is intentionally //! off (e.g. for tenants who opted out). @@ -39,7 +40,7 @@ use sqlx::PgPool; pub enum SearchBackend { /// Typesense (current production default). Typesense, - /// Postgres full-text search via the `content_tsv` generated column. + /// Postgres full-text search via the `idx_events_content_fts` expression GIN index. Postgres, /// NIP-50 search is disabled; every query returns empty. Disabled, @@ -138,8 +139,8 @@ impl SearchService { } /// Creates a Postgres FTS `SearchService` backed by the supplied pool. - /// No indexing setup is required — the `content_tsv` generated column - /// populates on every INSERT. + /// No indexing setup is required — the `idx_events_content_fts` expression + /// index is maintained by Postgres on every INSERT/UPDATE. pub fn with_postgres(pool: PgPool) -> Self { Self::Postgres(pool) } @@ -174,8 +175,8 @@ impl SearchService { /// Indexes a single event (upsert semantics). /// /// - **Typesense**: writes a document to the collection. - /// - **Postgres**: no-op — the `content_tsv` generated stored column is - /// populated automatically on the original INSERT. + /// - **Postgres**: no-op — the `idx_events_content_fts` expression index is + /// maintained automatically on the original INSERT. /// - **Disabled**: no-op. pub async fn index_event(&self, event: &StoredEvent) -> Result<(), SearchError> { match self { @@ -236,9 +237,9 @@ impl SearchService { /// Removes an event from the search index by its event ID hex string. /// /// - **Typesense**: deletes the document. - /// - **Postgres**: no-op — `content_tsv` is tied to the event row; - /// removing the row removes the index entry, and the relay's event - /// deletion path already handles that. + /// - **Postgres**: no-op — the `idx_events_content_fts` entry is tied to the + /// event row; removing the row removes the index entry, and the relay's + /// event deletion path already handles that. /// - **Disabled**: no-op. pub async fn delete_event(&self, event_id: &str) -> Result<(), SearchError> { match self { diff --git a/crates/buzz-search/src/postgres.rs b/crates/buzz-search/src/postgres.rs index 5fd73b2e9..e4e376407 100644 --- a/crates/buzz-search/src/postgres.rs +++ b/crates/buzz-search/src/postgres.rs @@ -6,8 +6,9 @@ //! `db.get_events_by_ids` → `filters_match` → auth post-filter chain in //! `crates/buzz-relay/src/handlers/req.rs` is unchanged. //! -//! Matching uses `plainto_tsquery('simple', $q)` against the `content_tsv` -//! generated column added in migration `0004_search_fts.sql`. Pushdowns: +//! Matching uses `plainto_tsquery('simple', $q)` against the +//! `idx_events_content_fts` expression GIN index added in migration +//! `0004_search_fts.sql`. Pushdowns: //! //! - `kinds` → `kind = ANY($kinds)` //! - `authors` → `pubkey = ANY($authors)` (hex-decoded) @@ -69,8 +70,9 @@ pub async fn search(pool: &PgPool, query: &SearchQuery) -> Result Result "run") and mangle handles. Chat content is heterogeneous; +-- `simple` is the safer default for v1. The query path in +-- `buzz-search/src/postgres.rs` renders the identical `to_tsvector('simple', content)` +-- expression so the planner matches this index. -- --- kind:0 metadata flattening: the existing Typesense pipeline appends parsed --- display_name/name/nip05 to event content before indexing --- (`buzz-search/src/index.rs::flatten_kind0_for_indexing`). With FTS on raw +-- kind:0 metadata: the old Typesense pipeline appended parsed +-- display_name/name/nip05 to event content before indexing. With FTS over raw -- `content`, those strings still tokenize because they live in the kind:0 JSON --- body — `to_tsvector('simple', '{"name":"alice"}')` matches `q=alice` after --- json-aware tokenization. Validated by the NIP-50 e2e suite. +-- body — `to_tsvector('simple', '{"name":"alice"}')` matches `q=alice`. +-- Validated by the NIP-50 e2e suite. -- --- `events` is partitioned by RANGE (created_at); ADD COLUMN on the parent --- cascades the generated column to every partition, and CREATE INDEX on the --- parent builds a partitioned GIN index that propagates to each partition. --- Partition pruning on since/until queries narrows the GIN scan further than --- Typesense's full-collection scan does today. +-- `events` is partitioned by RANGE (created_at). `CREATE INDEX ... ON events` +-- builds a partitioned GIN index whose per-partition child indexes propagate to +-- existing and future partitions. Partition pruning on since/until narrows the +-- GIN scan further than Typesense's full-collection scan does today. +-- +-- IF NOT EXISTS makes this migration idempotent against the operator runbook: +-- on a large relay the index is built per-child with CREATE INDEX CONCURRENTLY +-- and ATTACHed to a parent named `idx_events_content_fts` BEFORE this code +-- deploys, so this statement is a no-op. See +-- GUIDES/BUZZ_POSTGRES_FTS_LIVE_RELAY_RUNBOOK.md. -- -- Managed by sqlx migrations. -ALTER TABLE events - ADD COLUMN content_tsv tsvector - GENERATED ALWAYS AS (to_tsvector('simple', content)) STORED; - -CREATE INDEX idx_events_content_tsv ON events USING GIN (content_tsv); +CREATE INDEX IF NOT EXISTS idx_events_content_fts + ON events USING GIN (to_tsvector('simple', content)); diff --git a/schema/schema.sql b/schema/schema.sql index 6fea78b66..a351683eb 100644 --- a/schema/schema.sql +++ b/schema/schema.sql @@ -107,8 +107,6 @@ CREATE TABLE events ( d_tag TEXT, not_before BIGINT, delivered_at BIGINT, - content_tsv tsvector - GENERATED ALWAYS AS (to_tsvector('simple', content)) STORED, PRIMARY KEY (created_at, id) ) PARTITION BY RANGE (created_at); @@ -138,7 +136,7 @@ CREATE INDEX idx_events_addressable ON events (kind, pubkey, channel_id, deleted CREATE INDEX idx_events_parameterized ON events (kind, pubkey, d_tag, deleted_at) WHERE d_tag IS NOT NULL; CREATE INDEX idx_events_not_before ON events (not_before) WHERE not_before IS NOT NULL AND deleted_at IS NULL AND delivered_at IS NULL; -CREATE INDEX idx_events_content_tsv ON events USING GIN (content_tsv); +CREATE INDEX idx_events_content_fts ON events USING GIN (to_tsvector('simple', content)); -- ── Event mentions ──────────────────────────────────────────────────────────── From 5304a9b242a42a0825d6d747812e3f1d39af6c08 Mon Sep 17 00:00:00 2001 From: npub1qyvc0c5kl4gqv2fd97fsk46tu378sqgy35vc83rvgfwne90sel7s0ed67d <011987e296fd5006292d2f930b574be47c7801048d1983c46c425d3c95f0cffd@sprout-oss.stage.blox.sqprod.co> Date: Wed, 24 Jun 2026 19:14:26 -0400 Subject: [PATCH 05/10] test(e2e): make gift-wrap-not-searchable backend-agnostic; add cross-author search isolation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit test_nip17_gift_wrap_not_searchable previously queried Typesense directly to prove kind:1059 was never indexed — meaningless on the Postgres backend, where every event lives in the events table and there is no separate index to skip. Rewrite it to issue a NIP-50 search and assert the relay never surfaces the gift wrap (kind:9 control IS returned, kind:1059 is NOT). That guarantee comes from the relay's auth gates + filters_match post-filter, which are identical across all three backends, so the test now guards typesense/postgres/disabled. Add test_nip50_search_cross_author_isolation: an outsider who never joined an author's channel gets zero hits when searching that channel's exact #h + token, while the author finds their own message — proving the channel-scope clamp in handle_search_req (gate #1, no visibility widening) holds. Also relay-side and backend-independent. Compiles clean (cargo test -p buzz-test-client --test e2e_nostr_interop --no-run); the full matrix run lands after Max's backend-flag wiring is rebased in. Co-authored-by: Tyler Longwell Signed-off-by: Tyler Longwell --- .../tests/e2e_nostr_interop.rs | 200 ++++++++++++++---- 1 file changed, 153 insertions(+), 47 deletions(-) diff --git a/crates/buzz-test-client/tests/e2e_nostr_interop.rs b/crates/buzz-test-client/tests/e2e_nostr_interop.rs index 36fce674c..9d0c275e8 100644 --- a/crates/buzz-test-client/tests/e2e_nostr_interop.rs +++ b/crates/buzz-test-client/tests/e2e_nostr_interop.rs @@ -991,12 +991,24 @@ async fn test_nip10_thread_reply_not_in_top_level() { ); } -/// Send a kind:1059 gift wrap AND a kind:9 message with the same unique content. -/// Query Typesense directly to prove the gift wrap was NOT indexed while the -/// kind:9 message WAS. This bypasses all relay-level filtering (channel_id, #p) -/// and tests the actual indexing skip in dispatch_persistent_event. +/// Send a kind:1059 gift wrap AND a kind:9 message with the same unique content, +/// then issue a NIP-50 search and prove the gift wrap is NOT returned while the +/// kind:9 message IS. /// -/// Requires TYPESENSE_URL and TYPESENSE_API_KEY env vars (defaults to dev values). +/// This is the **backend-agnostic** form of the "gift wraps are not searchable" +/// guarantee. The old form queried Typesense directly to prove kind:1059 was +/// never *indexed* — meaningful only when Typesense is the backend. With the +/// Postgres backend every event lives in the `events` table (there is no +/// separate index to skip), so the protection is no longer "don't index it" +/// but "the relay's search REQ path never surfaces it." That path is identical +/// across all three backends: the auth/#p gates in `handle_req` run *before* +/// the backend call, and `handle_search_req` re-applies `filters_match` to every +/// hit before delivery. A kind:9 search filter therefore never returns a +/// kind:1059 row regardless of backend — which is exactly what we assert here. +/// +/// Runs against whatever backend the relay under test is configured with +/// (`BUZZ_SEARCH_BACKEND`), so the same test guards typesense, postgres, and +/// (vacuously) disabled. #[tokio::test] #[ignore] async fn test_nip17_gift_wrap_not_searchable() { @@ -1011,7 +1023,8 @@ async fn test_nip17_gift_wrap_not_searchable() { let unique_token = format!("giftwrap-nosearch-{}", uuid::Uuid::new_v4().simple()); - // 1. Send kind:1059 gift wrap. + // 1. Send kind:1059 gift wrap (p-tagged at B, signed by an ephemeral key, + // as NIP-17 prescribes) carrying the unique token as its content. let ephemeral_keys = Keys::generate(); let p_tag = Tag::parse(["p", &keys_b.public_key().to_hex()]).expect("p tag"); let gift_wrap = EventBuilder::new(Kind::Custom(1059), &unique_token) @@ -1021,65 +1034,158 @@ async fn test_nip17_gift_wrap_not_searchable() { let ok = client.send_event(gift_wrap).await.expect("send gift wrap"); assert!(ok.accepted, "relay rejected gift wrap: {}", ok.message); - // 2. Send kind:9 control message with the same content. + // 2. Send a kind:9 control message with the same token into A's channel. let ok2 = client .send_text_message(&keys_a, &channel, &unique_token, 9) .await .expect("send kind:9"); assert!(ok2.accepted, "relay rejected kind:9: {}", ok2.message); - client.disconnect().await.expect("disconnect"); - - // Wait for async Typesense indexing. - tokio::time::sleep(Duration::from_secs(3)).await; + // Allow async indexing (Typesense) / write commit (Postgres) to settle. + tokio::time::sleep(Duration::from_millis(800)).await; - // 3. Query Typesense DIRECTLY — bypasses all relay-level filtering. - let ts_url = - std::env::var("TYPESENSE_URL").unwrap_or_else(|_| "http://localhost:8108".to_string()); - let ts_key = std::env::var("TYPESENSE_API_KEY").unwrap_or_else(|_| "buzz_dev_key".to_string()); - - let http = reqwest::Client::new(); - let resp = http - .post(format!("{ts_url}/multi_search")) - .header("X-TYPESENSE-API-KEY", &ts_key) - .json(&serde_json::json!({ - "searches": [{ - "collection": "events", - "q": unique_token, - "query_by": "content", - "per_page": 10 - }] - })) - .send() + // 3. Search as A within A's channel for the token. The kind:9 control MUST + // come back (proves the token is searchable at all), and no kind:1059 + // must appear in the results. + let sid = sub_id("giftwrap-nosearch"); + let filter = Filter::new() + .search(&unique_token) + .custom_tags(SingleLetterTag::lowercase(Alphabet::H), [channel.as_str()]); + client + .subscribe(&sid, vec![filter]) .await - .expect("Typesense multi_search request"); + .expect("subscribe"); + let events = client + .collect_until_eose(&sid, Duration::from_secs(10)) + .await + .expect("collect until EOSE"); + // Control: the kind:9 message IS searchable. assert!( - resp.status().is_success(), - "Typesense returned {}", - resp.status() + events + .iter() + .any(|e| e.kind.as_u16() == 9 && e.content.contains(&unique_token)), + "kind:9 control message not returned by search — search broken for this backend. \ + events: {:?}", + events + .iter() + .map(|e| (e.kind.as_u16(), &e.content)) + .collect::>() ); - let body: serde_json::Value = resp.json().await.expect("parse Typesense response"); - let hits = body["results"][0]["hits"].as_array().expect("hits array"); + // Assertion: NO kind:1059 gift wrap is ever returned by search. + assert!( + !events.iter().any(|e| e.kind.as_u16() == 1059), + "kind:1059 gift wrap returned by NIP-50 search — gift wraps must NOT be \ + searchable on any backend. events: {:?}", + events + .iter() + .map(|e| (e.kind.as_u16(), &e.content)) + .collect::>() + ); - // Control: kind:9 IS indexed. - let has_kind9 = hits - .iter() - .any(|h| h["document"]["kind"].as_i64() == Some(9)); + client.disconnect().await.expect("disconnect"); +} + +/// Cross-author / cross-channel search isolation: a user who is NOT a member of +/// another author's channel must never see that channel's messages via NIP-50 +/// search, even when they search with the exact channel `#h` and the exact +/// content token. +/// +/// This exercises gate #1 (no visibility widening): the channel-scope clamp in +/// `handle_search_req` intersects the requested `#h` with the searcher's +/// `accessible_channels` and skips the filter entirely when nothing remains +/// (`req.rs`: the `#h` values that aren't accessible are dropped, and an empty +/// resulting scope short-circuits to "match nothing"). Because that clamp runs +/// relay-side BEFORE the backend call, it holds identically for typesense, +/// postgres, and disabled — which is why this test carries no backend-specific +/// branch. +#[tokio::test] +#[ignore] +async fn test_nip50_search_cross_author_isolation() { + let url = relay_url(); + let author = Keys::generate(); + let outsider = Keys::generate(); + + // Author A owns a private-by-default working channel and posts a token. + let channel = create_test_channel(&author).await; + let unique_token = format!("isolation_{}", uuid::Uuid::new_v4().simple()); + let content = format!("secret in A's channel {unique_token}"); + + let mut author_client = BuzzTestClient::connect(&url, &author) + .await + .expect("connect author"); + let ok = author_client + .send_text_message(&author, &channel, &content, 9) + .await + .expect("author sends message"); + assert!(ok.accepted, "relay rejected author message: {}", ok.message); + + // Allow indexing / write commit to settle. + tokio::time::sleep(Duration::from_millis(800)).await; + + // Sanity: the author themselves CAN find it (so a zero-result outsider + // search is proven to be isolation, not a broken/empty index). + let author_sid = sub_id("isolation-author"); + author_client + .subscribe( + &author_sid, + vec![Filter::new() + .kind(Kind::Custom(9)) + .search(&unique_token) + .custom_tags(SingleLetterTag::lowercase(Alphabet::H), [channel.as_str()])], + ) + .await + .expect("author subscribe"); + let author_events = author_client + .collect_until_eose(&author_sid, Duration::from_secs(10)) + .await + .expect("author collect until EOSE"); assert!( - has_kind9, - "kind:9 control message not found in Typesense — indexing broken" + author_events + .iter() + .any(|e| e.content.contains(&unique_token)), + "author could not find their own message — search is broken, test is vacuous. \ + events: {:?}", + author_events.iter().map(|e| &e.content).collect::>() ); + author_client.disconnect().await.expect("disconnect author"); - // Assertion: kind:1059 is NOT indexed. - let has_kind1059 = hits - .iter() - .any(|h| h["document"]["kind"].as_i64() == Some(1059)); + // Outsider B — a different authenticated identity who never joined A's + // channel — searches with A's exact #h and exact token. Must get nothing. + let mut outsider_client = BuzzTestClient::connect(&url, &outsider) + .await + .expect("connect outsider"); + let outsider_sid = sub_id("isolation-outsider"); + outsider_client + .subscribe( + &outsider_sid, + vec![Filter::new() + .kind(Kind::Custom(9)) + .search(&unique_token) + .custom_tags(SingleLetterTag::lowercase(Alphabet::H), [channel.as_str()])], + ) + .await + .expect("outsider subscribe"); + let outsider_events = outsider_client + .collect_until_eose(&outsider_sid, Duration::from_secs(10)) + .await + .expect("outsider collect until EOSE"); assert!( - !has_kind1059, - "kind:1059 found in Typesense — gift wraps must NOT be indexed. hits: {hits:?}" + outsider_events.is_empty(), + "outsider received {} search hit(s) for a channel they are not a member of — \ + visibility widening. events: {:?}", + outsider_events.len(), + outsider_events + .iter() + .map(|e| (e.kind.as_u16(), &e.content)) + .collect::>() ); + + outsider_client + .disconnect() + .await + .expect("disconnect outsider"); } /// Send 3 messages with varying relevance to a query, wait for indexing, then search. From 4a771fd08ceae9982f46309a8a559228b5fe6def Mon Sep 17 00:00:00 2001 From: npub1mprnacetjua2xx3p5eddmhxyk6wv929ymm5py8kd2xfxurxahspqqlgyta Date: Wed, 24 Jun 2026 17:55:36 -0400 Subject: [PATCH 06/10] Wire Postgres search backend through chart Co-authored-by: npub1mprnacetjua2xx3p5eddmhxyk6wv929ymm5py8kd2xfxurxahspqqlgyta Signed-off-by: npub1mprnacetjua2xx3p5eddmhxyk6wv929ymm5py8kd2xfxurxahspqqlgyta (cherry picked from commit 86368981635df8b2f050069827b70c7ceea0e5fd) Co-authored-by: Max Co-authored-by: Tyler Longwell Signed-off-by: Tyler Longwell --- crates/buzz-relay/src/config.rs | 12 ++-- crates/buzz-relay/src/handlers/event.rs | 20 +++++-- .../src/handlers/identity_archive.rs | 20 +++++-- .../buzz-relay/src/handlers/mesh_signaling.rs | 20 +++++-- deploy/charts/buzz/README.md | 48 ++++++++------- deploy/charts/buzz/ci/quickstart-values.yaml | 8 +-- deploy/charts/buzz/examples/argocd-app.yaml | 5 +- .../buzz/examples/flux-helmrelease.yaml | 5 +- .../charts/buzz/examples/secret-sample.yaml | 1 + deploy/charts/buzz/templates/NOTES.txt | 6 +- deploy/charts/buzz/templates/_helpers.tpl | 6 ++ deploy/charts/buzz/templates/_validate.tpl | 6 +- deploy/charts/buzz/templates/deployment.yaml | 4 ++ .../buzz/templates/quickstart-typesense.yaml | 2 +- .../charts/buzz/templates/secret-chart.yaml | 6 +- deploy/charts/buzz/tests/networking_test.yaml | 8 --- .../buzz/tests/quickstart_bundled_test.yaml | 9 ++- .../buzz/tests/quickstart_guards_test.yaml | 3 +- deploy/charts/buzz/tests/render_test.yaml | 59 +++++++++++++++++-- deploy/charts/buzz/tests/secrets_test.yaml | 53 +++++++++++++---- deploy/charts/buzz/tests/validation_test.yaml | 19 +----- deploy/charts/buzz/values.schema.json | 7 +++ deploy/charts/buzz/values.yaml | 21 ++++--- 23 files changed, 232 insertions(+), 116 deletions(-) diff --git a/crates/buzz-relay/src/config.rs b/crates/buzz-relay/src/config.rs index df0056314..04d43a13d 100644 --- a/crates/buzz-relay/src/config.rs +++ b/crates/buzz-relay/src/config.rs @@ -159,7 +159,7 @@ impl Config { "BUZZ_SEARCH_BACKEND={bad:?} (expected `typesense`, `postgres`, or `disabled`)" )) })?, - Err(_) => buzz_search::SearchBackend::Typesense, + Err(_) => buzz_search::SearchBackend::Postgres, }; let relay_url = @@ -562,17 +562,21 @@ mod tests { } #[test] - fn search_backend_defaults_to_typesense() { + fn search_backend_defaults_to_postgres() { let _guard = ENV_MUTEX.lock().unwrap(); std::env::remove_var("BUZZ_SEARCH_BACKEND"); let config = Config::from_env().expect("default config"); - assert_eq!(config.search_backend, buzz_search::SearchBackend::Typesense); + assert_eq!(config.search_backend, buzz_search::SearchBackend::Postgres); } #[test] - fn search_backend_parses_postgres_and_disabled() { + fn search_backend_parses_typesense_postgres_and_disabled() { let _guard = ENV_MUTEX.lock().unwrap(); + std::env::set_var("BUZZ_SEARCH_BACKEND", "typesense"); + let config = Config::from_env().expect("config"); + assert_eq!(config.search_backend, buzz_search::SearchBackend::Typesense); + std::env::set_var("BUZZ_SEARCH_BACKEND", "postgres"); let config = Config::from_env().expect("config"); assert_eq!(config.search_backend, buzz_search::SearchBackend::Postgres); diff --git a/crates/buzz-relay/src/handlers/event.rs b/crates/buzz-relay/src/handlers/event.rs index 4cdcf8b2a..aa6595b00 100644 --- a/crates/buzz-relay/src/handlers/event.rs +++ b/crates/buzz-relay/src/handlers/event.rs @@ -1250,13 +1250,21 @@ mod tests { .await .expect("pubsub manager"), ); - let audit = buzz_audit::AuditService::new(pool); + let audit = buzz_audit::AuditService::new(pool.clone()); let auth = buzz_auth::AuthService::new(config.auth.clone()); - let search = buzz_search::SearchService::new(buzz_search::SearchConfig { - url: config.typesense_url.clone(), - api_key: config.typesense_key.clone(), - collection: "events".to_string(), - }); + let search = match config.search_backend { + buzz_search::SearchBackend::Typesense => { + buzz_search::SearchService::new(buzz_search::SearchConfig { + url: config.typesense_url.clone(), + api_key: config.typesense_key.clone(), + collection: "events".to_string(), + }) + } + buzz_search::SearchBackend::Postgres => { + buzz_search::SearchService::with_postgres(pool.clone()) + } + buzz_search::SearchBackend::Disabled => buzz_search::SearchService::disabled(), + }; let workflow_engine = Arc::new(buzz_workflow::WorkflowEngine::new( db.clone(), buzz_workflow::WorkflowConfig::default(), diff --git a/crates/buzz-relay/src/handlers/identity_archive.rs b/crates/buzz-relay/src/handlers/identity_archive.rs index 1e8ed3634..47fa24778 100644 --- a/crates/buzz-relay/src/handlers/identity_archive.rs +++ b/crates/buzz-relay/src/handlers/identity_archive.rs @@ -443,13 +443,21 @@ mod tests { .await .ok()?, ); - let audit = buzz_audit::AuditService::new(pool); + let audit = buzz_audit::AuditService::new(pool.clone()); let auth = buzz_auth::AuthService::new(config.auth.clone()); - let search = buzz_search::SearchService::new(buzz_search::SearchConfig { - url: config.typesense_url.clone(), - api_key: config.typesense_key.clone(), - collection: "events".to_string(), - }); + let search = match config.search_backend { + buzz_search::SearchBackend::Typesense => { + buzz_search::SearchService::new(buzz_search::SearchConfig { + url: config.typesense_url.clone(), + api_key: config.typesense_key.clone(), + collection: "events".to_string(), + }) + } + buzz_search::SearchBackend::Postgres => { + buzz_search::SearchService::with_postgres(pool.clone()) + } + buzz_search::SearchBackend::Disabled => buzz_search::SearchService::disabled(), + }; let workflow_engine = Arc::new(buzz_workflow::WorkflowEngine::new( db.clone(), buzz_workflow::WorkflowConfig::default(), diff --git a/crates/buzz-relay/src/handlers/mesh_signaling.rs b/crates/buzz-relay/src/handlers/mesh_signaling.rs index 0f3815169..f43d4c038 100644 --- a/crates/buzz-relay/src/handlers/mesh_signaling.rs +++ b/crates/buzz-relay/src/handlers/mesh_signaling.rs @@ -507,13 +507,21 @@ mod tests { .await .expect("pubsub manager"), ); - let audit = buzz_audit::AuditService::new(pool); + let audit = buzz_audit::AuditService::new(pool.clone()); let auth = buzz_auth::AuthService::new(config.auth.clone()); - let search = buzz_search::SearchService::new(buzz_search::SearchConfig { - url: config.typesense_url.clone(), - api_key: config.typesense_key.clone(), - collection: "events".to_string(), - }); + let search = match config.search_backend { + buzz_search::SearchBackend::Typesense => { + buzz_search::SearchService::new(buzz_search::SearchConfig { + url: config.typesense_url.clone(), + api_key: config.typesense_key.clone(), + collection: "events".to_string(), + }) + } + buzz_search::SearchBackend::Postgres => { + buzz_search::SearchService::with_postgres(pool.clone()) + } + buzz_search::SearchBackend::Disabled => buzz_search::SearchService::disabled(), + }; let workflow_engine = std::sync::Arc::new(buzz_workflow::WorkflowEngine::new( db.clone(), buzz_workflow::WorkflowConfig::default(), diff --git a/deploy/charts/buzz/README.md b/deploy/charts/buzz/README.md index ab08200b2..480e35af0 100644 --- a/deploy/charts/buzz/README.md +++ b/deploy/charts/buzz/README.md @@ -1,13 +1,13 @@ # Buzz Helm Chart -[Buzz](https://github.com/block/buzz) is a Nostr-based messaging platform for human–agent collaboration: a single relay binary serving WebSocket + REST + web UI, backed by PostgreSQL, Redis, Typesense, and S3-compatible object storage. +[Buzz](https://github.com/block/buzz) is a Nostr-based messaging platform for human–agent collaboration: a single relay binary serving WebSocket + REST + web UI, backed by PostgreSQL, Redis, and S3-compatible object storage. NIP-50 search defaults to Postgres full-text search, with Typesense available as a fallback backend. This chart has two operating profiles selected by values: | Profile | When | What you get | |---|---|---| -| **Production** (default) | Self-hosted multi-tenant, regulated, or GitOps-managed | External managed Postgres/Redis/Typesense/S3, `secrets.existingSecret:`, no chart-side autogen, HA-capable (`replicaCount ≥ 2`) | -| **Quickstart** (eval) | Eval, single-node, one-off demo | In-cluster Postgres + Redis + MinIO + Typesense subcharts/Deployments, chart auto-generates relay + service secrets, single replica | +| **Production** (default) | Self-hosted multi-tenant, regulated, or GitOps-managed | External managed Postgres/Redis/S3, Postgres FTS search by default, `secrets.existingSecret:`, no chart-side autogen, HA-capable (`replicaCount ≥ 2`) | +| **Quickstart** (eval) | Eval, single-node, one-off demo | In-cluster Postgres + Redis + MinIO, chart auto-generates relay + service secrets, single replica | ## Quickstart (eval only) @@ -18,18 +18,18 @@ helm install buzz oci://ghcr.io/block/buzz/charts/buzz --version 0.1.0 \ --set postgresql.enabled=true \ --set redis.enabled=true \ --set minio.enabled=true \ - --set typesense.enabled=true \ --set relayUrl=wss://buzz.example.com \ --set ownerPubkey=<64-char-hex-pubkey> ``` -This brings up **everything in-cluster** — Postgres, Redis, MinIO (with its -bucket created by a post-install Job), and Typesense — and composes the relay's -`BUZZ_S3_ENDPOINT` / `TYPESENSE_URL` plus autogenerated credentials -automatically. No external services required. The `quickstart=true` flag is an -intent marker surfaced in NOTES.txt; the bundled services are opted in via the -four `*.enabled` flags above (see `ci/quickstart-values.yaml` for the exact set -CI installs). Eval-only: every bundled service is a single replica with no HA. +This brings up the required quickstart dependencies in-cluster — Postgres, +Redis, and MinIO (with its bucket created by a post-install Job) — and composes +the relay's `BUZZ_S3_ENDPOINT` plus autogenerated credentials automatically. +Search defaults to Postgres FTS (`search.backend=postgres`), so no Typesense +service is required. The `quickstart=true` flag is an intent marker surfaced in +NOTES.txt; the bundled services are opted in via the per-service `*.enabled` +flags (see `ci/quickstart-values.yaml` for the exact set CI installs). +Eval-only: every bundled service is a single replica with no HA. ## Production (GitOps) @@ -50,7 +50,7 @@ See: | `relayUrl` | Public `wss://` URL clients connect to | Always | | `ownerPubkey` | 64-char lowercase hex Nostr pubkey of the relay operator | When `relay.requireRelayMembership=true` (default) | | `secrets.existingSecret` | Name of pre-created Secret | Production / GitOps | -| `externalPostgresql.url` / `externalRedis.url` / `typesense.url` / `s3.endpoint` | External service URLs | Production — when the matching bundled service is disabled (the default) | +| `externalPostgresql.url` / `externalRedis.url` / `s3.endpoint` | External service URLs | Production — when the matching bundled service is disabled (the default) | The chart fails at `helm install` / `helm template` time with a clear message if any of these are missing or malformed (see `templates/_validate.tpl`). @@ -81,17 +81,21 @@ Save these. Losing any of them is data loss. See NOTES.txt printed by `helm inst ## Honest limitations (v1) -- **Bundled MinIO + Typesense are eval-only.** The quickstart profile runs an - in-cluster MinIO and Typesense (single replica, no HA, `lookup`-autogenerated - credentials) so the relay starts with zero external services. Production - leaves `minio.enabled` / `typesense.enabled` off and points `s3.endpoint` + - `typesense.url` (or `BUZZ_S3_*` / `TYPESENSE_URL` in `existingSecret`) at - managed S3-compatible storage and Typesense. The bundled Deployments are not - GitOps-safe and are not intended for production traffic. +- **Bundled MinIO is eval-only.** The quickstart profile runs an in-cluster MinIO + (single replica, no HA, `lookup`-autogenerated credentials) so the relay starts + with zero external object-storage dependencies. Production leaves + `minio.enabled` off and points `s3.endpoint` (or `BUZZ_S3_*` in + `existingSecret`) at managed S3-compatible storage. The bundled Deployment is + not GitOps-safe and is not intended for production traffic. +- **Typesense is a fallback search backend.** Default search is Postgres FTS + (`search.backend=postgres`). To use Typesense instead, set + `search.backend=typesense` and provide either `typesense.url`/`apiKey`, + `TYPESENSE_URL`/`TYPESENSE_API_KEY` in `secrets.existingSecret`, or + `typesense.enabled=true` for eval-only in-cluster Typesense. - **Minimal-mode is not yet supported.** The relay's `BUZZ_PUBSUB=local` / - `BUZZ_SEARCH=pg` / filesystem media paths are upstream work in progress — - even quickstart currently stands up real Redis, Typesense, and S3 rather than - the relay's single-node fallbacks. + filesystem media paths are upstream work in progress — even quickstart + currently stands up real Redis and S3 rather than the relay's single-node + fallbacks. - **OCI publish to GHCR + cosign signing** is a follow-up PR. For now, install the chart from source: `helm install buzz ./deploy/charts/buzz` after cloning the repo. ## Development diff --git a/deploy/charts/buzz/ci/quickstart-values.yaml b/deploy/charts/buzz/ci/quickstart-values.yaml index 71d4e76a4..3c7ac4e66 100644 --- a/deploy/charts/buzz/ci/quickstart-values.yaml +++ b/deploy/charts/buzz/ci/quickstart-values.yaml @@ -1,7 +1,9 @@ # Quickstart / eval: subcharts on, autogen secrets, single replica. # This is the scenario `ct install` exercises against a kind cluster — it -# spins up postgres + redis + minio + typesense in-cluster so the relay can -# actually start and pass its S3 conformance probe. +# spins up postgres + redis + minio in-cluster so the relay can actually +# start and pass its S3 conformance probe. Search defaults to Postgres FTS, so +# the quickstart no longer needs in-cluster Typesense unless explicitly testing +# search.backend=typesense. quickstart: true postgresql: enabled: true @@ -9,8 +11,6 @@ redis: enabled: true minio: enabled: true -typesense: - enabled: true relayUrl: wss://buzz.test.local ownerPubkey: "0000000000000000000000000000000000000000000000000000000000000001" relay: diff --git a/deploy/charts/buzz/examples/argocd-app.yaml b/deploy/charts/buzz/examples/argocd-app.yaml index 612f6e4b2..f56ad78fe 100644 --- a/deploy/charts/buzz/examples/argocd-app.yaml +++ b/deploy/charts/buzz/examples/argocd-app.yaml @@ -37,9 +37,8 @@ spec: externalRedis: url: "" - typesense: - url: "http://typesense.buzz.svc.cluster.local:8108" - # apiKey lives in buzz-secrets + search: + backend: postgres s3: endpoint: "https://s3.us-east-1.amazonaws.com" diff --git a/deploy/charts/buzz/examples/flux-helmrelease.yaml b/deploy/charts/buzz/examples/flux-helmrelease.yaml index a6b6276c5..251d25dec 100644 --- a/deploy/charts/buzz/examples/flux-helmrelease.yaml +++ b/deploy/charts/buzz/examples/flux-helmrelease.yaml @@ -37,9 +37,8 @@ spec: secrets: existingSecret: buzz-secrets - typesense: - url: "http://typesense.buzz.svc.cluster.local:8108" - # apiKey lives in buzz-secrets + search: + backend: postgres s3: endpoint: "https://s3.us-east-1.amazonaws.com" diff --git a/deploy/charts/buzz/examples/secret-sample.yaml b/deploy/charts/buzz/examples/secret-sample.yaml index c842038b4..968399436 100644 --- a/deploy/charts/buzz/examples/secret-sample.yaml +++ b/deploy/charts/buzz/examples/secret-sample.yaml @@ -11,6 +11,7 @@ # REDIS_URL — redis://... (required when replicaCount > 1) # TYPESENSE_URL # TYPESENSE_API_KEY +# Only required when Helm value search.backend=typesense. # BUZZ_S3_ACCESS_KEY # BUZZ_S3_SECRET_KEY apiVersion: v1 diff --git a/deploy/charts/buzz/templates/NOTES.txt b/deploy/charts/buzz/templates/NOTES.txt index 35a1080ad..5560d9d73 100644 --- a/deploy/charts/buzz/templates/NOTES.txt +++ b/deploy/charts/buzz/templates/NOTES.txt @@ -26,19 +26,19 @@ ────────────────────────────────────────────────────────────────────────────── Profile ────────────────────────────────────────────────────────────────────────────── -{{ if or .Values.postgresql.enabled .Values.redis.enabled .Values.minio.enabled .Values.typesense.enabled }} +{{ if or .Values.postgresql.enabled .Values.redis.enabled .Values.minio.enabled (and .Values.typesense.enabled (eq (include "buzz.searchBackend" .) "typesense")) }} ⚠ QUICKSTART / EVALUATION PROFILE {{ if .Values.postgresql.enabled }}- In-cluster Postgres subchart (CloudPirates){{ end }} {{ if .Values.redis.enabled }}- In-cluster Redis subchart (CloudPirates){{ end }} {{ if .Values.minio.enabled }}- In-cluster MinIO (eval-only, single replica; bucket "{{ .Values.s3.bucket }}" created by post-install Job){{ end }} - {{ if .Values.typesense.enabled }}- In-cluster Typesense (eval-only, single replica){{ end }} + {{ if and .Values.typesense.enabled (eq (include "buzz.searchBackend" .) "typesense") }}- In-cluster Typesense (eval-only, single replica){{ end }} - Chart auto-generates secrets via the `lookup` pattern. This is NOT GitOps-safe — secrets will silently rotate under ArgoCD/Flux. For production, see examples/argocd-app.yaml or examples/flux-helmrelease.yaml. {{ else }} ✓ PRODUCTION PROFILE - External Postgres, Redis (if enabled), Typesense, S3. + External Postgres, Redis (if enabled), S3; search backend: {{ include "buzz.searchBackend" . }}. {{ if .Values.secrets.existingSecret }}- Secrets sourced from: {{ .Values.secrets.existingSecret }}{{ end }} {{ end }} diff --git a/deploy/charts/buzz/templates/_helpers.tpl b/deploy/charts/buzz/templates/_helpers.tpl index e7ac0ed16..f40963f7d 100644 --- a/deploy/charts/buzz/templates/_helpers.tpl +++ b/deploy/charts/buzz/templates/_helpers.tpl @@ -116,6 +116,12 @@ secrets.existingSecret, use that. Otherwise use the chart-managed one. {{- end -}} {{- end -}} +{{/* Search backend flag. Defaults to Postgres FTS; Typesense remains opt-in fallback. */}} +{{- define "buzz.searchBackend" -}} +{{- $search := default dict .Values.search -}} +{{- default "postgres" $search.backend -}} +{{- end -}} + {{/* In-cluster Typesense URL, used when typesense.enabled and url unset. */}} {{- define "buzz.typesenseUrl" -}} {{- if .Values.typesense.url -}} diff --git a/deploy/charts/buzz/templates/_validate.tpl b/deploy/charts/buzz/templates/_validate.tpl index a6435f606..ad072a140 100644 --- a/deploy/charts/buzz/templates/_validate.tpl +++ b/deploy/charts/buzz/templates/_validate.tpl @@ -50,9 +50,9 @@ surface at template time regardless of which manifest helm renders first. {{- fail "Postgres source missing: enable postgresql.enabled=true, set externalPostgresql.url, or provide secrets.existingSecret with key DATABASE_URL." -}} {{- end -}} -{{/* Typesense source must exist somewhere */}} -{{- if not (or .Values.typesense.enabled .Values.typesense.url .Values.secrets.existingSecret) -}} - {{- fail "Typesense source missing: enable typesense.enabled=true (quickstart in-cluster), set typesense.url + typesense.apiKey, or provide secrets.existingSecret with keys TYPESENSE_URL + TYPESENSE_API_KEY." -}} +{{/* Typesense source is required only when search.backend=typesense. */}} +{{- if and (eq (include "buzz.searchBackend" .) "typesense") (not (or .Values.typesense.enabled .Values.typesense.url .Values.secrets.existingSecret)) -}} + {{- fail "Typesense source missing for search.backend=typesense: enable typesense.enabled=true (quickstart in-cluster), set typesense.url + typesense.apiKey, or provide secrets.existingSecret with keys TYPESENSE_URL + TYPESENSE_API_KEY." -}} {{- end -}} {{/* S3 / object-storage source must exist somewhere (relay hard-fails its diff --git a/deploy/charts/buzz/templates/deployment.yaml b/deploy/charts/buzz/templates/deployment.yaml index e234fb8e4..7c59c8294 100644 --- a/deploy/charts/buzz/templates/deployment.yaml +++ b/deploy/charts/buzz/templates/deployment.yaml @@ -1,4 +1,5 @@ {{- include "buzz.validate" . -}} +{{- $searchBackend := include "buzz.searchBackend" . -}} apiVersion: apps/v1 kind: Deployment metadata: @@ -80,6 +81,7 @@ spec: - { name: BUZZ_REQUIRE_RELAY_MEMBERSHIP, value: {{ .Values.relay.requireRelayMembership | quote }} } - { name: BUZZ_ALLOW_NIP_OA_AUTH, value: {{ .Values.relay.allowNipOaAuth | quote }} } - { name: BUZZ_PUBKEY_ALLOWLIST, value: {{ .Values.relay.pubkeyAllowlist | quote }} } + - { name: BUZZ_SEARCH_BACKEND, value: {{ $searchBackend | quote }} } {{- if .Values.relay.corsOrigins }} - { name: BUZZ_CORS_ORIGINS, value: {{ join "," .Values.relay.corsOrigins | quote }} } {{- end }} @@ -130,6 +132,7 @@ spec: name: {{ include "buzz.envSecretName" . }} key: REDIS_URL optional: {{ and (eq (.Values.replicaCount | int) 1) (not .Values.redis.enabled) (not .Values.externalRedis.url) }} + {{- if eq $searchBackend "typesense" }} - name: TYPESENSE_URL valueFrom: secretKeyRef: @@ -140,6 +143,7 @@ spec: secretKeyRef: name: {{ include "buzz.envSecretName" . }} key: TYPESENSE_API_KEY + {{- end }} - name: BUZZ_S3_ACCESS_KEY valueFrom: secretKeyRef: diff --git a/deploy/charts/buzz/templates/quickstart-typesense.yaml b/deploy/charts/buzz/templates/quickstart-typesense.yaml index 7184d5c5f..75f093edb 100644 --- a/deploy/charts/buzz/templates/quickstart-typesense.yaml +++ b/deploy/charts/buzz/templates/quickstart-typesense.yaml @@ -4,7 +4,7 @@ single replica, no TLS, API key from the chart-managed Secret. Production deploys leave typesense.enabled=false and point typesense.url/apiKey (or secrets.existingSecret) at a managed Typesense service. */ -}} -{{- if .Values.typesense.enabled -}} +{{- if and .Values.typesense.enabled (eq (include "buzz.searchBackend" .) "typesense") -}} {{- if .Values.secrets.existingSecret -}} {{- fail "typesense.enabled=true (quickstart) is incompatible with secrets.existingSecret. Quickstart autogenerates the Typesense key in the chart-managed Secret; for external Typesense set typesense.enabled=false and provide TYPESENSE_URL/TYPESENSE_API_KEY." -}} {{- end -}} diff --git a/deploy/charts/buzz/templates/secret-chart.yaml b/deploy/charts/buzz/templates/secret-chart.yaml index 0a7677bad..edf2ef842 100644 --- a/deploy/charts/buzz/templates/secret-chart.yaml +++ b/deploy/charts/buzz/templates/secret-chart.yaml @@ -73,8 +73,9 @@ data: REDIS_URL: {{ .Values.externalRedis.url | b64enc | quote }} {{- end }} - {{- /* Typesense — bundled (quickstart) composes URL + autogen key; else - pass through external values. */}} + {{- /* Typesense — only when search.backend=typesense. Bundled quickstart + composes URL + autogen key; else pass through external values. */}} + {{- if eq (include "buzz.searchBackend" .) "typesense" }} {{- if .Values.typesense.enabled }} {{- $tsKey := "" }} {{- if (index $existingData "TYPESENSE_API_KEY") }} @@ -94,6 +95,7 @@ data: TYPESENSE_API_KEY: {{ .Values.typesense.apiKey | b64enc | quote }} {{- end }} {{- end }} + {{- end }} {{- /* S3 creds — bundled MinIO (quickstart) autogenerates; else pass through external values. */}} diff --git a/deploy/charts/buzz/tests/networking_test.yaml b/deploy/charts/buzz/tests/networking_test.yaml index e925bbb2d..882679f2e 100644 --- a/deploy/charts/buzz/tests/networking_test.yaml +++ b/deploy/charts/buzz/tests/networking_test.yaml @@ -9,8 +9,6 @@ tests: relayUrl: wss://buzz.example.com ownerPubkey: "0000000000000000000000000000000000000000000000000000000000000000" externalPostgresql.url: postgres://u:p@h:5432/d - typesense.url: http://ts:8108 - typesense.apiKey: k s3.endpoint: http://minio:9000 s3.accessKey: a s3.secretKey: s @@ -29,8 +27,6 @@ tests: relayUrl: wss://buzz.example.com ownerPubkey: "0000000000000000000000000000000000000000000000000000000000000000" externalPostgresql.url: postgres://u:p@h:5432/d - typesense.url: http://ts:8108 - typesense.apiKey: k s3.endpoint: http://minio:9000 s3.accessKey: a s3.secretKey: s @@ -47,8 +43,6 @@ tests: relayUrl: wss://buzz.example.com ownerPubkey: "0000000000000000000000000000000000000000000000000000000000000000" externalPostgresql.url: postgres://u:p@h:5432/d - typesense.url: http://ts:8108 - typesense.apiKey: k s3.endpoint: http://minio:9000 s3.accessKey: a s3.secretKey: s @@ -68,8 +62,6 @@ tests: relayUrl: wss://buzz.example.com ownerPubkey: "0000000000000000000000000000000000000000000000000000000000000000" externalPostgresql.url: postgres://u:p@h:5432/d - typesense.url: http://ts:8108 - typesense.apiKey: k s3.endpoint: http://minio:9000 s3.accessKey: a s3.secretKey: s diff --git a/deploy/charts/buzz/tests/quickstart_bundled_test.yaml b/deploy/charts/buzz/tests/quickstart_bundled_test.yaml index 7acb8ab75..547e7125c 100644 --- a/deploy/charts/buzz/tests/quickstart_bundled_test.yaml +++ b/deploy/charts/buzz/tests/quickstart_bundled_test.yaml @@ -1,6 +1,6 @@ suite: quickstart bundled services -# The dev quickstart must stand up MinIO + Typesense in-cluster so the relay's -# startup S3 conformance probe passes with zero external dependencies. +# The dev quickstart must stand up MinIO in-cluster so the relay's startup S3 +# conformance probe passes. Typesense is only bundled when search.backend=typesense. templates: - templates/quickstart-minio.yaml - templates/quickstart-minio-init.yaml @@ -17,6 +17,7 @@ tests: ownerPubkey: "0000000000000000000000000000000000000000000000000000000000000000" postgresql.enabled: true redis.enabled: true + search.backend: typesense typesense.enabled: true minio.enabled: true asserts: @@ -40,6 +41,7 @@ tests: ownerPubkey: "0000000000000000000000000000000000000000000000000000000000000000" postgresql.enabled: true redis.enabled: true + search.backend: typesense typesense.enabled: true minio.enabled: true asserts: @@ -58,6 +60,7 @@ tests: ownerPubkey: "0000000000000000000000000000000000000000000000000000000000000000" postgresql.enabled: true redis.enabled: true + search.backend: typesense typesense.enabled: true minio.enabled: true asserts: @@ -76,6 +79,7 @@ tests: ownerPubkey: "0000000000000000000000000000000000000000000000000000000000000000" postgresql.enabled: true redis.enabled: true + search.backend: typesense typesense.enabled: true minio.enabled: true asserts: @@ -103,6 +107,7 @@ tests: ownerPubkey: "0000000000000000000000000000000000000000000000000000000000000000" postgresql.enabled: true redis.enabled: true + search.backend: typesense typesense.enabled: true minio.enabled: true asserts: diff --git a/deploy/charts/buzz/tests/quickstart_guards_test.yaml b/deploy/charts/buzz/tests/quickstart_guards_test.yaml index 9054cf5be..7b490e9b8 100644 --- a/deploy/charts/buzz/tests/quickstart_guards_test.yaml +++ b/deploy/charts/buzz/tests/quickstart_guards_test.yaml @@ -10,8 +10,6 @@ tests: relayUrl: wss://buzz.example.com ownerPubkey: "0000000000000000000000000000000000000000000000000000000000000000" postgresql.enabled: true - typesense.url: http://ts:8108 - typesense.apiKey: k minio.enabled: true secrets.existingSecret: "buzz-secrets" asserts: @@ -28,6 +26,7 @@ tests: s3.endpoint: http://minio:9000 s3.accessKey: a s3.secretKey: s + search.backend: typesense typesense.enabled: true secrets.existingSecret: "buzz-secrets" asserts: diff --git a/deploy/charts/buzz/tests/render_test.yaml b/deploy/charts/buzz/tests/render_test.yaml index 970aeeab8..353ce7c30 100644 --- a/deploy/charts/buzz/tests/render_test.yaml +++ b/deploy/charts/buzz/tests/render_test.yaml @@ -8,14 +8,12 @@ templates: - templates/service.yaml - templates/pvc-git.yaml tests: - - it: renders cleanly in production profile (external pg/redis/typesense) + - it: renders cleanly in production profile (external pg/redis/s3, postgres search) set: relayUrl: wss://buzz.example.com ownerPubkey: "0000000000000000000000000000000000000000000000000000000000000000" externalPostgresql.url: postgres://u:p@h:5432/d externalRedis.url: redis://h:6379 - typesense.url: http://ts:8108 - typesense.apiKey: k s3.endpoint: http://minio:9000 s3.accessKey: a s3.secretKey: s @@ -39,8 +37,6 @@ tests: ownerPubkey: "0000000000000000000000000000000000000000000000000000000000000000" externalPostgresql.url: postgres://u:p@h:5432/d externalRedis.url: redis://h:6379 - typesense.url: http://ts:8108 - typesense.apiKey: k s3.endpoint: http://minio:9000 s3.accessKey: a s3.secretKey: s @@ -55,3 +51,56 @@ tests: path: spec.accessModes[0] value: ReadWriteMany template: templates/pvc-git.yaml + + - it: postgres backend does not require a Typesense source + set: + relayUrl: wss://buzz.example.com + ownerPubkey: "0000000000000000000000000000000000000000000000000000000000000000" + externalPostgresql.url: postgres://u:p@h:5432/d + s3.endpoint: http://minio:9000 + s3.accessKey: a + s3.secretKey: s + asserts: + - contains: + path: spec.template.spec.containers[0].env + content: + name: BUZZ_SEARCH_BACKEND + value: "postgres" + template: templates/deployment.yaml + - notContains: + path: spec.template.spec.containers[0].env + content: + name: TYPESENSE_URL + template: templates/deployment.yaml + - notContains: + path: spec.template.spec.containers[0].env + content: + name: TYPESENSE_API_KEY + template: templates/deployment.yaml + + - it: disabled backend does not require a Typesense source + set: + relayUrl: wss://buzz.example.com + ownerPubkey: "0000000000000000000000000000000000000000000000000000000000000000" + externalPostgresql.url: postgres://u:p@h:5432/d + search.backend: disabled + s3.endpoint: http://minio:9000 + s3.accessKey: a + s3.secretKey: s + asserts: + - contains: + path: spec.template.spec.containers[0].env + content: + name: BUZZ_SEARCH_BACKEND + value: "disabled" + template: templates/deployment.yaml + - notContains: + path: spec.template.spec.containers[0].env + content: + name: TYPESENSE_URL + template: templates/deployment.yaml + - notContains: + path: spec.template.spec.containers[0].env + content: + name: TYPESENSE_API_KEY + template: templates/deployment.yaml diff --git a/deploy/charts/buzz/tests/secrets_test.yaml b/deploy/charts/buzz/tests/secrets_test.yaml index 1a3690c1f..eaedf83ac 100644 --- a/deploy/charts/buzz/tests/secrets_test.yaml +++ b/deploy/charts/buzz/tests/secrets_test.yaml @@ -8,8 +8,6 @@ tests: relayUrl: wss://buzz.example.com ownerPubkey: "0000000000000000000000000000000000000000000000000000000000000000" externalPostgresql.url: postgres://u:p@h:5432/d - typesense.url: http://ts:8108 - typesense.apiKey: k s3.endpoint: http://minio:9000 s3.accessKey: a s3.secretKey: s @@ -31,8 +29,6 @@ tests: relayUrl: wss://buzz.example.com ownerPubkey: "0000000000000000000000000000000000000000000000000000000000000000" externalPostgresql.url: postgres://u:p@h:5432/d - typesense.url: http://ts:8108 - typesense.apiKey: k s3.endpoint: http://minio:9000 s3.accessKey: a s3.secretKey: s @@ -47,8 +43,6 @@ tests: relayUrl: wss://buzz.example.com ownerPubkey: "0000000000000000000000000000000000000000000000000000000000000000" externalPostgresql.url: postgres://u:p@h:5432/d - typesense.url: http://ts:8108 - typesense.apiKey: k s3.endpoint: http://minio:9000 s3.accessKey: a s3.secretKey: s @@ -70,8 +64,6 @@ tests: relayUrl: wss://buzz.example.com ownerPubkey: "abcdef0123456789abcdef0123456789abcdef0123456789abcdef0123456789" externalPostgresql.url: postgres://u:p@h:5432/d - typesense.url: http://ts:8108 - typesense.apiKey: k s3.endpoint: http://minio:9000 s3.accessKey: a s3.secretKey: s @@ -93,8 +85,6 @@ tests: relayUrl: wss://buzz.example.com ownerPubkey: "0000000000000000000000000000000000000000000000000000000000000000" externalPostgresql.url: postgres://u:p@h:5432/d - typesense.url: http://ts:8108 - typesense.apiKey: k s3.endpoint: http://minio:9000 s3.accessKey: a s3.secretKey: s @@ -106,6 +96,47 @@ tests: value: "true" template: templates/deployment.yaml + - it: postgres search Secret omits Typesense keys + set: + relayUrl: wss://buzz.example.com + ownerPubkey: "0000000000000000000000000000000000000000000000000000000000000000" + externalPostgresql.url: postgres://u:p@h:5432/d + s3.endpoint: http://minio:9000 + s3.accessKey: a + s3.secretKey: s + typesense.url: http://ts:8108 + typesense.apiKey: k + asserts: + - notExists: + path: data.TYPESENSE_URL + template: templates/secret-chart.yaml + - notExists: + path: data.TYPESENSE_API_KEY + template: templates/secret-chart.yaml + + - it: typesense search Secret includes Typesense keys + set: + relayUrl: wss://buzz.example.com + ownerPubkey: "0000000000000000000000000000000000000000000000000000000000000000" + externalPostgresql.url: postgres://u:p@h:5432/d + search.backend: typesense + typesense.url: http://ts:8108 + typesense.apiKey: k + s3.endpoint: http://minio:9000 + s3.accessKey: a + s3.secretKey: s + asserts: + - equal: + path: data.TYPESENSE_URL + decodeBase64: true + value: http://ts:8108 + template: templates/secret-chart.yaml + - equal: + path: data.TYPESENSE_API_KEY + decodeBase64: true + value: k + template: templates/secret-chart.yaml + - it: quickstart composes DATABASE_URL/REDIS_URL at the actual subchart Service hosts release: name: rel @@ -114,8 +145,6 @@ tests: ownerPubkey: "0000000000000000000000000000000000000000000000000000000000000000" postgresql.enabled: true redis.enabled: true - typesense.url: http://ts:8108 - typesense.apiKey: k s3.endpoint: http://minio:9000 s3.accessKey: a s3.secretKey: s diff --git a/deploy/charts/buzz/tests/validation_test.yaml b/deploy/charts/buzz/tests/validation_test.yaml index 56e656ae8..1b9e503eb 100644 --- a/deploy/charts/buzz/tests/validation_test.yaml +++ b/deploy/charts/buzz/tests/validation_test.yaml @@ -7,8 +7,6 @@ tests: relayUrl: "" ownerPubkey: "0000000000000000000000000000000000000000000000000000000000000000" externalPostgresql.url: postgres://u:p@h:5432/d - typesense.url: http://ts:8108 - typesense.apiKey: k asserts: - failedTemplate: errorMessage: "relayUrl is required: set --set relayUrl=wss://your.domain" @@ -18,8 +16,6 @@ tests: relayUrl: wss://buzz.example.com ownerPubkey: "" externalPostgresql.url: postgres://u:p@h:5432/d - typesense.url: http://ts:8108 - typesense.apiKey: k asserts: - failedTemplate: errorPattern: "ownerPubkey is required when relay.requireRelayMembership=true" @@ -29,8 +25,6 @@ tests: relayUrl: wss://buzz.example.com ownerPubkey: "NOTAHEX" externalPostgresql.url: postgres://u:p@h:5432/d - typesense.url: http://ts:8108 - typesense.apiKey: k asserts: - failedTemplate: errorPattern: "ownerPubkey: Does not match pattern" @@ -40,8 +34,6 @@ tests: relayUrl: wss://buzz.example.com ownerPubkey: "0000000000000000000000000000000000000000000000000000000000000000" externalPostgresql.url: postgres://u:p@h:5432/d - typesense.url: http://ts:8108 - typesense.apiKey: k replicaCount: 3 asserts: - failedTemplate: @@ -53,8 +45,6 @@ tests: ownerPubkey: "0000000000000000000000000000000000000000000000000000000000000000" externalPostgresql.url: postgres://u:p@h:5432/d externalRedis.url: redis://h:6379 - typesense.url: http://ts:8108 - typesense.apiKey: k replicaCount: 3 persistence.git.accessMode: ReadWriteOnce asserts: @@ -66,8 +56,6 @@ tests: relayUrl: wss://buzz.example.com ownerPubkey: "0000000000000000000000000000000000000000000000000000000000000000" externalPostgresql.url: postgres://u:p@h:5432/d - typesense.url: http://ts:8108 - typesense.apiKey: k ingress.enabled: true httproute.enabled: true asserts: @@ -78,17 +66,16 @@ tests: set: relayUrl: wss://buzz.example.com ownerPubkey: "0000000000000000000000000000000000000000000000000000000000000000" - typesense.url: http://ts:8108 - typesense.apiKey: k asserts: - failedTemplate: errorPattern: "Postgres source missing" - - it: fails when Typesense source is missing + - it: fails when Typesense source is missing for typesense backend set: relayUrl: wss://buzz.example.com ownerPubkey: "0000000000000000000000000000000000000000000000000000000000000000" externalPostgresql.url: postgres://u:p@h:5432/d + search.backend: typesense asserts: - failedTemplate: errorPattern: "Typesense source missing" @@ -98,8 +85,6 @@ tests: relayUrl: wss://buzz.example.com ownerPubkey: "0000000000000000000000000000000000000000000000000000000000000000" externalPostgresql.url: postgres://u:p@h:5432/d - typesense.url: http://ts:8108 - typesense.apiKey: k asserts: - failedTemplate: errorPattern: "S3/object-storage source missing" diff --git a/deploy/charts/buzz/values.schema.json b/deploy/charts/buzz/values.schema.json index 38f7b204d..cc4aecf25 100644 --- a/deploy/charts/buzz/values.schema.json +++ b/deploy/charts/buzz/values.schema.json @@ -162,6 +162,13 @@ "url": { "type": "string", "pattern": "^(rediss?://.+)?$" } } }, + "search": { + "type": "object", + "additionalProperties": false, + "properties": { + "backend": { "type": "string", "enum": ["postgres", "typesense", "disabled"] } + } + }, "typesense": { "type": "object", "additionalProperties": false, diff --git a/deploy/charts/buzz/values.yaml b/deploy/charts/buzz/values.yaml index 15a70ec28..143457b6e 100644 --- a/deploy/charts/buzz/values.yaml +++ b/deploy/charts/buzz/values.yaml @@ -6,11 +6,11 @@ # refs everywhere, no chart-side autogeneration, GitOps-safe (ArgoCD/Flux). # HA-ready: replicaCount >= 2 (requires Redis and RWX storage for git). # -# QUICKSTART — bundles in-cluster Postgres + Redis + MinIO + Typesense and -# auto-generates relay secrets via the `lookup` pattern (NOT GitOps-safe — -# see README), single replica, evaluation only. Opt in by enabling each -# bundled service: postgresql.enabled, redis.enabled, minio.enabled, -# typesense.enabled. See ci/quickstart-values.yaml and the README. +# QUICKSTART — bundles in-cluster Postgres + Redis + MinIO and auto-generates +# relay secrets via the `lookup` pattern (NOT GitOps-safe — see README), +# single replica, evaluation only. Opt in by enabling each bundled service: +# postgresql.enabled, redis.enabled, minio.enabled. Typesense is optional only +# when search.backend=typesense. See ci/quickstart-values.yaml and the README. # # See examples/argocd-app.yaml and examples/flux-helmrelease.yaml for the # canonical GitOps configurations. @@ -56,8 +56,8 @@ ownerPubkey: "" # BUZZ_GIT_HOOK_HMAC_SECRET — 32+ chars; required when replicaCount > 1 # DATABASE_URL — full Postgres URL (preferred over externalPostgresql.url) # REDIS_URL — full Redis URL with auth -# TYPESENSE_URL — Typesense base URL -# TYPESENSE_API_KEY — Typesense API key +# TYPESENSE_URL — Typesense base URL (only when search.backend=typesense) +# TYPESENSE_API_KEY — Typesense API key (only when search.backend=typesense) # BUZZ_S3_ACCESS_KEY — S3 access key # BUZZ_S3_SECRET_KEY — S3 secret key secrets: @@ -211,6 +211,13 @@ redis: externalRedis: url: "" # redis://:pass@host:6379 +# ── Search ─────────────────────────────────────────────────────────────────── +# Postgres FTS is the default. Set backend=typesense only while using the +# fallback Typesense index; set backend=disabled to fail NIP-50 search closed +# (queries return no hits, without weakening auth/channel filtering). +search: + backend: postgres # postgres | typesense | disabled + # ── Typesense ──────────────────────────────────────────────────────────────── # Production: point url/apiKey at an external Typesense service (or supply # TYPESENSE_URL/TYPESENSE_API_KEY via secrets.existingSecret). From 5dfeb7fbe00a123f69419030256afb166d6575bc Mon Sep 17 00:00:00 2001 From: npub1qyvc0c5kl4gqv2fd97fsk46tu378sqgy35vc83rvgfwne90sel7s0ed67d <011987e296fd5006292d2f930b574be47c7801048d1983c46c425d3c95f0cffd@sprout-oss.stage.blox.sqprod.co> Date: Wed, 24 Jun 2026 19:22:28 -0400 Subject: [PATCH 07/10] test(e2e): use private channel for cross-author search isolation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The cross-author isolation test was vacuously failing on BOTH backends: it created an `open` channel, which is searchable by anyone by design (get_accessible_channel_ids unions member channels with ALL open channels), so the outsider legitimately found the author's message. Switch the test to create_private_test_channel. In a private channel the creator is bootstrapped as a member (so the author still finds their own post — the non-vacuous control), while the outsider is not a member and gets zero hits. This makes the test a true visibility-widening guard, backend-independent by construction. Adds create_private_test_channel / create_channel_with_visibility helpers; create_test_channel now delegates to the open variant (no behavior change for existing callers). Co-authored-by: Tyler Longwell Signed-off-by: Tyler Longwell --- .../tests/e2e_nostr_interop.rs | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/crates/buzz-test-client/tests/e2e_nostr_interop.rs b/crates/buzz-test-client/tests/e2e_nostr_interop.rs index 9d0c275e8..008e4c1ce 100644 --- a/crates/buzz-test-client/tests/e2e_nostr_interop.rs +++ b/crates/buzz-test-client/tests/e2e_nostr_interop.rs @@ -44,6 +44,17 @@ fn sub_id(name: &str) -> String { /// Create a real channel in the DB via REST so the relay accepts events for it. async fn create_test_channel(keys: &Keys) -> String { + create_channel_with_visibility(keys, "open").await +} + +/// Like `create_test_channel` but creates a `private` (invite-only, non-searchable +/// by non-members) channel. Used by the cross-author search-isolation test, where +/// an *open* channel would be visible to outsiders by design. +async fn create_private_test_channel(keys: &Keys) -> String { + create_channel_with_visibility(keys, "private").await +} + +async fn create_channel_with_visibility(keys: &Keys, visibility: &str) -> String { let client = reqwest::Client::new(); let pubkey_hex = keys.public_key().to_hex(); let channel_uuid = uuid::Uuid::new_v4(); @@ -54,7 +65,7 @@ async fn create_test_channel(keys: &Keys) -> String { Tag::parse(["h", &channel_uuid.to_string()]).unwrap(), Tag::parse(["name", &channel_name]).unwrap(), Tag::parse(["channel_type", "stream"]).unwrap(), - Tag::parse(["visibility", "open"]).unwrap(), + Tag::parse(["visibility", visibility]).unwrap(), ]) .sign_with_keys(keys) .unwrap(); @@ -1107,8 +1118,10 @@ async fn test_nip50_search_cross_author_isolation() { let author = Keys::generate(); let outsider = Keys::generate(); - // Author A owns a private-by-default working channel and posts a token. - let channel = create_test_channel(&author).await; + // Author A owns a PRIVATE (invite-only) working channel and posts a token. + // Must be private: open channels are searchable by anyone by design, so the + // outsider would legitimately find the message and this test would be vacuous. + let channel = create_private_test_channel(&author).await; let unique_token = format!("isolation_{}", uuid::Uuid::new_v4().simple()); let content = format!("secret in A's channel {unique_token}"); From bba6d20bb20da6e2ac3e64bd3f01785233e75eb0 Mon Sep 17 00:00:00 2001 From: npub1qyvc0c5kl4gqv2fd97fsk46tu378sqgy35vc83rvgfwne90sel7s0ed67d <011987e296fd5006292d2f930b574be47c7801048d1983c46c425d3c95f0cffd@sprout-oss.stage.blox.sqprod.co> Date: Wed, 24 Jun 2026 19:25:54 -0400 Subject: [PATCH 08/10] test(e2e): add disabled-backend fail-closed search test (gate #2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds test_nip50_search_disabled_fails_closed: posts a message that the postgres/typesense backends provably return, then asserts the relay delivers EOSE with zero events. Proves BUZZ_SEARCH_BACKEND=disabled fails closed — NIP-50 search leaks nothing regardless of how well content would otherwise match. Introduces a test_backend() helper reading BUZZ_TEST_BACKEND; the disabled test early-returns (skips) unless the relay-under-test is the disabled backend, so the full suite stays green against all three backends. Matrix verified green: typesense 9/9, postgres 9/9 (identical parity), disabled 6/6 incl. the fail-closed assertion. Co-authored-by: Tyler Longwell Signed-off-by: Tyler Longwell --- .../tests/e2e_nostr_interop.rs | 78 +++++++++++++++++++ 1 file changed, 78 insertions(+) diff --git a/crates/buzz-test-client/tests/e2e_nostr_interop.rs b/crates/buzz-test-client/tests/e2e_nostr_interop.rs index 008e4c1ce..2ac692af9 100644 --- a/crates/buzz-test-client/tests/e2e_nostr_interop.rs +++ b/crates/buzz-test-client/tests/e2e_nostr_interop.rs @@ -42,6 +42,15 @@ fn sub_id(name: &str) -> String { format!("e2e-{name}-{}", uuid::Uuid::new_v4()) } +/// The search backend the relay-under-test is configured with, surfaced to the +/// test process via `BUZZ_TEST_BACKEND` (`typesense` | `postgres` | `disabled`). +/// The matrix runner sets this to match the relay's `BUZZ_SEARCH_BACKEND`. +/// Backend-specific tests early-return when it does not match, so the same +/// suite is safe to run against any backend. +fn test_backend() -> String { + std::env::var("BUZZ_TEST_BACKEND").unwrap_or_else(|_| "postgres".to_string()) +} + /// Create a real channel in the DB via REST so the relay accepts events for it. async fn create_test_channel(keys: &Keys) -> String { create_channel_with_visibility(keys, "open").await @@ -365,6 +374,75 @@ async fn test_nip50_search_returns_results_and_eose() { client.disconnect().await.expect("disconnect"); } +/// Gate #2 — `BUZZ_SEARCH_BACKEND=disabled` fails closed. +/// +/// Posts a message that the postgres/typesense backends provably return (it is +/// the exact same setup as `test_nip50_search_returns_results_and_eose`), then +/// asserts the relay delivers EOSE with **zero** events. A disabled backend must +/// never leak content: NIP-50 search returns empty for every query, regardless +/// of how well it would otherwise match. +/// +/// Only meaningful against a relay configured with the disabled backend, so it +/// early-returns (skips) unless `BUZZ_TEST_BACKEND=disabled`. The matrix runner +/// sets that env to match the relay's `BUZZ_SEARCH_BACKEND`. +#[tokio::test] +#[ignore] +async fn test_nip50_search_disabled_fails_closed() { + if test_backend() != "disabled" { + eprintln!( + "skipping test_nip50_search_disabled_fails_closed: BUZZ_TEST_BACKEND={} (need `disabled`)", + test_backend() + ); + return; + } + + let url = relay_url(); + let keys = Keys::generate(); + let channel = create_test_channel(&keys).await; + + // Post a message that WOULD match under a real backend — same shape as the + // positive "returns results" test, so a non-empty result here would prove a + // working index, and an empty result proves the disabled backend held closed. + let unique_token = format!("searchtoken_{}", uuid::Uuid::new_v4().simple()); + let content = format!("Hello world {unique_token}"); + + let mut client = BuzzTestClient::connect(&url, &keys).await.expect("connect"); + + let ok = client + .send_text_message(&keys, &channel, &content, 9) + .await + .expect("send message"); + assert!(ok.accepted, "relay rejected message: {}", ok.message); + + // Generous settle window — under a real backend this is when indexing lands. + tokio::time::sleep(Duration::from_millis(800)).await; + + let sid = sub_id("nip50-disabled"); + let filter = Filter::new() + .kind(Kind::Custom(9)) + .search(&unique_token) + .custom_tags(SingleLetterTag::lowercase(Alphabet::H), [channel.as_str()]); + + client + .subscribe(&sid, vec![filter]) + .await + .expect("subscribe"); + + let events = client + .collect_until_eose(&sid, Duration::from_secs(10)) + .await + .expect("collect until EOSE"); + + assert!( + events.is_empty(), + "disabled backend returned {} search hit(s) — search did NOT fail closed. events: {:?}", + events.len(), + events.iter().map(|e| &e.content).collect::>() + ); + + client.disconnect().await.expect("disconnect"); +} + /// Subscribe with mixed search + non-search filters. /// Verify: relay sends CLOSED with error message containing "mixed". #[tokio::test] From 2088922a062e58cf5872b94b53642fa701c25cc7 Mon Sep 17 00:00:00 2001 From: npub1qyvc0c5kl4gqv2fd97fsk46tu378sqgy35vc83rvgfwne90sel7s0ed67d <011987e296fd5006292d2f930b574be47c7801048d1983c46c425d3c95f0cffd@sprout-oss.stage.blox.sqprod.co> Date: Wed, 24 Jun 2026 19:26:42 -0400 Subject: [PATCH 09/10] docs(testing): document the search-backend test matrix Adds a "Search Backend Test Matrix" section to TESTING.md covering the BUZZ_SEARCH_BACKEND flag, the two gates (no visibility widening; disabled fails closed), the per-backend test table with skip rationale, and how to run the suite with BUZZ_TEST_BACKEND. Adds BUZZ_SEARCH_BACKEND to the config reference. Co-authored-by: Tyler Longwell Signed-off-by: Tyler Longwell --- TESTING.md | 52 +++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 51 insertions(+), 1 deletion(-) diff --git a/TESTING.md b/TESTING.md index 2e3a1bdf5..76e9236f2 100644 --- a/TESTING.md +++ b/TESTING.md @@ -18,6 +18,55 @@ cargo test -p buzz-test-client -- --ignored --- +## Search Backend Test Matrix + +NIP-50 search runs behind the `BUZZ_SEARCH_BACKEND` flag (`typesense` | +`postgres` | `disabled`, **default `postgres`**). The relay enforces two +non-negotiable gates that must hold *identically across all three backends*: + +- **Gate #1 — no visibility widening.** A search must never return an event + the caller couldn't otherwise read. The auth/`#p` gates in `handle_req` run + *before* the backend call, and `handle_search_req` re-applies `filters_match` + to every hit, so the post-filter is backend-independent by construction. +- **Gate #2 — `disabled` fails closed.** With `BUZZ_SEARCH_BACKEND=disabled`, + every NIP-50 query returns empty — no content leaks regardless of how well it + would otherwise match. + +The e2e search suite lives in +[`crates/buzz-test-client/tests/e2e_nostr_interop.rs`](crates/buzz-test-client/tests/e2e_nostr_interop.rs) +(all `#[ignore]`, require a running relay). The relay's backend is surfaced to +the test process via `BUZZ_TEST_BACKEND`; backend-specific tests early-return +(skip) when it doesn't match, so the same suite is safe against any backend. + +| Test | typesense | postgres | disabled | Proves | +|------|:---------:|:--------:|:--------:|--------| +| `test_nip50_search_returns_results_and_eose` | ✅ | ✅ | skip¹ | search finds a matching message; one-shot (no live events post-EOSE) | +| `test_nip50_search_relevance_order` | ✅ | ✅ | skip¹ | rank-based ordering (proximity beats recency) | +| `test_nip50_search_cross_author_isolation` | ✅ | ✅ | skip¹ | **gate #1**: outsider gets 0 hits from a *private* channel | +| `test_nip17_gift_wrap_not_searchable` | ✅ | ✅ | skip¹ | **gate #1**: kind:1059 never surfaces via search; kind:9 control does | +| `test_nip50_search_disabled_fails_closed` | skip² | skip² | ✅ | **gate #2**: a would-match query returns empty under `disabled` | +| `test_nip50_search_empty_results` | ✅ | ✅ | ✅ | a non-matching query yields EOSE with no events | +| `test_nip50_search_mixed_filters_rejected` | ✅ | ✅ | ✅ | mixed search + non-search filters → CLOSED | +| `test_nip17_gift_wrap_accepted` / `_requires_p_filter` / `_recipient_receives` | ✅ | ✅ | ✅ | NIP-17 accept/auth paths (backend-independent) | + +¹ Hit-dependent — asserts a non-empty result, so it is only run against a real +backend. ² Asserts empty — only meaningful, and only run, under `disabled`. + +To exercise the matrix, launch a relay per backend (set `BUZZ_SEARCH_BACKEND`) +and run the suite with `BUZZ_TEST_BACKEND` set to match. For a real backend: + +```bash +BUZZ_SEARCH_BACKEND=postgres buzz-relay & # or typesense / disabled +RELAY_URL=ws://localhost:3000 BUZZ_TEST_BACKEND=postgres \ + cargo test -p buzz-test-client --test e2e_nostr_interop -- --ignored +``` + +For `disabled`, run only the fail-closed + result-independent tests — the +hit-dependent ones skip themselves, so a full `--ignored` run is also safe but +exercises fewer assertions. + +--- + ## Live Local Relay The fastest way to exercise the relay end-to-end is to build the release @@ -267,7 +316,8 @@ out of the box with `just setup` or `just relay`. Common overrides: | `RELAY_URL` | `ws://localhost:3000` | Advertised in NIP-11 / NIP-42 challenges. **Note: no `BUZZ_` prefix.** | | `DATABASE_URL` | `postgres://buzz:buzz_dev@localhost:5432/buzz` | | | `REDIS_URL` | `redis://localhost:6379` | | -| `TYPESENSE_URL` | `http://localhost:8108` | | +| `TYPESENSE_URL` | `http://localhost:8108` | Only used when `BUZZ_SEARCH_BACKEND=typesense` | +| `BUZZ_SEARCH_BACKEND` | `postgres` | NIP-50 search backend: `typesense`, `postgres`, or `disabled` (fails closed) | | `BUZZ_REQUIRE_AUTH_TOKEN` | `false` | When true, REST requires NIP-98 (no `X-Pubkey` fallback) | | `BUZZ_REQUIRE_RELAY_MEMBERSHIP` | `false` | When true, only pubkeys in `relay_members` can connect | | `BUZZ_AUTO_MIGRATE` | `false` | Opt in with `true`/`1`/`yes`/`on` to run embedded SQLx migrations on relay startup | From f39237557a3e4f4cac5b53ef1986a6ba57b7b9a6 Mon Sep 17 00:00:00 2001 From: npub1qyvc0c5kl4gqv2fd97fsk46tu378sqgy35vc83rvgfwne90sel7s0ed67d <011987e296fd5006292d2f930b574be47c7801048d1983c46c425d3c95f0cffd@sprout-oss.stage.blox.sqprod.co> Date: Wed, 24 Jun 2026 19:38:25 -0400 Subject: [PATCH 10/10] fix(search): use clamp for per_page (clippy -D warnings) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CI's Rust Lint and Windows Rust jobs run `cargo clippy --all-targets -- -D warnings`; the manual `min(MAX_PER_PAGE).max(1)` clamp pattern in the Postgres backend tripped clippy::manual_clamp and failed both. Replace with `.clamp(1, MAX_PER_PAGE)` — identical result (1 <= 250). Co-authored-by: Tyler Longwell Signed-off-by: Tyler Longwell --- crates/buzz-search/src/postgres.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/buzz-search/src/postgres.rs b/crates/buzz-search/src/postgres.rs index e4e376407..d7a4edbb0 100644 --- a/crates/buzz-search/src/postgres.rs +++ b/crates/buzz-search/src/postgres.rs @@ -47,7 +47,7 @@ pub async fn search(pool: &PgPool, query: &SearchQuery) -> Result