From 8af2a8d31c1468a57deb5a7b1e0b50608d447d28 Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sat, 25 Apr 2026 21:58:24 +1000 Subject: [PATCH 001/172] Enable foreign_keys pragma on SQLite native pool --- src/db/mod.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/db/mod.rs b/src/db/mod.rs index dca5d31..b06072b 100644 --- a/src/db/mod.rs +++ b/src/db/mod.rs @@ -327,6 +327,7 @@ impl DbPool { sqlx::sqlite::SqliteConnectOptions::new() .filename(&cfg.path) .create_if_missing(cfg.create_if_missing) + .foreign_keys(true) .journal_mode(if cfg.wal_mode { sqlx::sqlite::SqliteJournalMode::Wal } else { @@ -336,6 +337,13 @@ impl DbPool { ) .await?; + let fk_check: i64 = sqlx::query_scalar("PRAGMA foreign_keys") + .fetch_one(&pool) + .await?; + if fk_check != 1 { + return Err(DbError::NotConfigured); + } + let repos = CachedRepos { organizations: Arc::new(sqlite::SqliteOrganizationRepo::new(pool.clone())), projects: Arc::new(sqlite::SqliteProjectRepo::new(pool.clone())), From f81bb8c543d153777cfff41011614c134f9f4ef4 Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sat, 25 Apr 2026 22:00:17 +1000 Subject: [PATCH 002/172] Wire Postgres pool config (timeouts and ssl mode) --- src/db/mod.rs | 36 ++++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/src/db/mod.rs b/src/db/mod.rs index b06072b..d7fd087 100644 --- a/src/db/mod.rs +++ b/src/db/mod.rs @@ -393,21 +393,33 @@ impl DbPool { } #[cfg(feature = "database-postgres")] DatabaseConfig::Postgres(cfg) => { - let write_pool = sqlx::postgres::PgPoolOptions::new() - .min_connections(cfg.min_connections) - .max_connections(cfg.max_connections) - .connect(&cfg.url) - .await?; + let ssl_mode = match cfg.ssl_mode { + crate::config::PostgresSslMode::Disable => sqlx::postgres::PgSslMode::Disable, + crate::config::PostgresSslMode::Prefer => sqlx::postgres::PgSslMode::Prefer, + crate::config::PostgresSslMode::Require => sqlx::postgres::PgSslMode::Require, + crate::config::PostgresSslMode::VerifyCa => sqlx::postgres::PgSslMode::VerifyCa, + crate::config::PostgresSslMode::VerifyFull => { + sqlx::postgres::PgSslMode::VerifyFull + } + }; + let connect_opts = |url: &str| -> Result { + let opts: sqlx::postgres::PgConnectOptions = url.parse().map_err(|e| { + DbError::Validation(format!("Invalid Postgres URL: {e}")) + })?; + Ok(opts.ssl_mode(ssl_mode)) + }; + let pool_opts = || { + sqlx::postgres::PgPoolOptions::new() + .min_connections(cfg.min_connections) + .max_connections(cfg.max_connections) + .acquire_timeout(std::time::Duration::from_secs(cfg.connect_timeout_secs)) + .idle_timeout(std::time::Duration::from_secs(cfg.idle_timeout_secs)) + }; + let write_pool = pool_opts().connect_with(connect_opts(&cfg.url)?).await?; let read_pool = if let Some(read_url) = &cfg.read_url { tracing::info!("Configuring read replica pool"); - Some( - sqlx::postgres::PgPoolOptions::new() - .min_connections(cfg.min_connections) - .max_connections(cfg.max_connections) - .connect(read_url) - .await?, - ) + Some(pool_opts().connect_with(connect_opts(read_url)?).await?) } else { None }; From 84ee9867ccce68fae0dc4bee13a81d2e62a5d170 Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sat, 25 Apr 2026 22:00:49 +1000 Subject: [PATCH 003/172] Attach ConnectInfo to axum service for client IP extraction --- src/cli/server.rs | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/cli/server.rs b/src/cli/server.rs index 8e570d2..33ed357 100644 --- a/src/cli/server.rs +++ b/src/cli/server.rs @@ -400,11 +400,17 @@ pub(crate) async fn run_server(explicit_config_path: Option<&str>, no_browser: b #[cfg(not(feature = "wizard"))] let _ = no_browser; - // Graceful shutdown: wait for SIGINT/SIGTERM, then wait for all background tasks - axum::serve(listener, app) - .with_graceful_shutdown(shutdown_signal(task_tracker, usage_buffer_handle)) - .await - .unwrap(); + // Graceful shutdown: wait for SIGINT/SIGTERM, then wait for all background tasks. + // `into_make_service_with_connect_info` is required so middleware can read the + // connecting peer address via `ConnectInfo` for IP-based rate limits, + // API-key IP allowlists, and audit logging. + axum::serve( + listener, + app.into_make_service_with_connect_info::(), + ) + .with_graceful_shutdown(shutdown_signal(task_tracker, usage_buffer_handle)) + .await + .unwrap(); } async fn shutdown_signal( From 9883bb306d2dd2e808f88cbaeb07f1a184aad5cf Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sat, 25 Apr 2026 22:04:31 +1000 Subject: [PATCH 004/172] Use safe prefix strip helper for Anthropic stream IDs --- src/providers/anthropic/convert.rs | 10 +++++-- src/providers/anthropic/stream.rs | 43 +++++++++++++++--------------- 2 files changed, 30 insertions(+), 23 deletions(-) diff --git a/src/providers/anthropic/convert.rs b/src/providers/anthropic/convert.rs index 3650f99..d2309fc 100644 --- a/src/providers/anthropic/convert.rs +++ b/src/providers/anthropic/convert.rs @@ -954,7 +954,10 @@ pub fn convert_anthropic_to_responses_response( type_: ResponsesReasoningType::Reasoning, id: format!( "rs_{}", - &anthropic.id[4..].chars().take(24).collect::() + crate::providers::anthropic::stream::strip_anthropic_prefix( + &anthropic.id, + "msg_" + ) ), content: None, // Anthropic doesn't provide structured reasoning content summary: vec![], // Would need to generate summary @@ -996,7 +999,10 @@ pub fn convert_anthropic_to_responses_response( ResponsesOutputItem::Message(OutputMessage { id: format!( "msg_{}", - &anthropic.id[4..].chars().take(24).collect::() + crate::providers::anthropic::stream::strip_anthropic_prefix( + &anthropic.id, + "msg_" + ) ), type_: MessageType::Message, role: "assistant".to_string(), diff --git a/src/providers/anthropic/stream.rs b/src/providers/anthropic/stream.rs index 533bd6a..2da21a5 100644 --- a/src/providers/anthropic/stream.rs +++ b/src/providers/anthropic/stream.rs @@ -14,6 +14,18 @@ use serde::{Deserialize, Serialize}; use crate::config::StreamingBufferConfig; +/// Strip a known Anthropic ID prefix (`msg_`, `toolu_`, …) and return up to 24 +/// chars of the remainder. Falls back to the whole id if the prefix isn't +/// present, which protects against panics on short ids or multibyte +/// boundaries inside the prefix. +pub(crate) fn strip_anthropic_prefix(id: &str, prefix: &str) -> String { + id.strip_prefix(prefix) + .unwrap_or(id) + .chars() + .take(24) + .collect() +} + // ============================================================================ // Anthropic Streaming Event Types // ============================================================================ @@ -820,10 +832,8 @@ impl AnthropicToResponsesStream { match event { AnthropicStreamEvent::MessageStart { message } => { self.state.response_id = message.id.clone(); - self.state.message_id = format!( - "msg_{}", - &message.id[4..].chars().take(24).collect::() - ); + self.state.message_id = + format!("msg_{}", strip_anthropic_prefix(&message.id, "msg_")); self.state.model = message.model; if let Some(usage) = message.usage { self.state.input_tokens = usage.input_tokens; @@ -904,7 +914,7 @@ impl AnthropicToResponsesStream { "output_index": output_index, "item": { "type": "function_call", - "id": format!("fc_{}", &id[6..].chars().take(24).collect::()), + "id": format!("fc_{}", strip_anthropic_prefix(&id, "toolu_")), "call_id": id, "name": name, "arguments": "", @@ -927,7 +937,7 @@ impl AnthropicToResponsesStream { "output_index": 0, "item": { "type": "reasoning", - "id": format!("rs_{}", &self.state.response_id[4..].chars().take(24).collect::()), + "id": format!("rs_{}", strip_anthropic_prefix(&self.state.response_id, "msg_")), "summary": [] } }), @@ -977,7 +987,7 @@ impl AnthropicToResponsesStream { // Emit function call arguments delta let fc_id = - format!("fc_{}", &tool_id[6..].chars().take(24).collect::()); + format!("fc_{}", strip_anthropic_prefix(&tool_id, "toolu_")); self.emit_event( "response.function_call_arguments.delta", serde_json::json!({ @@ -996,10 +1006,7 @@ impl AnthropicToResponsesStream { // Emit reasoning summary delta let reasoning_id = format!( "rs_{}", - &self.state.response_id[4..] - .chars() - .take(24) - .collect::() + strip_anthropic_prefix(&self.state.response_id, "msg_") ); self.emit_event( "response.reasoning_summary_text.delta", @@ -1036,10 +1043,7 @@ impl AnthropicToResponsesStream { if self.state.emitted_reasoning_added { let reasoning_id = format!( "rs_{}", - &self.state.response_id[4..] - .chars() - .take(24) - .collect::() + strip_anthropic_prefix(&self.state.response_id, "msg_") ); // Emit reasoning summary done @@ -1142,7 +1146,7 @@ impl AnthropicToResponsesStream { for (i, tool_id, tool_name, arguments) in tool_calls { let output_index = self.tool_output_index(i); let fc_id = - format!("fc_{}", &tool_id[6..].chars().take(24).collect::()); + format!("fc_{}", strip_anthropic_prefix(tool_id.as_str(), "toolu_")); self.emit_event( "response.function_call_arguments.done", @@ -1176,10 +1180,7 @@ impl AnthropicToResponsesStream { if self.state.emitted_reasoning_added { let reasoning_id = format!( "rs_{}", - &self.state.response_id[4..] - .chars() - .take(24) - .collect::() + strip_anthropic_prefix(&self.state.response_id, "msg_") ); let mut reasoning_item = serde_json::json!({ "type": "reasoning", @@ -1215,7 +1216,7 @@ impl AnthropicToResponsesStream { // Tool calls come last for (_, tool_id, tool_name, arguments) in &self.state.tool_calls { let fc_id = - format!("fc_{}", &tool_id[6..].chars().take(24).collect::()); + format!("fc_{}", strip_anthropic_prefix(tool_id.as_str(), "toolu_")); output.push(serde_json::json!({ "type": "function_call", "id": fc_id, From c58d4406de5ff6f880fe76408a9deb28e832b235 Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sat, 25 Apr 2026 22:04:52 +1000 Subject: [PATCH 005/172] Use /health/live for liveness and /health/ready for readiness --- Dockerfile | 2 +- helm/hadrian/values.yaml | 9 +++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index 46361a6..1bd24b5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -158,6 +158,6 @@ EXPOSE 8080 # Health check HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \ - CMD curl -f http://localhost:8080/health || exit 1 + CMD curl -f http://localhost:8080/health/live || exit 1 CMD ["/app/hadrian", "--config", "/app/config/hadrian.toml"] diff --git a/helm/hadrian/values.yaml b/helm/hadrian/values.yaml index 21c2671..40e614e 100644 --- a/helm/hadrian/values.yaml +++ b/helm/hadrian/values.yaml @@ -269,9 +269,12 @@ resources: memory: 256Mi # -- Liveness probe configuration +# `/health/live` is a cheap "process is up" check. The full `/health` aggregates +# downstream subsystems (DB, cache, providers) and would cause every pod to +# restart on any transient downstream blip — never use it for liveness. livenessProbe: httpGet: - path: /health + path: /health/live port: http initialDelaySeconds: 10 periodSeconds: 30 @@ -279,9 +282,11 @@ livenessProbe: failureThreshold: 3 # -- Readiness probe configuration +# `/health/ready` checks DB connectivity, which is the right gate for accepting +# traffic. readinessProbe: httpGet: - path: /health + path: /health/ready port: http initialDelaySeconds: 5 periodSeconds: 10 From 1b81caea5e7c79256056d4fe7ee7450a85708851 Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sat, 25 Apr 2026 22:05:40 +1000 Subject: [PATCH 006/172] Tighten OAuth callback loopback check and strip duplicate code param --- src/routes/admin/oauth.rs | 31 ++++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/src/routes/admin/oauth.rs b/src/routes/admin/oauth.rs index 58348bb..ab7f866 100644 --- a/src/routes/admin/oauth.rs +++ b/src/routes/admin/oauth.rs @@ -51,7 +51,17 @@ fn validate_callback_url(callback_url: &str, pkce: &OAuthPkceConfig) -> Result d.eq_ignore_ascii_case("localhost"), + Some(url::Host::Ipv4(ip)) => ip.is_loopback(), + Some(url::Host::Ipv6(ip)) => { + ip.is_loopback() || ip.to_ipv4_mapped().map(|v4| v4.is_loopback()).unwrap_or(false) + } + None => false, + }; if scheme != "https" && !(scheme == "http" && is_loopback) { return Err(AdminError::Validation( "callback_url must use https (http is allowed only for loopback hosts)".to_string(), @@ -68,11 +78,26 @@ fn validate_callback_url(callback_url: &str, pkce: &OAuthPkceConfig) -> Result Result { let mut redirect = url::Url::parse(callback_url) .map_err(|_| AdminError::Validation("callback_url must be a valid URL".to_string()))?; - redirect.query_pairs_mut().append_pair("code", code); + let preserved: Vec<(String, String)> = redirect + .query_pairs() + .filter(|(k, _)| k != "code") + .map(|(k, v)| (k.into_owned(), v.into_owned())) + .collect(); + { + let mut pairs = redirect.query_pairs_mut(); + pairs.clear(); + for (k, v) in &preserved { + pairs.append_pair(k, v); + } + pairs.append_pair("code", code); + } Ok(redirect.to_string()) } From 932f17f6af54438dd388531368c8f22bce64450d Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sat, 25 Apr 2026 22:06:01 +1000 Subject: [PATCH 007/172] Pin OpenAPI info.version to CARGO_PKG_VERSION --- src/openapi.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/openapi.rs b/src/openapi.rs index e4ea858..841e78f 100644 --- a/src/openapi.rs +++ b/src/openapi.rs @@ -14,7 +14,7 @@ use crate::{ #[openapi( info( title = "Hadrian Gateway API", - version = "0.1.0", + version = env!("CARGO_PKG_VERSION"), description = r#"**Hadrian Gateway** is an AI Gateway providing a unified OpenAI-compatible API for routing requests to multiple LLM providers. ## Overview From afc03c3e0f64799b49393484128d9537f10adb2a Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sat, 25 Apr 2026 22:06:26 +1000 Subject: [PATCH 008/172] Stop swallowing cargo audit failures in CI scripts --- scripts/ci-backend.sh | 7 +++++-- scripts/ci.sh | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/scripts/ci-backend.sh b/scripts/ci-backend.sh index 9441b7a..a608984 100755 --- a/scripts/ci-backend.sh +++ b/scripts/ci-backend.sh @@ -86,10 +86,13 @@ else FAILED=1 fi -# Security audit (non-blocking) +# Security audit step "Security audit" if command -v cargo-audit &> /dev/null; then - cargo audit || echo -e "${YELLOW}!${NC} Audit warnings (non-blocking)" + if ! cargo audit; then + echo -e "${RED}✗${NC} Security audit failed" + FAILED=1 + fi else echo " cargo-audit not installed, skipping" fi diff --git a/scripts/ci.sh b/scripts/ci.sh index 210b539..bb35150 100755 --- a/scripts/ci.sh +++ b/scripts/ci.sh @@ -127,7 +127,7 @@ if [ "$RUN_BACKEND" = true ]; then run_check "Tests (unit + integration)" cargo test -- --include-ignored - run_check "Security audit" cargo audit || true # Don't fail on audit warnings + run_check "Security audit" cargo audit fi # Frontend checks From 550e583d5a5473b9bbf6d310bb0ce9c8f23bfcb9 Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sat, 25 Apr 2026 22:12:09 +1000 Subject: [PATCH 009/172] Reject empty JWT and proxy audience values at config load --- src/auth/jwt.rs | 38 ++++++++++++++++++++++++++++---------- src/config/auth.rs | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+), 10 deletions(-) diff --git a/src/auth/jwt.rs b/src/auth/jwt.rs index 519b1bf..e2556c7 100644 --- a/src/auth/jwt.rs +++ b/src/auth/jwt.rs @@ -115,11 +115,7 @@ impl JwtValidator { /// Create a new JWT validator. #[allow(dead_code)] // Auth infrastructure pub fn new(config: JwtAuthConfig) -> Result { - if config.allowed_algorithms.is_empty() { - return Err(AuthError::Internal( - "JWT allowed_algorithms must not be empty".into(), - )); - } + Self::check_config(&config)?; Ok(Self { config, http_client: reqwest::Client::new(), @@ -132,11 +128,7 @@ impl JwtValidator { config: JwtAuthConfig, http_client: reqwest::Client, ) -> Result { - if config.allowed_algorithms.is_empty() { - return Err(AuthError::Internal( - "JWT allowed_algorithms must not be empty".into(), - )); - } + Self::check_config(&config)?; Ok(Self { config, http_client, @@ -144,6 +136,32 @@ impl JwtValidator { }) } + fn check_config(config: &JwtAuthConfig) -> Result<(), AuthError> { + if config.allowed_algorithms.is_empty() { + return Err(AuthError::Internal( + "JWT allowed_algorithms must not be empty".into(), + )); + } + // `jsonwebtoken::Validation::set_audience(&[""])` accepts a token whose + // `aud` claim equals the empty string, silently disabling the audience + // check. Reject empty entries here so the validator always enforces a + // real expected audience. + let entries = config.audience.to_vec(); + if entries.is_empty() { + return Err(AuthError::Internal( + "JWT audience must not be empty".into(), + )); + } + for entry in entries { + if entry.trim().is_empty() { + return Err(AuthError::Internal( + "JWT audience entries must not be empty".into(), + )); + } + } + Ok(()) + } + /// Validate a JWT and return the claims. pub async fn validate(&self, token: &str) -> Result { // Decode header to get the key ID and algorithm diff --git a/src/config/auth.rs b/src/config/auth.rs index 963284d..0a69cb1 100644 --- a/src/config/auth.rs +++ b/src/config/auth.rs @@ -586,10 +586,48 @@ impl IapConfig { "IAP identity header cannot be empty".into(), )); } + if let Some(jwt) = &self.jwt_assertion { + jwt.validate()?; + } + Ok(()) + } +} + +impl ProxyAuthJwtConfig { + fn validate(&self) -> Result<(), ConfigError> { + validate_jwt_audience("auth.iap.jwt_assertion", &self.audience)?; + if self.issuer.is_empty() { + return Err(ConfigError::Validation( + "auth.iap.jwt_assertion.issuer cannot be empty".into(), + )); + } Ok(()) } } +/// Reject empty audience values. `jsonwebtoken` accepts an empty string as a +/// valid audience match, so an empty entry would silently disable the audience +/// check. +fn validate_jwt_audience( + field: &str, + audience: &OneOrMany, +) -> Result<(), ConfigError> { + let entries = audience.to_vec(); + if entries.is_empty() { + return Err(ConfigError::Validation(format!( + "{field}.audience must not be empty" + ))); + } + for entry in &entries { + if entry.trim().is_empty() { + return Err(ConfigError::Validation(format!( + "{field}.audience entries must not be empty" + ))); + } + } + Ok(()) +} + /// API key authentication configuration. #[derive(Debug, Clone, Serialize, Deserialize)] #[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))] From 947fc5a791d3655cb8d0fc645bc0593bba7f9b40 Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sat, 25 Apr 2026 22:15:01 +1000 Subject: [PATCH 010/172] Disallow space character in model string validation --- src/routing/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/routing/mod.rs b/src/routing/mod.rs index baf7f22..77bbf6b 100644 --- a/src/routing/mod.rs +++ b/src/routing/mod.rs @@ -101,7 +101,7 @@ fn validate_model_string(model: &str) -> Result<(), RoutingError> { } if !model .chars() - .all(|c| c.is_alphanumeric() || "-._/:@ ".contains(c)) + .all(|c| c.is_alphanumeric() || "-._/:@".contains(c)) { return Err(RoutingError::InvalidModelFormat( "Model string contains invalid characters".to_string(), From cba1be4661e06a900bf3f246f5e5f16ee676ee13 Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sat, 25 Apr 2026 22:15:29 +1000 Subject: [PATCH 011/172] Return first routing error rather than last on fallback failure --- src/routing/mod.rs | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/src/routing/mod.rs b/src/routing/mod.rs index 77bbf6b..15684e0 100644 --- a/src/routing/mod.rs +++ b/src/routing/mod.rs @@ -345,28 +345,30 @@ pub fn route_models_extended<'a>( models: Option<&'a [String]>, providers: &'a ProvidersConfig, ) -> Result, RoutingError> { - let mut last_error = None; + // Surface the *first* error if every candidate fails. The primary model's + // failure is the most actionable for the caller — fallback errors are a + // secondary signal. + let mut first_error: Option = None; - // First, try the primary model if let Some(m) = model { match route_model_extended(Some(m), providers) { Ok(routed) => return Ok(routed), - Err(e) => last_error = Some(e), - } + Err(e) => first_error.get_or_insert(e), + }; } - // Then try fallback models if let Some(model_list) = models { for m in model_list { match route_model_extended(Some(m.as_str()), providers) { Ok(routed) => return Ok(routed), - Err(e) => last_error = Some(e), + Err(e) => { + first_error.get_or_insert(e); + } } } } - // Return the last error, or NoModel if no models were tried - Err(last_error.unwrap_or(RoutingError::NoModel)) + Err(first_error.unwrap_or(RoutingError::NoModel)) } #[cfg(test)] From e3b394a30c74c6f9a225dd5cabe76a7f143e7847 Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sat, 25 Apr 2026 22:17:26 +1000 Subject: [PATCH 012/172] Match HADRIAN_TEST_DEBUG on value not env presence --- src/tests/provider_e2e.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/tests/provider_e2e.rs b/src/tests/provider_e2e.rs index 11fc1c8..6fdd058 100644 --- a/src/tests/provider_e2e.rs +++ b/src/tests/provider_e2e.rs @@ -483,8 +483,16 @@ pub static OLLAMA_SPEC: ProviderTestSpec = ProviderTestSpec { // ============================================================================= /// Check if debug output is enabled via HADRIAN_TEST_DEBUG env var. +/// Only `1`/`true` (case-insensitive) count — `HADRIAN_TEST_DEBUG=0` should +/// not turn debug on. fn is_debug_enabled() -> bool { - std::env::var("HADRIAN_TEST_DEBUG").is_ok() + matches!( + std::env::var("HADRIAN_TEST_DEBUG") + .ok() + .as_deref() + .map(|v| v.trim().to_ascii_lowercase()), + Some(ref s) if s == "1" || s == "true" + ) } /// Save a debug response to the debug output directory. From e2e13b121c87583f8523e1263310f9ee80ced3bd Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sat, 25 Apr 2026 22:18:51 +1000 Subject: [PATCH 013/172] Validate SAML metadata URL against SSRF in parse endpoint --- src/routes/admin/org_sso_configs.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/routes/admin/org_sso_configs.rs b/src/routes/admin/org_sso_configs.rs index 3a0f3cc..3bd55ba 100644 --- a/src/routes/admin/org_sso_configs.rs +++ b/src/routes/admin/org_sso_configs.rs @@ -759,6 +759,12 @@ pub async fn parse_saml_metadata( crate::validation::require_https(&input.metadata_url) .map_err(|e| AdminError::Validation(format!("SAML metadata URL must use HTTPS: {e}")))?; + // Block private/loopback/cloud-metadata addresses with DNS rebinding + // protection — the same gate that `SamlAuthenticator::get_metadata` uses. + crate::validation::validate_base_url(&input.metadata_url, false).map_err(|e| { + AdminError::Validation(format!("SAML metadata URL is not permitted: {e}")) + })?; + // Fetch and parse the metadata let client = reqwest::Client::new(); tracing::debug!(url = %input.metadata_url, "Fetching SAML IdP metadata"); From 20a676f7bd0aa75b40f3ede89d9418c3b317385a Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sat, 25 Apr 2026 22:20:27 +1000 Subject: [PATCH 014/172] Validate image URL against SSRF before fetching --- src/providers/image.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/providers/image.rs b/src/providers/image.rs index 0480f3a..765d231 100644 --- a/src/providers/image.rs +++ b/src/providers/image.rs @@ -58,6 +58,8 @@ pub enum ImageError { TooLarge { size: usize, limit: usize }, #[error("Unsupported content type: {0}")] UnsupportedContentType(String), + #[error("Image URL is not permitted: {0}")] + BlockedUrl(String), #[error("Failed to fetch image: {0}")] FetchError(String), #[error("Image URL timeout after {0:?}")] @@ -177,6 +179,13 @@ pub async fn fetch_image_url( ))); } + // SSRF guard: reject loopback/private/cloud-metadata/RFC1918 addresses and + // resolve hostnames so DNS rebinding can't redirect us to a blocked range + // between this check and the actual HTTP request below. We deliberately do + // not enable `allow_loopback` — image URLs from chat content are untrusted. + crate::validation::validate_base_url(url, false) + .map_err(|e| ImageError::BlockedUrl(e.to_string()))?; + // Build request with timeout let response = client .get(url) From 6c07a760cb7aec699842921d36b1c1d65c0bb4b3 Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sat, 25 Apr 2026 22:21:22 +1000 Subject: [PATCH 015/172] Strip reserved underscore-prefixed roles from bearer and proxy auth --- src/middleware/layers/admin.rs | 44 +++++++++++++++++++++++----------- 1 file changed, 30 insertions(+), 14 deletions(-) diff --git a/src/middleware/layers/admin.rs b/src/middleware/layers/admin.rs index b3f7463..e97315b 100644 --- a/src/middleware/layers/admin.rs +++ b/src/middleware/layers/admin.rs @@ -181,6 +181,16 @@ pub const BOOTSTRAP_ROLE: &str = "_system_bootstrap"; /// Roles starting with `_` are reserved for internal use and cannot be assigned by IdPs. pub const EMERGENCY_ADMIN_ROLE: &str = "_emergency_admin"; +/// Drop any role with the reserved `_` prefix from a list. IdPs and proxy +/// headers must never be able to claim these roles, since the gateway grants +/// extra trust to them (bootstrap / emergency break-glass). +pub(crate) fn strip_reserved_roles(roles: Vec) -> Vec { + roles + .into_iter() + .filter(|r| !r.starts_with('_')) + .collect() +} + /// Try to authenticate via bootstrap API key. /// /// Bootstrap authentication is only valid when: @@ -860,8 +870,9 @@ async fn try_bearer_token_auth( (None, Vec::new(), Vec::new(), Vec::new()) }; - // Extract roles from token - let roles = claims.roles.clone().unwrap_or_default(); + // Extract roles from token, stripping any `_`-prefixed reserved roles + // (bootstrap/emergency) — IdPs must never be able to claim these. + let roles = strip_reserved_roles(claims.roles.clone().unwrap_or_default()); tracing::debug!( sub = %claims.sub, @@ -1144,18 +1155,23 @@ async fn try_proxy_auth_auth( None }; - // Extract roles from groups header if configured - let roles = config - .groups_header - .as_ref() - .and_then(|h| headers.get(h)) - .and_then(|v| v.to_str().ok()) - .map(|v| { - // Try JSON array first, then comma-separated - serde_json::from_str::>(v) - .unwrap_or_else(|_| v.split(',').map(|s| s.trim().to_string()).collect()) - }) - .unwrap_or_default(); + // Extract roles from groups header if configured. Strip any `_`-prefixed + // reserved roles — proxy headers can be spoofed if `trusted_proxies` is + // misconfigured, so even with that gate we never want to honour a claim + // for `_emergency_admin`/`_system_bootstrap`. + let roles = strip_reserved_roles( + config + .groups_header + .as_ref() + .and_then(|h| headers.get(h)) + .and_then(|v| v.to_str().ok()) + .map(|v| { + // Try JSON array first, then comma-separated + serde_json::from_str::>(v) + .unwrap_or_else(|_| v.split(',').map(|s| s.trim().to_string()).collect()) + }) + .unwrap_or_default(), + ); // For proxy auth, the groups header contains both roles and raw groups // Store them in both fields for backwards compatibility and debugging From 9c34a1b693631ae9a7f58eba70b106a5c5706150 Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sat, 25 Apr 2026 22:21:56 +1000 Subject: [PATCH 016/172] Hide cross-user session existence in delete endpoint --- src/routes/admin/me_sessions.rs | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/routes/admin/me_sessions.rs b/src/routes/admin/me_sessions.rs index ea88b46..1968244 100644 --- a/src/routes/admin/me_sessions.rs +++ b/src/routes/admin/me_sessions.rs @@ -127,15 +127,22 @@ pub async fn delete_one( let session_store = get_session_store(&state)?; - // Verify session belongs to the current user + // Verify session belongs to the current user. Both "session does not exist" + // and "session belongs to a different user" return 200 with + // `sessions_revoked: 0` so an attacker can't probe arbitrary session IDs to + // confirm they exist. The mismatch is logged at warn for forensics. let session_existed = match session_store.get_session(session_id).await { Ok(Some(session)) => { if session.external_id != *external_id { - return Err(AdminError::BadRequest( - "Session does not belong to current user".to_string(), - )); + tracing::warn!( + session_id = %session_id, + actor_external_id = %external_id, + "Attempt to revoke a session that belongs to a different user" + ); + false + } else { + true } - true } Ok(None) => false, Err(e) => { From b63092fe9d0706f461946ad49799999e1c21e905 Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sat, 25 Apr 2026 22:23:24 +1000 Subject: [PATCH 017/172] Send Vertex API key via header instead of URL query --- src/providers/vertex/mod.rs | 34 ++++++++++++++-------------------- 1 file changed, 14 insertions(+), 20 deletions(-) diff --git a/src/providers/vertex/mod.rs b/src/providers/vertex/mod.rs index 1f70653..5c06dba 100644 --- a/src/providers/vertex/mod.rs +++ b/src/providers/vertex/mod.rs @@ -168,27 +168,15 @@ impl VertexProvider { } } - /// Build the full URL for a model endpoint. + /// Build the full URL for a model endpoint. The API key (when present) is + /// passed as the `x-goog-api-key` header in [`build_request`], not in the + /// query string — query parameters end up in HTTP access logs and tracing + /// span attributes. fn model_url(&self, model: &str, endpoint: &str, stream: bool) -> String { let base = self.base_url(); let mut url = format!("{}/{}:{}", base, model, endpoint); - - match &self.auth_mode { - AuthMode::ApiKey(api_key) => { - // Add API key as query parameter - if stream { - url.push_str("?alt=sse&key="); - } else { - url.push_str("?key="); - } - url.push_str(api_key); - } - AuthMode::OAuth { .. } => { - // OAuth uses header auth, just add SSE param if streaming - if stream { - url.push_str("?alt=sse"); - } - } + if stream { + url.push_str("?alt=sse"); } url } @@ -316,8 +304,14 @@ impl VertexProvider { .header("Content-Type", "application/json") .timeout(self.timeout); - if let Some(token) = token { - req = req.header("Authorization", format!("Bearer {}", token)); + match (&self.auth_mode, token) { + (AuthMode::ApiKey(api_key), _) => { + req = req.header("x-goog-api-key", api_key.as_str()); + } + (AuthMode::OAuth { .. }, Some(token)) => { + req = req.header("Authorization", format!("Bearer {}", token)); + } + (AuthMode::OAuth { .. }, None) => {} } req From 47a3d0acf39d288699fecfe7c5852232a175f8c3 Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sat, 25 Apr 2026 22:26:36 +1000 Subject: [PATCH 018/172] Use VecDeque for SSE stream output buffers to avoid O(n) shifts --- src/providers/anthropic/stream.rs | 46 +++++++++++++++++++++---------- src/providers/bedrock/stream.rs | 46 +++++++++++++++++++++---------- src/providers/vertex/mod.rs | 3 +- src/providers/vertex/stream.rs | 46 +++++++++++++++++++++---------- 4 files changed, 98 insertions(+), 43 deletions(-) diff --git a/src/providers/anthropic/stream.rs b/src/providers/anthropic/stream.rs index 2da21a5..3d2e512 100644 --- a/src/providers/anthropic/stream.rs +++ b/src/providers/anthropic/stream.rs @@ -246,7 +246,7 @@ pub struct AnthropicToOpenAIStream { inner: S, state: StreamState, /// Output buffer for generated SSE chunks - output_buffer: Vec, + output_buffer: std::collections::VecDeque, /// Maximum input buffer size in bytes max_input_buffer_bytes: usize, /// Maximum output buffer chunks @@ -258,7 +258,7 @@ impl AnthropicToOpenAIStream { Self { inner, state: StreamState::default(), - output_buffer: Vec::new(), + output_buffer: std::collections::VecDeque::new(), max_input_buffer_bytes: streaming_buffer.max_input_buffer_bytes, max_output_buffer_chunks: streaming_buffer.max_output_buffer_chunks, } @@ -541,7 +541,7 @@ impl AnthropicToOpenAIStream { self.emit_chunk(&chunk); // Emit [DONE] - self.output_buffer.push(Bytes::from("data: [DONE]\n\n")); + self.output_buffer.push_back(Bytes::from("data: [DONE]\n\n")); } AnthropicStreamEvent::Ping => { @@ -562,7 +562,7 @@ impl AnthropicToOpenAIStream { fn emit_chunk(&mut self, chunk: &OpenAIStreamChunk) { if let Ok(json) = serde_json::to_string(chunk) { let sse = format!("data: {}\n\n", json); - self.output_buffer.push(Bytes::from(sse)); + self.output_buffer.push_back(Bytes::from(sse)); } } @@ -632,7 +632,10 @@ where // First, return any buffered output if !self.output_buffer.is_empty() { - return Poll::Ready(Some(Ok(self.output_buffer.remove(0)))); + return Poll::Ready(Some(Ok(self + .output_buffer + .pop_front() + .expect("non-empty checked above")))); } // Poll the inner stream @@ -652,7 +655,10 @@ where // Return first buffered output if any if !self.output_buffer.is_empty() { - Poll::Ready(Some(Ok(self.output_buffer.remove(0)))) + Poll::Ready(Some(Ok(self + .output_buffer + .pop_front() + .expect("non-empty checked above")))) } else { // No output yet, need to poll again cx.waker().wake_by_ref(); @@ -663,7 +669,10 @@ where Poll::Ready(None) => { // Stream ended - flush any remaining buffer if !self.output_buffer.is_empty() { - Poll::Ready(Some(Ok(self.output_buffer.remove(0)))) + Poll::Ready(Some(Ok(self + .output_buffer + .pop_front() + .expect("non-empty checked above")))) } else { Poll::Ready(None) } @@ -724,7 +733,7 @@ pub struct AnthropicToResponsesStream { inner: S, state: ResponsesStreamState, /// Output buffer for generated SSE chunks - output_buffer: Vec, + output_buffer: std::collections::VecDeque, /// Maximum input buffer size in bytes max_input_buffer_bytes: usize, /// Maximum output buffer chunks @@ -743,7 +752,7 @@ impl AnthropicToResponsesStream { echo_fields, ..ResponsesStreamState::default() }, - output_buffer: Vec::new(), + output_buffer: std::collections::VecDeque::new(), max_input_buffer_bytes: streaming_buffer.max_input_buffer_bytes, max_output_buffer_chunks: streaming_buffer.max_output_buffer_chunks, } @@ -1256,7 +1265,7 @@ impl AnthropicToResponsesStream { ); // Emit [DONE] to signal end of stream (OpenAI Responses API convention) - self.output_buffer.push(Bytes::from("data: [DONE]\n\n")); + self.output_buffer.push_back(Bytes::from("data: [DONE]\n\n")); } AnthropicStreamEvent::Ping => { @@ -1302,7 +1311,7 @@ impl AnthropicToResponsesStream { } if let Ok(json) = serde_json::to_string(&serde_json::Value::Object(event_obj)) { let sse = format!("data: {}\n\n", json); - self.output_buffer.push(Bytes::from(sse)); + self.output_buffer.push_back(Bytes::from(sse)); } } @@ -1369,7 +1378,10 @@ where // First, return any buffered output if !self.output_buffer.is_empty() { - return Poll::Ready(Some(Ok(self.output_buffer.remove(0)))); + return Poll::Ready(Some(Ok(self + .output_buffer + .pop_front() + .expect("non-empty checked above")))); } // Poll the inner stream @@ -1389,7 +1401,10 @@ where // Return buffered output or wake for more if !self.output_buffer.is_empty() { - Poll::Ready(Some(Ok(self.output_buffer.remove(0)))) + Poll::Ready(Some(Ok(self + .output_buffer + .pop_front() + .expect("non-empty checked above")))) } else { cx.waker().wake_by_ref(); Poll::Pending @@ -1399,7 +1414,10 @@ where Poll::Ready(None) => { // Stream ended - flush any remaining buffer if !self.output_buffer.is_empty() { - Poll::Ready(Some(Ok(self.output_buffer.remove(0)))) + Poll::Ready(Some(Ok(self + .output_buffer + .pop_front() + .expect("non-empty checked above")))) } else { Poll::Ready(None) } diff --git a/src/providers/bedrock/stream.rs b/src/providers/bedrock/stream.rs index 989ffc4..575fe9c 100644 --- a/src/providers/bedrock/stream.rs +++ b/src/providers/bedrock/stream.rs @@ -48,7 +48,7 @@ pub(super) struct BedrockToOpenAIStream { pub inner: S, pub state: StreamState, /// Output buffer for generated SSE chunks - pub output_buffer: Vec, + pub output_buffer: std::collections::VecDeque, /// Maximum input buffer size in bytes pub max_input_buffer_bytes: usize, /// Maximum output buffer chunks @@ -66,7 +66,7 @@ impl BedrockToOpenAIStream { buffer: bytes::BytesMut::new(), ..StreamState::default() }, - output_buffer: Vec::new(), + output_buffer: std::collections::VecDeque::new(), max_input_buffer_bytes: streaming_buffer.max_input_buffer_bytes, max_output_buffer_chunks: streaming_buffer.max_output_buffer_chunks, } @@ -362,7 +362,7 @@ impl BedrockToOpenAIStream { self.emit_chunk(&usage_chunk); // Emit [DONE] - self.output_buffer.push(Bytes::from("data: [DONE]\n\n")); + self.output_buffer.push_back(Bytes::from("data: [DONE]\n\n")); } } _ => { @@ -374,7 +374,7 @@ impl BedrockToOpenAIStream { pub fn emit_chunk(&mut self, chunk: &OpenAIStreamChunk) { if let Ok(json) = serde_json::to_string(chunk) { let sse = format!("data: {}\n\n", json); - self.output_buffer.push(Bytes::from(sse)); + self.output_buffer.push_back(Bytes::from(sse)); } } @@ -446,7 +446,10 @@ where // First, return any buffered output if !self.output_buffer.is_empty() { - return Poll::Ready(Some(Ok(self.output_buffer.remove(0)))); + return Poll::Ready(Some(Ok(self + .output_buffer + .pop_front() + .expect("non-empty checked above")))); } // Poll the inner stream @@ -466,7 +469,10 @@ where // Return first buffered output if any if !self.output_buffer.is_empty() { - Poll::Ready(Some(Ok(self.output_buffer.remove(0)))) + Poll::Ready(Some(Ok(self + .output_buffer + .pop_front() + .expect("non-empty checked above")))) } else { // No output yet, need to poll again cx.waker().wake_by_ref(); @@ -477,7 +483,10 @@ where Poll::Ready(None) => { // Stream ended, return any remaining buffered output if !self.output_buffer.is_empty() { - Poll::Ready(Some(Ok(self.output_buffer.remove(0)))) + Poll::Ready(Some(Ok(self + .output_buffer + .pop_front() + .expect("non-empty checked above")))) } else { Poll::Ready(None) } @@ -540,7 +549,7 @@ pub struct BedrockToResponsesStream { pub inner: S, pub state: ResponsesStreamState, /// Output buffer for generated SSE chunks - pub output_buffer: Vec, + pub output_buffer: std::collections::VecDeque, /// Maximum input buffer size in bytes pub max_input_buffer_bytes: usize, /// Maximum output buffer chunks @@ -574,7 +583,7 @@ impl BedrockToResponsesStream { echo_fields, ..ResponsesStreamState::default() }, - output_buffer: Vec::new(), + output_buffer: std::collections::VecDeque::new(), max_input_buffer_bytes: streaming_buffer.max_input_buffer_bytes, max_output_buffer_chunks: streaming_buffer.max_output_buffer_chunks, } @@ -1099,7 +1108,7 @@ impl BedrockToResponsesStream { ); // Emit [DONE] to signal end of stream - self.output_buffer.push(Bytes::from("data: [DONE]\n\n")); + self.output_buffer.push_back(Bytes::from("data: [DONE]\n\n")); } } _ => { @@ -1129,7 +1138,7 @@ impl BedrockToResponsesStream { } if let Ok(json) = serde_json::to_string(&serde_json::Value::Object(event_obj)) { let sse = format!("data: {}\n\n", json); - self.output_buffer.push(Bytes::from(sse)); + self.output_buffer.push_back(Bytes::from(sse)); } } @@ -1200,7 +1209,10 @@ where // First, return any buffered output if !self.output_buffer.is_empty() { - return Poll::Ready(Some(Ok(self.output_buffer.remove(0)))); + return Poll::Ready(Some(Ok(self + .output_buffer + .pop_front() + .expect("non-empty checked above")))); } // Poll the inner stream @@ -1220,7 +1232,10 @@ where // Return first buffered output if any if !self.output_buffer.is_empty() { - Poll::Ready(Some(Ok(self.output_buffer.remove(0)))) + Poll::Ready(Some(Ok(self + .output_buffer + .pop_front() + .expect("non-empty checked above")))) } else { // No output yet, need to poll again cx.waker().wake_by_ref(); @@ -1231,7 +1246,10 @@ where Poll::Ready(None) => { // Stream ended, return any remaining buffered output if !self.output_buffer.is_empty() { - Poll::Ready(Some(Ok(self.output_buffer.remove(0)))) + Poll::Ready(Some(Ok(self + .output_buffer + .pop_front() + .expect("non-empty checked above")))) } else { Poll::Ready(None) } diff --git a/src/providers/vertex/mod.rs b/src/providers/vertex/mod.rs index 5c06dba..1283647 100644 --- a/src/providers/vertex/mod.rs +++ b/src/providers/vertex/mod.rs @@ -874,7 +874,8 @@ mod streaming_tests { transformer.handle_response(response); // Should emit [DONE] at the end - let last_chunk = std::str::from_utf8(transformer.output_buffer.last().unwrap()).unwrap(); + let last_chunk = + std::str::from_utf8(transformer.output_buffer.back().unwrap()).unwrap(); assert_eq!(last_chunk, "data: [DONE]\n\n"); // Should have usage in second-to-last chunk diff --git a/src/providers/vertex/stream.rs b/src/providers/vertex/stream.rs index 4acd7ee..cf735e3 100644 --- a/src/providers/vertex/stream.rs +++ b/src/providers/vertex/stream.rs @@ -121,7 +121,7 @@ pub struct VertexToOpenAIStream { pub inner: S, pub state: StreamState, /// Output buffer for generated SSE chunks - pub output_buffer: Vec, + pub output_buffer: std::collections::VecDeque, /// Maximum input buffer size in bytes pub max_input_buffer_bytes: usize, /// Maximum output buffer chunks @@ -136,7 +136,7 @@ impl VertexToOpenAIStream { model, ..StreamState::default() }, - output_buffer: Vec::new(), + output_buffer: std::collections::VecDeque::new(), max_input_buffer_bytes: streaming_buffer.max_input_buffer_bytes, max_output_buffer_chunks: streaming_buffer.max_output_buffer_chunks, } @@ -353,7 +353,7 @@ impl VertexToOpenAIStream { self.emit_chunk(&usage_chunk); // Emit [DONE] - self.output_buffer.push(Bytes::from("data: [DONE]\n\n")); + self.output_buffer.push_back(Bytes::from("data: [DONE]\n\n")); } } } @@ -361,7 +361,7 @@ impl VertexToOpenAIStream { fn emit_chunk(&mut self, chunk: &OpenAIStreamChunk) { if let Ok(json) = serde_json::to_string(chunk) { let sse = format!("data: {}\n\n", json); - self.output_buffer.push(Bytes::from(sse)); + self.output_buffer.push_back(Bytes::from(sse)); } } @@ -431,7 +431,10 @@ where // First, return any buffered output if !self.output_buffer.is_empty() { - return Poll::Ready(Some(Ok(self.output_buffer.remove(0)))); + return Poll::Ready(Some(Ok(self + .output_buffer + .pop_front() + .expect("non-empty checked above")))); } // Poll the inner stream @@ -451,7 +454,10 @@ where // Return first buffered output if any if !self.output_buffer.is_empty() { - Poll::Ready(Some(Ok(self.output_buffer.remove(0)))) + Poll::Ready(Some(Ok(self + .output_buffer + .pop_front() + .expect("non-empty checked above")))) } else { // No output yet, need to poll again cx.waker().wake_by_ref(); @@ -462,7 +468,10 @@ where Poll::Ready(None) => { // Stream ended - flush any remaining buffer if !self.output_buffer.is_empty() { - Poll::Ready(Some(Ok(self.output_buffer.remove(0)))) + Poll::Ready(Some(Ok(self + .output_buffer + .pop_front() + .expect("non-empty checked above")))) } else { Poll::Ready(None) } @@ -517,7 +526,7 @@ pub struct VertexToResponsesStream { inner: S, state: ResponsesStreamState, /// Output buffer for generated SSE chunks - output_buffer: Vec, + output_buffer: std::collections::VecDeque, /// Maximum input buffer size in bytes max_input_buffer_bytes: usize, /// Maximum output buffer chunks @@ -541,7 +550,7 @@ impl VertexToResponsesStream { echo_fields, ..ResponsesStreamState::default() }, - output_buffer: Vec::new(), + output_buffer: std::collections::VecDeque::new(), max_input_buffer_bytes: streaming_buffer.max_input_buffer_bytes, max_output_buffer_chunks: streaming_buffer.max_output_buffer_chunks, } @@ -583,7 +592,7 @@ impl VertexToResponsesStream { // Pass through [DONE] marker if json_str == "[DONE]" { - self.output_buffer.push(Bytes::from("data: [DONE]\n\n")); + self.output_buffer.push_back(Bytes::from("data: [DONE]\n\n")); return; } @@ -1021,7 +1030,7 @@ impl VertexToResponsesStream { } if let Ok(json) = serde_json::to_string(&serde_json::Value::Object(event_obj)) { let sse = format!("data: {}\n\n", json); - self.output_buffer.push(Bytes::from(sse)); + self.output_buffer.push_back(Bytes::from(sse)); } } @@ -1088,7 +1097,10 @@ where // First, return any buffered output if !self.output_buffer.is_empty() { - return Poll::Ready(Some(Ok(self.output_buffer.remove(0)))); + return Poll::Ready(Some(Ok(self + .output_buffer + .pop_front() + .expect("non-empty checked above")))); } // Poll the inner stream @@ -1108,7 +1120,10 @@ where // Return buffered output or wake for more if !self.output_buffer.is_empty() { - Poll::Ready(Some(Ok(self.output_buffer.remove(0)))) + Poll::Ready(Some(Ok(self + .output_buffer + .pop_front() + .expect("non-empty checked above")))) } else { cx.waker().wake_by_ref(); Poll::Pending @@ -1118,7 +1133,10 @@ where Poll::Ready(None) => { // Stream ended - flush any remaining buffer if !self.output_buffer.is_empty() { - Poll::Ready(Some(Ok(self.output_buffer.remove(0)))) + Poll::Ready(Some(Ok(self + .output_buffer + .pop_front() + .expect("non-empty checked above")))) } else { Poll::Ready(None) } From 28c8664c1b73a36ac0373ea4c15d53dc9f73d0ee Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sat, 25 Apr 2026 22:27:18 +1000 Subject: [PATCH 019/172] Use parking_lot RwLock in CircuitBreakerRegistry to drop poison panic --- src/providers/registry.rs | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/src/providers/registry.rs b/src/providers/registry.rs index ee14016..0e1ae4c 100644 --- a/src/providers/registry.rs +++ b/src/providers/registry.rs @@ -4,11 +4,9 @@ //! and protect against unhealthy providers. This module provides a //! registry that stores circuit breakers keyed by provider name. -use std::{ - collections::HashMap, - sync::{Arc, RwLock}, -}; +use std::{collections::HashMap, sync::Arc}; +use parking_lot::RwLock; use serde::Serialize; use super::circuit_breaker::{CircuitBreaker, CircuitState}; @@ -74,7 +72,7 @@ impl CircuitBreakerRegistry { /// Register a circuit breaker for a provider. pub fn register(&self, provider_name: &str, breaker: CircuitBreaker) { - let mut breakers = self.breakers.write().expect("RwLock poisoned"); + let mut breakers = self.breakers.write(); breakers.insert(provider_name.to_string(), Arc::new(breaker)); } @@ -93,14 +91,14 @@ impl CircuitBreakerRegistry { // Try read lock first { - let breakers = self.breakers.read().expect("RwLock poisoned"); + let breakers = self.breakers.read(); if let Some(breaker) = breakers.get(provider_name) { return Some(breaker.clone()); } } // Need to create - upgrade to write lock - let mut breakers = self.breakers.write().expect("RwLock poisoned"); + let mut breakers = self.breakers.write(); // Double-check after acquiring write lock if let Some(breaker) = breakers.get(provider_name) { return Some(breaker.clone()); @@ -121,13 +119,13 @@ impl CircuitBreakerRegistry { /// Get a circuit breaker by name if it exists. pub fn get(&self, provider_name: &str) -> Option> { - let breakers = self.breakers.read().expect("RwLock poisoned"); + let breakers = self.breakers.read(); breakers.get(provider_name).cloned() } /// Get the status of all circuit breakers. pub fn status(&self) -> Vec { - let breakers = self.breakers.read().expect("RwLock poisoned"); + let breakers = self.breakers.read(); breakers .iter() .map( @@ -142,7 +140,7 @@ impl CircuitBreakerRegistry { /// Get the status of a specific circuit breaker. pub fn status_for(&self, provider_name: &str) -> Option { - let breakers = self.breakers.read().expect("RwLock poisoned"); + let breakers = self.breakers.read(); breakers .get(provider_name) .map(|breaker: &Arc| CircuitBreakerStatus { From ab947cc4be13f480ad40d07d2fba77e73cf112ab Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sat, 25 Apr 2026 22:27:58 +1000 Subject: [PATCH 020/172] Add noopener to OpenRouter OAuth iframe escape window.open --- ui/src/components/WasmSetup/openrouter-oauth.ts | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/ui/src/components/WasmSetup/openrouter-oauth.ts b/ui/src/components/WasmSetup/openrouter-oauth.ts index e235870..9566252 100644 --- a/ui/src/components/WasmSetup/openrouter-oauth.ts +++ b/ui/src/components/WasmSetup/openrouter-oauth.ts @@ -43,7 +43,11 @@ export function isInIframe(): boolean { */ export async function startOpenRouterOAuth() { if (isInIframe()) { - window.open(window.location.origin + window.location.pathname, "_blank"); + window.open( + window.location.origin + window.location.pathname, + "_blank", + "noopener,noreferrer", + ); return; } From 9f6305ab4a5d126e25d5ed51ee047e9bfecf67fc Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sat, 25 Apr 2026 22:28:23 +1000 Subject: [PATCH 021/172] Reject protocol-relative return_to values on login redirect --- ui/src/pages/LoginPage.tsx | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/ui/src/pages/LoginPage.tsx b/ui/src/pages/LoginPage.tsx index 863764d..ee317cf 100644 --- a/ui/src/pages/LoginPage.tsx +++ b/ui/src/pages/LoginPage.tsx @@ -63,11 +63,20 @@ export default function LoginPage() { // a full URL (path + search, e.g. /oauth/authorize?callback_url=...) survive // the round-trip through login. Falls back to the in-app `state.from` set by // RequireAuth. + // + // `startsWith("/")` alone is not enough: `//evil.com/...` and `/\evil.com` + // are treated as same-origin by `Navigate`/`startsWith` but resolve to a + // cross-origin URL in the browser. Reject anything whose second character + // makes it protocol-relative or backslash-prefixed. + const isSafeReturnTo = (value: string | null): value is string => + !!value && + value.startsWith("/") && + !value.startsWith("//") && + !value.startsWith("/\\"); const returnToParam = new URLSearchParams(location.search).get("return_to"); - const from = - returnToParam && returnToParam.startsWith("/") - ? returnToParam - : location.state?.from?.pathname || "/"; + const from = isSafeReturnTo(returnToParam) + ? returnToParam + : location.state?.from?.pathname || "/"; if (configLoading || authLoading) { return ( From 3ba6e8dcb7ce47a8af471b033cc20aac434c3899 Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sat, 25 Apr 2026 22:28:40 +1000 Subject: [PATCH 022/172] Redact auth token from AccountPage data export --- ui/src/pages/AccountPage.tsx | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/ui/src/pages/AccountPage.tsx b/ui/src/pages/AccountPage.tsx index 7e0141b..c823868 100644 --- a/ui/src/pages/AccountPage.tsx +++ b/ui/src/pages/AccountPage.tsx @@ -22,17 +22,32 @@ import { exportAllIndexedDBData, deleteIndexedDBDatabase } from "@/hooks/useInde // localStorage keys used by the app const LOCAL_STORAGE_KEYS = ["hadrian-auth", "hadrian-mcp-servers", "hadrian-preferences"] as const; -/** Export all localStorage data for Hadrian keys */ +/** Sanitize a stored auth blob so the export doesn't ship the bearer token. */ +function sanitizeForExport(key: string, value: unknown): unknown { + if (!value || typeof value !== "object" || Array.isArray(value)) { + return value; + } + if (key === "hadrian-auth") { + const { token: _token, ...rest } = value as Record; + return { ...rest, token: "[redacted]" }; + } + return value; +} + +/** Export all localStorage data for Hadrian keys. + * Auth tokens are redacted: a user emailing this export "for support" + * shouldn't be shipping their gateway credential. */ function exportLocalStorageData(): Record { const result: Record = {}; for (const key of LOCAL_STORAGE_KEYS) { try { const value = localStorage.getItem(key); if (value) { - result[key] = JSON.parse(value); + result[key] = sanitizeForExport(key, JSON.parse(value)); } } catch { - // If parsing fails, store as raw string + // If parsing fails, store as raw string (auth blob always parses, so + // raw strings reaching here aren't credentials we know about) const value = localStorage.getItem(key); if (value) { result[key] = value; From c3f47e062bf9686a32c3ea15121d8625894a797a Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sat, 25 Apr 2026 22:29:13 +1000 Subject: [PATCH 023/172] Clamp ListQuery limit to a hard maximum of 1000 --- src/routes/admin/organizations.rs | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/routes/admin/organizations.rs b/src/routes/admin/organizations.rs index 7ea7fed..a2229bc 100644 --- a/src/routes/admin/organizations.rs +++ b/src/routes/admin/organizations.rs @@ -37,11 +37,19 @@ pub struct ListQuery { pub include_deleted: Option, } +/// Hard upper bound on `limit` for any admin list endpoint. A client passing +/// `limit=999999999` would otherwise scan an entire table and DoS the gateway. +pub const MAX_LIST_LIMIT: i64 = 1000; + +fn clamp_limit(limit: Option) -> Option { + limit.map(|n| n.clamp(1, MAX_LIST_LIMIT)) +} + /// Simple conversion that requires using try_into_with_cursor() for cursor validation. impl From for ListParams { fn from(q: ListQuery) -> Self { ListParams { - limit: q.limit, + limit: clamp_limit(q.limit), cursor: None, direction: CursorDirection::Forward, sort_order: Default::default(), @@ -73,7 +81,7 @@ impl ListQuery { }; Ok(ListParams { - limit: self.limit, + limit: clamp_limit(self.limit), cursor, direction, sort_order: Default::default(), From fbf0c476e1b84afd57ea4779208db7fd33f3b3ad Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sat, 25 Apr 2026 22:34:25 +1000 Subject: [PATCH 024/172] Sanitize CSV export cells to defang formula injection --- src/routes/admin/csv_export.rs | 83 ++++++++++++++++++++-------------- 1 file changed, 48 insertions(+), 35 deletions(-) diff --git a/src/routes/admin/csv_export.rs b/src/routes/admin/csv_export.rs index 963ed4b..e6ac221 100644 --- a/src/routes/admin/csv_export.rs +++ b/src/routes/admin/csv_export.rs @@ -15,6 +15,19 @@ use crate::models::{ UserAccessInventoryEntry, UserAccessSummaryResponse, }; +/// Defang any cell whose first character would be interpreted as a formula by +/// Excel/Sheets/Numbers (`= + - @ \t \r`). The auditor-friendly format means +/// a malicious user-controlled email or org name should never become a live +/// formula or `HYPERLINK()` exfiltration vector. +fn sanitize_csv_cell(value: String) -> String { + match value.chars().next() { + Some('=') | Some('+') | Some('-') | Some('@') | Some('\t') | Some('\r') => { + format!("'{}", value) + } + _ => value, + } +} + /// Error type for CSV export operations #[derive(Debug)] pub struct CsvExportError(String); @@ -98,9 +111,9 @@ pub fn export_access_inventory_csv( for org in &user.organizations { let mut row = base_row.clone(); row.org_id = org.org_id.to_string(); - row.org_slug = org.org_slug.clone(); - row.org_name = org.org_name.clone(); - row.org_role = org.role.clone(); + row.org_slug = sanitize_csv_cell(org.org_slug.clone()); + row.org_name = sanitize_csv_cell(org.org_name.clone()); + row.org_role = sanitize_csv_cell(org.role.clone()); row.org_granted_at = org.granted_at.to_rfc3339(); wtr.serialize(&row) .map_err(|e| CsvExportError(e.to_string()))?; @@ -110,10 +123,10 @@ pub fn export_access_inventory_csv( for project in &user.projects { let mut row = base_row.clone(); row.project_id = project.project_id.to_string(); - row.project_slug = project.project_slug.clone(); - row.project_name = project.project_name.clone(); + row.project_slug = sanitize_csv_cell(project.project_slug.clone()); + row.project_name = sanitize_csv_cell(project.project_name.clone()); row.project_org_id = project.org_id.to_string(); - row.project_role = project.role.clone(); + row.project_role = sanitize_csv_cell(project.role.clone()); row.project_granted_at = project.granted_at.to_rfc3339(); wtr.serialize(&row) .map_err(|e| CsvExportError(e.to_string()))?; @@ -127,9 +140,9 @@ pub fn export_access_inventory_csv( fn create_base_inventory_row(user: &UserAccessInventoryEntry) -> AccessInventoryRow { AccessInventoryRow { user_id: user.user_id.to_string(), - external_id: user.external_id.clone(), - email: user.email.clone().unwrap_or_default(), - name: user.name.clone().unwrap_or_default(), + external_id: sanitize_csv_cell(user.external_id.clone()), + email: sanitize_csv_cell(user.email.clone().unwrap_or_default()), + name: sanitize_csv_cell(user.name.clone().unwrap_or_default()), created_at: user.created_at.to_rfc3339(), org_id: String::new(), org_slug: String::new(), @@ -181,10 +194,10 @@ pub fn export_org_access_report_csv( for member in &response.members { let base_row = OrgAccessReportRow { user_id: member.user_id.to_string(), - external_id: member.external_id.clone(), - email: member.email.clone().unwrap_or_default(), - name: member.name.clone().unwrap_or_default(), - org_role: member.role.clone(), + external_id: sanitize_csv_cell(member.external_id.clone()), + email: sanitize_csv_cell(member.email.clone().unwrap_or_default()), + name: sanitize_csv_cell(member.name.clone().unwrap_or_default()), + org_role: sanitize_csv_cell(member.role.clone()), org_granted_at: member.granted_at.to_rfc3339(), project_id: String::new(), project_slug: String::new(), @@ -207,9 +220,9 @@ pub fn export_org_access_report_csv( for project in &member.project_access { let mut row = base_row.clone(); row.project_id = project.project_id.to_string(); - row.project_slug = project.project_slug.clone(); - row.project_name = project.project_name.clone(); - row.project_role = project.role.clone(); + row.project_slug = sanitize_csv_cell(project.project_slug.clone()); + row.project_name = sanitize_csv_cell(project.project_name.clone()); + row.project_role = sanitize_csv_cell(project.role.clone()); row.project_granted_at = project.granted_at.to_rfc3339(); wtr.serialize(&row) .map_err(|e| CsvExportError(e.to_string()))?; @@ -247,9 +260,9 @@ pub fn export_user_access_summary_csv( let base = |resource_type: &str| UserAccessSummaryRow { user_id: response.user_id.to_string(), - external_id: response.external_id.clone(), - email: response.email.clone().unwrap_or_default(), - name: response.name.clone().unwrap_or_default(), + external_id: sanitize_csv_cell(response.external_id.clone()), + email: sanitize_csv_cell(response.email.clone().unwrap_or_default()), + name: sanitize_csv_cell(response.name.clone().unwrap_or_default()), created_at: response.created_at.to_rfc3339(), resource_type: resource_type.to_string(), resource_id: String::new(), @@ -269,9 +282,9 @@ pub fn export_user_access_summary_csv( for org in &response.organizations { let mut row = base("organization"); row.resource_id = org.org_id.to_string(); - row.resource_slug = org.org_slug.clone(); - row.resource_name = org.org_name.clone(); - row.role = org.role.clone(); + row.resource_slug = sanitize_csv_cell(org.org_slug.clone()); + row.resource_name = sanitize_csv_cell(org.org_name.clone()); + row.role = sanitize_csv_cell(org.role.clone()); row.granted_at = org.granted_at.to_rfc3339(); row.last_activity_at = org .last_activity_at @@ -285,9 +298,9 @@ pub fn export_user_access_summary_csv( for project in &response.projects { let mut row = base("project"); row.resource_id = project.project_id.to_string(); - row.resource_slug = project.project_slug.clone(); - row.resource_name = project.project_name.clone(); - row.role = project.role.clone(); + row.resource_slug = sanitize_csv_cell(project.project_slug.clone()); + row.resource_name = sanitize_csv_cell(project.project_name.clone()); + row.role = sanitize_csv_cell(project.role.clone()); row.granted_at = project.granted_at.to_rfc3339(); row.last_activity_at = project .last_activity_at @@ -301,7 +314,7 @@ pub fn export_user_access_summary_csv( for api_key in &response.api_keys { let mut row = base("api_key"); row.resource_id = api_key.key_id.to_string(); - row.resource_name = api_key.name.clone(); + row.resource_name = sanitize_csv_cell(api_key.name.clone()); row.is_active = api_key.is_active.to_string(); row.granted_at = api_key.created_at.to_rfc3339(); row.last_used_at = api_key @@ -356,9 +369,9 @@ pub fn export_stale_access_csv(response: &StaleAccessResponse) -> Result let row = StaleAccessRow { category: "stale_user".to_string(), user_id: user.user_id.to_string(), - external_id: user.external_id.clone(), - email: user.email.clone().unwrap_or_default(), - name: user.name.clone().unwrap_or_default(), + external_id: sanitize_csv_cell(user.external_id.clone()), + email: sanitize_csv_cell(user.email.clone().unwrap_or_default()), + name: sanitize_csv_cell(user.name.clone().unwrap_or_default()), created_at: user.created_at.to_rfc3339(), last_activity_at: user .last_activity_at @@ -384,9 +397,9 @@ pub fn export_stale_access_csv(response: &StaleAccessResponse) -> Result let row = StaleAccessRow { category: "never_active_user".to_string(), user_id: user.user_id.to_string(), - external_id: user.external_id.clone(), - email: user.email.clone().unwrap_or_default(), - name: user.name.clone().unwrap_or_default(), + external_id: sanitize_csv_cell(user.external_id.clone()), + email: sanitize_csv_cell(user.email.clone().unwrap_or_default()), + name: sanitize_csv_cell(user.name.clone().unwrap_or_default()), created_at: user.created_at.to_rfc3339(), last_activity_at: String::new(), days_inactive: user.days_since_creation, @@ -419,9 +432,9 @@ pub fn export_stale_access_csv(response: &StaleAccessResponse) -> Result project_count: 0, active_api_keys: 0, key_id: key.key_id.to_string(), - key_name: key.name.clone(), - key_prefix: key.key_prefix.clone(), - owner_type: key.owner_type.clone(), + key_name: sanitize_csv_cell(key.name.clone()), + key_prefix: sanitize_csv_cell(key.key_prefix.clone()), + owner_type: sanitize_csv_cell(key.owner_type.clone()), owner_id: key.owner_id.to_string(), never_used: key.never_used.to_string(), }; From 0708ea46acba5099cd87426dfc11e0c31f6b8ae1 Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sat, 25 Apr 2026 22:35:07 +1000 Subject: [PATCH 025/172] Route citation links through parent URL handler for safety modal --- .../components/CitationList/CitationList.tsx | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/ui/src/components/CitationList/CitationList.tsx b/ui/src/components/CitationList/CitationList.tsx index 46c56f6..c603ab7 100644 --- a/ui/src/components/CitationList/CitationList.tsx +++ b/ui/src/components/CitationList/CitationList.tsx @@ -162,14 +162,23 @@ const CitationItem = memo(function CitationItem({

)} {citation.type === "url" && ( - { + // Route through the parent's URL handler so the same trusted- + // domain confirmation modal that markdown links use applies + // here. Citations are model-supplied — a citation that + // displays "Wikipedia" can link to attacker.example. + if (onUrlClick) { + onUrlClick(citation.url); + } else { + window.open(citation.url, "_blank", "noopener,noreferrer"); + } + }} className="text-xs text-primary hover:underline mt-1 inline-flex items-center gap-1" > Open source - + )} )} From 2a2edceb730ecad0d78574a6117d5baa46421825 Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sat, 25 Apr 2026 22:36:01 +1000 Subject: [PATCH 026/172] Default audit log list to last 7 days when no range given --- src/routes/admin/audit_logs.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/routes/admin/audit_logs.rs b/src/routes/admin/audit_logs.rs index 3d8cc50..34e5de2 100644 --- a/src/routes/admin/audit_logs.rs +++ b/src/routes/admin/audit_logs.rs @@ -61,6 +61,14 @@ pub async fn list( ))); } + // Cap unbounded scans: when no time range is supplied, default to the last + // 7 days. The audit log is append-only and grows fast; an unfiltered list + // hits the entire table with `ORDER BY ts DESC` which can DoS the gateway. + let mut query = query; + if query.from.is_none() && query.to.is_none() { + query.from = Some(chrono::Utc::now() - chrono::Duration::days(7)); + } + let result = services.audit_logs.list(query).await?; let pagination = PaginationMeta::with_cursors( From 9cf736baf6dcd6bce1ec658cba8c0fdd0a8c0627 Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sat, 25 Apr 2026 22:36:32 +1000 Subject: [PATCH 027/172] Validate DLQ table_name as identifier before interpolating --- src/dlq/mod.rs | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/dlq/mod.rs b/src/dlq/mod.rs index 5bbd80f..ba35c26 100644 --- a/src/dlq/mod.rs +++ b/src/dlq/mod.rs @@ -74,6 +74,25 @@ pub async fn create_dlq( ttl_secs, .. } => { + // The table name is interpolated as raw SQL throughout + // `dlq::database`, so we validate it against an identifier shape + // here rather than trusting it. Mistyped/templated config values + // would otherwise become an injection surface. + let valid_ident = !table_name.is_empty() + && table_name.len() <= 63 + && table_name + .chars() + .next() + .map(|c| c.is_ascii_alphabetic() || c == '_') + .unwrap_or(false) + && table_name + .chars() + .all(|c| c.is_ascii_alphanumeric() || c == '_'); + if !valid_ident { + return Err(DlqError::Internal(format!( + "Invalid DLQ table_name '{table_name}': must match [A-Za-z_][A-Za-z0-9_]{{0,62}}" + ))); + } let db = db.ok_or_else(|| { DlqError::Internal( "Database DLQ configured but no database connection available".to_string(), From 0d175c35a1ac4129308f6bbc48a355e79d0b8ad6 Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sat, 25 Apr 2026 22:37:09 +1000 Subject: [PATCH 028/172] Mark selected conversation with aria-current for screen readers --- ui/src/components/ConversationList/ConversationList.tsx | 1 + 1 file changed, 1 insertion(+) diff --git a/ui/src/components/ConversationList/ConversationList.tsx b/ui/src/components/ConversationList/ConversationList.tsx index 7abb522..fbe46cd 100644 --- a/ui/src/components/ConversationList/ConversationList.tsx +++ b/ui/src/components/ConversationList/ConversationList.tsx @@ -151,6 +151,7 @@ const ConversationItem = memo( type="button" className="flex min-w-0 flex-1 items-center gap-2 text-left" onClick={() => onSelect(conv.id)} + aria-current={isSelected ? "page" : undefined} > Date: Sat, 25 Apr 2026 22:37:31 +1000 Subject: [PATCH 029/172] Log SSE event parse errors instead of silently swallowing --- ui/src/pages/chat/useChat.ts | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/ui/src/pages/chat/useChat.ts b/ui/src/pages/chat/useChat.ts index b96b911..2b7331f 100644 --- a/ui/src/pages/chat/useChat.ts +++ b/ui/src/pages/chat/useChat.ts @@ -1211,8 +1211,12 @@ export function useChat({ } } } - } catch { - // Ignore parse errors for partial JSON + } catch (err) { + // Per-line `data:` payloads should always be complete JSON + // (we already split on `\n` and the last partial line stays + // in `buffer`). Surface the error at debug so producer/spec + // drift doesn't silently drop tool calls or citations. + console.debug("Failed to parse SSE event payload", { data, err }); } } } From 4cdb3b8396b2787842a0d0233506204d692da7ac Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sat, 25 Apr 2026 22:38:07 +1000 Subject: [PATCH 030/172] Wrap clipboard writes in try/catch to surface failures --- ui/src/components/ChatMessage/ChatMessage.tsx | 10 +++++++--- .../MultiModelResponse/MultiModelResponse.tsx | 10 +++++++--- .../components/ResponseActions/ResponseActions.tsx | 13 ++++++++++--- 3 files changed, 24 insertions(+), 9 deletions(-) diff --git a/ui/src/components/ChatMessage/ChatMessage.tsx b/ui/src/components/ChatMessage/ChatMessage.tsx index b022080..8603569 100644 --- a/ui/src/components/ChatMessage/ChatMessage.tsx +++ b/ui/src/components/ChatMessage/ChatMessage.tsx @@ -141,9 +141,13 @@ function ChatMessageComponent({ ); const handleCopy = async () => { - await navigator.clipboard.writeText(message.content); - setCopied(true); - setTimeout(() => setCopied(false), 2000); + try { + await navigator.clipboard.writeText(message.content); + setCopied(true); + setTimeout(() => setCopied(false), 2000); + } catch (err) { + console.debug("Clipboard write failed", err); + } }; // Quote selection state diff --git a/ui/src/components/MultiModelResponse/MultiModelResponse.tsx b/ui/src/components/MultiModelResponse/MultiModelResponse.tsx index d99e11a..77d7d0a 100644 --- a/ui/src/components/MultiModelResponse/MultiModelResponse.tsx +++ b/ui/src/components/MultiModelResponse/MultiModelResponse.tsx @@ -417,9 +417,13 @@ function CollapsedActionsMenu({ const [copied, setCopied] = useState(false); const handleCopy = async () => { - await navigator.clipboard.writeText(content); - setCopied(true); - setTimeout(() => setCopied(false), 2000); + try { + await navigator.clipboard.writeText(content); + setCopied(true); + setTimeout(() => setCopied(false), 2000); + } catch (err) { + console.debug("Clipboard write failed", err); + } }; const isSpeaking = speakingState === "playing"; diff --git a/ui/src/components/ResponseActions/ResponseActions.tsx b/ui/src/components/ResponseActions/ResponseActions.tsx index 06e2586..4101e88 100644 --- a/ui/src/components/ResponseActions/ResponseActions.tsx +++ b/ui/src/components/ResponseActions/ResponseActions.tsx @@ -102,9 +102,16 @@ export function ResponseActions({ const [copied, setCopied] = useState(false); const handleCopy = async () => { - await navigator.clipboard.writeText(content); - setCopied(true); - setTimeout(() => setCopied(false), 2000); + // `clipboard.writeText` rejects on permission denial, lack of focus, or + // non-secure context. Without try/catch the rejection becomes an + // unhandled promise rejection and `setCopied(true)` silently never runs. + try { + await navigator.clipboard.writeText(content); + setCopied(true); + setTimeout(() => setCopied(false), 2000); + } catch (err) { + console.debug("Clipboard write failed", err); + } }; // Primary actions - always visible From 92bc583c370fa3e389433bd829e9204146b8ea18 Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sat, 25 Apr 2026 22:38:52 +1000 Subject: [PATCH 031/172] Debounce and memoise conversation list filter to avoid O(N*M) hitches --- .../ConversationList/ConversationList.tsx | 23 ++++++++++++------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/ui/src/components/ConversationList/ConversationList.tsx b/ui/src/components/ConversationList/ConversationList.tsx index fbe46cd..b4b4c39 100644 --- a/ui/src/components/ConversationList/ConversationList.tsx +++ b/ui/src/components/ConversationList/ConversationList.tsx @@ -9,9 +9,10 @@ import { Trash2, X, } from "lucide-react"; -import { memo, useCallback, useState } from "react"; +import { memo, useCallback, useMemo, useState } from "react"; import { Button } from "@/components/Button/Button"; +import { useDebouncedValue } from "@/hooks/useDebouncedValue"; import { Dropdown, DropdownContent, @@ -244,13 +245,19 @@ export function ConversationList({ const [editingId, setEditingId] = useState(null); const [editTitle, setEditTitle] = useState(""); - const filteredConversations = searchQuery - ? conversations.filter( - (c) => - c.title.toLowerCase().includes(searchQuery.toLowerCase()) || - c.messages.some((m) => m.content.toLowerCase().includes(searchQuery.toLowerCase())) - ) - : conversations; + // Debounce + memoise the filter. Without this every keystroke walks every + // message body lowercased — O(N×M) on each character. With many long + // conversations this is a measurable hitch. + const debouncedQuery = useDebouncedValue(searchQuery, 150); + const filteredConversations = useMemo(() => { + if (!debouncedQuery) return conversations; + const needle = debouncedQuery.toLowerCase(); + return conversations.filter( + (c) => + c.title.toLowerCase().includes(needle) || + c.messages.some((m) => m.content.toLowerCase().includes(needle)) + ); + }, [conversations, debouncedQuery]); const groups = groupConversations(filteredConversations); From 786ac9e817dc2d74d69f3b3e64a640f1c9fb5010 Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sat, 25 Apr 2026 22:39:12 +1000 Subject: [PATCH 032/172] Stop forcing inflated virtualizer height in ChatMessageList --- ui/src/components/ChatMessageList/ChatMessageList.tsx | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/ui/src/components/ChatMessageList/ChatMessageList.tsx b/ui/src/components/ChatMessageList/ChatMessageList.tsx index c877799..176e761 100644 --- a/ui/src/components/ChatMessageList/ChatMessageList.tsx +++ b/ui/src/components/ChatMessageList/ChatMessageList.tsx @@ -351,9 +351,12 @@ export function ChatMessageList({ ) : (
{virtualizer.getVirtualItems().map((virtualItem) => { const group = messageGroups[virtualItem.index]; From 9bdb3db588a9b9111e5f9ec215dc94f01326a2b7 Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sat, 25 Apr 2026 22:39:47 +1000 Subject: [PATCH 033/172] Broadcast post-update conversation snapshot to other tabs --- .../ConversationsProvider/ConversationsProvider.tsx | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/ui/src/components/ConversationsProvider/ConversationsProvider.tsx b/ui/src/components/ConversationsProvider/ConversationsProvider.tsx index 159b8a0..93764a8 100644 --- a/ui/src/components/ConversationsProvider/ConversationsProvider.tsx +++ b/ui/src/components/ConversationsProvider/ConversationsProvider.tsx @@ -382,8 +382,12 @@ export function ConversationsProvider({ children }: ConversationsProviderProps) } } - // Apply all updates atomically via React state + // Apply all updates atomically via React state, and broadcast the + // *post-update* snapshot so other tabs see the new remoteId/syncedAt. + // Reading the closed-over `storedConversations` here would broadcast + // the pre-update state, leaving other tabs out of sync. if (updates.length > 0) { + let merged: StoredConversation[] = storedConversationsRef.current; setStoredConversations((prev) => { const updated = [...prev]; for (const update of updates) { @@ -396,13 +400,13 @@ export function ConversationsProvider({ children }: ConversationsProviderProps) }; } } + merged = updated; return updated; }); - // Broadcast to other tabs broadcastChannelRef.current?.postMessage({ type: "sync", - conversations: storedConversations, + conversations: merged, } satisfies SyncMessage); } } finally { From 88de4db2c0aac5a1a110225583584cfe41a5ae37 Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sat, 25 Apr 2026 22:40:14 +1000 Subject: [PATCH 034/172] Compare feedback historyMode and modeMetadata in memo equality --- .../MultiModelResponse/MultiModelResponse.tsx | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/ui/src/components/MultiModelResponse/MultiModelResponse.tsx b/ui/src/components/MultiModelResponse/MultiModelResponse.tsx index 77d7d0a..2ec3907 100644 --- a/ui/src/components/MultiModelResponse/MultiModelResponse.tsx +++ b/ui/src/components/MultiModelResponse/MultiModelResponse.tsx @@ -1535,6 +1535,8 @@ function areMultiModelResponsePropsEqual( if (prev.groupId !== next.groupId) return false; if (prev.selectedBest !== next.selectedBest) return false; if (prev.timestamp.getTime() !== next.timestamp.getTime()) return false; + if (prev.historyMode !== next.historyMode) return false; + if (prev.forceStacked !== next.forceStacked) return false; // Check callback identity - parent MUST use useCallback for stable refs if (prev.onSelectBest !== next.onSelectBest) return false; @@ -1576,6 +1578,14 @@ function areMultiModelResponsePropsEqual( if (prevR.error !== nextR.error) return false; if (prevR.usage?.totalTokens !== nextR.usage?.totalTokens) return false; if (prevR.usage?.reasoningTokens !== nextR.usage?.reasoningTokens) return false; + // Feedback flips (rating, "select as best") — these change badges in the + // header; without a check the user has to scroll/click to see the new + // state. + if (prevR.feedback?.rating !== nextR.feedback?.rating) return false; + if (prevR.feedback?.selectedAsBest !== nextR.feedback?.selectedAsBest) return false; + // Mode metadata (e.g., router model swap on regenerate) drives the + // routing badge. + if (prevR.modeMetadata !== nextR.modeMetadata) return false; // Check citations (compare length as a quick check) if ((prevR.citations?.length ?? 0) !== (nextR.citations?.length ?? 0)) return false; // Check artifacts (compare length as a quick check) From e2b5593b3464899ebfd3ad4aba04a399baaee69f Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sat, 25 Apr 2026 22:47:22 +1000 Subject: [PATCH 035/172] Apply nightly rustfmt to review-fixes changes --- src/auth/jwt.rs | 4 +--- src/config/auth.rs | 5 +---- src/db/mod.rs | 13 +++++++------ src/middleware/layers/admin.rs | 5 +---- src/providers/anthropic/convert.rs | 5 +---- src/providers/anthropic/stream.rs | 9 +++++---- src/providers/bedrock/stream.rs | 6 ++++-- src/providers/vertex/mod.rs | 3 +-- src/providers/vertex/stream.rs | 6 ++++-- src/routes/admin/oauth.rs | 6 +++++- src/routes/admin/org_sso_configs.rs | 5 ++--- 11 files changed, 32 insertions(+), 35 deletions(-) diff --git a/src/auth/jwt.rs b/src/auth/jwt.rs index e2556c7..5176cc8 100644 --- a/src/auth/jwt.rs +++ b/src/auth/jwt.rs @@ -148,9 +148,7 @@ impl JwtValidator { // real expected audience. let entries = config.audience.to_vec(); if entries.is_empty() { - return Err(AuthError::Internal( - "JWT audience must not be empty".into(), - )); + return Err(AuthError::Internal("JWT audience must not be empty".into())); } for entry in entries { if entry.trim().is_empty() { diff --git a/src/config/auth.rs b/src/config/auth.rs index 0a69cb1..db87cd4 100644 --- a/src/config/auth.rs +++ b/src/config/auth.rs @@ -608,10 +608,7 @@ impl ProxyAuthJwtConfig { /// Reject empty audience values. `jsonwebtoken` accepts an empty string as a /// valid audience match, so an empty entry would silently disable the audience /// check. -fn validate_jwt_audience( - field: &str, - audience: &OneOrMany, -) -> Result<(), ConfigError> { +fn validate_jwt_audience(field: &str, audience: &OneOrMany) -> Result<(), ConfigError> { let entries = audience.to_vec(); if entries.is_empty() { return Err(ConfigError::Validation(format!( diff --git a/src/db/mod.rs b/src/db/mod.rs index d7fd087..b13bb01 100644 --- a/src/db/mod.rs +++ b/src/db/mod.rs @@ -402,12 +402,13 @@ impl DbPool { sqlx::postgres::PgSslMode::VerifyFull } }; - let connect_opts = |url: &str| -> Result { - let opts: sqlx::postgres::PgConnectOptions = url.parse().map_err(|e| { - DbError::Validation(format!("Invalid Postgres URL: {e}")) - })?; - Ok(opts.ssl_mode(ssl_mode)) - }; + let connect_opts = + |url: &str| -> Result { + let opts: sqlx::postgres::PgConnectOptions = url.parse().map_err(|e| { + DbError::Validation(format!("Invalid Postgres URL: {e}")) + })?; + Ok(opts.ssl_mode(ssl_mode)) + }; let pool_opts = || { sqlx::postgres::PgPoolOptions::new() .min_connections(cfg.min_connections) diff --git a/src/middleware/layers/admin.rs b/src/middleware/layers/admin.rs index e97315b..76129f7 100644 --- a/src/middleware/layers/admin.rs +++ b/src/middleware/layers/admin.rs @@ -185,10 +185,7 @@ pub const EMERGENCY_ADMIN_ROLE: &str = "_emergency_admin"; /// headers must never be able to claim these roles, since the gateway grants /// extra trust to them (bootstrap / emergency break-glass). pub(crate) fn strip_reserved_roles(roles: Vec) -> Vec { - roles - .into_iter() - .filter(|r| !r.starts_with('_')) - .collect() + roles.into_iter().filter(|r| !r.starts_with('_')).collect() } /// Try to authenticate via bootstrap API key. diff --git a/src/providers/anthropic/convert.rs b/src/providers/anthropic/convert.rs index d2309fc..0766b2f 100644 --- a/src/providers/anthropic/convert.rs +++ b/src/providers/anthropic/convert.rs @@ -954,10 +954,7 @@ pub fn convert_anthropic_to_responses_response( type_: ResponsesReasoningType::Reasoning, id: format!( "rs_{}", - crate::providers::anthropic::stream::strip_anthropic_prefix( - &anthropic.id, - "msg_" - ) + crate::providers::anthropic::stream::strip_anthropic_prefix(&anthropic.id, "msg_") ), content: None, // Anthropic doesn't provide structured reasoning content summary: vec![], // Would need to generate summary diff --git a/src/providers/anthropic/stream.rs b/src/providers/anthropic/stream.rs index 3d2e512..497069a 100644 --- a/src/providers/anthropic/stream.rs +++ b/src/providers/anthropic/stream.rs @@ -541,7 +541,8 @@ impl AnthropicToOpenAIStream { self.emit_chunk(&chunk); // Emit [DONE] - self.output_buffer.push_back(Bytes::from("data: [DONE]\n\n")); + self.output_buffer + .push_back(Bytes::from("data: [DONE]\n\n")); } AnthropicStreamEvent::Ping => { @@ -995,8 +996,7 @@ impl AnthropicToResponsesStream { let output_index = self.tool_output_index(tool_index); // Emit function call arguments delta - let fc_id = - format!("fc_{}", strip_anthropic_prefix(&tool_id, "toolu_")); + let fc_id = format!("fc_{}", strip_anthropic_prefix(&tool_id, "toolu_")); self.emit_event( "response.function_call_arguments.delta", serde_json::json!({ @@ -1265,7 +1265,8 @@ impl AnthropicToResponsesStream { ); // Emit [DONE] to signal end of stream (OpenAI Responses API convention) - self.output_buffer.push_back(Bytes::from("data: [DONE]\n\n")); + self.output_buffer + .push_back(Bytes::from("data: [DONE]\n\n")); } AnthropicStreamEvent::Ping => { diff --git a/src/providers/bedrock/stream.rs b/src/providers/bedrock/stream.rs index 575fe9c..c58c0bd 100644 --- a/src/providers/bedrock/stream.rs +++ b/src/providers/bedrock/stream.rs @@ -362,7 +362,8 @@ impl BedrockToOpenAIStream { self.emit_chunk(&usage_chunk); // Emit [DONE] - self.output_buffer.push_back(Bytes::from("data: [DONE]\n\n")); + self.output_buffer + .push_back(Bytes::from("data: [DONE]\n\n")); } } _ => { @@ -1108,7 +1109,8 @@ impl BedrockToResponsesStream { ); // Emit [DONE] to signal end of stream - self.output_buffer.push_back(Bytes::from("data: [DONE]\n\n")); + self.output_buffer + .push_back(Bytes::from("data: [DONE]\n\n")); } } _ => { diff --git a/src/providers/vertex/mod.rs b/src/providers/vertex/mod.rs index 1283647..9def374 100644 --- a/src/providers/vertex/mod.rs +++ b/src/providers/vertex/mod.rs @@ -874,8 +874,7 @@ mod streaming_tests { transformer.handle_response(response); // Should emit [DONE] at the end - let last_chunk = - std::str::from_utf8(transformer.output_buffer.back().unwrap()).unwrap(); + let last_chunk = std::str::from_utf8(transformer.output_buffer.back().unwrap()).unwrap(); assert_eq!(last_chunk, "data: [DONE]\n\n"); // Should have usage in second-to-last chunk diff --git a/src/providers/vertex/stream.rs b/src/providers/vertex/stream.rs index cf735e3..1837263 100644 --- a/src/providers/vertex/stream.rs +++ b/src/providers/vertex/stream.rs @@ -353,7 +353,8 @@ impl VertexToOpenAIStream { self.emit_chunk(&usage_chunk); // Emit [DONE] - self.output_buffer.push_back(Bytes::from("data: [DONE]\n\n")); + self.output_buffer + .push_back(Bytes::from("data: [DONE]\n\n")); } } } @@ -592,7 +593,8 @@ impl VertexToResponsesStream { // Pass through [DONE] marker if json_str == "[DONE]" { - self.output_buffer.push_back(Bytes::from("data: [DONE]\n\n")); + self.output_buffer + .push_back(Bytes::from("data: [DONE]\n\n")); return; } diff --git a/src/routes/admin/oauth.rs b/src/routes/admin/oauth.rs index ab7f866..14f0abb 100644 --- a/src/routes/admin/oauth.rs +++ b/src/routes/admin/oauth.rs @@ -58,7 +58,11 @@ fn validate_callback_url(callback_url: &str, pkce: &OAuthPkceConfig) -> Result d.eq_ignore_ascii_case("localhost"), Some(url::Host::Ipv4(ip)) => ip.is_loopback(), Some(url::Host::Ipv6(ip)) => { - ip.is_loopback() || ip.to_ipv4_mapped().map(|v4| v4.is_loopback()).unwrap_or(false) + ip.is_loopback() + || ip + .to_ipv4_mapped() + .map(|v4| v4.is_loopback()) + .unwrap_or(false) } None => false, }; diff --git a/src/routes/admin/org_sso_configs.rs b/src/routes/admin/org_sso_configs.rs index 3bd55ba..031c303 100644 --- a/src/routes/admin/org_sso_configs.rs +++ b/src/routes/admin/org_sso_configs.rs @@ -761,9 +761,8 @@ pub async fn parse_saml_metadata( // Block private/loopback/cloud-metadata addresses with DNS rebinding // protection — the same gate that `SamlAuthenticator::get_metadata` uses. - crate::validation::validate_base_url(&input.metadata_url, false).map_err(|e| { - AdminError::Validation(format!("SAML metadata URL is not permitted: {e}")) - })?; + crate::validation::validate_base_url(&input.metadata_url, false) + .map_err(|e| AdminError::Validation(format!("SAML metadata URL is not permitted: {e}")))?; // Fetch and parse the metadata let client = reqwest::Client::new(); From f2edc67a0cc2143c920936476ec5372439f457f8 Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sat, 25 Apr 2026 22:59:41 +1000 Subject: [PATCH 036/172] Reject session cookie secure=false with SameSite=None --- src/config/auth.rs | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/src/config/auth.rs b/src/config/auth.rs index db87cd4..97367e4 100644 --- a/src/config/auth.rs +++ b/src/config/auth.rs @@ -1203,6 +1203,13 @@ impl SessionConfig { "Session duration cannot be zero".into(), )); } + // Browsers require the Secure attribute when SameSite=None; otherwise + // the cookie is silently rejected in cross-site contexts. + if matches!(self.same_site, SameSite::None) && !self.secure { + return Err(ConfigError::Validation( + "Session cookie with same_site = \"none\" requires secure = true".into(), + )); + } Ok(()) } } @@ -1767,6 +1774,39 @@ mod tests { ); } + #[cfg(feature = "sso")] + #[test] + fn test_session_config_rejects_insecure_samesite_none() { + let config = SessionConfig { + cookie_name: "__gw_session".to_string(), + duration_secs: 86400, + secure: false, + same_site: SameSite::None, + secret: None, + enhanced: EnhancedSessionConfig::default(), + }; + let err = config.validate().expect_err("must reject insecure None"); + let msg = format!("{}", err); + assert!( + msg.contains("same_site") && msg.contains("secure"), + "error must mention same_site/secure: {msg}" + ); + } + + #[cfg(feature = "sso")] + #[test] + fn test_session_config_allows_insecure_lax() { + let config = SessionConfig { + cookie_name: "__gw_session".to_string(), + duration_secs: 86400, + secure: false, + same_site: SameSite::Lax, + secret: None, + enhanced: EnhancedSessionConfig::default(), + }; + config.validate().expect("Lax + insecure must validate"); + } + #[cfg(feature = "sso")] #[test] fn test_session_config_debug_no_secret() { From 899b3fe829bd75f1c329ad98ce3445166867928f Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sat, 25 Apr 2026 23:02:17 +1000 Subject: [PATCH 037/172] Preserve SSE event terminator when injecting cost --- src/streaming/mod.rs | 33 +++++++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/src/streaming/mod.rs b/src/streaming/mod.rs index 2af4fb2..6764645 100644 --- a/src/streaming/mod.rs +++ b/src/streaming/mod.rs @@ -276,7 +276,11 @@ fn inject_cost_into_sse_chunk(chunk: &[u8], cost_dollars: f64) -> Bytes { }; let mut output = String::with_capacity(chunk_str.len() + 32); - for line in chunk_str.split('\n') { + for raw in chunk_str.split_inclusive('\n') { + let (line, terminator) = match raw.strip_suffix('\n') { + Some(without) => (without, "\n"), + None => (raw, ""), + }; if let Some(json_str) = line.strip_prefix("data: ") { if let Ok(mut json) = serde_json::from_str::(json_str) { // Try root-level usage (Chat Completions format) @@ -308,13 +312,7 @@ fn inject_cost_into_sse_chunk(chunk: &[u8], cost_dollars: f64) -> Bytes { } else { output.push_str(line); } - output.push('\n'); - } - - // The split('\n') + push('\n') loop adds one extra trailing newline; - // remove it to match original chunk ending - if !chunk_str.ends_with('\n') { - output.pop(); + output.push_str(terminator); } Bytes::from(output) @@ -1024,6 +1022,25 @@ mod tests { } } + #[test] + fn test_inject_cost_preserves_double_newline_terminator() { + let chunk = b"data: {\"usage\":{\"prompt_tokens\":1,\"completion_tokens\":2}}\n\n"; + let injected = inject_cost_into_sse_chunk(chunk, 0.0042); + let s = std::str::from_utf8(&injected).unwrap(); + assert!(s.ends_with("\n\n"), "must preserve SSE event terminator"); + assert!(!s.ends_with("\n\n\n"), "must not add extra newline"); + assert!(s.contains("\"cost\":0.0042")); + } + + #[test] + fn test_inject_cost_no_trailing_newline() { + let chunk = b"data: {\"usage\":{\"prompt_tokens\":1,\"completion_tokens\":2}}"; + let injected = inject_cost_into_sse_chunk(chunk, 0.0042); + let s = std::str::from_utf8(&injected).unwrap(); + assert!(!s.ends_with('\n'), "must preserve absent terminator"); + assert!(s.contains("\"cost\":0.0042")); + } + #[test] fn test_parse_sse_done() { let chunk = b"data: [DONE]\n\n"; From d3af79f4a512cecbf1f1a814c134bd8793da8a7b Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sat, 25 Apr 2026 23:04:00 +1000 Subject: [PATCH 038/172] Estimate SSE delta tokens by char count, not byte len --- src/streaming/mod.rs | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/src/streaming/mod.rs b/src/streaming/mod.rs index 6764645..726ed3c 100644 --- a/src/streaming/mod.rs +++ b/src/streaming/mod.rs @@ -229,8 +229,10 @@ impl SseParser { .and_then(|delta| delta.get("content")) .and_then(|c| c.as_str()) { - // Rough approximation: 1 token ≈ 4 characters - let estimated_tokens = (content.len() as i64 + 3) / 4; + // Rough approximation: 1 token ≈ 4 characters. + // Use chars() instead of len() so multibyte content + // (CJK, emoji) isn't over-counted as a token-per-byte. + let estimated_tokens = (content.chars().count() as i64 + 3) / 4; return Some(SseChunk::Delta { tokens: estimated_tokens, }); @@ -1022,6 +1024,23 @@ mod tests { } } + #[test] + fn test_parse_sse_delta_multibyte_content() { + // Four CJK chars = 12 bytes. len()/4 would estimate 3 tokens; + // chars().count()/4 estimates 1. + let chunk = r#"data: {"choices":[{"delta":{"content":"日本語😀"}}]}"#; + let result = SseParser::parse_chunk(chunk.as_bytes()); + match result { + Some(SseChunk::Delta { tokens }) => { + assert_eq!( + tokens, 1, + "4 chars should estimate to 1 token, got {tokens}" + ); + } + _ => panic!("Expected Delta chunk"), + } + } + #[test] fn test_inject_cost_preserves_double_newline_terminator() { let chunk = b"data: {\"usage\":{\"prompt_tokens\":1,\"completion_tokens\":2}}\n\n"; From 4850102c288463da3e19ed9aa114ea614f84b33e Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sat, 25 Apr 2026 23:04:27 +1000 Subject: [PATCH 039/172] Pin React Query mutations retry to 0 --- ui/src/App.tsx | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ui/src/App.tsx b/ui/src/App.tsx index 0eb36c7..5bcfa98 100644 --- a/ui/src/App.tsx +++ b/ui/src/App.tsx @@ -18,6 +18,9 @@ const queryClient = new QueryClient({ staleTime: 1000 * 60, // 1 minute retry: 1, }, + mutations: { + retry: 0, + }, }, }); From 140c5c17aaab4b21df0bb6f40c199754601147e6 Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sat, 25 Apr 2026 23:05:07 +1000 Subject: [PATCH 040/172] Use form's isSubmitting on LoginPage to prevent double-submit --- ui/src/pages/LoginPage.tsx | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/ui/src/pages/LoginPage.tsx b/ui/src/pages/LoginPage.tsx index ee317cf..e930f01 100644 --- a/ui/src/pages/LoginPage.tsx +++ b/ui/src/pages/LoginPage.tsx @@ -32,7 +32,6 @@ export default function LoginPage() { const discoverSso = useDiscoverSso(); const [error, setError] = useState(null); - const [isSubmitting, setIsSubmitting] = useState(false); const [discoveredOrg, setDiscoveredOrg] = useState(null); const [discoveryEmail, setDiscoveryEmail] = useState(""); @@ -98,17 +97,15 @@ export default function LoginPage() { const onApiKeySubmit = async (data: LoginForm) => { setError(null); - setIsSubmitting(true); - try { await login("api_key", { apiKey: data.apiKey }); } catch (err) { setError(err instanceof Error ? err.message : "Authentication failed"); - } finally { - setIsSubmitting(false); } }; + const isSubmitting = apiKeyForm.formState.isSubmitting; + const handleOidcLogin = (orgId?: string) => { login("oidc", orgId ? { orgId } : undefined); }; From aeb5aa85c8f1a8c3a925ddf9c0189a2e9a53ed80 Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sat, 25 Apr 2026 23:09:52 +1000 Subject: [PATCH 041/172] Make OIDC/SAML auth_state TTL configurable via SessionConfig --- src/auth/oidc.rs | 5 +++-- src/auth/saml.rs | 5 +++-- src/config/auth.rs | 22 ++++++++++++++++++++++ 3 files changed, 28 insertions(+), 4 deletions(-) diff --git a/src/auth/oidc.rs b/src/auth/oidc.rs index 6e7217d..079b9e4 100644 --- a/src/auth/oidc.rs +++ b/src/auth/oidc.rs @@ -347,9 +347,10 @@ impl OidcAuthenticator { .map_err(|e| AuthError::Internal(format!("Failed to retrieve auth state: {}", e)))? .ok_or(AuthError::InvalidToken)?; - // Check if state is too old (10 minute limit) + // Reject states older than the configured TTL. + let ttl = chrono::Duration::seconds(self.config.session.auth_state_ttl_secs as i64); let age = Utc::now() - auth_state.created_at; - if age > chrono::Duration::minutes(10) { + if age > ttl { return Err(AuthError::ExpiredToken); } diff --git a/src/auth/saml.rs b/src/auth/saml.rs index d15c95a..6627562 100644 --- a/src/auth/saml.rs +++ b/src/auth/saml.rs @@ -329,9 +329,10 @@ impl SamlAuthenticator { .map_err(|e| AuthError::Internal(format!("Failed to retrieve auth state: {}", e)))? .ok_or(AuthError::InvalidToken)?; - // Check if state is too old (10 minute limit) + // Reject states older than the configured TTL. + let ttl = chrono::Duration::seconds(self.config.session.auth_state_ttl_secs as i64); let age = Utc::now() - auth_state.created_at; - if age > chrono::Duration::minutes(10) { + if age > ttl { return Err(AuthError::ExpiredToken); } diff --git a/src/config/auth.rs b/src/config/auth.rs index 97367e4..9d13d1c 100644 --- a/src/config/auth.rs +++ b/src/config/auth.rs @@ -1097,6 +1097,12 @@ pub struct SessionConfig { #[serde(default = "default_session_duration")] pub duration_secs: u64, + /// How long an in-flight authorization request (PKCE state, SAML + /// `relay_state`) remains valid, in seconds. Once exceeded, the user must + /// restart the login. Defaults to 10 minutes. + #[serde(default = "default_auth_state_ttl")] + pub auth_state_ttl_secs: u64, + /// Secure cookie (HTTPS only). #[serde(default = "default_true")] pub secure: bool, @@ -1167,6 +1173,7 @@ impl std::fmt::Debug for SessionConfig { f.debug_struct("SessionConfig") .field("cookie_name", &self.cookie_name) .field("duration_secs", &self.duration_secs) + .field("auth_state_ttl_secs", &self.auth_state_ttl_secs) .field("secure", &self.secure) .field("same_site", &self.same_site) .field("secret", &self.secret.as_ref().map(|_| "****")) @@ -1181,6 +1188,7 @@ impl Default for SessionConfig { Self { cookie_name: default_session_cookie(), duration_secs: default_session_duration(), + auth_state_ttl_secs: default_auth_state_ttl(), secure: true, same_site: SameSite::default(), secret: None, @@ -1203,6 +1211,11 @@ impl SessionConfig { "Session duration cannot be zero".into(), )); } + if self.auth_state_ttl_secs == 0 { + return Err(ConfigError::Validation( + "Session auth_state_ttl_secs cannot be zero".into(), + )); + } // Browsers require the Secure attribute when SameSite=None; otherwise // the cookie is silently rejected in cross-site contexts. if matches!(self.same_site, SameSite::None) && !self.secure { @@ -1224,6 +1237,11 @@ fn default_session_duration() -> u64 { 86400 * 7 // 7 days } +#[cfg(feature = "sso")] +fn default_auth_state_ttl() -> u64 { + 600 // 10 minutes +} + #[cfg(feature = "sso")] #[derive(Debug, Clone, Default, Serialize, Deserialize)] #[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))] @@ -1752,6 +1770,7 @@ mod tests { let config = SessionConfig { cookie_name: "__gw_session".to_string(), duration_secs: 86400, + auth_state_ttl_secs: 600, secure: true, same_site: SameSite::Lax, secret: Some("my-super-secret-session-key".to_string()), @@ -1780,6 +1799,7 @@ mod tests { let config = SessionConfig { cookie_name: "__gw_session".to_string(), duration_secs: 86400, + auth_state_ttl_secs: 600, secure: false, same_site: SameSite::None, secret: None, @@ -1799,6 +1819,7 @@ mod tests { let config = SessionConfig { cookie_name: "__gw_session".to_string(), duration_secs: 86400, + auth_state_ttl_secs: 600, secure: false, same_site: SameSite::Lax, secret: None, @@ -1814,6 +1835,7 @@ mod tests { let config = SessionConfig { cookie_name: "__gw_session".to_string(), duration_secs: 86400, + auth_state_ttl_secs: 600, secure: true, same_site: SameSite::Lax, secret: None, From 2dcd8084efc6c38ce79476af045df5f7d992a226 Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sat, 25 Apr 2026 23:11:40 +1000 Subject: [PATCH 042/172] Only strip Content-Length when cost injection rewrites body --- src/providers/mod.rs | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/src/providers/mod.rs b/src/providers/mod.rs index ce4c639..e9db46b 100644 --- a/src/providers/mod.rs +++ b/src/providers/mod.rs @@ -797,16 +797,24 @@ pub async fn inject_cost_into_response(params: CostInjectionParams<'_>) -> Respo .map(|(_, s)| s) .unwrap_or(crate::pricing::CostPricingSource::None); - // Inject cost (in dollars) into the usage object in the response body + // Inject cost (in dollars) into the usage object in the response body. + // Only re-serialize when we actually mutate the JSON; otherwise we'd + // change the body length (whitespace, key order) and have to strip + // Content-Length unnecessarily. + let mut body_modified = false; if let Some(cost) = cost_microcents { let cost_dollars = crate::pricing::microcents_to_dollars(cost); if let Some(usage_obj) = json.get_mut("usage").and_then(|u| u.as_object_mut()) { usage_obj.insert("cost".to_string(), serde_json::Value::from(cost_dollars)); + body_modified = true; } } - // Re-serialize the (possibly modified) JSON - let body_bytes = serde_json::to_vec(&json).unwrap_or_else(|_| bytes.to_vec()); + let body_bytes = if body_modified { + serde_json::to_vec(&json).unwrap_or_else(|_| bytes.to_vec()) + } else { + bytes.to_vec() + }; ( Some(input), @@ -817,6 +825,7 @@ pub async fn inject_cost_into_response(params: CostInjectionParams<'_>) -> Respo finish_reason, body_bytes, pricing_source, + body_modified, ) } Err(_) => ( @@ -828,6 +837,7 @@ pub async fn inject_cost_into_response(params: CostInjectionParams<'_>) -> Respo None, bytes.to_vec(), crate::pricing::CostPricingSource::None, + false, ), }; @@ -840,6 +850,7 @@ pub async fn inject_cost_into_response(params: CostInjectionParams<'_>) -> Respo finish_reason, body_bytes, pricing_source, + body_modified, ) = extracted; // Rebuild response with headers @@ -880,8 +891,11 @@ pub async fn inject_cost_into_response(params: CostInjectionParams<'_>) -> Respo new_parts.headers.insert("X-Pricing-Source", value); } - // Remove Content-Length since body size may have changed after cost injection - new_parts.headers.remove(CONTENT_LENGTH); + // Only strip Content-Length when we re-serialized the body. If the body is + // passed through untouched, the upstream length is still authoritative. + if body_modified { + new_parts.headers.remove(CONTENT_LENGTH); + } Response::from_parts(new_parts, Body::from(body_bytes)) } From 443c85a9948625157f24cf69984c6ba89b61670a Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sat, 25 Apr 2026 23:12:12 +1000 Subject: [PATCH 043/172] Surface unknown OAuth owner kind instead of coercing to user --- ui/src/pages/OAuthAuthorizePage.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ui/src/pages/OAuthAuthorizePage.tsx b/ui/src/pages/OAuthAuthorizePage.tsx index eaf6fea..2322622 100644 --- a/ui/src/pages/OAuthAuthorizePage.tsx +++ b/ui/src/pages/OAuthAuthorizePage.tsx @@ -125,7 +125,7 @@ function ownerKeyToApiKeyOwner(key: string, userId: string): ApiKeyOwner { case "project": return { type: "project", project_id: id }; default: - return { type: "user", user_id: userId }; + throw new Error(`Unsupported owner kind: ${kind}`); } } From 7212f401335e1063ff9416fffd8db6e504762313 Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sat, 25 Apr 2026 23:12:55 +1000 Subject: [PATCH 044/172] Cancel useAutoScroll rAF on unmount to avoid stale scheduled callback --- ui/src/hooks/useAutoScroll.ts | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/ui/src/hooks/useAutoScroll.ts b/ui/src/hooks/useAutoScroll.ts index 5f8f996..4d35eb5 100644 --- a/ui/src/hooks/useAutoScroll.ts +++ b/ui/src/hooks/useAutoScroll.ts @@ -140,7 +140,7 @@ export function useAutoScroll(options: UseAutoScrollOptions = {}): UseAutoScroll }; // Use requestAnimationFrame to ensure layout is complete - requestAnimationFrame(checkInitialPosition); + const rafId = requestAnimationFrame(checkInitialPosition); // Also check when container resizes (content loaded) // Skip during streaming - content height changes constantly during streaming, @@ -153,7 +153,10 @@ export function useAutoScroll(options: UseAutoScrollOptions = {}): UseAutoScroll }); resizeObserver.observe(container); - return () => resizeObserver.disconnect(); + return () => { + cancelAnimationFrame(rafId); + resizeObserver.disconnect(); + }; }, [checkIfAtBottom]); return { From 128ccf39ccec341da965184aeb78b362bbb28f7e Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sat, 25 Apr 2026 23:13:31 +1000 Subject: [PATCH 045/172] Roll back optimistic pin reorder on sync failure --- .../ConversationsProvider.tsx | 20 ++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/ui/src/components/ConversationsProvider/ConversationsProvider.tsx b/ui/src/components/ConversationsProvider/ConversationsProvider.tsx index 93764a8..eae2f28 100644 --- a/ui/src/components/ConversationsProvider/ConversationsProvider.tsx +++ b/ui/src/components/ConversationsProvider/ConversationsProvider.tsx @@ -714,6 +714,11 @@ export function ConversationsProvider({ children }: ConversationsProviderProps) const reorderPinned = useCallback( (orderedIds: string[]) => { + // Snapshot current pin orders so we can roll back if any sync fails. + const previousOrders = new Map( + storedConversations.map((c) => [c.id, c.pinOrder] as const) + ); + // Update local state with new pin orders setStoredConversations((prev) => { const updated = prev.map((c) => { @@ -731,7 +736,20 @@ export function ConversationsProvider({ children }: ConversationsProviderProps) orderedIds.forEach((id, index) => { const conv = storedConversations.find((c) => c.id === id); if (conv?.remoteId) { - pinMutation.mutate({ remoteId: conv.remoteId, pinOrder: index }); + pinMutation.mutate( + { remoteId: conv.remoteId, pinOrder: index }, + { + onError: () => { + setStoredConversations((prev) => + prev.map((c) => + previousOrders.has(c.id) + ? { ...c, pinOrder: previousOrders.get(c.id) } + : c + ) + ); + }, + } + ); } }); } From 55fc3e5babb3c2e1f401b9c8ae96a0d143201a6e Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sat, 25 Apr 2026 23:14:50 +1000 Subject: [PATCH 046/172] Cache shiki HTML so theme toggles reuse prior highlights --- .../HighlightedCode/HighlightedCode.tsx | 37 ++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/ui/src/components/HighlightedCode/HighlightedCode.tsx b/ui/src/components/HighlightedCode/HighlightedCode.tsx index a930837..7fa2f79 100644 --- a/ui/src/components/HighlightedCode/HighlightedCode.tsx +++ b/ui/src/components/HighlightedCode/HighlightedCode.tsx @@ -31,6 +31,33 @@ function getHighlighter(): Promise { return highlighterPromise; } +// Bounded LRU-ish cache so toggling themes back and forth on the same blocks +// doesn't trigger a re-highlight every time. Keyed on (theme, lang, code). +const HIGHLIGHT_CACHE_LIMIT = 256; +const highlightCache = new Map(); + +function cacheKey(theme: string, lang: string, code: string): string { + return `${theme}|${lang}|${code}`; +} + +function readHighlightCache(key: string): string | undefined { + const cached = highlightCache.get(key); + if (cached !== undefined) { + // Move to most-recent slot + highlightCache.delete(key); + highlightCache.set(key, cached); + } + return cached; +} + +function writeHighlightCache(key: string, value: string): void { + if (highlightCache.size >= HIGHLIGHT_CACHE_LIMIT) { + const oldest = highlightCache.keys().next().value; + if (oldest !== undefined) highlightCache.delete(oldest); + } + highlightCache.set(key, value); +} + export interface HighlightedCodeProps { code: string; language?: string; @@ -64,10 +91,17 @@ function HighlightedCodeComponent({ useEffect(() => { let cancelled = false; + const lang = (language?.toLowerCase() ?? "text") || "text"; + const key = cacheKey(theme, lang, code); + const cached = readHighlightCache(key); + if (cached !== undefined) { + setHtml(cached); + return; + } + getHighlighter().then((highlighter) => { if (cancelled) return; - const lang = language?.toLowerCase() ?? "text"; const loadedLangs = highlighter.getLoadedLanguages(); // Use plain text for unknown languages @@ -77,6 +111,7 @@ function HighlightedCodeComponent({ lang: effectiveLang, theme, }); + writeHighlightCache(key, result); setHtml(result); }); From 4e82ec073fb59832ba4eab140a0b1cf851c4fc09 Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sat, 25 Apr 2026 23:15:16 +1000 Subject: [PATCH 047/172] Replace 50-char prefix sync hash with djb2 over full content --- .../ConversationsProvider.tsx | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/ui/src/components/ConversationsProvider/ConversationsProvider.tsx b/ui/src/components/ConversationsProvider/ConversationsProvider.tsx index eae2f28..3ab8851 100644 --- a/ui/src/components/ConversationsProvider/ConversationsProvider.tsx +++ b/ui/src/components/ConversationsProvider/ConversationsProvider.tsx @@ -121,15 +121,25 @@ function localToApiMessage(m: StoredConversation["messages"][0]): Message { }; } +// djb2 string hash. Plenty for content-change detection: collisions are +// vanishingly rare in practice and we don't need cryptographic guarantees. +function hashContent(s: string): string { + let h = 5381; + for (let i = 0; i < s.length; i++) { + h = (((h << 5) + h) ^ s.charCodeAt(i)) | 0; + } + return (h >>> 0).toString(36); +} + // Compute a sync hash that includes actual content changes function computeSyncHash(conversations: StoredConversation[]): string { return JSON.stringify( conversations.map((c) => ({ id: c.id, title: c.title, - // Include message content hash for detecting content changes + // Hash full content so edits past character 50 still invalidate the hash. msgHash: c.messages - .map((m) => `${m.role}:${m.content.length}:${m.content.slice(0, 50)}`) + .map((m) => `${m.role}:${m.content.length}:${hashContent(m.content)}`) .join("|"), models: c.models.join(","), updatedAt: c.updatedAt, From c5c45db0cc15471775650e84a6b75ff0b15ce031 Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sat, 25 Apr 2026 23:15:46 +1000 Subject: [PATCH 048/172] Accept pasted images in ChatInput textarea --- ui/src/components/ChatInput/ChatInput.tsx | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/ui/src/components/ChatInput/ChatInput.tsx b/ui/src/components/ChatInput/ChatInput.tsx index 76c3e9a..da9f1ae 100644 --- a/ui/src/components/ChatInput/ChatInput.tsx +++ b/ui/src/components/ChatInput/ChatInput.tsx @@ -437,6 +437,18 @@ export function ChatInput({ [handleFileSelect] ); + const handlePaste = useCallback( + (event: React.ClipboardEvent) => { + const pastedFiles = event.clipboardData?.files; + if (pastedFiles && pastedFiles.length > 0) { + // Prevent the textarea from inserting an image filename or data URL. + event.preventDefault(); + handleFileSelect(pastedFiles); + } + }, + [handleFileSelect] + ); + const handleDragOver = useCallback((event: React.DragEvent) => { event.preventDefault(); setIsDragging(true); @@ -549,6 +561,7 @@ export function ChatInput({ updateSlashState(target.value, target.selectionStart ?? 0); }} onKeyDown={handleKeyDown} + onPaste={handlePaste} placeholder={placeholder} className="min-h-[56px] w-full resize-none border-0 bg-transparent px-4 pt-3 pb-1 text-base focus-visible:ring-0 focus-visible:ring-offset-0" autoResize From d5c95924bdd164c71a509fbf477c545e87313c6c Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sat, 25 Apr 2026 23:16:14 +1000 Subject: [PATCH 049/172] Broadcast useLocalStorage writes to same-tab hook instances --- ui/src/hooks/useLocalStorage.ts | 45 ++++++++++++++++++++++++++------- 1 file changed, 36 insertions(+), 9 deletions(-) diff --git a/ui/src/hooks/useLocalStorage.ts b/ui/src/hooks/useLocalStorage.ts index 9216965..16abe7a 100644 --- a/ui/src/hooks/useLocalStorage.ts +++ b/ui/src/hooks/useLocalStorage.ts @@ -1,5 +1,15 @@ import { useState, useEffect, useCallback } from "react"; +// `storage` events only fire in *other* tabs. To keep multiple hook instances +// of the same key inside the same tab in sync, mirror writes onto a custom +// event we dispatch ourselves. +const SAME_TAB_EVENT = "hadrian:local-storage"; + +interface SameTabPayload { + key: string; + newValue: string | null; +} + export function useLocalStorage( key: string, initialValue: T @@ -21,7 +31,13 @@ export function useLocalStorage( setStoredValue((prev) => { const valueToStore = value instanceof Function ? value(prev) : value; if (typeof window !== "undefined") { - window.localStorage.setItem(key, JSON.stringify(valueToStore)); + const serialized = JSON.stringify(valueToStore); + window.localStorage.setItem(key, serialized); + window.dispatchEvent( + new CustomEvent(SAME_TAB_EVENT, { + detail: { key, newValue: serialized }, + }) + ); } return valueToStore; }); @@ -30,18 +46,29 @@ export function useLocalStorage( ); useEffect(() => { - const handleStorageChange = (e: StorageEvent) => { - if (e.key === key && e.newValue) { - try { - setStoredValue(JSON.parse(e.newValue) as T); - } catch { - // Ignore parse errors - } + const apply = (newValue: string | null) => { + if (newValue === null) return; + try { + setStoredValue(JSON.parse(newValue) as T); + } catch { + // Ignore parse errors } }; + const handleStorageChange = (e: StorageEvent) => { + if (e.key === key) apply(e.newValue); + }; + const handleSameTabChange = (e: Event) => { + const detail = (e as CustomEvent).detail; + if (detail?.key === key) apply(detail.newValue); + }; + window.addEventListener("storage", handleStorageChange); - return () => window.removeEventListener("storage", handleStorageChange); + window.addEventListener(SAME_TAB_EVENT, handleSameTabChange); + return () => { + window.removeEventListener("storage", handleStorageChange); + window.removeEventListener(SAME_TAB_EVENT, handleSameTabChange); + }; }, [key]); return [storedValue, setValue]; From 0a7423106fb222222d31b0da337d3d3d4b4f5cc8 Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sat, 25 Apr 2026 23:18:36 +1000 Subject: [PATCH 050/172] SSRF-validate per-org OIDC redirect_uri on create and update --- src/routes/admin/org_sso_configs.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/routes/admin/org_sso_configs.rs b/src/routes/admin/org_sso_configs.rs index 031c303..0cfe64b 100644 --- a/src/routes/admin/org_sso_configs.rs +++ b/src/routes/admin/org_sso_configs.rs @@ -334,6 +334,10 @@ pub async fn create( crate::validation::validate_base_url_opts(discovery_url, url_opts) .map_err(|e| AdminError::Validation(format!("Invalid discovery URL: {e}")))?; } + if let Some(ref redirect_uri) = input.redirect_uri { + crate::validation::validate_base_url_opts(redirect_uri, url_opts) + .map_err(|e| AdminError::Validation(format!("Invalid redirect URI: {e}")))?; + } } // Create the SSO config @@ -550,6 +554,10 @@ pub async fn update( crate::validation::validate_base_url_opts(discovery_url, url_opts) .map_err(|e| AdminError::Validation(format!("Invalid discovery URL: {e}")))?; } + if let Some(Some(ref redirect_uri)) = input.redirect_uri { + crate::validation::validate_base_url_opts(redirect_uri, url_opts) + .map_err(|e| AdminError::Validation(format!("Invalid redirect URI: {e}")))?; + } } // Update the SSO config From 33991a9ad2a188c6b89512e1c2db0429a90299b6 Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sat, 25 Apr 2026 23:20:01 +1000 Subject: [PATCH 051/172] Abort in-flight title generation when ConversationsProvider unmounts --- .../ConversationsProvider/ConversationsProvider.tsx | 9 ++++++++- ui/src/utils/generateTitle.ts | 4 +++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/ui/src/components/ConversationsProvider/ConversationsProvider.tsx b/ui/src/components/ConversationsProvider/ConversationsProvider.tsx index 3ab8851..d550eb1 100644 --- a/ui/src/components/ConversationsProvider/ConversationsProvider.tsx +++ b/ui/src/components/ConversationsProvider/ConversationsProvider.tsx @@ -527,6 +527,13 @@ export function ConversationsProvider({ children }: ConversationsProviderProps) // Track conversations that are pending LLM title generation to avoid duplicate calls const pendingTitleGenRef = useRef>(new Set()); + // AbortController used to cancel any in-flight title generations on unmount. + const titleGenAbortRef = useRef(new AbortController()); + useEffect(() => { + return () => { + titleGenAbortRef.current.abort(); + }; + }, []); const updateConversation = useCallback( (id: string, messages: ChatMessage[], models?: string[]) => { @@ -564,7 +571,7 @@ export function ConversationsProvider({ children }: ConversationsProviderProps) const titleModel = preferences.titleGenerationModel; if (needsLLMTitle && firstUserMessage && titleModel) { pendingTitleGenRef.current.add(id); - generateTitleWithLLM(firstUserMessage, titleModel) + generateTitleWithLLM(firstUserMessage, titleModel, titleGenAbortRef.current.signal) .then((result) => { // Only update if the title is different and better setConversations((prev) => diff --git a/ui/src/utils/generateTitle.ts b/ui/src/utils/generateTitle.ts index 1453035..ff49dd3 100644 --- a/ui/src/utils/generateTitle.ts +++ b/ui/src/utils/generateTitle.ts @@ -27,7 +27,8 @@ export function generateSimpleTitle(userMessage: string): string { */ export async function generateTitleWithLLM( userMessage: string, - model: string + model: string, + signal?: AbortSignal ): Promise { try { const response = await apiV1ChatCompletions({ @@ -46,6 +47,7 @@ export async function generateTitleWithLLM( }, ], }, + signal, throwOnError: true, }); From 5de4df678d384f0fbc6eea238e2a5676bcdd8c8b Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sat, 25 Apr 2026 23:25:32 +1000 Subject: [PATCH 052/172] Scope API-key revoke/rotate authz by owner org/team/project --- src/routes/admin/api_keys.rs | 123 ++++++++++++++++++++++++++++------- 1 file changed, 101 insertions(+), 22 deletions(-) diff --git a/src/routes/admin/api_keys.rs b/src/routes/admin/api_keys.rs index af51c7e..63086b3 100644 --- a/src/routes/admin/api_keys.rs +++ b/src/routes/admin/api_keys.rs @@ -189,6 +189,88 @@ pub(super) async fn check_owner_create_authz( Ok(()) } +/// Run the owner-scoped RBAC check that gates modification of an existing key +/// (revoke, rotate, etc). Mirrors `check_owner_create_authz` but for an +/// already-known key with a concrete id, so authorisation is scoped to the +/// owner's org/team/project rather than checking only the bare resource id. +pub(super) async fn check_owner_modify_authz( + services: &crate::services::Services, + authz: &crate::middleware::AuthzContext, + action: &str, + key_id: uuid::Uuid, + owner: &crate::models::ApiKeyOwner, +) -> Result<(), AdminError> { + let resource_id = key_id.to_string(); + match owner { + crate::models::ApiKeyOwner::Organization { org_id } => { + authz.require( + "api_key", + action, + Some(&resource_id), + Some(&org_id.to_string()), + None, + None, + )?; + } + crate::models::ApiKeyOwner::Team { team_id } => { + let team = services + .teams + .get_by_id(*team_id) + .await? + .ok_or_else(|| AdminError::NotFound(format!("Team '{}' not found", team_id)))?; + authz.require( + "api_key", + action, + Some(&resource_id), + Some(&team.org_id.to_string()), + Some(&team_id.to_string()), + None, + )?; + } + crate::models::ApiKeyOwner::Project { project_id } => { + let project = services + .projects + .get_by_id(*project_id) + .await? + .ok_or_else(|| { + AdminError::NotFound(format!("Project '{}' not found", project_id)) + })?; + authz.require( + "api_key", + action, + Some(&resource_id), + Some(&project.org_id.to_string()), + None, + Some(&project_id.to_string()), + )?; + } + crate::models::ApiKeyOwner::User { .. } => { + authz.require("api_key", action, Some(&resource_id), None, None, None)?; + } + crate::models::ApiKeyOwner::ServiceAccount { service_account_id } => { + let sa = services + .service_accounts + .get_by_id(*service_account_id) + .await? + .ok_or_else(|| { + AdminError::NotFound(format!( + "Service account '{}' not found", + service_account_id + )) + })?; + authz.require( + "api_key", + action, + Some(&resource_id), + Some(&sa.org_id.to_string()), + None, + None, + )?; + } + } + Ok(()) +} + /// Enforce the per-scope `max_api_keys_per_*` limits before creating a key. pub(crate) async fn check_owner_create_limits( services: &crate::services::Services, @@ -800,20 +882,19 @@ pub async fn revoke( Extension(client_info): Extension, Path(key_id): Path, ) -> Result, AdminError> { - authz.require( - "api_key", - "delete", - Some(&key_id.to_string()), - None, - None, - None, - )?; - let services = get_services(&state)?; let actor = AuditActor::from(&admin_auth); - // Get API key info for audit log before revoking - let key_info = services.api_keys.get_by_id(key_id).await?; + // Fetch the key first so authz can scope the check by owner. Without + // this, the key id alone is insufficient — RBAC needs the org/team/ + // project to distinguish org-admins of different tenants. + let key_info = services + .api_keys + .get_by_id(key_id) + .await? + .ok_or_else(|| AdminError::NotFound(format!("API key '{}' not found", key_id)))?; + check_owner_modify_authz(services, &authz, "delete", key_id, &key_info.owner).await?; + let key_info = Some(key_info); services.api_keys.revoke(key_id).await?; @@ -956,18 +1037,17 @@ pub async fn rotate( Path(key_id): Path, Json(request): Json, ) -> Result<(StatusCode, Json), AdminError> { - authz.require( - "api_key", - "update", - Some(&key_id.to_string()), - None, - None, - None, - )?; - let services = get_services(&state)?; let actor = AuditActor::from(&admin_auth); + // Fetch first so authz can scope by owner; see `revoke` for rationale. + let old_key_for_authz = services + .api_keys + .get_by_id(key_id) + .await? + .ok_or_else(|| AdminError::NotFound(format!("API key '{}' not found", key_id)))?; + check_owner_modify_authz(services, &authz, "update", key_id, &old_key_for_authz.owner).await?; + // Validate grace period let grace_period_seconds = request .grace_period_seconds @@ -989,8 +1069,7 @@ pub async fn rotate( // Get the key generation prefix from config let prefix = state.config.auth.api_key_config().generation_prefix(); - // Get old key info for audit log before rotating - let old_key = services.api_keys.get_by_id(key_id).await?; + let old_key = Some(old_key_for_authz); // Perform the rotation let created = services From 3136d84c0eb298e0be028234973d7ca812ee1307 Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sat, 25 Apr 2026 23:26:00 +1000 Subject: [PATCH 053/172] Gate admin-UI bypass on explicit VITE_FORCE_ADMIN_ACCESS env flag --- ui/src/auth/types.ts | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/ui/src/auth/types.ts b/ui/src/auth/types.ts index e183948..353dd89 100644 --- a/ui/src/auth/types.ts +++ b/ui/src/auth/types.ts @@ -12,10 +12,15 @@ export interface User { /** Admin roles that grant access to the admin UI */ export const ADMIN_ROLES = ["super_admin", "org_admin", "team_admin"] as const; -/** Check if a user has any admin role */ +/** Check if a user has any admin role. + * + * The earlier shortcut "always allow in `import.meta.env.DEV`" leaked into + * Storybook builds and any local production-ish setup with `pnpm dev`, so + * the admin UI rendered for unprivileged users. Bypassing the role check now + * requires an explicit opt-in via `VITE_FORCE_ADMIN_ACCESS=1` so each + * developer turning it on is doing so deliberately. */ export function hasAdminAccess(user: User | null): boolean { - // In dev mode, always show admin pages for easier development - if (import.meta.env.DEV) return true; + if (import.meta.env.VITE_FORCE_ADMIN_ACCESS === "1") return true; if (!user?.roles) return false; return user.roles.some((role) => ADMIN_ROLES.includes(role as (typeof ADMIN_ROLES)[number])); From dfc9e4e2c1230558a0848d6b4459969f98f32eda Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sat, 25 Apr 2026 23:26:46 +1000 Subject: [PATCH 054/172] Cancel superseded CEL validation requests with AbortController --- .../RbacPolicy/CelExpressionInput.tsx | 20 ++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/ui/src/components/RbacPolicy/CelExpressionInput.tsx b/ui/src/components/RbacPolicy/CelExpressionInput.tsx index 20d91f5..a926f3e 100644 --- a/ui/src/components/RbacPolicy/CelExpressionInput.tsx +++ b/ui/src/components/RbacPolicy/CelExpressionInput.tsx @@ -1,4 +1,4 @@ -import { useEffect, useState } from "react"; +import { useEffect, useRef, useState } from "react"; import { useMutation } from "@tanstack/react-query"; import { CheckCircle2, XCircle, Loader2, Info } from "lucide-react"; import { useDebouncedCallback } from "use-debounce"; @@ -50,6 +50,12 @@ export function CelExpressionInput({ }>({ valid: null, error: null, checking: false }); const [showHelp, setShowHelp] = useState(false); + // Newer keystrokes abort older in-flight validations so out-of-order + // responses can't paint stale state, and unmount cancels everything. + const abortRef = useRef(null); + useEffect(() => { + return () => abortRef.current?.abort(); + }, []); const validateMutation = useMutation({ ...orgRbacPolicyValidateMutation(), @@ -60,7 +66,10 @@ export function CelExpressionInput({ checking: false, }); }, - onError: () => { + onError: (error) => { + // Suppress aborted-request errors: they only mean a newer keystroke + // superseded this validation, not that the expression is invalid. + if (error instanceof DOMException && error.name === "AbortError") return; setValidationState({ valid: null, error: "Failed to validate expression", @@ -75,7 +84,12 @@ export function CelExpressionInput({ return; } setValidationState((prev) => ({ ...prev, checking: true })); - validateMutation.mutate({ body: { condition } }); + abortRef.current?.abort(); + abortRef.current = new AbortController(); + validateMutation.mutate({ + body: { condition }, + signal: abortRef.current.signal, + }); }, 500); useEffect(() => { From 30316b9daf8d4b51e33566229719ba7bd96e5314 Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sat, 25 Apr 2026 23:29:17 +1000 Subject: [PATCH 055/172] Skip HTTPS image preprocessing for providers that pass through --- src/config/features.rs | 2 ++ src/providers/anthropic/mod.rs | 5 +++++ src/providers/image.rs | 15 +++++++++++++++ 3 files changed, 22 insertions(+) diff --git a/src/config/features.rs b/src/config/features.rs index 965dda5..3d0d799 100644 --- a/src/config/features.rs +++ b/src/config/features.rs @@ -2126,6 +2126,8 @@ impl ImageFetchingConfig { max_size_bytes: self.max_size_mb * 1024 * 1024, timeout: std::time::Duration::from_secs(self.timeout_secs), allowed_content_types: self.allowed_content_types.clone(), + // Per-provider; Anthropic's constructor sets this on its own copy. + pass_through_https: false, } } } diff --git a/src/providers/anthropic/mod.rs b/src/providers/anthropic/mod.rs index 39898a8..c8740d9 100644 --- a/src/providers/anthropic/mod.rs +++ b/src/providers/anthropic/mod.rs @@ -100,6 +100,11 @@ impl AnthropicProvider { ) -> Self { let circuit_breaker = registry.get_or_create(provider_name, &config.circuit_breaker); + // Anthropic supports HTTPS image URLs natively, so don't waste cycles + // re-encoding them as base64 data URLs in the preprocess step. + let mut image_fetch_config = image_fetch_config; + image_fetch_config.pass_through_https = true; + Self { api_key: config.api_key.clone(), base_url: config.base_url.trim_end_matches('/').to_string(), diff --git a/src/providers/image.rs b/src/providers/image.rs index 765d231..1f48137 100644 --- a/src/providers/image.rs +++ b/src/providers/image.rs @@ -31,6 +31,12 @@ pub struct ImageFetchConfig { pub timeout: Duration, /// Allowed content types (empty = allow all image types) pub allowed_content_types: Vec, + /// Skip preprocessing for `https://` URLs (default: false). Set this for + /// providers that natively support HTTPS image URLs (e.g. Anthropic), so + /// we don't waste bandwidth fetching and re-encoding images the upstream + /// can pull itself. `http://` URLs are still preprocessed because most + /// providers reject plain HTTP. + pub pass_through_https: bool, } impl Default for ImageFetchConfig { @@ -45,6 +51,7 @@ impl Default for ImageFetchConfig { "image/gif".to_string(), "image/webp".to_string(), ], + pass_through_https: false, } } } @@ -380,6 +387,14 @@ async fn preprocess_content_for_images( continue; } + // Providers like Anthropic accept HTTPS URLs directly; + // fetching and re-encoding them is wasted work. + if image_url.url.starts_with("https://") + && config.is_some_and(|c| c.pass_through_https) + { + continue; + } + // Try to fetch HTTP URL if is_http_url(&image_url.url) { match resolve_image_url(client, &image_url.url, config).await { From 1c848d31756f470a5ef982104ed69213857cc94e Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sat, 25 Apr 2026 23:30:24 +1000 Subject: [PATCH 056/172] Wire DataTable filtered row model unconditionally --- ui/src/components/DataTable/DataTable.tsx | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/ui/src/components/DataTable/DataTable.tsx b/ui/src/components/DataTable/DataTable.tsx index e0b6e41..b577de6 100644 --- a/ui/src/components/DataTable/DataTable.tsx +++ b/ui/src/components/DataTable/DataTable.tsx @@ -62,10 +62,11 @@ export function DataTable({ getSortedRowModel: getSortedRowModel(), onSortingChange: setSorting, }), - ...(searchColumn && { - getFilteredRowModel: getFilteredRowModel(), - onColumnFiltersChange: setColumnFilters, - }), + // Always enable the filtered row model when filtering is possible — + // either column-scoped (searchColumn) or via globalFilter — so the + // search input doesn't silently no-op when `searchColumn` is unset. + getFilteredRowModel: getFilteredRowModel(), + onColumnFiltersChange: setColumnFilters, onColumnVisibilityChange: setColumnVisibility, onGlobalFilterChange: setGlobalFilter, state: { From 18bd86d6bc6033974197af9acca49f4e7d4f2eb2 Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sat, 25 Apr 2026 23:32:44 +1000 Subject: [PATCH 057/172] Prefer configured public_url for SCIM base URL --- src/routes/scim/users.rs | 46 +++++++++++++++++++++++++++++++++------- 1 file changed, 38 insertions(+), 8 deletions(-) diff --git a/src/routes/scim/users.rs b/src/routes/scim/users.rs index 4c516f9..90e5f33 100644 --- a/src/routes/scim/users.rs +++ b/src/routes/scim/users.rs @@ -72,19 +72,49 @@ impl IntoResponse for ScimJsonWithStatus { // ============================================================================= /// Extract the SCIM base URL from the request. -fn get_base_url(request: &Request) -> String { +/// +/// Prefers the operator-configured `auth.oauth_pkce.public_url` so we don't +/// trust forwarded headers from arbitrary callers — RFC 7644 endpoints are +/// authenticated by a bearer token, but a misconfigured deployment could +/// still let a client poison the `Location` URLs we mint by spoofing +/// `X-Forwarded-Host`. The configured URL is authoritative when present; +/// otherwise build from the server's bound host/port. +fn get_base_url(state: &AppState, request: &Request) -> String { + if let Some(public_url) = state.config.auth.oauth_pkce.public_url.as_deref() + && !public_url.is_empty() + { + return format!("{}/scim/v2", public_url.trim_end_matches('/')); + } + + // Fall back to whatever the request claims, then finally to localhost so + // a SCIM list response is at least syntactically valid in dev/test. let scheme = request .headers() .get("x-forwarded-proto") .and_then(|v| v.to_str().ok()) - .unwrap_or("https"); + .unwrap_or_else(|| { + if state.config.server.tls.is_some() { + "https" + } else { + "http" + } + }); let host = request .headers() .get("x-forwarded-host") .or_else(|| request.headers().get(header::HOST)) .and_then(|v| v.to_str().ok()) - .unwrap_or("localhost"); + .map(str::to_string) + .unwrap_or_else(|| { + let server = &state.config.server; + if (scheme == "https" && server.port == 443) || (scheme == "http" && server.port == 80) + { + server.host.to_string() + } else { + format!("{}:{}", server.host, server.port) + } + }); format!("{}://{}/scim/v2", scheme, host) } @@ -123,7 +153,7 @@ pub async fn list_users( Query(params): Query, request: Request, ) -> Response { - let base_url = get_base_url(&request); + let base_url = get_base_url(&state, &request); let service = match get_provisioning_service(&state) { Ok(s) => s, Err(e) => return e.into_response(), @@ -150,7 +180,7 @@ pub async fn create_user( Extension(scim_auth): Extension, request: Request, ) -> Response { - let base_url = get_base_url(&request); + let base_url = get_base_url(&state, &request); let service = match get_provisioning_service(&state) { Ok(s) => s, Err(e) => return e.into_response(), @@ -199,7 +229,7 @@ pub async fn get_user( Path(id): Path, request: Request, ) -> Response { - let base_url = get_base_url(&request); + let base_url = get_base_url(&state, &request); let service = match get_provisioning_service(&state) { Ok(s) => s, Err(e) => return e.into_response(), @@ -228,7 +258,7 @@ pub async fn replace_user( Path(id): Path, request: Request, ) -> Response { - let base_url = get_base_url(&request); + let base_url = get_base_url(&state, &request); let service = match get_provisioning_service(&state) { Ok(s) => s, Err(e) => return e.into_response(), @@ -286,7 +316,7 @@ pub async fn patch_user( Path(id): Path, request: Request, ) -> Response { - let base_url = get_base_url(&request); + let base_url = get_base_url(&state, &request); let service = match get_provisioning_service(&state) { Ok(s) => s, Err(e) => return e.into_response(), From b302de88cba60b2ee4c688639388c584ca771260 Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sat, 25 Apr 2026 23:33:33 +1000 Subject: [PATCH 058/172] Replace per-token Markdown pre-tagging with MutationObserver --- ui/src/components/Markdown/Markdown.tsx | 35 +++++++++++++++++++++---- 1 file changed, 30 insertions(+), 5 deletions(-) diff --git a/ui/src/components/Markdown/Markdown.tsx b/ui/src/components/Markdown/Markdown.tsx index 83eff81..a7c4ba2 100644 --- a/ui/src/components/Markdown/Markdown.tsx +++ b/ui/src/components/Markdown/Markdown.tsx @@ -26,15 +26,40 @@ export function Markdown({ content, className }: MarkdownProps) { // Streamdown renders
 elements that we can't control directly.
   // Post-render fixup: set tabIndex="0" on all 
 children so keyboard
   // users can scroll them (fixes axe-core scrollable-region-focusable).
+  //
+  // Use a MutationObserver instead of re-querying on every token: streaming
+  // content changes hundreds of times per response, and `querySelectorAll`
+  // walks the entire markdown subtree each call. The observer only fires
+  // when the DOM actually changes, and we only need to attribute newly
+  // mounted 
 nodes.
   useEffect(() => {
     const container = containerRef.current;
     if (!container) return;
-    for (const pre of container.querySelectorAll("pre")) {
-      if (!pre.hasAttribute("tabindex")) {
-        pre.setAttribute("tabindex", "0");
+
+    const tagPre = (node: Element) => {
+      if (node.tagName === "PRE" && !node.hasAttribute("tabindex")) {
+        node.setAttribute("tabindex", "0");
+      }
+      for (const pre of node.querySelectorAll("pre")) {
+        if (!pre.hasAttribute("tabindex")) {
+          pre.setAttribute("tabindex", "0");
+        }
+      }
+    };
+    tagPre(container);
+
+    const observer = new MutationObserver((records) => {
+      for (const record of records) {
+        for (const node of record.addedNodes) {
+          if (node.nodeType === Node.ELEMENT_NODE) {
+            tagPre(node as Element);
+          }
+        }
       }
-    }
-  }, [content]);
+    });
+    observer.observe(container, { childList: true, subtree: true });
+    return () => observer.disconnect();
+  }, []);
 
   const mermaidOptions: MermaidOptions = {
     config: {

From 670f439542e16c6bdb0a6c1ae09e2a1b39ace0c3 Mon Sep 17 00:00:00 2001
From: ScriptSmith 
Date: Sat, 25 Apr 2026 23:34:50 +1000
Subject: [PATCH 059/172] Generate a session secret in wizard-rendered IdP
 configs

---
 src/wizard.rs | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/src/wizard.rs b/src/wizard.rs
index 03be401..93e314a 100644
--- a/src/wizard.rs
+++ b/src/wizard.rs
@@ -1068,7 +1068,10 @@ fn generate_config(mode: DeploymentMode, wizard_config: &WizardConfig) -> String
             ));
             config.push('\n');
             config.push_str("[auth.session]\n");
-            config.push_str("secret = \"${SESSION_SECRET}\"\n");
+            config.push_str("# Sessions are signed with this 256-bit secret. Override via the\n");
+            config.push_str("# SESSION_SECRET env var in multi-replica setups so every node\n");
+            config.push_str("# accepts the others' cookies.\n");
+            config.push_str(&format!("secret = \"{}\"\n", generate_session_secret()));
             config.push('\n');
         }
     }
@@ -1149,6 +1152,17 @@ fn escape_toml_string(s: &str) -> String {
     s.replace('\\', "\\\\").replace('"', "\\\"")
 }
 
+/// Generate a fresh 256-bit URL-safe base64 session-signing secret. Called
+/// from the wizard so a freshly-installed deployment has a stable secret
+/// without the operator having to remember to set `SESSION_SECRET`.
+fn generate_session_secret() -> String {
+    use base64::{Engine, engine::general_purpose::URL_SAFE_NO_PAD};
+    use rand::RngCore;
+    let mut bytes = [0u8; 32];
+    rand::thread_rng().fill_bytes(&mut bytes);
+    URL_SAFE_NO_PAD.encode(bytes)
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;

From 38103ebf227cfafb32947bccad0c30389f34f460 Mon Sep 17 00:00:00 2001
From: ScriptSmith 
Date: Sat, 25 Apr 2026 23:36:04 +1000
Subject: [PATCH 060/172] Validate branding colors, fonts, and favicon URL
 before injecting

---
 ui/src/config/ConfigProvider.tsx | 84 ++++++++++++++++++++++++--------
 1 file changed, 63 insertions(+), 21 deletions(-)

diff --git a/ui/src/config/ConfigProvider.tsx b/ui/src/config/ConfigProvider.tsx
index c2515d5..a620d4e 100644
--- a/ui/src/config/ConfigProvider.tsx
+++ b/ui/src/config/ConfigProvider.tsx
@@ -14,43 +14,74 @@ const ConfigContext = createContext(null);
 const BRANDING_STYLE_ID = "hadrian-branding-colors";
 const BRANDING_FONTS_STYLE_ID = "hadrian-branding-fonts";
 
+/** Permissive color literal: hex, rgb()/hsl()/oklch()/var(), CSS keyword.
+ *  Rejects anything containing CSS control chars (`{`, `}`, `;`, `<`, etc.)
+ *  so a misconfigured branding payload can't break out of the rule and
+ *  inject arbitrary CSS into the page. */
+const COLOR_RE = /^[a-zA-Z0-9#%(),.\s\-/_]+$/;
+
+function isSafeColor(value: string | undefined): value is string {
+  return typeof value === "string" && value.length > 0 && value.length < 200 && COLOR_RE.test(value);
+}
+
+/** Validate a font-family name. Quotes/braces/semicolons in here would let
+ *  an attacker close the `font-family` declaration and inject other rules. */
+const FONT_NAME_RE = /^[a-zA-Z0-9 \-_]+$/;
+
+function isSafeFontName(value: string | undefined): value is string {
+  return (
+    typeof value === "string" && value.length > 0 && value.length < 100 && FONT_NAME_RE.test(value)
+  );
+}
+
+/** Only accept absolute https/data URLs for font sources. */
+function isSafeFontUrl(value: string | undefined): value is string {
+  if (typeof value !== "string" || value.length === 0 || value.length > 2048) return false;
+  try {
+    const url = new URL(value, window.location.origin);
+    return url.protocol === "https:" || url.protocol === "data:";
+  } catch {
+    return false;
+  }
+}
+
 /**
  * Generates CSS variable overrides from a color palette
  */
 function generateColorCss(colors: ColorPalette, selector: string): string {
   const rules: string[] = [];
 
-  if (colors.primary) {
+  if (isSafeColor(colors.primary)) {
     rules.push(`--color-primary: ${colors.primary};`);
     rules.push(`--color-ring: ${colors.primary};`);
     // Set accent-foreground to primary color for consistent branding on selected items
     rules.push(`--color-accent-foreground: ${colors.primary};`);
   }
-  if (colors.primary_foreground) {
+  if (isSafeColor(colors.primary_foreground)) {
     rules.push(`--color-primary-foreground: ${colors.primary_foreground};`);
-  } else if (colors.primary) {
+  } else if (isSafeColor(colors.primary)) {
     // Default to white if primary is set but primary_foreground is not
     rules.push(`--color-primary-foreground: #ffffff;`);
   }
-  if (colors.secondary) {
+  if (isSafeColor(colors.secondary)) {
     rules.push(`--color-secondary: ${colors.secondary};`);
   }
-  if (colors.secondary_foreground) {
+  if (isSafeColor(colors.secondary_foreground)) {
     rules.push(`--color-secondary-foreground: ${colors.secondary_foreground};`);
   }
-  if (colors.accent) {
+  if (isSafeColor(colors.accent)) {
     rules.push(`--color-accent: ${colors.accent};`);
   }
-  if (colors.background) {
+  if (isSafeColor(colors.background)) {
     rules.push(`--color-background: ${colors.background};`);
   }
-  if (colors.foreground) {
+  if (isSafeColor(colors.foreground)) {
     rules.push(`--color-foreground: ${colors.foreground};`);
   }
-  if (colors.muted) {
+  if (isSafeColor(colors.muted)) {
     rules.push(`--color-muted: ${colors.muted};`);
   }
-  if (colors.border) {
+  if (isSafeColor(colors.border)) {
     rules.push(`--color-border: ${colors.border};`);
     rules.push(`--color-input: ${colors.border};`);
   }
@@ -82,19 +113,30 @@ function injectBrandingColors(colors: ColorPalette, colorsDark: ColorPalette | n
 }
 
 /**
- * Generates @font-face rules for custom fonts
+ * Generates @font-face rules for custom fonts. Skips entries whose name or URL
+ * fails validation; an invalid entry is logged and dropped rather than
+ * inlined verbatim into the stylesheet (where it could break out of the rule).
  */
 function generateFontFaceRules(customFonts: CustomFont[]): string {
   return customFonts
-    .map(
-      (font) => `@font-face {
+    .filter((font) => {
+      const ok = isSafeFontName(font.name) && isSafeFontUrl(font.url);
+      if (!ok) {
+        console.warn("Ignoring branded custom font with unsafe name or URL", font);
+      }
+      return ok;
+    })
+    .map((font) => {
+      const weight = Number.isFinite(Number(font.weight)) ? Number(font.weight) : 400;
+      const style = font.style === "italic" || font.style === "oblique" ? font.style : "normal";
+      return `@font-face {
   font-family: "${font.name}";
   src: url("${font.url}");
-  font-weight: ${font.weight};
-  font-style: ${font.style};
+  font-weight: ${weight};
+  font-style: ${style};
   font-display: swap;
-}`
-    )
+}`;
+    })
     .join("\n\n");
 }
 
@@ -110,13 +152,13 @@ function generateFontCss(fonts: FontsConfig): string {
   const monoStack =
     'ui-monospace, SFMono-Regular, "SF Mono", Menlo, Monaco, Consolas, "Liberation Mono", monospace';
 
-  if (fonts.body) {
+  if (isSafeFontName(fonts.body)) {
     rules.push(`--font-sans: "${fonts.body}", ${sansStack};`);
   }
-  if (fonts.heading) {
+  if (isSafeFontName(fonts.heading)) {
     rules.push(`--font-heading: "${fonts.heading}", ${sansStack};`);
   }
-  if (fonts.mono) {
+  if (isSafeFontName(fonts.mono)) {
     rules.push(`--font-mono: "${fonts.mono}", ${monoStack};`);
   }
 
@@ -190,7 +232,7 @@ export function ConfigProvider({ children }: ConfigProviderProps) {
   // Update document title, favicon, colors, and fonts based on config
   useEffect(() => {
     document.title = config.branding.title;
-    if (config.branding.favicon_url) {
+    if (config.branding.favicon_url && isSafeFontUrl(config.branding.favicon_url)) {
       const favicon = document.querySelector('link[rel="icon"]');
       if (favicon) {
         favicon.href = config.branding.favicon_url;

From 57690a94780ef44bd12510e360d192c21488e9f4 Mon Sep 17 00:00:00 2001
From: ScriptSmith 
Date: Sat, 25 Apr 2026 23:37:09 +1000
Subject: [PATCH 061/172] Record cache_operation error metric on semantic-match
 lookup failure

---
 src/cache/semantic_cache.rs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/cache/semantic_cache.rs b/src/cache/semantic_cache.rs
index 7838583..1acdb90 100644
--- a/src/cache/semantic_cache.rs
+++ b/src/cache/semantic_cache.rs
@@ -337,6 +337,7 @@ impl SemanticCache {
                     );
                 }
                 Err(e) => {
+                    metrics::record_cache_operation("semantic", "get", "error");
                     tracing::warn!(
                         matched_key = %best_match.metadata.cache_key,
                         error = %e,

From 28dd9fe1277b4c2dc2e89e4d79b04bb734d7ff35 Mon Sep 17 00:00:00 2001
From: ScriptSmith 
Date: Sat, 25 Apr 2026 23:57:06 +1000
Subject: [PATCH 062/172] Drop inner stream when IdleTimeoutStream times out to
 release upstream resources

---
 src/streaming/mod.rs | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/src/streaming/mod.rs b/src/streaming/mod.rs
index 726ed3c..5178461 100644
--- a/src/streaming/mod.rs
+++ b/src/streaming/mod.rs
@@ -45,13 +45,13 @@ pub struct IdleTimeoutError(Duration);
 /// The timeout resets after each successful chunk, so long-running streams
 /// that are actively producing data will not timeout.
 pub struct IdleTimeoutStream {
-    inner: S,
+    /// `None` once the stream has terminated, dropping the inner stream so any
+    /// upstream resources (sockets, channels) are released immediately.
+    inner: Option,
     timeout: Duration,
     /// Sleep future for the current timeout period.
     /// Pinned because Sleep requires pinning.
     sleep: Pin>,
-    /// Whether the stream has already timed out or ended
-    terminated: bool,
 }
 
 impl IdleTimeoutStream
@@ -63,10 +63,9 @@ where
     /// If `timeout` is zero, the wrapper is effectively a no-op pass-through.
     pub fn new(inner: S, timeout: Duration) -> Self {
         Self {
-            inner,
+            inner: Some(inner),
             timeout,
             sleep: Box::pin(tokio::time::sleep(timeout)),
-            terminated: false,
         }
     }
 
@@ -84,17 +83,18 @@ where
     type Item = Result;
 
     fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> {
-        if self.terminated {
+        if self.inner.is_none() {
             return Poll::Ready(None);
         }
 
         // If timeout is disabled (zero), just pass through
         if !self.timeout_enabled() {
-            return Pin::new(&mut self.inner).poll_next(cx);
+            return Pin::new(self.inner.as_mut().expect("checked above")).poll_next(cx);
         }
 
         // Poll the inner stream first
-        match Pin::new(&mut self.inner).poll_next(cx) {
+        let inner = self.inner.as_mut().expect("checked above");
+        match Pin::new(inner).poll_next(cx) {
             Poll::Ready(Some(Ok(item))) => {
                 // Got a chunk - reset the timeout
                 let new_deadline = tokio::time::Instant::now() + self.timeout;
@@ -102,20 +102,20 @@ where
                 Poll::Ready(Some(Ok(item)))
             }
             Poll::Ready(Some(Err(e))) => {
-                self.terminated = true;
+                self.inner = None;
                 Poll::Ready(Some(Err(e)))
             }
             Poll::Ready(None) => {
-                // Stream ended normally
-                self.terminated = true;
+                self.inner = None;
                 Poll::Ready(None)
             }
             Poll::Pending => {
                 // Stream is waiting for data - check if we've timed out
                 match self.sleep.as_mut().poll(cx) {
                     Poll::Ready(()) => {
-                        // Timeout elapsed!
-                        self.terminated = true;
+                        // Timeout elapsed - drop the inner stream so its
+                        // socket/connection releases instead of lingering.
+                        self.inner = None;
                         tracing::warn!(
                             timeout_secs = self.timeout.as_secs(),
                             "Streaming response idle timeout - terminating stalled stream"

From 9541f3509891d1dc20f40bb6cc780b2ea3bb34cc Mon Sep 17 00:00:00 2001
From: ScriptSmith 
Date: Sat, 25 Apr 2026 23:59:41 +1000
Subject: [PATCH 063/172] Skip primary payload clone when no fallback chain is
 configured

---
 src/routes/execution.rs | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/src/routes/execution.rs b/src/routes/execution.rs
index 0a82db5..f3b41d3 100644
--- a/src/routes/execution.rs
+++ b/src/routes/execution.rs
@@ -549,8 +549,15 @@ pub async fn execute_with_fallback(
     let mut last_provider = primary_provider_name.clone();
     let mut last_model = primary_model_name.clone();
 
-    // Try primary provider first
-    let mut current_payload = payload.clone();
+    // Hold a template clone for the fallback chain only when needed; the
+    // primary call takes the original payload by value to avoid one clone in
+    // the common no-fallback path.
+    let payload_for_fallbacks = if fallback_chain.is_empty() {
+        None
+    } else {
+        Some(payload.clone())
+    };
+    let mut current_payload = payload;
     current_payload.set_model(primary_model_name.clone());
 
     // Store the last response for chain exhaustion case
@@ -606,7 +613,11 @@ pub async fn execute_with_fallback(
         }
     }
 
-    // Try each fallback in order
+    // Try each fallback in order. `payload_for_fallbacks` is `Some` whenever
+    // `fallback_chain` is non-empty (which is the only case we reach this loop
+    // with work to do), so unwrapping is safe.
+    let payload_template = payload_for_fallbacks
+        .expect("payload_for_fallbacks is Some when fallback_chain is non-empty");
     let mut last_error: Option = None;
 
     for (idx, fallback) in fallback_chain.iter().enumerate() {
@@ -654,7 +665,7 @@ pub async fn execute_with_fallback(
         }
 
         // Update payload with fallback model
-        let mut fallback_payload = payload.clone();
+        let mut fallback_payload = payload_template.clone();
         fallback_payload.set_model(fallback.model_name.clone());
 
         tracing::debug!(

From 1443a82ed2ae2f836ac53830fab31359730f5248 Mon Sep 17 00:00:00 2001
From: ScriptSmith 
Date: Sun, 26 Apr 2026 00:05:48 +1000
Subject: [PATCH 064/172] Strip provider error detail from client responses;
 keep raw text in logs

---
 src/providers/mod.rs    | 42 +++++++++++++++++++++++++++++++----------
 src/routes/execution.rs | 42 +++++++++++++++++++++++++++++------------
 2 files changed, 62 insertions(+), 22 deletions(-)

diff --git a/src/providers/mod.rs b/src/providers/mod.rs
index e9db46b..c22d681 100644
--- a/src/providers/mod.rs
+++ b/src/providers/mod.rs
@@ -164,23 +164,45 @@ impl From for StatusCode {
 
 impl IntoResponse for ProviderError {
     fn into_response(self) -> Response {
-        let (status, error_code) = match &self {
-            ProviderError::Request(_) => (StatusCode::BAD_GATEWAY, "request_failed"),
-            ProviderError::ResponseBuilder(_) => {
-                (StatusCode::INTERNAL_SERVER_ERROR, "response_builder")
-            }
-            ProviderError::Internal(_) => (StatusCode::INTERNAL_SERVER_ERROR, "internal"),
-            ProviderError::CircuitBreakerOpen(_) => {
-                (StatusCode::SERVICE_UNAVAILABLE, "circuit_breaker_open")
-            }
+        // CircuitBreakerOpen is a curated message we own (no upstream detail
+        // mixed in), so it's safe to expose. The other variants wrap reqwest
+        // / http / arbitrary internal strings that may include hostnames,
+        // file paths, or stack-trace fragments — keep those in logs only.
+        let (status, error_code, public_message) = match &self {
+            ProviderError::Request(_) => (
+                StatusCode::BAD_GATEWAY,
+                "request_failed",
+                "Upstream provider request failed".to_string(),
+            ),
+            ProviderError::ResponseBuilder(_) => (
+                StatusCode::INTERNAL_SERVER_ERROR,
+                "response_builder",
+                "Failed to build response".to_string(),
+            ),
+            ProviderError::Internal(_) => (
+                StatusCode::INTERNAL_SERVER_ERROR,
+                "internal",
+                "Internal provider error".to_string(),
+            ),
+            ProviderError::CircuitBreakerOpen(e) => (
+                StatusCode::SERVICE_UNAVAILABLE,
+                "circuit_breaker_open",
+                e.to_string(),
+            ),
         };
 
+        tracing::error!(
+            error_code = %error_code,
+            error = %self,
+            "Provider error returned to client"
+        );
+
         // Record provider error metric
         // Note: Provider name is tracked via llm_requests_total with status="error"
         // This counter provides unified error categorization across all error types
         metrics::record_gateway_error("provider_error", error_code, None);
 
-        (status, self.to_string()).into_response()
+        (status, public_message).into_response()
     }
 }
 
diff --git a/src/routes/execution.rs b/src/routes/execution.rs
index f3b41d3..10230a1 100644
--- a/src/routes/execution.rs
+++ b/src/routes/execution.rs
@@ -811,22 +811,40 @@ pub async fn execute_with_fallback(
 // Helper Functions
 // ============================================================================
 
-/// Convert a provider error to an API error.
+/// Convert a provider error to an API error. The full error string is logged
+/// for operator debugging (it can contain internal URLs/paths from upstream
+/// SDKs) while only a generic message is returned to the client.
+/// `CircuitBreakerOpen` is exposed verbatim because its display string is a
+/// curated message we control (provider name + retry-at hint).
 pub fn provider_error_to_api_error(e: ProviderError) -> ApiError {
     use http::StatusCode;
 
-    let message = e.to_string();
-    let (status, code) = match &e {
-        ProviderError::Request(_) => (StatusCode::BAD_GATEWAY, "provider_error"),
-        ProviderError::ResponseBuilder(_) => {
-            (StatusCode::INTERNAL_SERVER_ERROR, "response_builder_error")
-        }
-        ProviderError::Internal(_) => (StatusCode::INTERNAL_SERVER_ERROR, "internal_error"),
-        ProviderError::CircuitBreakerOpen(_) => {
-            (StatusCode::SERVICE_UNAVAILABLE, "circuit_breaker_open")
-        }
+    let (status, code, public_message) = match &e {
+        ProviderError::Request(_) => (
+            StatusCode::BAD_GATEWAY,
+            "provider_error",
+            "Upstream provider request failed".to_string(),
+        ),
+        ProviderError::ResponseBuilder(_) => (
+            StatusCode::INTERNAL_SERVER_ERROR,
+            "response_builder_error",
+            "Failed to build response".to_string(),
+        ),
+        ProviderError::Internal(_) => (
+            StatusCode::INTERNAL_SERVER_ERROR,
+            "internal_error",
+            "Internal provider error".to_string(),
+        ),
+        ProviderError::CircuitBreakerOpen(cb) => (
+            StatusCode::SERVICE_UNAVAILABLE,
+            "circuit_breaker_open",
+            cb.to_string(),
+        ),
     };
-    ApiError::new(status, code, message)
+
+    tracing::error!(error_code = %code, error = %e, "Provider error converted to API error");
+
+    ApiError::new(status, code, public_message)
 }
 
 #[cfg(test)]

From 24ba57c6e1005d84ecfa777a591d3c730cb8bd0a Mon Sep 17 00:00:00 2001
From: ScriptSmith 
Date: Sun, 26 Apr 2026 00:12:12 +1000
Subject: [PATCH 065/172] Move shutdown timeouts and JWT loader concurrency to
 ServerConfig

---
 src/cli/server.rs    | 26 +++++++++++++++++-----
 src/config/server.rs | 53 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 74 insertions(+), 5 deletions(-)

diff --git a/src/cli/server.rs b/src/cli/server.rs
index 33ed357..e021af7 100644
--- a/src/cli/server.rs
+++ b/src/cli/server.rs
@@ -148,6 +148,7 @@ pub(crate) async fn run_server(explicit_config_path: Option<&str>, no_browser: b
         let http_client = state.http_client.clone();
         let allow_loopback = config.server.allow_loopback_urls;
         let allow_private = config.server.allow_private_urls;
+        let jwt_loader_concurrency = config.server.jwt_loader_concurrency;
         state.task_tracker.spawn(async move {
             let configs = match db.org_sso_configs().list_enabled().await {
                 Ok(c) => c,
@@ -199,7 +200,7 @@ pub(crate) async fn run_server(explicit_config_path: Option<&str>, no_browser: b
                         }
                     }
                 })
-                .buffer_unordered(10)
+                .buffer_unordered(jwt_loader_concurrency)
                 .collect()
                 .await;
 
@@ -400,6 +401,8 @@ pub(crate) async fn run_server(explicit_config_path: Option<&str>, no_browser: b
     #[cfg(not(feature = "wizard"))]
     let _ = no_browser;
 
+    let shutdown_config = config.server.shutdown.clone();
+
     // Graceful shutdown: wait for SIGINT/SIGTERM, then wait for all background tasks.
     // `into_make_service_with_connect_info` is required so middleware can read the
     // connecting peer address via `ConnectInfo` for IP-based rate limits,
@@ -408,7 +411,11 @@ pub(crate) async fn run_server(explicit_config_path: Option<&str>, no_browser: b
         listener,
         app.into_make_service_with_connect_info::(),
     )
-    .with_graceful_shutdown(shutdown_signal(task_tracker, usage_buffer_handle))
+    .with_graceful_shutdown(shutdown_signal(
+        task_tracker,
+        usage_buffer_handle,
+        shutdown_config,
+    ))
     .await
     .unwrap();
 }
@@ -419,6 +426,7 @@ async fn shutdown_signal(
         Arc,
         tokio::task::JoinHandle<()>,
     )>,
+    shutdown_config: crate::config::ShutdownConfig,
 ) {
     let ctrl_c = async {
         tokio::signal::ctrl_c()
@@ -450,7 +458,12 @@ async fn shutdown_signal(
     // Shutdown usage buffer worker and wait for it to flush
     if let Some((buffer, handle)) = usage_buffer_handle {
         buffer.shutdown();
-        if let Err(e) = tokio::time::timeout(std::time::Duration::from_secs(5), handle).await {
+        if let Err(e) = tokio::time::timeout(
+            std::time::Duration::from_secs(shutdown_config.usage_buffer_flush_secs),
+            handle,
+        )
+        .await
+        {
             tracing::warn!(error = %e, "Timeout waiting for usage buffer to flush");
         } else {
             tracing::info!("Usage buffer flushed successfully");
@@ -458,8 +471,11 @@ async fn shutdown_signal(
     }
 
     // Wait for all in-flight tasks to complete (with timeout)
-    let wait_result =
-        tokio::time::timeout(std::time::Duration::from_secs(30), task_tracker.wait()).await;
+    let wait_result = tokio::time::timeout(
+        std::time::Duration::from_secs(shutdown_config.drain_secs),
+        task_tracker.wait(),
+    )
+    .await;
 
     match wait_result {
         Ok(()) => tracing::info!("All background tasks completed"),
diff --git a/src/config/server.rs b/src/config/server.rs
index def455f..135ca1b 100644
--- a/src/config/server.rs
+++ b/src/config/server.rs
@@ -67,6 +67,16 @@ pub struct ServerConfig {
     #[serde(default)]
     pub http_client: HttpClientConfig,
 
+    /// Graceful shutdown timing.
+    #[serde(default)]
+    pub shutdown: ShutdownConfig,
+
+    /// Maximum number of per-issuer JWKS endpoints fetched in parallel when
+    /// warming the gateway JWT validator registry on startup. Higher values
+    /// speed up startup but risk overwhelming individual IdPs.
+    #[serde(default = "default_jwt_loader_concurrency")]
+    pub jwt_loader_concurrency: usize,
+
     /// Allow loopback addresses (127.0.0.1, ::1, localhost) in user-supplied URLs.
     ///
     /// When false (default), URLs targeting loopback addresses are blocked to prevent SSRF.
@@ -100,6 +110,8 @@ impl Default for ServerConfig {
             cors: CorsConfig::default(),
             security_headers: SecurityHeadersConfig::default(),
             http_client: HttpClientConfig::default(),
+            shutdown: ShutdownConfig::default(),
+            jwt_loader_concurrency: default_jwt_loader_concurrency(),
             allow_loopback_urls: false,
             allow_private_urls: false,
         }
@@ -130,6 +142,47 @@ fn default_streaming_idle_timeout() -> u64 {
     120 // 2 minutes between chunks
 }
 
+/// Graceful shutdown timing.
+///
+/// These values were previously hardcoded constants. They control how long the
+/// server waits for in-flight work to drain before exiting. The defaults match
+/// the prior hardcoded values; deployments with longer-running tasks (or with
+/// shorter `terminationGracePeriodSeconds`) should override them.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))]
+#[serde(deny_unknown_fields)]
+pub struct ShutdownConfig {
+    /// Seconds to wait for the usage-buffer worker to flush its final batch.
+    #[serde(default = "default_usage_buffer_flush_secs")]
+    pub usage_buffer_flush_secs: u64,
+
+    /// Seconds to wait for outstanding background tasks (request handlers,
+    /// usage logging, etc.) to complete after the close signal.
+    #[serde(default = "default_drain_secs")]
+    pub drain_secs: u64,
+}
+
+impl Default for ShutdownConfig {
+    fn default() -> Self {
+        Self {
+            usage_buffer_flush_secs: default_usage_buffer_flush_secs(),
+            drain_secs: default_drain_secs(),
+        }
+    }
+}
+
+fn default_usage_buffer_flush_secs() -> u64 {
+    5
+}
+
+fn default_drain_secs() -> u64 {
+    30
+}
+
+fn default_jwt_loader_concurrency() -> usize {
+    10
+}
+
 /// TLS configuration.
 #[derive(Debug, Clone, Serialize, Deserialize)]
 #[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))]

From 0b51c20b087dc81098d2208bc9bf67629d26e468 Mon Sep 17 00:00:00 2001
From: ScriptSmith 
Date: Sun, 26 Apr 2026 00:13:08 +1000
Subject: [PATCH 066/172] Bound AWS credential-refresh notify_waiters wait to
 avoid stuck waiters

---
 src/providers/aws.rs | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/src/providers/aws.rs b/src/providers/aws.rs
index 57d4967..5585f96 100644
--- a/src/providers/aws.rs
+++ b/src/providers/aws.rs
@@ -22,6 +22,13 @@ use crate::config::AwsCredentials;
 /// preventing request failures during the refresh window.
 const CREDENTIAL_REFRESH_BUFFER_SECS: u64 = 300;
 
+/// Maximum time a waiting task will block on `refresh_notify` before
+/// re-checking the cache. `Notify::notify_waiters` only signals tasks that are
+/// already in `notified()` at the moment of the call, so a task that loses
+/// the refresh race but reaches `notified()` after the refresher finishes
+/// would otherwise wait indefinitely. The timeout bounds that worst case.
+const REFRESH_NOTIFY_TIMEOUT_SECS: u64 = 10;
+
 /// Error type for AWS credential operations.
 #[derive(Debug, thiserror::Error)]
 pub enum AwsError {
@@ -114,7 +121,14 @@ impl AwsCredentialCache {
             }
 
             // Another task is refreshing. Wait for notification then retry.
-            self.refresh_notify.notified().await;
+            // Apply a timeout so a task that reaches this point after the
+            // refresher already called `notify_waiters` doesn't deadlock —
+            // it will simply re-check the cache on the next loop iteration.
+            let _ = tokio::time::timeout(
+                std::time::Duration::from_secs(REFRESH_NOTIFY_TIMEOUT_SECS),
+                self.refresh_notify.notified(),
+            )
+            .await;
         }
     }
 

From 99f56b60f109eca3c0144d129449b8f83d5b2cab Mon Sep 17 00:00:00 2001
From: ScriptSmith 
Date: Sun, 26 Apr 2026 00:16:14 +1000
Subject: [PATCH 067/172] Drain provider stream transformers in a loop instead
 of self-waking

---
 src/providers/anthropic/stream.rs | 134 ++++++++++++------------------
 src/providers/bedrock/stream.rs   | 132 ++++++++++-------------------
 src/providers/vertex/stream.rs    |  64 +++++---------
 3 files changed, 119 insertions(+), 211 deletions(-)

diff --git a/src/providers/anthropic/stream.rs b/src/providers/anthropic/stream.rs
index 497069a..22b5bcb 100644
--- a/src/providers/anthropic/stream.rs
+++ b/src/providers/anthropic/stream.rs
@@ -632,53 +632,40 @@ where
         }
 
         // First, return any buffered output
-        if !self.output_buffer.is_empty() {
-            return Poll::Ready(Some(Ok(self
-                .output_buffer
-                .pop_front()
-                .expect("non-empty checked above"))));
+        if let Some(out) = self.output_buffer.pop_front() {
+            return Poll::Ready(Some(Ok(out)));
         }
 
-        // Poll the inner stream
-        let inner = Pin::new(&mut self.inner);
-        match inner.poll_next(cx) {
-            Poll::Ready(Some(Ok(bytes))) => {
-                // Process the Anthropic SSE bytes
-                self.process_bytes(&bytes);
-
-                // Check for buffer overflow after processing
-                if self.state.buffer_overflow {
-                    return Poll::Ready(Some(Err(io::Error::new(
-                        io::ErrorKind::OutOfMemory,
-                        "SSE buffer overflow",
-                    ))));
-                }
+        // Drain the inner stream until we either produce output, hit a real
+        // Pending, or end. The previous implementation woke itself with
+        // `wake_by_ref` after consuming an empty chunk, which busy-loops the
+        // executor; an inline loop avoids that.
+        loop {
+            match Pin::new(&mut self.inner).poll_next(cx) {
+                Poll::Ready(Some(Ok(bytes))) => {
+                    self.process_bytes(&bytes);
+
+                    if self.state.buffer_overflow {
+                        return Poll::Ready(Some(Err(io::Error::new(
+                            io::ErrorKind::OutOfMemory,
+                            "SSE buffer overflow",
+                        ))));
+                    }
 
-                // Return first buffered output if any
-                if !self.output_buffer.is_empty() {
-                    Poll::Ready(Some(Ok(self
-                        .output_buffer
-                        .pop_front()
-                        .expect("non-empty checked above"))))
-                } else {
-                    // No output yet, need to poll again
-                    cx.waker().wake_by_ref();
-                    Poll::Pending
+                    if let Some(out) = self.output_buffer.pop_front() {
+                        return Poll::Ready(Some(Ok(out)));
+                    }
+                    // No output produced yet — keep draining.
                 }
-            }
-            Poll::Ready(Some(Err(e))) => Poll::Ready(Some(Err(e))),
-            Poll::Ready(None) => {
-                // Stream ended - flush any remaining buffer
-                if !self.output_buffer.is_empty() {
-                    Poll::Ready(Some(Ok(self
-                        .output_buffer
-                        .pop_front()
-                        .expect("non-empty checked above"))))
-                } else {
-                    Poll::Ready(None)
+                Poll::Ready(Some(Err(e))) => return Poll::Ready(Some(Err(e))),
+                Poll::Ready(None) => {
+                    return match self.output_buffer.pop_front() {
+                        Some(out) => Poll::Ready(Some(Ok(out))),
+                        None => Poll::Ready(None),
+                    };
                 }
+                Poll::Pending => return Poll::Pending,
             }
-            Poll::Pending => Poll::Pending,
         }
     }
 }
@@ -1369,7 +1356,6 @@ where
     type Item = Result;
 
     fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> {
-        // Check for buffer overflow error
         if self.state.buffer_overflow {
             return Poll::Ready(Some(Err(io::Error::new(
                 io::ErrorKind::OutOfMemory,
@@ -1377,53 +1363,35 @@ where
             ))));
         }
 
-        // First, return any buffered output
-        if !self.output_buffer.is_empty() {
-            return Poll::Ready(Some(Ok(self
-                .output_buffer
-                .pop_front()
-                .expect("non-empty checked above"))));
+        if let Some(out) = self.output_buffer.pop_front() {
+            return Poll::Ready(Some(Ok(out)));
         }
 
-        // Poll the inner stream
-        let inner = Pin::new(&mut self.inner);
-        match inner.poll_next(cx) {
-            Poll::Ready(Some(Ok(bytes))) => {
-                // Process the Anthropic SSE bytes
-                self.process_bytes(&bytes);
-
-                // Check for buffer overflow after processing
-                if self.state.buffer_overflow {
-                    return Poll::Ready(Some(Err(io::Error::new(
-                        io::ErrorKind::OutOfMemory,
-                        "SSE buffer overflow",
-                    ))));
-                }
+        loop {
+            match Pin::new(&mut self.inner).poll_next(cx) {
+                Poll::Ready(Some(Ok(bytes))) => {
+                    self.process_bytes(&bytes);
+
+                    if self.state.buffer_overflow {
+                        return Poll::Ready(Some(Err(io::Error::new(
+                            io::ErrorKind::OutOfMemory,
+                            "SSE buffer overflow",
+                        ))));
+                    }
 
-                // Return buffered output or wake for more
-                if !self.output_buffer.is_empty() {
-                    Poll::Ready(Some(Ok(self
-                        .output_buffer
-                        .pop_front()
-                        .expect("non-empty checked above"))))
-                } else {
-                    cx.waker().wake_by_ref();
-                    Poll::Pending
+                    if let Some(out) = self.output_buffer.pop_front() {
+                        return Poll::Ready(Some(Ok(out)));
+                    }
                 }
-            }
-            Poll::Ready(Some(Err(e))) => Poll::Ready(Some(Err(e))),
-            Poll::Ready(None) => {
-                // Stream ended - flush any remaining buffer
-                if !self.output_buffer.is_empty() {
-                    Poll::Ready(Some(Ok(self
-                        .output_buffer
-                        .pop_front()
-                        .expect("non-empty checked above"))))
-                } else {
-                    Poll::Ready(None)
+                Poll::Ready(Some(Err(e))) => return Poll::Ready(Some(Err(e))),
+                Poll::Ready(None) => {
+                    return match self.output_buffer.pop_front() {
+                        Some(out) => Poll::Ready(Some(Ok(out))),
+                        None => Poll::Ready(None),
+                    };
                 }
+                Poll::Pending => return Poll::Pending,
             }
-            Poll::Pending => Poll::Pending,
         }
     }
 }
diff --git a/src/providers/bedrock/stream.rs b/src/providers/bedrock/stream.rs
index c58c0bd..2c6857d 100644
--- a/src/providers/bedrock/stream.rs
+++ b/src/providers/bedrock/stream.rs
@@ -437,7 +437,6 @@ where
     type Item = Result;
 
     fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> {
-        // Check for buffer overflow error
         if self.state.buffer_overflow {
             return Poll::Ready(Some(Err(io::Error::new(
                 io::ErrorKind::OutOfMemory,
@@ -445,54 +444,35 @@ where
             ))));
         }
 
-        // First, return any buffered output
-        if !self.output_buffer.is_empty() {
-            return Poll::Ready(Some(Ok(self
-                .output_buffer
-                .pop_front()
-                .expect("non-empty checked above"))));
+        if let Some(out) = self.output_buffer.pop_front() {
+            return Poll::Ready(Some(Ok(out)));
         }
 
-        // Poll the inner stream
-        let inner = Pin::new(&mut self.inner);
-        match inner.poll_next(cx) {
-            Poll::Ready(Some(Ok(bytes))) => {
-                // Process the event stream bytes
-                self.process_bytes(&bytes);
-
-                // Check for buffer overflow after processing
-                if self.state.buffer_overflow {
-                    return Poll::Ready(Some(Err(io::Error::new(
-                        io::ErrorKind::OutOfMemory,
-                        "Event stream buffer overflow",
-                    ))));
-                }
+        loop {
+            match Pin::new(&mut self.inner).poll_next(cx) {
+                Poll::Ready(Some(Ok(bytes))) => {
+                    self.process_bytes(&bytes);
+
+                    if self.state.buffer_overflow {
+                        return Poll::Ready(Some(Err(io::Error::new(
+                            io::ErrorKind::OutOfMemory,
+                            "Event stream buffer overflow",
+                        ))));
+                    }
 
-                // Return first buffered output if any
-                if !self.output_buffer.is_empty() {
-                    Poll::Ready(Some(Ok(self
-                        .output_buffer
-                        .pop_front()
-                        .expect("non-empty checked above"))))
-                } else {
-                    // No output yet, need to poll again
-                    cx.waker().wake_by_ref();
-                    Poll::Pending
+                    if let Some(out) = self.output_buffer.pop_front() {
+                        return Poll::Ready(Some(Ok(out)));
+                    }
                 }
-            }
-            Poll::Ready(Some(Err(e))) => Poll::Ready(Some(Err(io::Error::other(e)))),
-            Poll::Ready(None) => {
-                // Stream ended, return any remaining buffered output
-                if !self.output_buffer.is_empty() {
-                    Poll::Ready(Some(Ok(self
-                        .output_buffer
-                        .pop_front()
-                        .expect("non-empty checked above"))))
-                } else {
-                    Poll::Ready(None)
+                Poll::Ready(Some(Err(e))) => return Poll::Ready(Some(Err(io::Error::other(e)))),
+                Poll::Ready(None) => {
+                    return match self.output_buffer.pop_front() {
+                        Some(out) => Poll::Ready(Some(Ok(out))),
+                        None => Poll::Ready(None),
+                    };
                 }
+                Poll::Pending => return Poll::Pending,
             }
-            Poll::Pending => Poll::Pending,
         }
     }
 }
@@ -1201,7 +1181,6 @@ where
     type Item = Result;
 
     fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> {
-        // Check for buffer overflow error
         if self.state.buffer_overflow {
             return Poll::Ready(Some(Err(io::Error::new(
                 io::ErrorKind::OutOfMemory,
@@ -1209,54 +1188,35 @@ where
             ))));
         }
 
-        // First, return any buffered output
-        if !self.output_buffer.is_empty() {
-            return Poll::Ready(Some(Ok(self
-                .output_buffer
-                .pop_front()
-                .expect("non-empty checked above"))));
+        if let Some(out) = self.output_buffer.pop_front() {
+            return Poll::Ready(Some(Ok(out)));
         }
 
-        // Poll the inner stream
-        let inner = Pin::new(&mut self.inner);
-        match inner.poll_next(cx) {
-            Poll::Ready(Some(Ok(bytes))) => {
-                // Process the event stream bytes
-                self.process_bytes(&bytes);
-
-                // Check for buffer overflow after processing
-                if self.state.buffer_overflow {
-                    return Poll::Ready(Some(Err(io::Error::new(
-                        io::ErrorKind::OutOfMemory,
-                        "Event stream buffer overflow",
-                    ))));
-                }
+        loop {
+            match Pin::new(&mut self.inner).poll_next(cx) {
+                Poll::Ready(Some(Ok(bytes))) => {
+                    self.process_bytes(&bytes);
+
+                    if self.state.buffer_overflow {
+                        return Poll::Ready(Some(Err(io::Error::new(
+                            io::ErrorKind::OutOfMemory,
+                            "Event stream buffer overflow",
+                        ))));
+                    }
 
-                // Return first buffered output if any
-                if !self.output_buffer.is_empty() {
-                    Poll::Ready(Some(Ok(self
-                        .output_buffer
-                        .pop_front()
-                        .expect("non-empty checked above"))))
-                } else {
-                    // No output yet, need to poll again
-                    cx.waker().wake_by_ref();
-                    Poll::Pending
+                    if let Some(out) = self.output_buffer.pop_front() {
+                        return Poll::Ready(Some(Ok(out)));
+                    }
                 }
-            }
-            Poll::Ready(Some(Err(e))) => Poll::Ready(Some(Err(io::Error::other(e)))),
-            Poll::Ready(None) => {
-                // Stream ended, return any remaining buffered output
-                if !self.output_buffer.is_empty() {
-                    Poll::Ready(Some(Ok(self
-                        .output_buffer
-                        .pop_front()
-                        .expect("non-empty checked above"))))
-                } else {
-                    Poll::Ready(None)
+                Poll::Ready(Some(Err(e))) => return Poll::Ready(Some(Err(io::Error::other(e)))),
+                Poll::Ready(None) => {
+                    return match self.output_buffer.pop_front() {
+                        Some(out) => Poll::Ready(Some(Ok(out))),
+                        None => Poll::Ready(None),
+                    };
                 }
+                Poll::Pending => return Poll::Pending,
             }
-            Poll::Pending => Poll::Pending,
         }
     }
 }
diff --git a/src/providers/vertex/stream.rs b/src/providers/vertex/stream.rs
index 1837263..c3796f4 100644
--- a/src/providers/vertex/stream.rs
+++ b/src/providers/vertex/stream.rs
@@ -422,7 +422,6 @@ where
     type Item = Result;
 
     fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> {
-        // Check for buffer overflow error
         if self.state.buffer_overflow {
             return Poll::Ready(Some(Err(io::Error::new(
                 io::ErrorKind::OutOfMemory,
@@ -430,54 +429,35 @@ where
             ))));
         }
 
-        // First, return any buffered output
-        if !self.output_buffer.is_empty() {
-            return Poll::Ready(Some(Ok(self
-                .output_buffer
-                .pop_front()
-                .expect("non-empty checked above"))));
+        if let Some(out) = self.output_buffer.pop_front() {
+            return Poll::Ready(Some(Ok(out)));
         }
 
-        // Poll the inner stream
-        let inner = Pin::new(&mut self.inner);
-        match inner.poll_next(cx) {
-            Poll::Ready(Some(Ok(bytes))) => {
-                // Process the Vertex SSE bytes
-                self.process_bytes(&bytes);
+        loop {
+            match Pin::new(&mut self.inner).poll_next(cx) {
+                Poll::Ready(Some(Ok(bytes))) => {
+                    self.process_bytes(&bytes);
 
-                // Check for buffer overflow after processing
-                if self.state.buffer_overflow {
-                    return Poll::Ready(Some(Err(io::Error::new(
-                        io::ErrorKind::OutOfMemory,
-                        "SSE buffer overflow",
-                    ))));
-                }
+                    if self.state.buffer_overflow {
+                        return Poll::Ready(Some(Err(io::Error::new(
+                            io::ErrorKind::OutOfMemory,
+                            "SSE buffer overflow",
+                        ))));
+                    }
 
-                // Return first buffered output if any
-                if !self.output_buffer.is_empty() {
-                    Poll::Ready(Some(Ok(self
-                        .output_buffer
-                        .pop_front()
-                        .expect("non-empty checked above"))))
-                } else {
-                    // No output yet, need to poll again
-                    cx.waker().wake_by_ref();
-                    Poll::Pending
+                    if let Some(out) = self.output_buffer.pop_front() {
+                        return Poll::Ready(Some(Ok(out)));
+                    }
                 }
-            }
-            Poll::Ready(Some(Err(e))) => Poll::Ready(Some(Err(e))),
-            Poll::Ready(None) => {
-                // Stream ended - flush any remaining buffer
-                if !self.output_buffer.is_empty() {
-                    Poll::Ready(Some(Ok(self
-                        .output_buffer
-                        .pop_front()
-                        .expect("non-empty checked above"))))
-                } else {
-                    Poll::Ready(None)
+                Poll::Ready(Some(Err(e))) => return Poll::Ready(Some(Err(e))),
+                Poll::Ready(None) => {
+                    return match self.output_buffer.pop_front() {
+                        Some(out) => Poll::Ready(Some(Ok(out))),
+                        None => Poll::Ready(None),
+                    };
                 }
+                Poll::Pending => return Poll::Pending,
             }
-            Poll::Pending => Poll::Pending,
         }
     }
 }

From 51a5c2d32bcf39f96c9802a307137abc874c4036 Mon Sep 17 00:00:00 2001
From: ScriptSmith 
Date: Sun, 26 Apr 2026 00:17:29 +1000
Subject: [PATCH 068/172] Probe /auth/me for header auth so non-admin users
 authenticate cleanly

---
 ui/src/auth/AuthProvider.tsx                  | 36 +++----------------
 .../ConversationsProvider.tsx                 |  8 ++---
 .../components/WasmSetup/openrouter-oauth.ts  |  6 +---
 ui/src/config/ConfigProvider.tsx              |  4 ++-
 ui/src/pages/LoginPage.tsx                    |  5 +--
 5 files changed, 12 insertions(+), 47 deletions(-)

diff --git a/ui/src/auth/AuthProvider.tsx b/ui/src/auth/AuthProvider.tsx
index 4ebc340..05ceaf7 100644
--- a/ui/src/auth/AuthProvider.tsx
+++ b/ui/src/auth/AuthProvider.tsx
@@ -60,45 +60,19 @@ export function AuthProvider({ children }: { children: React.ReactNode }) {
     token: null,
   });
 
-  // Check for header-based auth (zero-trust proxy)
+  // Check for header-based auth (zero-trust proxy). Probe `/auth/me` rather
+  // than an admin endpoint so non-admin header-authenticated users (who cannot
+  // list organizations) still resolve to an authenticated session.
   const checkHeaderAuth = useCallback(async (): Promise<{
     user: User;
     token: string;
   } | null> => {
-    // In header auth mode, the proxy sets headers that the backend trusts
-    // We can make a request to a "whoami" endpoint or just trust the UI config
-    // For now, we'll check if header auth is available and make a test request
     if (!config?.auth.methods.includes("header")) {
       return null;
     }
 
-    try {
-      // Try to access an admin endpoint to see if we're authenticated via headers
-      const response = await fetch("/admin/v1/organizations?limit=1", {
-        credentials: "include",
-      });
-
-      if (response.ok) {
-        // Fetch user info from /auth/me
-        const user = await fetchMe();
-        if (user) {
-          return { user, token: "header-auth" };
-        }
-        // Fallback if /auth/me doesn't work
-        const userEmail = response.headers.get("X-Forwarded-User");
-        return {
-          user: {
-            id: userEmail || "header-user",
-            email: userEmail || undefined,
-          },
-          token: "header-auth",
-        };
-      }
-    } catch {
-      // Header auth not working
-    }
-
-    return null;
+    const user = await fetchMe();
+    return user ? { user, token: "header-auth" } : null;
   }, [config?.auth.methods]);
 
   // Initialize auth state
diff --git a/ui/src/components/ConversationsProvider/ConversationsProvider.tsx b/ui/src/components/ConversationsProvider/ConversationsProvider.tsx
index d550eb1..fdc3ea1 100644
--- a/ui/src/components/ConversationsProvider/ConversationsProvider.tsx
+++ b/ui/src/components/ConversationsProvider/ConversationsProvider.tsx
@@ -732,9 +732,7 @@ export function ConversationsProvider({ children }: ConversationsProviderProps)
   const reorderPinned = useCallback(
     (orderedIds: string[]) => {
       // Snapshot current pin orders so we can roll back if any sync fails.
-      const previousOrders = new Map(
-        storedConversations.map((c) => [c.id, c.pinOrder] as const)
-      );
+      const previousOrders = new Map(storedConversations.map((c) => [c.id, c.pinOrder] as const));
 
       // Update local state with new pin orders
       setStoredConversations((prev) => {
@@ -759,9 +757,7 @@ export function ConversationsProvider({ children }: ConversationsProviderProps)
                 onError: () => {
                   setStoredConversations((prev) =>
                     prev.map((c) =>
-                      previousOrders.has(c.id)
-                        ? { ...c, pinOrder: previousOrders.get(c.id) }
-                        : c
+                      previousOrders.has(c.id) ? { ...c, pinOrder: previousOrders.get(c.id) } : c
                     )
                   );
                 },
diff --git a/ui/src/components/WasmSetup/openrouter-oauth.ts b/ui/src/components/WasmSetup/openrouter-oauth.ts
index 9566252..6bd6c2a 100644
--- a/ui/src/components/WasmSetup/openrouter-oauth.ts
+++ b/ui/src/components/WasmSetup/openrouter-oauth.ts
@@ -43,11 +43,7 @@ export function isInIframe(): boolean {
  */
 export async function startOpenRouterOAuth() {
   if (isInIframe()) {
-    window.open(
-      window.location.origin + window.location.pathname,
-      "_blank",
-      "noopener,noreferrer",
-    );
+    window.open(window.location.origin + window.location.pathname, "_blank", "noopener,noreferrer");
     return;
   }
 
diff --git a/ui/src/config/ConfigProvider.tsx b/ui/src/config/ConfigProvider.tsx
index a620d4e..d11a272 100644
--- a/ui/src/config/ConfigProvider.tsx
+++ b/ui/src/config/ConfigProvider.tsx
@@ -21,7 +21,9 @@ const BRANDING_FONTS_STYLE_ID = "hadrian-branding-fonts";
 const COLOR_RE = /^[a-zA-Z0-9#%(),.\s\-/_]+$/;
 
 function isSafeColor(value: string | undefined): value is string {
-  return typeof value === "string" && value.length > 0 && value.length < 200 && COLOR_RE.test(value);
+  return (
+    typeof value === "string" && value.length > 0 && value.length < 200 && COLOR_RE.test(value)
+  );
 }
 
 /** Validate a font-family name. Quotes/braces/semicolons in here would let
diff --git a/ui/src/pages/LoginPage.tsx b/ui/src/pages/LoginPage.tsx
index e930f01..975e397 100644
--- a/ui/src/pages/LoginPage.tsx
+++ b/ui/src/pages/LoginPage.tsx
@@ -68,10 +68,7 @@ export default function LoginPage() {
   // cross-origin URL in the browser. Reject anything whose second character
   // makes it protocol-relative or backslash-prefixed.
   const isSafeReturnTo = (value: string | null): value is string =>
-    !!value &&
-    value.startsWith("/") &&
-    !value.startsWith("//") &&
-    !value.startsWith("/\\");
+    !!value && value.startsWith("/") && !value.startsWith("//") && !value.startsWith("/\\");
   const returnToParam = new URLSearchParams(location.search).get("return_to");
   const from = isSafeReturnTo(returnToParam)
     ? returnToParam

From 691651fc008607af963701e841bed894462aae16 Mon Sep 17 00:00:00 2001
From: ScriptSmith 
Date: Sun, 26 Apr 2026 00:20:38 +1000
Subject: [PATCH 069/172] Confine FilesystemFileStorage I/O to the configured
 root via canonicalize

---
 src/services/file_storage.rs | 75 ++++++++++++++++++++++++------------
 1 file changed, 50 insertions(+), 25 deletions(-)

diff --git a/src/services/file_storage.rs b/src/services/file_storage.rs
index 55d2736..e5d2f68 100644
--- a/src/services/file_storage.rs
+++ b/src/services/file_storage.rs
@@ -188,6 +188,52 @@ impl FilesystemFileStorage {
     fn file_path(&self, file_id: &str) -> std::path::PathBuf {
         self.config.file_path(file_id)
     }
+
+    /// Resolve a `file_id_or_path` from an upstream caller (database row, etc.)
+    /// to an on-disk path that is guaranteed to live under `config.path`.
+    ///
+    /// Reject anything that escapes the configured root via `..`, absolute
+    /// paths outside the root, or symlinks. This is the single chokepoint for
+    /// all read/delete/exists operations so that a tampered DB row cannot be
+    /// used to read or delete arbitrary files on the host.
+    fn resolve_path(&self, file_id_or_path: &str) -> FileStorageResult {
+        let candidate = if file_id_or_path.contains(std::path::MAIN_SEPARATOR)
+            || file_id_or_path.contains('/')
+        {
+            std::path::PathBuf::from(file_id_or_path)
+        } else {
+            self.file_path(file_id_or_path)
+        };
+
+        let root = std::path::Path::new(&self.config.path);
+        let root_canonical = root.canonicalize().unwrap_or_else(|_| root.to_path_buf());
+
+        // Resolve symlinks if the file exists; otherwise resolve the parent
+        // and re-attach the file name so callers can pre-check pending paths.
+        let resolved = match candidate.canonicalize() {
+            Ok(p) => p,
+            Err(_) => {
+                let parent = candidate
+                    .parent()
+                    .unwrap_or_else(|| std::path::Path::new(""));
+                let canonical_parent = parent
+                    .canonicalize()
+                    .unwrap_or_else(|_| parent.to_path_buf());
+                match candidate.file_name() {
+                    Some(name) => canonical_parent.join(name),
+                    None => canonical_parent,
+                }
+            }
+        };
+
+        if !resolved.starts_with(&root_canonical) {
+            return Err(FileStorageError::NotFound(format!(
+                "Path '{}' is outside the configured storage root",
+                file_id_or_path
+            )));
+        }
+        Ok(resolved)
+    }
 }
 
 #[cfg(feature = "server")]
@@ -224,16 +270,7 @@ impl FileStorage for FilesystemFileStorage {
 
     #[instrument(skip(self))]
     async fn retrieve(&self, file_id_or_path: &str) -> FileStorageResult> {
-        // If the input looks like a path (contains separator), use it directly
-        // Otherwise, treat it as a file ID and construct the path
-        let path = if file_id_or_path.contains(std::path::MAIN_SEPARATOR)
-            || file_id_or_path.contains('/')
-        {
-            std::path::PathBuf::from(file_id_or_path)
-        } else {
-            self.file_path(file_id_or_path)
-        };
-
+        let path = self.resolve_path(file_id_or_path)?;
         debug!(path = %path.display(), "Retrieving file from filesystem");
 
         match tokio::fs::read(&path).await {
@@ -247,14 +284,7 @@ impl FileStorage for FilesystemFileStorage {
 
     #[instrument(skip(self))]
     async fn delete(&self, file_id_or_path: &str) -> FileStorageResult<()> {
-        let path = if file_id_or_path.contains(std::path::MAIN_SEPARATOR)
-            || file_id_or_path.contains('/')
-        {
-            std::path::PathBuf::from(file_id_or_path)
-        } else {
-            self.file_path(file_id_or_path)
-        };
-
+        let path = self.resolve_path(file_id_or_path)?;
         debug!(path = %path.display(), "Deleting file from filesystem");
 
         match tokio::fs::remove_file(&path).await {
@@ -272,14 +302,9 @@ impl FileStorage for FilesystemFileStorage {
 
     #[instrument(skip(self))]
     async fn exists(&self, file_id_or_path: &str) -> FileStorageResult {
-        let path = if file_id_or_path.contains(std::path::MAIN_SEPARATOR)
-            || file_id_or_path.contains('/')
-        {
-            std::path::PathBuf::from(file_id_or_path)
-        } else {
-            self.file_path(file_id_or_path)
+        let Ok(path) = self.resolve_path(file_id_or_path) else {
+            return Ok(false);
         };
-
         Ok(tokio::fs::metadata(&path).await.is_ok())
     }
 

From 26418a92f8fb3c1978cdfc196ccb559c437e6239 Mon Sep 17 00:00:00 2001
From: ScriptSmith 
Date: Sun, 26 Apr 2026 00:23:24 +1000
Subject: [PATCH 070/172] Pin audit-log list org_id to caller's org membership

---
 src/routes/admin/audit_logs.rs | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/src/routes/admin/audit_logs.rs b/src/routes/admin/audit_logs.rs
index 34e5de2..b2d57a0 100644
--- a/src/routes/admin/audit_logs.rs
+++ b/src/routes/admin/audit_logs.rs
@@ -69,6 +69,29 @@ pub async fn list(
         query.from = Some(chrono::Utc::now() - chrono::Duration::days(7));
     }
 
+    // Constrain `org_id` to one the caller belongs to. Without this, anyone
+    // with the `audit_log:list` permission could read any tenant's logs by
+    // sending an arbitrary `?org_id=` query parameter. Subjects with no
+    // membership (e.g. super-admins) are allowed through unconstrained.
+    if !authz.subject.org_ids.is_empty() {
+        match query.org_id {
+            Some(requested) => {
+                if !authz.subject.is_org_member(&requested.to_string()) {
+                    return Err(AdminError::Forbidden(
+                        "audit_log:list scoped outside your organization".to_string(),
+                    ));
+                }
+            }
+            None => {
+                if let Some(first) = authz.subject.org_ids.first()
+                    && let Ok(parsed) = first.parse()
+                {
+                    query.org_id = Some(parsed);
+                }
+            }
+        }
+    }
+
     let result = services.audit_logs.list(query).await?;
 
     let pagination = PaginationMeta::with_cursors(

From 38cf600454a9c7e063a7ecca52896a9418999e8d Mon Sep 17 00:00:00 2001
From: ScriptSmith 
Date: Sun, 26 Apr 2026 00:26:34 +1000
Subject: [PATCH 071/172] Pass requested owner scope into skills/templates
 create authz check

---
 src/routes/admin/skills.rs    | 21 ++++++++++++++++++++-
 src/routes/admin/templates.rs | 21 ++++++++++++++++++++-
 2 files changed, 40 insertions(+), 2 deletions(-)

diff --git a/src/routes/admin/skills.rs b/src/routes/admin/skills.rs
index a0f7192..a87193e 100644
--- a/src/routes/admin/skills.rs
+++ b/src/routes/admin/skills.rs
@@ -65,7 +65,26 @@ pub async fn create(
     let services = get_services(&state)?;
     let actor = AuditActor::from(&admin_auth);
 
-    authz.require("skill", "create", None, None, None, None)?;
+    // Pass the requested owner scope into authz so the policy can reject
+    // creating a skill for a team / project / user the caller does not own.
+    let (owner_org, owner_team, owner_project) = match &input.owner {
+        crate::models::SkillOwner::Organization { organization_id } => {
+            (Some(organization_id.to_string()), None, None)
+        }
+        crate::models::SkillOwner::Team { team_id } => (None, Some(team_id.to_string()), None),
+        crate::models::SkillOwner::Project { project_id } => {
+            (None, None, Some(project_id.to_string()))
+        }
+        crate::models::SkillOwner::User { .. } => (None, None, None),
+    };
+    authz.require(
+        "skill",
+        "create",
+        None,
+        owner_org.as_deref(),
+        owner_team.as_deref(),
+        owner_project.as_deref(),
+    )?;
 
     // Enforce per-owner skill count limit.
     let max = state.config.limits.resource_limits.max_skills_per_owner;
diff --git a/src/routes/admin/templates.rs b/src/routes/admin/templates.rs
index c91d644..9f7172a 100644
--- a/src/routes/admin/templates.rs
+++ b/src/routes/admin/templates.rs
@@ -55,7 +55,26 @@ pub async fn create(
     let services = get_services(&state)?;
     let actor = AuditActor::from(&admin_auth);
 
-    authz.require("template", "create", None, None, None, None)?;
+    // Pass the requested owner scope into authz so the policy can reject
+    // creating a template for a team / project / user the caller does not own.
+    let (owner_org, owner_team, owner_project) = match &input.owner {
+        crate::models::TemplateOwner::Organization { organization_id } => {
+            (Some(organization_id.to_string()), None, None)
+        }
+        crate::models::TemplateOwner::Team { team_id } => (None, Some(team_id.to_string()), None),
+        crate::models::TemplateOwner::Project { project_id } => {
+            (None, None, Some(project_id.to_string()))
+        }
+        crate::models::TemplateOwner::User { .. } => (None, None, None),
+    };
+    authz.require(
+        "template",
+        "create",
+        None,
+        owner_org.as_deref(),
+        owner_team.as_deref(),
+        owner_project.as_deref(),
+    )?;
 
     // Check template limit
     let max = state.config.limits.resource_limits.max_templates_per_owner;

From aa0838ea8b64662ed8575fcbf73fb9976c00aaef Mon Sep 17 00:00:00 2001
From: ScriptSmith 
Date: Sun, 26 Apr 2026 00:30:08 +1000
Subject: [PATCH 072/172] Delete orphaned files from external storage before
 removing DB rows

---
 src/cli/server.rs                |  4 ++-
 src/jobs/vector_store_cleanup.rs | 52 +++++++++++++++++++++++++++++---
 src/services/files.rs            |  7 +++++
 3 files changed, 57 insertions(+), 6 deletions(-)

diff --git a/src/cli/server.rs b/src/cli/server.rs
index e021af7..a0500a0 100644
--- a/src/cli/server.rs
+++ b/src/cli/server.rs
@@ -237,9 +237,11 @@ pub(crate) async fn run_server(explicit_config_path: Option<&str>, no_browser: b
             .file_search_service
             .as_ref()
             .map(|fs| fs.vector_store());
+        let file_storage = state.services.as_ref().map(|s| s.files.storage());
 
         tokio::spawn(async move {
-            jobs::start_vector_store_cleanup_worker(db, vector_store, cleanup_config).await;
+            jobs::start_vector_store_cleanup_worker(db, vector_store, file_storage, cleanup_config)
+                .await;
         });
     }
 
diff --git a/src/jobs/vector_store_cleanup.rs b/src/jobs/vector_store_cleanup.rs
index 2548107..8ee3b6c 100644
--- a/src/jobs/vector_store_cleanup.rs
+++ b/src/jobs/vector_store_cleanup.rs
@@ -17,8 +17,11 @@ use std::{sync::Arc, time::Instant};
 use chrono::{Duration, Utc};
 
 use crate::{
-    cache::vector_store::VectorBackend, config::VectorStoreCleanupConfig, db::DbPool,
+    cache::vector_store::VectorBackend,
+    config::VectorStoreCleanupConfig,
+    db::DbPool,
     observability::metrics,
+    services::{FileStorage, FileStorageError},
 };
 
 /// Results from a single cleanup run.
@@ -55,6 +58,7 @@ impl CleanupRunResult {
 pub async fn start_vector_store_cleanup_worker(
     db: Arc,
     vector_store: Option>,
+    file_storage: Option>,
     config: VectorStoreCleanupConfig,
 ) {
     if !config.enabled {
@@ -90,7 +94,7 @@ pub async fn start_vector_store_cleanup_worker(
     let interval = config.interval();
 
     loop {
-        match run_cleanup(&db, &vector_store, &config).await {
+        match run_cleanup(&db, &vector_store, file_storage.as_ref(), &config).await {
             Ok(result) => {
                 if result.has_deletions() {
                     tracing::info!(
@@ -122,6 +126,7 @@ pub async fn start_vector_store_cleanup_worker(
 async fn run_cleanup(
     db: &Arc,
     vector_store: &Arc,
+    file_storage: Option<&Arc>,
     config: &VectorStoreCleanupConfig,
 ) -> Result> {
     let start = Instant::now();
@@ -329,10 +334,47 @@ async fn run_cleanup(
             // Check if file is referenced by other vector stores
             match db.files().count_file_references(file_id).await {
                 Ok(ref_count) if ref_count <= 1 => {
-                    // File is only referenced by this (deleted) vector store, delete it
-                    // First get the file to know its size
-                    if let Ok(Some(file)) = db.files().get_file(file_id).await {
+                    // File is only referenced by this (deleted) vector store, delete it.
+                    // Fetch metadata first so we can free both the on-disk/object
+                    // payload and the DB row in the right order: external first
+                    // (so a partial failure leaves the DB pointing at a valid
+                    // object that the next sweep will retry), then DB.
+                    let file_meta = match db.files().get_file(file_id).await {
+                        Ok(meta) => meta,
+                        Err(e) => {
+                            tracing::error!(
+                                file_id = %file_id,
+                                error = %e,
+                                "Failed to fetch orphaned file metadata"
+                            );
+                            None
+                        }
+                    };
+
+                    if let Some(file) = &file_meta {
                         result.storage_bytes_freed += file.size_bytes as u64;
+                        if let (Some(storage), Some(path)) = (file_storage, &file.storage_path)
+                            && file.storage_backend != crate::models::StorageBackend::Database
+                        {
+                            match storage.delete(path).await {
+                                Ok(()) => tracing::debug!(
+                                    file_id = %file_id,
+                                    path = %path,
+                                    "Deleted orphaned file from external storage"
+                                ),
+                                Err(FileStorageError::NotFound(_)) => {}
+                                Err(e) => {
+                                    tracing::error!(
+                                        file_id = %file_id,
+                                        path = %path,
+                                        error = %e,
+                                        "Failed to delete orphaned file from external storage; \
+                                         skipping DB row to retry next sweep"
+                                    );
+                                    continue;
+                                }
+                            }
+                        }
                     }
 
                     if let Err(e) = db.files().delete_file(file_id).await {
diff --git a/src/services/files.rs b/src/services/files.rs
index 57c4e2a..7e71a32 100644
--- a/src/services/files.rs
+++ b/src/services/files.rs
@@ -53,6 +53,13 @@ impl FilesService {
         self.storage.backend_name()
     }
 
+    /// Get a clone of the underlying storage backend handle. Used by background
+    /// jobs (e.g. vector store cleanup) that need to delete the on-disk/object
+    /// payload alongside the DB row.
+    pub fn storage(&self) -> Arc {
+        self.storage.clone()
+    }
+
     /// Count files by owner.
     pub async fn count_by_owner(
         &self,

From 37a1229912d497633f29556cc3827d841d8e42cb Mon Sep 17 00:00:00 2001
From: ScriptSmith 
Date: Sun, 26 Apr 2026 00:32:21 +1000
Subject: [PATCH 073/172] Walk caller's memberships in user_has_access instead
 of paging through resource members

---
 src/services/files.rs | 34 +++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/src/services/files.rs b/src/services/files.rs
index 7e71a32..bca83af 100644
--- a/src/services/files.rs
+++ b/src/services/files.rs
@@ -257,6 +257,12 @@ impl FilesService {
     /// - The file is owned by the user directly
     /// - The file is owned by an organization the user belongs to
     /// - The file is owned by a project the user belongs to
+    ///
+    /// Each membership check is bounded by the user's own membership count
+    /// (typically a handful of orgs/teams/projects) instead of paging through
+    /// every member of the resource — the previous default-`ListParams` calls
+    /// silently denied access whenever an org/team/project had more members
+    /// than the page cap.
     pub async fn user_has_access(&self, user_id: Uuid, file_id: Uuid) -> DbResult {
         let file = match self.db.files().get_file(file_id).await? {
             Some(f) => f,
@@ -264,36 +270,30 @@ impl FilesService {
         };
 
         match file.owner_type {
-            VectorStoreOwnerType::User => {
-                // Direct ownership
-                Ok(file.owner_id == user_id)
-            }
+            VectorStoreOwnerType::User => Ok(file.owner_id == user_id),
             VectorStoreOwnerType::Organization => {
-                // Check if user is a member of the organization
-                let members = self
+                let memberships = self
                     .db
                     .users()
-                    .list_org_members(file.owner_id, ListParams::default())
+                    .get_org_memberships_for_user(user_id)
                     .await?;
-                Ok(members.items.iter().any(|u| u.id == user_id))
+                Ok(memberships.iter().any(|m| m.org_id == file.owner_id))
             }
             VectorStoreOwnerType::Team => {
-                // Check if user is a member of the team
-                let members = self
+                let memberships = self
                     .db
-                    .teams()
-                    .list_members(file.owner_id, ListParams::default())
+                    .users()
+                    .get_team_memberships_for_user(user_id)
                     .await?;
-                Ok(members.items.iter().any(|m| m.user_id == user_id))
+                Ok(memberships.iter().any(|m| m.team_id == file.owner_id))
             }
             VectorStoreOwnerType::Project => {
-                // Check if user is a member of the project
-                let members = self
+                let memberships = self
                     .db
                     .users()
-                    .list_project_members(file.owner_id, ListParams::default())
+                    .get_project_memberships_for_user(user_id)
                     .await?;
-                Ok(members.items.iter().any(|u| u.id == user_id))
+                Ok(memberships.iter().any(|m| m.project_id == file.owner_id))
             }
         }
     }

From f85e43c10f2eda3ce5b5cb84d7cd1fc4b74c2287 Mon Sep 17 00:00:00 2001
From: ScriptSmith 
Date: Sun, 26 Apr 2026 00:35:00 +1000
Subject: [PATCH 074/172] Bound Kreuzberg document extraction with configurable
 timeout

---
 src/config/features.rs             | 14 ++++++++++++++
 src/services/document_processor.rs | 23 ++++++++++++++++++++---
 2 files changed, 34 insertions(+), 3 deletions(-)

diff --git a/src/config/features.rs b/src/config/features.rs
index 3d0d799..135271f 100644
--- a/src/config/features.rs
+++ b/src/config/features.rs
@@ -826,6 +826,15 @@ pub struct DocumentExtractionConfig {
     /// Default: 300
     #[serde(default = "default_pdf_image_dpi")]
     pub pdf_image_dpi: u32,
+
+    /// Maximum time (in seconds) a single document extraction is allowed to
+    /// run. Set to 0 to disable the timeout.
+    ///
+    /// A malicious or pathological document (e.g. an OCR job on a 5,000-page
+    /// PDF) can otherwise tie up an extraction worker indefinitely.
+    /// Default: 120 seconds (2 minutes)
+    #[serde(default = "default_extraction_timeout_secs")]
+    pub extraction_timeout_secs: u64,
 }
 
 impl Default for DocumentExtractionConfig {
@@ -836,10 +845,15 @@ impl Default for DocumentExtractionConfig {
             ocr_language: default_ocr_language(),
             pdf_extract_images: false,
             pdf_image_dpi: default_pdf_image_dpi(),
+            extraction_timeout_secs: default_extraction_timeout_secs(),
         }
     }
 }
 
+fn default_extraction_timeout_secs() -> u64 {
+    120
+}
+
 fn default_ocr_language() -> String {
     "eng".to_string()
 }
diff --git a/src/services/document_processor.rs b/src/services/document_processor.rs
index 69c9eec..b882a9f 100644
--- a/src/services/document_processor.rs
+++ b/src/services/document_processor.rs
@@ -2162,9 +2162,26 @@ async fn extract_text(
 
         // Build Kreuzberg extraction config from our config
         let config = build_kreuzberg_config(extraction_config);
-        let result = kreuzberg::extract_bytes(&data, mime_type, &config)
-            .await
-            .map_err(|e| DocumentProcessorError::DocumentExtraction(e.to_string()))?;
+        let extraction = kreuzberg::extract_bytes(&data, mime_type, &config);
+
+        // Bound how long any single document may tie up an extraction worker.
+        // Kreuzberg has no internal hard limit, so a 5,000-page OCR job (or a
+        // pathological/malicious input) would otherwise run unbounded.
+        let result = if extraction_config.extraction_timeout_secs > 0 {
+            let timeout = std::time::Duration::from_secs(extraction_config.extraction_timeout_secs);
+            match tokio::time::timeout(timeout, extraction).await {
+                Ok(r) => r,
+                Err(_) => {
+                    return Err(DocumentProcessorError::DocumentExtraction(format!(
+                        "Document extraction exceeded {}s timeout",
+                        extraction_config.extraction_timeout_secs
+                    )));
+                }
+            }
+        } else {
+            extraction.await
+        }
+        .map_err(|e| DocumentProcessorError::DocumentExtraction(e.to_string()))?;
 
         Ok(result.content)
     }

From c93e84fe53e5f3788afdc5444e11288a391b8fcb Mon Sep 17 00:00:00 2001
From: ScriptSmith 
Date: Sun, 26 Apr 2026 00:36:52 +1000
Subject: [PATCH 075/172] Use partial unique index so soft-deleted
 vector_store_files don't block re-add

---
 migrations_sqlx/postgres/20250101000000_initial.sql | 11 ++++++++---
 migrations_sqlx/sqlite/20250101000000_initial.sql   | 11 ++++++++---
 2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/migrations_sqlx/postgres/20250101000000_initial.sql b/migrations_sqlx/postgres/20250101000000_initial.sql
index 0e3c5f3..5396412 100644
--- a/migrations_sqlx/postgres/20250101000000_initial.sql
+++ b/migrations_sqlx/postgres/20250101000000_initial.sql
@@ -1103,11 +1103,16 @@ CREATE TABLE IF NOT EXISTS vector_store_files (
     created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
     updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
     -- Soft delete timestamp (NULL = not deleted)
-    deleted_at TIMESTAMPTZ,
-    -- A file can only be in a vector store once (among non-deleted entries)
-    UNIQUE(vector_store_id, file_id)
+    deleted_at TIMESTAMPTZ
 );
 
+-- A file can only be in a vector store once among *live* entries. Using a
+-- partial unique index instead of a plain UNIQUE constraint lets a soft-deleted
+-- row coexist with a fresh re-add of the same file.
+CREATE UNIQUE INDEX IF NOT EXISTS idx_vector_store_files_unique_live
+    ON vector_store_files(vector_store_id, file_id)
+    WHERE deleted_at IS NULL;
+
 CREATE INDEX IF NOT EXISTS idx_vector_store_files_vector_store ON vector_store_files(vector_store_id);
 CREATE INDEX IF NOT EXISTS idx_vector_store_files_file ON vector_store_files(file_id);
 CREATE INDEX IF NOT EXISTS idx_vector_store_files_status ON vector_store_files(status);
diff --git a/migrations_sqlx/sqlite/20250101000000_initial.sql b/migrations_sqlx/sqlite/20250101000000_initial.sql
index a5ea403..e15eb2c 100644
--- a/migrations_sqlx/sqlite/20250101000000_initial.sql
+++ b/migrations_sqlx/sqlite/20250101000000_initial.sql
@@ -901,11 +901,16 @@ CREATE TABLE IF NOT EXISTS vector_store_files (
     created_at TEXT NOT NULL DEFAULT (datetime('now')),
     updated_at TEXT NOT NULL DEFAULT (datetime('now')),
     -- Soft delete timestamp (NULL = not deleted)
-    deleted_at TEXT,
-    -- A file can only be in a vector store once (among non-deleted entries)
-    UNIQUE(vector_store_id, file_id)
+    deleted_at TEXT
 );
 
+-- A file can only be in a vector store once among *live* entries. Using a
+-- partial unique index instead of a plain UNIQUE constraint lets a soft-deleted
+-- row coexist with a fresh re-add of the same file.
+CREATE UNIQUE INDEX IF NOT EXISTS idx_vector_store_files_unique_live
+    ON vector_store_files(vector_store_id, file_id)
+    WHERE deleted_at IS NULL;
+
 CREATE INDEX IF NOT EXISTS idx_vector_store_files_vector_store ON vector_store_files(vector_store_id);
 CREATE INDEX IF NOT EXISTS idx_vector_store_files_file ON vector_store_files(file_id);
 CREATE INDEX IF NOT EXISTS idx_vector_store_files_status ON vector_store_files(status);

From e429cc7c9d2fdbf052b2543fa50a36be3d9e6cc0 Mon Sep 17 00:00:00 2001
From: ScriptSmith 
Date: Sun, 26 Apr 2026 00:38:34 +1000
Subject: [PATCH 076/172] Surface UUID parse errors in service_accounts revoke
 instead of dropping rows

---
 src/db/sqlite/service_accounts.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/db/sqlite/service_accounts.rs b/src/db/sqlite/service_accounts.rs
index fa0234c..a902ff7 100644
--- a/src/db/sqlite/service_accounts.rs
+++ b/src/db/sqlite/service_accounts.rs
@@ -352,8 +352,8 @@ impl ServiceAccountRepo for SqliteServiceAccountRepo {
 
         let revoked_uuids = revoked_ids
             .into_iter()
-            .filter_map(|s| parse_uuid(&s).ok())
-            .collect();
+            .map(|s| parse_uuid(&s))
+            .collect::>>()?;
         Ok(revoked_uuids)
     }
 }

From 617417e023717efb73d4c96797cc9d5f70f04b50 Mon Sep 17 00:00:00 2001
From: ScriptSmith 
Date: Sun, 26 Apr 2026 00:40:23 +1000
Subject: [PATCH 077/172] Combine DLQ count and delete into a single statement
 to fix TOCTOU

---
 src/dlq/database.rs | 58 +++++++++++++++++++++++++--------------------
 1 file changed, 32 insertions(+), 26 deletions(-)

diff --git a/src/dlq/database.rs b/src/dlq/database.rs
index f0e9899..55e49ee 100644
--- a/src/dlq/database.rs
+++ b/src/dlq/database.rs
@@ -565,32 +565,38 @@ impl DatabaseDlq {
     }
 
     async fn enforce_max_entries(&self) -> DlqResult<()> {
-        let count = self.len().await?;
-
-        if count > self.max_entries {
-            let to_delete = count - self.max_entries;
-
-            match self.pool.pool() {
-                #[cfg(feature = "database-sqlite")]
-                DbPoolRef::Sqlite(pool) => {
-                    sqlx::query(&format!(
-                        "DELETE FROM {} WHERE id IN (SELECT id FROM {} ORDER BY created_at ASC LIMIT ?)",
-                        self.table_name, self.table_name
-                    ))
-                    .bind(to_delete as i64)
-                    .execute(pool)
-                    .await?;
-                }
-                #[cfg(feature = "database-postgres")]
-                DbPoolRef::Postgres(pools) => {
-                    sqlx::query(&format!(
-                        "DELETE FROM {} WHERE id IN (SELECT id FROM {} ORDER BY created_at ASC LIMIT $1)",
-                        self.table_name, self.table_name
-                    ))
-                    .bind(to_delete as i64)
-                    .execute(pools.write_pool())
-                    .await?;
-                }
+        // Combine the count and delete in a single statement so a concurrent
+        // insert between SELECT COUNT(*) and DELETE can't make us drop the
+        // wrong number of rows. The subquery returns "every row except the
+        // most-recent `max_entries`" ordered oldest-first, which is exactly
+        // the set we need to evict.
+        let max_entries = self.max_entries as i64;
+        match self.pool.pool() {
+            #[cfg(feature = "database-sqlite")]
+            DbPoolRef::Sqlite(pool) => {
+                // SQLite quirk: LIMIT -1 means "no limit", which lets us pair
+                // it with OFFSET to skip the newest `max_entries` rows.
+                sqlx::query(&format!(
+                    "DELETE FROM {table} WHERE id IN (\
+                         SELECT id FROM {table} ORDER BY created_at DESC LIMIT -1 OFFSET ?\
+                     )",
+                    table = self.table_name
+                ))
+                .bind(max_entries)
+                .execute(pool)
+                .await?;
+            }
+            #[cfg(feature = "database-postgres")]
+            DbPoolRef::Postgres(pools) => {
+                sqlx::query(&format!(
+                    "DELETE FROM {table} WHERE id IN (\
+                         SELECT id FROM {table} ORDER BY created_at DESC OFFSET $1\
+                     )",
+                    table = self.table_name
+                ))
+                .bind(max_entries)
+                .execute(pools.write_pool())
+                .await?;
             }
         }
 

From ef8e891ea5cb957f51ba4a6ca1d0be4d514a06db Mon Sep 17 00:00:00 2001
From: ScriptSmith 
Date: Sun, 26 Apr 2026 00:44:10 +1000
Subject: [PATCH 078/172] Route MCPUIRenderer default link clicks through
 trusted-domain modal

---
 .../MCPUIRenderer/MCPUIRenderer.tsx           | 43 +++++++++++++++++--
 1 file changed, 39 insertions(+), 4 deletions(-)

diff --git a/ui/src/components/MCPUIRenderer/MCPUIRenderer.tsx b/ui/src/components/MCPUIRenderer/MCPUIRenderer.tsx
index 4231e06..e68f647 100644
--- a/ui/src/components/MCPUIRenderer/MCPUIRenderer.tsx
+++ b/ui/src/components/MCPUIRenderer/MCPUIRenderer.tsx
@@ -11,9 +11,10 @@
  * - Remote DOM (`application/vnd.mcp-ui.remote-dom`) - Server-generated components
  */
 
-import { useCallback } from "react";
+import { useCallback, useState } from "react";
 import { UIResourceRenderer, type UIActionResult } from "@mcp-ui/client";
 import { cn } from "@/utils/cn";
+import { linkSafety } from "@/components/Markdown/linkSafety";
 
 /** MCP-UI Resource type (matches @mcp-ui/client expectations) */
 export interface MCPUIResource {
@@ -67,6 +68,34 @@ export function MCPUIRenderer({
   style,
   autoResize = true,
 }: MCPUIRendererProps) {
+  const [pendingUrl, setPendingUrl] = useState(null);
+
+  const openLink = useCallback((url: string) => {
+    window.open(url, "_blank", "noopener,noreferrer");
+  }, []);
+
+  const requestLinkOpen = useCallback(
+    (url: string) => {
+      // Defer to the trusted-domain modal unless the user has already
+      // approved this domain. MCP-UI servers are user-configured but
+      // their content is still server-supplied, so untrusted links
+      // shouldn't open without explicit consent.
+      if (linkSafety.onLinkCheck(url)) {
+        openLink(url);
+      } else {
+        setPendingUrl(url);
+      }
+    },
+    [openLink]
+  );
+
+  const handleConfirmPendingUrl = useCallback(() => {
+    if (pendingUrl) {
+      openLink(pendingUrl);
+      setPendingUrl(null);
+    }
+  }, [pendingUrl, openLink]);
+
   // Handle UI actions from the rendered content
   const handleUIAction = useCallback(
     async (result: UIActionResult): Promise => {
@@ -90,8 +119,7 @@ export function MCPUIRenderer({
           if (actionHandlers?.onLink) {
             actionHandlers.onLink(result.payload.url);
           } else {
-            // Default: open link in new tab
-            window.open(result.payload.url, "_blank", "noopener,noreferrer");
+            requestLinkOpen(result.payload.url);
           }
           return { status: "handled" };
 
@@ -117,7 +145,7 @@ export function MCPUIRenderer({
           return { status: "unhandled", reason: "Unknown action type" };
       }
     },
-    [actionHandlers]
+    [actionHandlers, requestLinkOpen]
   );
 
   return (
@@ -135,6 +163,13 @@ export function MCPUIRenderer({
           },
         }}
       />
+      {pendingUrl !== null &&
+        linkSafety.renderModal({
+          isOpen: true,
+          onClose: () => setPendingUrl(null),
+          onConfirm: handleConfirmPendingUrl,
+          url: pendingUrl,
+        })}
     
); } From ff9427790c355fe4dd1ef15e85cd000b3fc75e7d Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sun, 26 Apr 2026 00:44:51 +1000 Subject: [PATCH 079/172] Accept optional zod schema in useLocalStorage to validate cross-tab writes --- ui/src/hooks/useLocalStorage.ts | 46 +++++++++++++++++++++++---------- 1 file changed, 32 insertions(+), 14 deletions(-) diff --git a/ui/src/hooks/useLocalStorage.ts b/ui/src/hooks/useLocalStorage.ts index 16abe7a..a909113 100644 --- a/ui/src/hooks/useLocalStorage.ts +++ b/ui/src/hooks/useLocalStorage.ts @@ -1,4 +1,5 @@ import { useState, useEffect, useCallback } from "react"; +import type { ZodType } from "zod"; // `storage` events only fire in *other* tabs. To keep multiple hook instances // of the same key inside the same tab in sync, mirror writes onto a custom @@ -10,20 +11,41 @@ interface SameTabPayload { newValue: string | null; } +/** + * Persist state to `localStorage` with same-tab and cross-tab sync. + * + * Pass an optional zod `schema` to validate values arriving from + * `localStorage` (initial read, `storage` events, same-tab broadcasts). + * Anything that fails validation is discarded — without a schema, a + * malicious or stale tab could write any JSON-shaped value into the key + * and surface it as a typed `T`. Callers handling user-controlled keys + * (auth tokens, preferences, settings) should always supply a schema. + */ export function useLocalStorage( key: string, - initialValue: T + initialValue: T, + schema?: ZodType ): [T, (value: T | ((prev: T) => T)) => void] { + const parse = useCallback( + (raw: string | null): T | undefined => { + if (raw === null) return undefined; + try { + const parsed: unknown = JSON.parse(raw); + if (!schema) return parsed as T; + const result = schema.safeParse(parsed); + return result.success ? result.data : undefined; + } catch { + return undefined; + } + }, + [schema] + ); + const [storedValue, setStoredValue] = useState(() => { if (typeof window === "undefined") { return initialValue; } - try { - const item = window.localStorage.getItem(key); - return item ? (JSON.parse(item) as T) : initialValue; - } catch { - return initialValue; - } + return parse(window.localStorage.getItem(key)) ?? initialValue; }); const setValue = useCallback( @@ -47,12 +69,8 @@ export function useLocalStorage( useEffect(() => { const apply = (newValue: string | null) => { - if (newValue === null) return; - try { - setStoredValue(JSON.parse(newValue) as T); - } catch { - // Ignore parse errors - } + const next = parse(newValue); + if (next !== undefined) setStoredValue(next); }; const handleStorageChange = (e: StorageEvent) => { @@ -69,7 +87,7 @@ export function useLocalStorage( window.removeEventListener("storage", handleStorageChange); window.removeEventListener(SAME_TAB_EVENT, handleSameTabChange); }; - }, [key]); + }, [key, parse]); return [storedValue, setValue]; } From f31b16cb72497edf94aac666bc3048feb0d630ea Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sun, 26 Apr 2026 00:45:27 +1000 Subject: [PATCH 080/172] Replace per-token streaming aria-live with hidden status region announcement --- ui/src/components/ChatMessage/ChatMessage.tsx | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/ui/src/components/ChatMessage/ChatMessage.tsx b/ui/src/components/ChatMessage/ChatMessage.tsx index 8603569..74f2336 100644 --- a/ui/src/components/ChatMessage/ChatMessage.tsx +++ b/ui/src/components/ChatMessage/ChatMessage.tsx @@ -289,9 +289,14 @@ function ChatMessageComponent({ )} + {/* Streaming status announcement. Marking the whole content div as + `aria-live="polite"` floods screen readers with every token — + this hidden status region instead announces start/finish only. */} +
+ {isStreaming ? "Assistant is responding" : ""} +
{isUser ? ( From 5676cba08a91f9bef6260315d7d398a3121cdf75 Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sun, 26 Apr 2026 00:46:13 +1000 Subject: [PATCH 081/172] Add hidden streaming status region to MultiModelResponse for screen readers --- .../components/MultiModelResponse/MultiModelResponse.tsx | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/ui/src/components/MultiModelResponse/MultiModelResponse.tsx b/ui/src/components/MultiModelResponse/MultiModelResponse.tsx index 2ec3907..6369f80 100644 --- a/ui/src/components/MultiModelResponse/MultiModelResponse.tsx +++ b/ui/src/components/MultiModelResponse/MultiModelResponse.tsx @@ -964,6 +964,13 @@ const ModelResponseCard = memo(function ModelResponseCard({
+ {/* Streaming status announcement for screen readers. Per-token + updates would flood; a hidden status region announces + start/finish only. */} +
+ {response.isStreaming ? `${response.model ?? "Model"} is responding` : ""} +
+ {/* Content */} {/* eslint-disable-next-line jsx-a11y/no-static-element-interactions, jsx-a11y/no-noninteractive-tabindex -- onMouseUp for text selection quoting; tabIndex for scrollable region keyboard access (axe: scrollable-region-focusable) */}
From 7e17dabb248b2ebc21f9647123d8c6102680803e Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sun, 26 Apr 2026 00:54:22 +1000 Subject: [PATCH 082/172] Spawn static models cache warm after listener bind so it doesn't block startup --- src/app.rs | 11 ++++------- src/cli/server.rs | 17 +++++++++++++++++ 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/src/app.rs b/src/app.rs index 0052817..6a4a370 100644 --- a/src/app.rs +++ b/src/app.rs @@ -1107,13 +1107,10 @@ impl AppState { )), }); - // Warm the static models cache so /v1/models is fast from the first request - if let Ok(ref state) = result - && state.config.features.static_models_cache.enabled() - { - state.warm_static_models_cache().await; - } - + // Note: the static models cache is no longer warmed inside + // `AppState::new`. The CLI server entrypoint spawns the warm on a + // background task after the listener is bound so a slow/dead + // provider can't delay startup or the readiness probe. result } diff --git a/src/cli/server.rs b/src/cli/server.rs index a0500a0..7977e96 100644 --- a/src/cli/server.rs +++ b/src/cli/server.rs @@ -366,6 +366,12 @@ pub(crate) async fn run_server(explicit_config_path: Option<&str>, no_browser: b } let task_tracker = state.task_tracker.clone(); + let static_cache_enabled = state.config.features.static_models_cache.enabled(); + let warm_state = if static_cache_enabled { + Some(state.clone()) + } else { + None + }; let app = build_app(&config, state); let bind_addr = format!("{}:{}", config.server.host, config.server.port); @@ -375,6 +381,17 @@ pub(crate) async fn run_server(explicit_config_path: Option<&str>, no_browser: b tracing::info!("Server listening on http://{}", bind_addr); + // Warm the static models cache on a background task. With many providers + // (including slow/dead ones holding open connections until they time out) + // the warm can take tens of seconds; doing it inline would delay the + // listener bind, the readiness probe, and any rolling deploy gated on + // `/health/ready`. + if let Some(warm_state) = warm_state { + task_tracker.spawn(async move { + warm_state.warm_static_models_cache().await; + }); + } + if config.server.allow_loopback_urls || config.server.allow_private_urls { tracing::info!( allow_loopback = config.server.allow_loopback_urls, From 4c6974a2ef90e6bc593d042c6586c41a3c0f73e1 Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sun, 26 Apr 2026 00:56:56 +1000 Subject: [PATCH 083/172] Reject empty error.message and error.type in assert_error helper --- src/providers/test_utils.rs | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/providers/test_utils.rs b/src/providers/test_utils.rs index 116bf48..e62ba13 100644 --- a/src/providers/test_utils.rs +++ b/src/providers/test_utils.rs @@ -643,11 +643,20 @@ pub mod validators { pub fn assert_error(body: &Value) { let error = &body["error"]; assert!(error.is_object(), "Response should have 'error' object"); + let message = error["message"] + .as_str() + .expect("error should have 'message' string field"); assert!( - error["message"].is_string(), - "error should have 'message' field" + !message.is_empty(), + "error.message must be non-empty so clients can surface a reason" + ); + let ty = error["type"] + .as_str() + .expect("error should have 'type' string field"); + assert!( + !ty.is_empty(), + "error.type must be non-empty so clients can branch on the error class" ); - assert!(error["type"].is_string(), "error should have 'type' field"); } /// Parse SSE streaming response and return validated chunks. From ff0bb8a12d9fe26dfffee2fd78e8564a4a0150b0 Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sun, 26 Apr 2026 00:59:11 +1000 Subject: [PATCH 084/172] Panic with raw body when provider e2e response isn't valid JSON --- src/tests/provider_e2e.rs | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/src/tests/provider_e2e.rs b/src/tests/provider_e2e.rs index 6fdd058..bbe5cb5 100644 --- a/src/tests/provider_e2e.rs +++ b/src/tests/provider_e2e.rs @@ -589,7 +589,12 @@ impl E2ETestHarness { save_debug_response(self.spec.name, name, status, &body_str); } - let json: Value = serde_json::from_slice(&body_bytes).unwrap_or(Value::Null); + let json: Value = serde_json::from_slice(&body_bytes).unwrap_or_else(|e| { + panic!( + "Failed to parse response as JSON: {e}\nstatus: {status}\nbody: {}", + String::from_utf8_lossy(&body_bytes) + ) + }); (status, json) } @@ -639,7 +644,12 @@ impl E2ETestHarness { let body_bytes = axum::body::to_bytes(response.into_body(), usize::MAX) .await .unwrap(); - let json: Value = serde_json::from_slice(&body_bytes).unwrap_or(Value::Null); + let json: Value = serde_json::from_slice(&body_bytes).unwrap_or_else(|e| { + panic!( + "Failed to parse response as JSON: {e}\nstatus: {status}\nbody: {}", + String::from_utf8_lossy(&body_bytes) + ) + }); (status, json) } @@ -663,7 +673,12 @@ impl E2ETestHarness { let body_bytes = axum::body::to_bytes(response.into_body(), usize::MAX) .await .unwrap(); - let json: Value = serde_json::from_slice(&body_bytes).unwrap_or(Value::Null); + let json: Value = serde_json::from_slice(&body_bytes).unwrap_or_else(|e| { + panic!( + "Failed to parse response as JSON: {e}\nstatus: {status}\nbody: {}", + String::from_utf8_lossy(&body_bytes) + ) + }); (status, headers, json) } @@ -2431,7 +2446,12 @@ impl ResilienceTestHarness { let body_bytes = axum::body::to_bytes(response.into_body(), usize::MAX) .await .unwrap(); - let json: Value = serde_json::from_slice(&body_bytes).unwrap_or(Value::Null); + let json: Value = serde_json::from_slice(&body_bytes).unwrap_or_else(|e| { + panic!( + "Failed to parse response as JSON: {e}\nstatus: {status}\nbody: {}", + String::from_utf8_lossy(&body_bytes) + ) + }); (status, json) } } From fe5b84f1a46ce4ba6593b7f0f995fce9e93521bd Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sun, 26 Apr 2026 08:44:53 +1000 Subject: [PATCH 085/172] Add SSRF validation and issuer pinning to OIDC discovery --- src/app.rs | 7 +++ src/auth/oidc.rs | 133 ++++++++++++++++++++++++++++++++++++++++++- src/auth/registry.rs | 46 ++++++++++++--- src/routes/auth.rs | 8 +++ 4 files changed, 182 insertions(+), 12 deletions(-) diff --git a/src/app.rs b/src/app.rs index 6a4a370..a66a214 100644 --- a/src/app.rs +++ b/src/app.rs @@ -692,12 +692,18 @@ impl AppState { // No default redirect URI - per-org SSO configs must specify their own let default_redirect_uri: Option = None; + let url_validation_opts = crate::validation::UrlValidationOptions { + allow_loopback: config.server.allow_loopback_urls, + allow_private: config.server.allow_private_urls, + }; + match auth::OidcAuthenticatorRegistry::initialize_from_db( &svc.org_sso_configs, secrets.as_ref(), session_store.clone(), default_session_config.clone(), default_redirect_uri.clone(), + url_validation_opts, ) .await { @@ -723,6 +729,7 @@ impl AppState { session_store, default_session_config, default_redirect_uri, + url_validation_opts, ); Some(Arc::new(empty_registry)) } diff --git a/src/auth/oidc.rs b/src/auth/oidc.rs index 079b9e4..fcfc819 100644 --- a/src/auth/oidc.rs +++ b/src/auth/oidc.rs @@ -28,7 +28,10 @@ use super::{ enforce_session_limit, validate_and_refresh_session, }, }; -use crate::config::OidcAuthConfig; +use crate::{ + config::OidcAuthConfig, + validation::{UrlValidationOptions, validate_base_url_opts}, +}; /// OIDC discovery document. #[derive(Debug, Clone, Deserialize)] @@ -118,6 +121,7 @@ pub struct OidcAuthenticator { discovery_cache: RwLock>, jwt_validator: RwLock>>, session_store: SharedSessionStore, + url_validation_opts: UrlValidationOptions, } impl OidcAuthenticator { @@ -125,13 +129,18 @@ impl OidcAuthenticator { /// /// For multi-node deployments, pass a `CacheSessionStore` backed by Redis. /// For single-node deployments, a `MemorySessionStore` can be used. - pub fn new(config: OidcAuthConfig, session_store: SharedSessionStore) -> Self { + pub fn new( + config: OidcAuthConfig, + session_store: SharedSessionStore, + url_validation_opts: UrlValidationOptions, + ) -> Self { Self { config, http_client: reqwest::Client::new(), discovery_cache: RwLock::new(None), jwt_validator: RwLock::new(None), session_store, + url_validation_opts, } } @@ -144,7 +153,11 @@ impl OidcAuthenticator { "Creating OidcAuthenticator with in-memory session store. \ Sessions will not be shared across nodes." ); - Self::new(config, Arc::new(MemorySessionStore::new())) + Self::new( + config, + Arc::new(MemorySessionStore::new()), + UrlValidationOptions::default(), + ) } /// Create a new OIDC authenticator with a custom HTTP client. @@ -152,6 +165,7 @@ impl OidcAuthenticator { config: OidcAuthConfig, http_client: reqwest::Client, session_store: SharedSessionStore, + url_validation_opts: UrlValidationOptions, ) -> Self { Self { config, @@ -159,6 +173,7 @@ impl OidcAuthenticator { discovery_cache: RwLock::new(None), jwt_validator: RwLock::new(None), session_store, + url_validation_opts, } } @@ -187,6 +202,12 @@ impl OidcAuthenticator { self.config.discovery_base_url().trim_end_matches('/') ); + // SSRF-validate the discovery URL before fetching + validate_base_url_opts(&discovery_url, self.url_validation_opts).map_err(|e| { + tracing::error!(error = %e, "OIDC discovery URL failed SSRF validation"); + AuthError::Internal(format!("OIDC discovery URL failed SSRF validation: {e}")) + })?; + tracing::debug!(url = %discovery_url, "Fetching OIDC discovery document"); let response = self @@ -213,6 +234,41 @@ impl OidcAuthenticator { AuthError::Internal(format!("Failed to parse OIDC discovery: {}", e)) })?; + // Pin the discovery's issuer to the configured issuer to prevent IdP substitution. + // OIDC spec (section 4.3) requires the discovery doc's issuer to match exactly. + if discovery.issuer != self.config.issuer { + tracing::error!( + expected = %self.config.issuer, + actual = %discovery.issuer, + "OIDC discovery issuer mismatch" + ); + return Err(AuthError::Internal( + "OIDC discovery issuer mismatch".to_string(), + )); + } + + // SSRF-validate the endpoints we will subsequently call. + for (label, url) in [ + ("authorization_endpoint", &discovery.authorization_endpoint), + ("token_endpoint", &discovery.token_endpoint), + ("jwks_uri", &discovery.jwks_uri), + ] { + validate_base_url_opts(url, self.url_validation_opts).map_err(|e| { + tracing::error!(error = %e, endpoint = label, "OIDC endpoint failed SSRF validation"); + AuthError::Internal(format!( + "OIDC {label} failed SSRF validation: {e}" + )) + })?; + } + if let Some(ref userinfo) = discovery.userinfo_endpoint { + validate_base_url_opts(userinfo, self.url_validation_opts).map_err(|e| { + tracing::error!(error = %e, "OIDC userinfo_endpoint failed SSRF validation"); + AuthError::Internal(format!( + "OIDC userinfo_endpoint failed SSRF validation: {e}" + )) + })?; + } + // Update cache { let mut cache = self.discovery_cache.write().await; @@ -606,7 +662,78 @@ pub async fn fetch_jwks_uri( #[cfg(test)] mod tests { + use serde_json::json; + use wiremock::{ + Mock, MockServer, ResponseTemplate, + matchers::{method, path}, + }; + use super::*; + use crate::config::SessionConfig; + + fn test_oidc_config(issuer: String) -> OidcAuthConfig { + OidcAuthConfig { + issuer, + discovery_url: None, + client_id: "test-client".to_string(), + client_secret: "test-secret".to_string(), + redirect_uri: "http://callback.example".to_string(), + scopes: vec!["openid".to_string()], + identity_claim: "sub".to_string(), + org_claim: None, + groups_claim: None, + session: SessionConfig::default(), + provisioning: Default::default(), + } + } + + #[tokio::test] + async fn test_get_discovery_rejects_issuer_mismatch() { + let mock_server = MockServer::start().await; + Mock::given(method("GET")) + .and(path("/.well-known/openid-configuration")) + .respond_with(ResponseTemplate::new(200).set_body_json(json!({ + "issuer": "https://attacker.example", + "authorization_endpoint": format!("{}/authorize", mock_server.uri()), + "token_endpoint": format!("{}/token", mock_server.uri()), + "jwks_uri": format!("{}/jwks", mock_server.uri()), + }))) + .mount(&mock_server) + .await; + + let config = test_oidc_config(mock_server.uri()); + let auth = OidcAuthenticator::new( + config, + Arc::new(super::super::session_store::MemorySessionStore::new()), + UrlValidationOptions { + allow_loopback: true, + allow_private: true, + }, + ); + + let err = auth.get_discovery().await.unwrap_err(); + let msg = err.to_string(); + assert!( + msg.contains("issuer"), + "expected issuer-mismatch error, got: {msg}" + ); + } + + #[tokio::test] + async fn test_get_discovery_rejects_blocked_loopback() { + // Default UrlValidationOptions disallow loopback; using a 127.x discovery URL + // (without an actual server) should fail validation before any network call. + let config = test_oidc_config("http://127.0.0.1:1".to_string()); + let auth = OidcAuthenticator::new( + config, + Arc::new(super::super::session_store::MemorySessionStore::new()), + UrlValidationOptions::default(), + ); + + let err = auth.get_discovery().await.unwrap_err(); + let msg = err.to_string(); + assert!(msg.contains("SSRF"), "expected SSRF rejection, got: {msg}"); + } #[test] fn test_pkce_challenge() { diff --git a/src/auth/registry.rs b/src/auth/registry.rs index 3f6759c..9f13774 100644 --- a/src/auth/registry.rs +++ b/src/auth/registry.rs @@ -36,6 +36,7 @@ use crate::{ config::{OidcAuthConfig, ProvisioningConfig, SessionConfig}, secrets::SecretManager, services::{OrgSsoConfigError, OrgSsoConfigService, OrgSsoConfigWithClientSecret}, + validation::UrlValidationOptions, }; /// Error type for registry operations. @@ -71,6 +72,8 @@ pub struct OidcAuthenticatorRegistry { default_session_config: SessionConfig, /// Default redirect URI used when org config doesn't specify one default_redirect_uri: Option, + /// SSRF validation options applied to OIDC discovery / endpoint URLs. + url_validation_opts: UrlValidationOptions, } impl OidcAuthenticatorRegistry { @@ -79,12 +82,14 @@ impl OidcAuthenticatorRegistry { session_store: SharedSessionStore, default_session_config: SessionConfig, default_redirect_uri: Option, + url_validation_opts: UrlValidationOptions, ) -> Self { Self { authenticators: Arc::new(RwLock::new(HashMap::new())), session_store, default_session_config, default_redirect_uri, + url_validation_opts, } } @@ -97,8 +102,14 @@ impl OidcAuthenticatorRegistry { session_store: SharedSessionStore, default_session_config: SessionConfig, default_redirect_uri: Option, + url_validation_opts: UrlValidationOptions, ) -> Result { - let registry = Self::new(session_store, default_session_config, default_redirect_uri); + let registry = Self::new( + session_store, + default_session_config, + default_redirect_uri, + url_validation_opts, + ); // Load only OIDC SSO configs (not SAML — those use SamlAuthenticatorRegistry) let configs = service @@ -138,6 +149,7 @@ impl OidcAuthenticatorRegistry { Ok(OidcAuthenticator::new( oidc_config, self.session_store.clone(), + self.url_validation_opts, )) } @@ -374,8 +386,12 @@ mod tests { #[tokio::test] async fn test_registry_register_and_get() { let session_store = create_test_session_store(); - let registry = - OidcAuthenticatorRegistry::new(session_store.clone(), SessionConfig::default(), None); + let registry = OidcAuthenticatorRegistry::new( + session_store.clone(), + SessionConfig::default(), + None, + UrlValidationOptions::default(), + ); let org_id = Uuid::new_v4(); let config = create_test_config(org_id); @@ -395,8 +411,12 @@ mod tests { #[tokio::test] async fn test_registry_remove() { let session_store = create_test_session_store(); - let registry = - OidcAuthenticatorRegistry::new(session_store.clone(), SessionConfig::default(), None); + let registry = OidcAuthenticatorRegistry::new( + session_store.clone(), + SessionConfig::default(), + None, + UrlValidationOptions::default(), + ); let org_id = Uuid::new_v4(); let config = create_test_config(org_id); @@ -415,8 +435,12 @@ mod tests { #[tokio::test] async fn test_registry_list_orgs() { let session_store = create_test_session_store(); - let registry = - OidcAuthenticatorRegistry::new(session_store.clone(), SessionConfig::default(), None); + let registry = OidcAuthenticatorRegistry::new( + session_store.clone(), + SessionConfig::default(), + None, + UrlValidationOptions::default(), + ); let org1 = Uuid::new_v4(); let org2 = Uuid::new_v4(); @@ -439,8 +463,12 @@ mod tests { #[tokio::test] async fn test_registry_len_and_is_empty() { let session_store = create_test_session_store(); - let registry = - OidcAuthenticatorRegistry::new(session_store.clone(), SessionConfig::default(), None); + let registry = OidcAuthenticatorRegistry::new( + session_store.clone(), + SessionConfig::default(), + None, + UrlValidationOptions::default(), + ); assert!(registry.is_empty().await); assert_eq!(registry.len().await, 0); diff --git a/src/routes/auth.rs b/src/routes/auth.rs index cc4810a..a55546e 100644 --- a/src/routes/auth.rs +++ b/src/routes/auth.rs @@ -1415,6 +1415,10 @@ run_migrations = true wal_mode = false busy_timeout_ms = 5000 +[server] +allow_loopback_urls = true +allow_private_urls = true + [auth.mode] type = "idp" @@ -2132,6 +2136,10 @@ run_migrations = true wal_mode = false busy_timeout_ms = 5000 +[server] +allow_loopback_urls = true +allow_private_urls = true + [auth.mode] type = "idp" From 463efa68b5c620ee8bce795924f24be85dcdcffb Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sun, 26 Apr 2026 08:52:37 +1000 Subject: [PATCH 086/172] Strip reserved-prefix roles from OIDC and SAML session claims --- src/auth/oidc.rs | 12 ++++++++++-- src/auth/saml.rs | 8 +++++++- src/middleware/layers/admin.rs | 2 +- src/middleware/mod.rs | 2 ++ 4 files changed, 20 insertions(+), 4 deletions(-) diff --git a/src/auth/oidc.rs b/src/auth/oidc.rs index fcfc819..8fa2e59 100644 --- a/src/auth/oidc.rs +++ b/src/auth/oidc.rs @@ -481,14 +481,22 @@ impl OidcAuthenticator { let external_id = validator.extract_identity(&claims); let org = validator.extract_org(&claims); + // IdPs must never be able to claim reserved-prefix roles (e.g. + // `_emergency_admin`, `_system_bootstrap`) — those grant special trust + // and are reserved for bootstrap/break-glass auth paths. + let roles = + crate::middleware::strip_reserved_roles(claims.roles.clone().unwrap_or_default()); + let groups = + crate::middleware::strip_reserved_roles(claims.groups.clone().unwrap_or_default()); + let session = OidcSession { id: Uuid::new_v4(), external_id, email: claims.email.clone(), name: claims.name.clone(), org, - groups: claims.groups.clone().unwrap_or_default(), - roles: claims.roles.clone().unwrap_or_default(), + groups, + roles, access_token: Some(tokens.access_token), refresh_token: tokens.refresh_token, created_at: now, diff --git a/src/auth/saml.rs b/src/auth/saml.rs index 6627562..3a35b81 100644 --- a/src/auth/saml.rs +++ b/src/auth/saml.rs @@ -346,13 +346,19 @@ impl SamlAuthenticator { let now = Utc::now(); let session_duration = chrono::Duration::seconds(self.config.session.duration_secs as i64); + // IdPs must never be able to claim reserved-prefix roles via SAML + // group attributes — `session.groups` falls through to `roles` in the + // middleware when `roles` is empty, which would otherwise smuggle in + // bootstrap/emergency privileges. + let groups = crate::middleware::strip_reserved_roles(assertion.groups); + let session = OidcSession { id: Uuid::new_v4(), external_id: assertion.name_id, email: assertion.email, name: assertion.name, org: None, // SAML doesn't have org claim like OIDC - groups: assertion.groups, + groups, roles: vec![], // Roles would need to be mapped from groups access_token: None, // SAML doesn't use access tokens refresh_token: None, diff --git a/src/middleware/layers/admin.rs b/src/middleware/layers/admin.rs index 76129f7..e78cd31 100644 --- a/src/middleware/layers/admin.rs +++ b/src/middleware/layers/admin.rs @@ -184,7 +184,7 @@ pub const EMERGENCY_ADMIN_ROLE: &str = "_emergency_admin"; /// Drop any role with the reserved `_` prefix from a list. IdPs and proxy /// headers must never be able to claim these roles, since the gateway grants /// extra trust to them (bootstrap / emergency break-glass). -pub(crate) fn strip_reserved_roles(roles: Vec) -> Vec { +pub fn strip_reserved_roles(roles: Vec) -> Vec { roles.into_iter().filter(|r| !r.starts_with('_')).collect() } diff --git a/src/middleware/mod.rs b/src/middleware/mod.rs index fbeb3ac..b012238 100644 --- a/src/middleware/mod.rs +++ b/src/middleware/mod.rs @@ -36,6 +36,8 @@ pub(crate) mod util; // ── Middleware layer exports — server only ─────────────────────────────────── #[cfg(feature = "sso")] +pub use layers::admin::strip_reserved_roles; +#[cfg(feature = "sso")] pub use layers::rate_limit::extract_client_ip_from_parts; #[cfg(feature = "server")] pub use layers::{ From 5e68979c2124d088b57bcffe4c9b567ef5955c24 Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sun, 26 Apr 2026 09:01:06 +1000 Subject: [PATCH 087/172] Honor real OAuth token expiry in Vertex token caching --- src/providers/vertex/mod.rs | 182 +++++++++++++++--------------------- 1 file changed, 77 insertions(+), 105 deletions(-) diff --git a/src/providers/vertex/mod.rs b/src/providers/vertex/mod.rs index 9def374..078b3e7 100644 --- a/src/providers/vertex/mod.rs +++ b/src/providers/vertex/mod.rs @@ -21,7 +21,6 @@ use convert::{ convert_responses_tool_choice_to_vertex, convert_responses_tools_to_vertex, convert_stop, convert_tool_choice, convert_tools, convert_vertex_to_responses_response, }; -use google_cloud_token::TokenSourceProvider; #[cfg(test)] use stream::StreamState; pub use stream::{VertexToOpenAIStream, VertexToResponsesStream}; @@ -53,14 +52,6 @@ use crate::{ const VERTEX_AI_SCOPE: &str = "https://www.googleapis.com/auth/cloud-platform"; -/// Buffer time before token expiry to trigger refresh (5 minutes). -/// Ensures tokens are refreshed before they actually expire. -const TOKEN_REFRESH_BUFFER_SECS: u64 = 300; - -/// Default token cache duration (1 hour). -/// Most Google OAuth tokens have a 1-hour lifetime. -const TOKEN_CACHE_DURATION_SECS: u64 = 3600; - /// Authentication mode for the Vertex provider. #[derive(Clone)] enum AuthMode { @@ -80,7 +71,10 @@ pub struct VertexProvider { auth_mode: AuthMode, publisher: String, base_url_override: Option, - token_cache: Arc>>, + /// Cached token-source provider. The underlying `DefaultTokenSourceProvider` + /// wraps a `ReuseTokenSource`, which honors the token's actual `expiry` + /// rather than a hardcoded duration — so we let it own all caching. + token_source: Arc>>>, timeout: Duration, retry: RetryConfig, circuit_breaker_config: CircuitBreakerConfig, @@ -89,11 +83,6 @@ pub struct VertexProvider { image_fetch_config: ImageFetchConfig, } -struct CachedToken { - token: String, - expires_at: std::time::Instant, -} - impl VertexProvider { /// Create a provider from configuration with a shared circuit breaker. pub fn from_config_with_registry( @@ -132,7 +121,7 @@ impl VertexProvider { auth_mode, publisher: config.publisher.clone(), base_url_override: config.base_url.clone(), - token_cache: Arc::new(RwLock::new(None)), + token_source: Arc::new(RwLock::new(None)), timeout: Duration::from_secs(config.timeout_secs), retry: config.retry.clone(), circuit_breaker_config: config.circuit_breaker.clone(), @@ -189,107 +178,90 @@ impl VertexProvider { AuthMode::OAuth { credentials, .. } => credentials, }; - // Check cache first - { - let cache = self.token_cache.read().await; - if let Some(cached) = cache.as_ref() { - // Return cached token if not expired (with refresh buffer) - if cached.expires_at - > std::time::Instant::now() - + std::time::Duration::from_secs(TOKEN_REFRESH_BUFFER_SECS) - { - return Ok(Some(cached.token.clone())); + // Reuse the cached `TokenSourceProvider` if we already created one. The + // provider's underlying `ReuseTokenSource` honors the token's actual + // `expiry`, so we don't need (and should not maintain) a parallel cache. + let provider = { + let guard = self.token_source.read().await; + guard.clone() + }; + let provider = match provider { + Some(p) => p, + None => { + let mut guard = self.token_source.write().await; + if let Some(p) = guard.as_ref() { + p.clone() + } else { + let p: Arc = + Arc::from(self.build_token_source(credentials).await?); + *guard = Some(p.clone()); + p } } - } - - // Get token based on credential type - let token = match credentials { - GcpCredentials::Default => { - // Use Application Default Credentials - let config = - google_cloud_auth::project::Config::default().with_scopes(&[VERTEX_AI_SCOPE]); - - let ts = google_cloud_auth::token::DefaultTokenSourceProvider::new(config) - .await - .map_err(|e| { - ProviderError::Internal(format!("Failed to create token source: {}", e)) - })?; - - ts.token_source() - .token() - .await - .map_err(|e| ProviderError::Internal(format!("Failed to get token: {}", e)))? - } - GcpCredentials::ServiceAccount { key_path } => { - // Load service account key from file - self.get_token_from_service_account_file(Path::new(key_path)) - .await? - } - GcpCredentials::ServiceAccountJson { json } => { - // Parse service account key from JSON string - self.get_token_from_service_account_json(json).await? - } }; - // Cache token (assume standard expiry for Google tokens) - { - let mut cache = self.token_cache.write().await; - *cache = Some(CachedToken { - token: token.clone(), - expires_at: std::time::Instant::now() - + std::time::Duration::from_secs(TOKEN_CACHE_DURATION_SECS), - }); - } + let token = provider + .token_source() + .token() + .await + .map_err(|e| ProviderError::Internal(format!("Failed to get token: {}", e)))?; Ok(Some(token)) } - /// Get token from a service account key file. - async fn get_token_from_service_account_file( - &self, - key_path: &Path, - ) -> Result { - let key_json = tokio::fs::read_to_string(key_path).await.map_err(|e| { - ProviderError::Internal(format!( - "Failed to read service account key file '{}': {}", - key_path.display(), - e - )) - })?; - - self.get_token_from_service_account_json(&key_json).await - } - - /// Get token from a service account key JSON string. - async fn get_token_from_service_account_json( + /// Build a `DefaultTokenSourceProvider` for the configured credentials. + async fn build_token_source( &self, - json: &str, - ) -> Result { - use google_cloud_auth::credentials::CredentialsFile; - - let creds: CredentialsFile = serde_json::from_str(json).map_err(|e| { - ProviderError::Internal(format!("Failed to parse service account JSON: {}", e)) - })?; + credentials: &GcpCredentials, + ) -> Result, ProviderError> { + use google_cloud_auth::{credentials::CredentialsFile, token::DefaultTokenSourceProvider}; let config = google_cloud_auth::project::Config::default().with_scopes(&[VERTEX_AI_SCOPE]); - let ts = google_cloud_auth::token::DefaultTokenSourceProvider::new_with_credentials( - config, - Box::new(creds), - ) - .await - .map_err(|e| { - ProviderError::Internal(format!( - "Failed to create token source from service account: {}", - e - )) - })?; - - ts.token_source() - .token() - .await - .map_err(|e| ProviderError::Internal(format!("Failed to get token: {}", e))) + match credentials { + GcpCredentials::Default => { + let ts = DefaultTokenSourceProvider::new(config).await.map_err(|e| { + ProviderError::Internal(format!("Failed to create token source: {}", e)) + })?; + Ok(Box::new(ts)) + } + GcpCredentials::ServiceAccount { key_path } => { + let json = tokio::fs::read_to_string(Path::new(key_path)) + .await + .map_err(|e| { + ProviderError::Internal(format!( + "Failed to read service account key file '{}': {}", + key_path, e + )) + })?; + let creds: CredentialsFile = serde_json::from_str(&json).map_err(|e| { + ProviderError::Internal(format!("Failed to parse service account JSON: {}", e)) + })?; + let ts = DefaultTokenSourceProvider::new_with_credentials(config, Box::new(creds)) + .await + .map_err(|e| { + ProviderError::Internal(format!( + "Failed to create token source from service account: {}", + e + )) + })?; + Ok(Box::new(ts)) + } + GcpCredentials::ServiceAccountJson { json } => { + let creds: CredentialsFile = serde_json::from_str(json).map_err(|e| { + ProviderError::Internal(format!("Failed to parse service account JSON: {}", e)) + })?; + let ts = DefaultTokenSourceProvider::new_with_credentials(config, Box::new(creds)) + .await + .map_err(|e| { + ProviderError::Internal(format!( + "Failed to create token source from service account: {}", + e + )) + })?; + Ok(Box::new(ts)) + } + } } /// Build a request with appropriate authentication. From ab5a8b012dea48db1433afd083afc3a434009d38 Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sun, 26 Apr 2026 09:03:01 +1000 Subject: [PATCH 088/172] Drop bespoke IPv4/IPv6 form validators; rely on backend IpNet parser --- .../Admin/ApiKeyFormModal/ApiKeyFormModal.tsx | 63 +++---------------- 1 file changed, 8 insertions(+), 55 deletions(-) diff --git a/ui/src/components/Admin/ApiKeyFormModal/ApiKeyFormModal.tsx b/ui/src/components/Admin/ApiKeyFormModal/ApiKeyFormModal.tsx index 2307736..6fcb2ff 100644 --- a/ui/src/components/Admin/ApiKeyFormModal/ApiKeyFormModal.tsx +++ b/ui/src/components/Admin/ApiKeyFormModal/ApiKeyFormModal.tsx @@ -51,67 +51,20 @@ function validateModelPatterns(value: string | undefined): boolean { return patterns.every((p) => MODEL_PATTERN_REGEX.test(p)); } -// Validation for IP/CIDR notation -const IPV4_REGEX = /^(\d{1,3}\.){3}\d{1,3}(\/\d{1,2})?$/; - -function isValidIPv4(ip: string): boolean { - const cidrMatch = ip.match(/^(.+)\/(\d+)$/); - const address = cidrMatch ? cidrMatch[1] : ip; - const prefix = cidrMatch ? parseInt(cidrMatch[2], 10) : null; - - // Check prefix range for IPv4 (0-32) - if (prefix !== null && (prefix < 0 || prefix > 32)) return false; - - // Validate IPv4 format and octet ranges - if (!IPV4_REGEX.test(ip)) return false; - const octets = address.split(".").map((o) => parseInt(o, 10)); - return octets.every((o) => o >= 0 && o <= 255); -} - -function isValidIPv6(ip: string): boolean { - const cidrMatch = ip.match(/^(.+)\/(\d+)$/); - const address = cidrMatch ? cidrMatch[1] : ip; - const prefix = cidrMatch ? parseInt(cidrMatch[2], 10) : null; - - // Check prefix range for IPv6 (0-128) - if (prefix !== null && (prefix < 0 || prefix > 128)) return false; - - // Basic structure checks - if (!/^[0-9a-fA-F:]+$/.test(address)) return false; - - // No triple colons allowed - if (address.includes(":::")) return false; - - // Only one :: allowed - const doubleColonCount = (address.match(/::/g) || []).length; - if (doubleColonCount > 1) return false; - - // Split and validate groups - const groups = address.split(":"); - - // Handle :: compression - if (address.includes("::")) { - // With ::, total groups after expansion must be <= 8 - const nonEmptyGroupCount = groups.filter((g) => g !== "").length; - // :: can represent 1 to (8 - nonEmptyGroupCount) groups - if (nonEmptyGroupCount > 7) return false; - } else { - // Without ::, must have exactly 8 groups - if (groups.length !== 8) return false; - } - - // Validate each group is valid hex (1-4 chars) - const nonEmptyGroups = groups.filter((g) => g !== ""); - return nonEmptyGroups.every((g) => g.length >= 1 && g.length <= 4 && /^[0-9a-fA-F]+$/.test(g)); +// Lightweight shape check: catch obvious typos client-side, but rely on the +// backend (which uses Rust's `IpNet`/`IpAddr` parsers) for authoritative +// IP/CIDR validation. Duplicating that logic in the browser only invites drift. +function looksLikeCidrEntry(entry: string): boolean { + return /^[0-9a-fA-F:.]+(\/\d{1,3})?$/.test(entry); } function validateCidrNotation(value: string | undefined): boolean { if (!value || value.trim() === "") return true; - const entries = value + return value .split("\n") .map((e) => e.trim()) - .filter(Boolean); - return entries.every((entry) => isValidIPv4(entry) || isValidIPv6(entry)); + .filter(Boolean) + .every(looksLikeCidrEntry); } const createApiKeySchema = z From 7d3b258a6fb718f174dab10985362cb9f7fd6612 Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sun, 26 Apr 2026 09:10:43 +1000 Subject: [PATCH 089/172] Make Anthropic interleaved-thinking model allowlist configurable --- src/config/providers.rs | 14 ++++++ src/providers/anthropic/mod.rs | 84 ++++++++++++++++++++++++++++++---- src/routing/resolver.rs | 1 + 3 files changed, 91 insertions(+), 8 deletions(-) diff --git a/src/config/providers.rs b/src/config/providers.rs index 136a39d..c8e315c 100644 --- a/src/config/providers.rs +++ b/src/config/providers.rs @@ -770,6 +770,19 @@ pub struct AnthropicProviderConfig { /// Sovereignty and compliance metadata for this provider. #[serde(default, skip_serializing_if = "Option::is_none")] pub sovereignty: Option, + + /// Models for which the `interleaved-thinking-2025-05-14` beta header + /// should be sent when thinking is enabled. Each entry is matched against + /// the model name as a substring (e.g. `"opus-4-6"` matches + /// `"claude-opus-4-6-20250101"`). Some Anthropic models reject this + /// header, so override the default list when adding or removing support. + /// Set to an empty list to disable the beta header entirely. + #[serde(default = "default_interleaved_thinking_models")] + pub interleaved_thinking_models: Vec, +} + +pub fn default_interleaved_thinking_models() -> Vec { + vec!["opus-4-6".to_string(), "opus-4.6".to_string()] } impl AnthropicProviderConfig { @@ -2868,6 +2881,7 @@ mod tests { health_check: ProviderHealthCheckConfig::default(), catalog_provider: None, sovereignty: None, + interleaved_thinking_models: default_interleaved_thinking_models(), }; let debug_output = format!("{:?}", config); diff --git a/src/providers/anthropic/mod.rs b/src/providers/anthropic/mod.rs index c8740d9..284e345 100644 --- a/src/providers/anthropic/mod.rs +++ b/src/providers/anthropic/mod.rs @@ -15,7 +15,7 @@ use convert::{ convert_anthropic_to_responses_response, convert_chat_completion_reasoning_config, convert_messages, convert_reasoning_config, convert_response, convert_responses_input_to_messages, convert_responses_tool_choice, convert_responses_tools, - convert_stop, convert_tool_choice, convert_tools, supports_adaptive_thinking, + convert_stop, convert_tool_choice, convert_tools, }; use serde::Deserialize; use stream::{AnthropicToOpenAIStream, AnthropicToResponsesStream}; @@ -45,18 +45,25 @@ const DEFAULT_MAX_TOKENS: u32 = 4096; /// Compute the `anthropic-beta` header value based on model and thinking config. /// -/// When thinking is enabled on models that support interleaved thinking (Opus 4.6+), -/// include the `interleaved-thinking-2025-05-14` beta flag. +/// When thinking is enabled on models that match an entry in +/// `interleaved_thinking_models` (substring match), include the +/// `interleaved-thinking-2025-05-14` beta flag. Some Anthropic models reject +/// this header, so the allowlist is configurable. fn compute_beta_header( model: &str, thinking: &Option, + interleaved_thinking_models: &[String], ) -> Option { let thinking_enabled = matches!( thinking, Some(types::AnthropicThinkingConfig::Enabled { .. }) | Some(types::AnthropicThinkingConfig::Adaptive) ); - if thinking_enabled && supports_adaptive_thinking(model) { + if thinking_enabled + && interleaved_thinking_models + .iter() + .any(|pat| !pat.is_empty() && model.contains(pat.as_str())) + { Some("interleaved-thinking-2025-05-14".to_string()) } else { None @@ -74,6 +81,7 @@ pub struct AnthropicProvider { circuit_breaker: Option>, streaming_buffer: StreamingBufferConfig, image_fetch_config: ImageFetchConfig, + interleaved_thinking_models: Vec, } impl AnthropicProvider { @@ -116,6 +124,7 @@ impl AnthropicProvider { circuit_breaker, streaming_buffer: config.streaming_buffer.clone(), image_fetch_config, + interleaved_thinking_models: config.interleaved_thinking_models.clone(), } } } @@ -208,8 +217,11 @@ impl Provider for AnthropicProvider { }; // Pre-serialize request body before retry loop to avoid repeated serialization - let beta_header = - compute_beta_header(&anthropic_request.model, &anthropic_request.thinking); + let beta_header = compute_beta_header( + &anthropic_request.model, + &anthropic_request.thinking, + &self.interleaved_thinking_models, + ); let body = serde_json::to_vec(&anthropic_request).unwrap_or_default(); let url = format!("{}/v1/messages", self.base_url); @@ -346,8 +358,11 @@ impl Provider for AnthropicProvider { }; // Pre-serialize request body before retry loop to avoid repeated serialization - let beta_header = - compute_beta_header(&anthropic_request.model, &anthropic_request.thinking); + let beta_header = compute_beta_header( + &anthropic_request.model, + &anthropic_request.thinking, + &self.interleaved_thinking_models, + ); let body = serde_json::to_vec(&anthropic_request).unwrap_or_default(); let url = format!("{}/v1/messages", self.base_url); @@ -500,3 +515,56 @@ impl Provider for AnthropicProvider { Ok(ModelsResponse { data: all_models }) } } + +#[cfg(test)] +mod tests { + use super::*; + + fn enabled() -> Option { + Some(types::AnthropicThinkingConfig::Adaptive) + } + + #[test] + fn beta_header_set_for_allowed_model() { + let allow = vec!["opus-4-6".to_string()]; + assert_eq!( + compute_beta_header("claude-opus-4-6-20260101", &enabled(), &allow), + Some("interleaved-thinking-2025-05-14".to_string()) + ); + } + + #[test] + fn beta_header_skipped_for_unlisted_model() { + let allow = vec!["opus-4-6".to_string()]; + assert_eq!( + compute_beta_header("claude-sonnet-4-5-20250929", &enabled(), &allow), + None + ); + } + + #[test] + fn beta_header_skipped_when_thinking_disabled() { + let allow = vec!["opus-4-6".to_string()]; + assert_eq!( + compute_beta_header("claude-opus-4-6-20260101", &None, &allow), + None + ); + } + + #[test] + fn beta_header_disabled_with_empty_allowlist() { + assert_eq!( + compute_beta_header("claude-opus-4-6-20260101", &enabled(), &[]), + None + ); + } + + #[test] + fn beta_header_ignores_empty_pattern() { + let allow = vec![String::new()]; + assert_eq!( + compute_beta_header("claude-opus-4-6", &enabled(), &allow), + None + ); + } +} diff --git a/src/routing/resolver.rs b/src/routing/resolver.rs index 46454d5..53bec75 100644 --- a/src/routing/resolver.rs +++ b/src/routing/resolver.rs @@ -634,6 +634,7 @@ pub async fn dynamic_provider_to_config( health_check: Default::default(), catalog_provider: None, sovereignty: provider.sovereignty.clone(), + interleaved_thinking_models: crate::config::default_interleaved_thinking_models(), }, )), #[cfg(feature = "provider-azure")] From b1571bf6919958928bab47a0ca1b3cc0dd039816 Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sun, 26 Apr 2026 09:17:17 +1000 Subject: [PATCH 090/172] Re-scope DLQ retry authz to the queued entry's tenant fields --- src/routes/admin/dlq.rs | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/src/routes/admin/dlq.rs b/src/routes/admin/dlq.rs index 2a84537..9f240de 100644 --- a/src/routes/admin/dlq.rs +++ b/src/routes/admin/dlq.rs @@ -287,7 +287,6 @@ pub async fn retry( Extension(authz): Extension, Path(id): Path, ) -> Result, AdminError> { - authz.require("dlq", "update", None, None, None, None)?; let dlq = get_dlq(&state)?; let db = state .db @@ -302,7 +301,13 @@ pub async fn retry( let entry = match entry { Some(e) => e, - None => return Err(AdminError::NotFound("DLQ entry".to_string())), + None => { + // Don't disclose existence to callers without DLQ access. Returning + // 404 here is fine because the per-entry scope check below would + // also yield a 4xx; we want a consistent response either way. + authz.require("dlq", "update", None, None, None, None)?; + return Err(AdminError::NotFound("DLQ entry".to_string())); + } }; // Process based on entry type @@ -312,6 +317,22 @@ pub async fn retry( let usage_entry: UsageLogEntry = serde_json::from_str(&entry.payload) .map_err(|e| AdminError::BadRequest(format!("Invalid usage_log payload: {}", e)))?; + // Authorize against the entry's actual tenant scope so a tenant + // admin can't retry another tenant's queued work; platform admins + // (no scope) are also satisfied by this call. + let org_id = usage_entry.org_id.map(|id| id.to_string()); + let team_id = usage_entry.team_id.map(|id| id.to_string()); + let project_id = usage_entry.project_id.map(|id| id.to_string()); + let user_id = usage_entry.user_id.map(|id| id.to_string()); + authz.require( + "dlq", + "update", + org_id.as_deref(), + team_id.as_deref(), + project_id.as_deref(), + user_id.as_deref(), + )?; + // Try to write to database match db.usage().log(usage_entry).await { Ok(_) => { @@ -339,6 +360,9 @@ pub async fn retry( } } _ => { + // Unknown entry type: gate behind platform-level dlq:update so we + // don't expose payload type to callers without any DLQ access. + authz.require("dlq", "update", None, None, None, None)?; return Err(AdminError::BadRequest(format!( "Unsupported entry type for manual retry: {}", entry.entry_type From 9a9bdf0a73061d2a5b9aa2d986f4ccc823ba0da4 Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sun, 26 Apr 2026 09:27:15 +1000 Subject: [PATCH 091/172] Cap accumulated reasoning and text content in stream state --- src/config/providers.rs | 13 ++++++ src/providers/anthropic/stream.rs | 66 ++++++++++++++++++++++++++++++- src/providers/bedrock/stream.rs | 35 +++++++++++++++- 3 files changed, 110 insertions(+), 4 deletions(-) diff --git a/src/config/providers.rs b/src/config/providers.rs index c8e315c..52c490a 100644 --- a/src/config/providers.rs +++ b/src/config/providers.rs @@ -1516,6 +1516,14 @@ pub struct StreamingBufferConfig { /// Default: 1000 chunks #[serde(default = "default_max_output_buffer_chunks")] pub max_output_buffer_chunks: usize, + + /// Maximum total bytes of accumulated response state (text and reasoning + /// content) per stream. Bounds memory usage if a provider produces a + /// runaway response. Bytes beyond this cap are silently dropped from the + /// state buffer, but pass-through deltas are still emitted to the client. + /// Default: 32 MB + #[serde(default = "default_max_response_state_bytes")] + pub max_response_state_bytes: usize, } impl Default for StreamingBufferConfig { @@ -1523,6 +1531,7 @@ impl Default for StreamingBufferConfig { Self { max_input_buffer_bytes: default_max_input_buffer_bytes(), max_output_buffer_chunks: default_max_output_buffer_chunks(), + max_response_state_bytes: default_max_response_state_bytes(), } } } @@ -1531,6 +1540,10 @@ fn default_max_input_buffer_bytes() -> usize { 16 * 1024 * 1024 // 16 MB } +fn default_max_response_state_bytes() -> usize { + 32 * 1024 * 1024 // 32 MB +} + fn default_max_output_buffer_chunks() -> usize { 1000 } diff --git a/src/providers/anthropic/stream.rs b/src/providers/anthropic/stream.rs index 22b5bcb..7b860a8 100644 --- a/src/providers/anthropic/stream.rs +++ b/src/providers/anthropic/stream.rs @@ -26,6 +26,26 @@ pub(crate) fn strip_anthropic_prefix(id: &str, prefix: &str) -> String { .collect() } +/// Append `delta` to `buf` up to `max_bytes` total. Slices on a UTF-8 +/// character boundary so the buffer remains valid UTF-8. Once the cap is hit +/// further deltas are dropped from the in-memory state — pass-through SSE +/// chunks to the client are unaffected. +fn bounded_push(buf: &mut String, delta: &str, max_bytes: usize) { + if buf.len() >= max_bytes { + return; + } + let remaining = max_bytes - buf.len(); + if delta.len() <= remaining { + buf.push_str(delta); + return; + } + let mut end = remaining; + while end > 0 && !delta.is_char_boundary(end) { + end -= 1; + } + buf.push_str(&delta[..end]); +} + // ============================================================================ // Anthropic Streaming Event Types // ============================================================================ @@ -726,6 +746,8 @@ pub struct AnthropicToResponsesStream { max_input_buffer_bytes: usize, /// Maximum output buffer chunks max_output_buffer_chunks: usize, + /// Maximum total bytes of accumulated text+reasoning state + max_response_state_bytes: usize, } impl AnthropicToResponsesStream { @@ -743,6 +765,7 @@ impl AnthropicToResponsesStream { output_buffer: std::collections::VecDeque::new(), max_input_buffer_bytes: streaming_buffer.max_input_buffer_bytes, max_output_buffer_chunks: streaming_buffer.max_output_buffer_chunks, + max_response_state_bytes: streaming_buffer.max_response_state_bytes, } } @@ -946,7 +969,11 @@ impl AnthropicToResponsesStream { AnthropicStreamEvent::ContentBlockDelta { index, delta } => match delta { ContentDelta::TextDelta { text } => { - self.state.text_content.push_str(&text); + bounded_push( + &mut self.state.text_content, + &text, + self.max_response_state_bytes, + ); // Emit text delta let msg_output_index = self.message_output_index(); @@ -997,7 +1024,11 @@ impl AnthropicToResponsesStream { ContentDelta::ThinkingDelta { thinking } => { // Emit thinking delta as reasoning content if self.state.thinking_block_indices.contains(&index) { - self.state.reasoning_content.push_str(&thinking); + bounded_push( + &mut self.state.reasoning_content, + &thinking, + self.max_response_state_bytes, + ); // Emit reasoning summary delta let reasoning_id = format!( @@ -1400,6 +1431,37 @@ where mod tests { use super::*; + #[test] + fn bounded_push_under_cap_appends_full_delta() { + let mut buf = "hello".to_string(); + bounded_push(&mut buf, " world", 100); + assert_eq!(buf, "hello world"); + } + + #[test] + fn bounded_push_clamps_at_cap() { + let mut buf = "abc".to_string(); + bounded_push(&mut buf, "defghi", 5); + assert_eq!(buf, "abcde"); + } + + #[test] + fn bounded_push_drops_when_full() { + let mut buf = "abcde".to_string(); + bounded_push(&mut buf, "fg", 5); + assert_eq!(buf, "abcde"); + } + + #[test] + fn bounded_push_respects_utf8_boundary() { + let mut buf = String::new(); + // "aé" is 3 bytes (a=1, é=2). Cap=2: push "a", drop é to avoid + // splitting the multibyte char. + bounded_push(&mut buf, "aé", 2); + assert!(buf.is_char_boundary(buf.len())); + assert_eq!(buf, "a"); + } + #[test] fn test_parse_message_start() { let json = r#"{"type":"message_start","message":{"id":"msg_123","model":"claude-sonnet-4-5-20250929","usage":{"input_tokens":25,"output_tokens":1}}}"#; diff --git a/src/providers/bedrock/stream.rs b/src/providers/bedrock/stream.rs index 2c6857d..5ca2174 100644 --- a/src/providers/bedrock/stream.rs +++ b/src/providers/bedrock/stream.rs @@ -18,6 +18,26 @@ use futures_util::stream::Stream; use super::types::*; use crate::config::StreamingBufferConfig; +/// Append `delta` to `buf` up to `max_bytes` total. Slices on a UTF-8 +/// character boundary so the buffer remains valid UTF-8. Once the cap is hit +/// further deltas are dropped from in-memory state — pass-through SSE chunks +/// to the client are unaffected. +fn bounded_push(buf: &mut String, delta: &str, max_bytes: usize) { + if buf.len() >= max_bytes { + return; + } + let remaining = max_bytes - buf.len(); + if delta.len() <= remaining { + buf.push_str(delta); + return; + } + let mut end = remaining; + while end > 0 && !delta.is_char_boundary(end) { + end -= 1; + } + buf.push_str(&delta[..end]); +} + /// Stream state for tracking the transformation #[derive(Debug, Default)] pub(super) struct StreamState { @@ -535,6 +555,8 @@ pub struct BedrockToResponsesStream { pub max_input_buffer_bytes: usize, /// Maximum output buffer chunks pub max_output_buffer_chunks: usize, + /// Maximum total bytes of accumulated text+reasoning state + pub max_response_state_bytes: usize, } impl BedrockToResponsesStream { @@ -567,6 +589,7 @@ impl BedrockToResponsesStream { output_buffer: std::collections::VecDeque::new(), max_input_buffer_bytes: streaming_buffer.max_input_buffer_bytes, max_output_buffer_chunks: streaming_buffer.max_output_buffer_chunks, + max_response_state_bytes: streaming_buffer.max_response_state_bytes, } } @@ -798,7 +821,11 @@ impl BedrockToResponsesStream { .reasoning_block_indices .contains(&delta.content_block_index) { - self.state.reasoning_content.push_str(&reasoning.text); + bounded_push( + &mut self.state.reasoning_content, + &reasoning.text, + self.max_response_state_bytes, + ); // Accumulate signature if present if let Some(sig) = &reasoning.signature { @@ -820,7 +847,11 @@ impl BedrockToResponsesStream { else if let Some(text) = delta.delta.text && !text.is_empty() { - self.state.text_content.push_str(&text); + bounded_push( + &mut self.state.text_content, + &text, + self.max_response_state_bytes, + ); // Emit text delta let msg_output_index = self.message_output_index(); From 0b5a7a5c48eceff8d19c476b498d18a23a169f79 Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sun, 26 Apr 2026 09:29:16 +1000 Subject: [PATCH 092/172] Pin Dockerfile builder to stable Rust 1.90 and set MSRV --- Cargo.toml | 2 ++ Dockerfile | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index ac3ba58..5e0ad98 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,6 +2,8 @@ name = "hadrian" version = "0.0.0-alpha.12" edition = "2024" +# 1.88 stabilized `if let` chains; 1.85 stabilized edition 2024. +rust-version = "1.88" license = "Apache-2.0 OR MIT" description = "An open-source AI Gateway providing a unified OpenAI-compatible API for routing requests to multiple LLM providers" repository = "https://github.com/ScriptSmith/hadrian" diff --git a/Dockerfile b/Dockerfile index 1bd24b5..523bd0f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -44,7 +44,9 @@ WORKDIR /app/docs RUN pnpm build # Stage 2: Build Rust application -FROM rustlang/rust:nightly-slim AS builder +# Pinned to a stable Rust toolchain. Requires 1.88+ for `if let` chains and +# 1.85+ for edition 2024. +FROM rust:1.90-slim AS builder # Install build dependencies # Includes SAML libraries (libxml2, libxslt, xmlsec1) for samael crate From b68cf6267ef24ff75edb9901b7541bf722bf2e0c Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sun, 26 Apr 2026 09:33:14 +1000 Subject: [PATCH 093/172] Replace startup unwrap/expect with structured exit and bump MSRV to 1.91 --- Cargo.toml | 5 +++-- Dockerfile | 5 ++--- src/cli/server.rs | 55 ++++++++++++++++++++++++++++++++--------------- 3 files changed, 43 insertions(+), 22 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 5e0ad98..9fe6345 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,8 +2,9 @@ name = "hadrian" version = "0.0.0-alpha.12" edition = "2024" -# 1.88 stabilized `if let` chains; 1.85 stabilized edition 2024. -rust-version = "1.88" +# 1.91 stabilized `str::{floor,ceil}_char_boundary`; 1.88 stabilized `if let` +# chains; 1.85 stabilized edition 2024. +rust-version = "1.91" license = "Apache-2.0 OR MIT" description = "An open-source AI Gateway providing a unified OpenAI-compatible API for routing requests to multiple LLM providers" repository = "https://github.com/ScriptSmith/hadrian" diff --git a/Dockerfile b/Dockerfile index 523bd0f..04b62ad 100644 --- a/Dockerfile +++ b/Dockerfile @@ -44,9 +44,8 @@ WORKDIR /app/docs RUN pnpm build # Stage 2: Build Rust application -# Pinned to a stable Rust toolchain. Requires 1.88+ for `if let` chains and -# 1.85+ for edition 2024. -FROM rust:1.90-slim AS builder +# Pinned to a stable Rust toolchain. MSRV is 1.91 (see Cargo.toml). +FROM rust:1.91-slim AS builder # Install build dependencies # Includes SAML libraries (libxml2, libxslt, xmlsec1) for samael crate diff --git a/src/cli/server.rs b/src/cli/server.rs index 7977e96..e746837 100644 --- a/src/cli/server.rs +++ b/src/cli/server.rs @@ -52,8 +52,13 @@ pub(crate) async fn run_server(explicit_config_path: Option<&str>, no_browser: b // Initialize observability (tracing, metrics) // Keep the guard alive to ensure proper OpenTelemetry shutdown - let _tracing_guard = - observability::init_tracing(&config.observability).expect("Failed to initialize tracing"); + let _tracing_guard = match observability::init_tracing(&config.observability) { + Ok(g) => g, + Err(e) => { + eprintln!("Failed to initialize tracing: {e}"); + std::process::exit(1); + } + }; if let Err(e) = observability::metrics::init_metrics(&config.observability.metrics) { tracing::warn!(error = %e, "Failed to initialize metrics: {e}"); @@ -100,9 +105,13 @@ pub(crate) async fn run_server(explicit_config_path: Option<&str>, no_browser: b ); } - let state = AppState::new(config.clone()) - .await - .expect("Failed to initialize application state"); + let state = match AppState::new(config.clone()).await { + Ok(state) => state, + Err(e) => { + tracing::error!(error = %e, "Failed to initialize application state"); + std::process::exit(1); + } + }; // Check for RBAC configuration mismatches with database state if !config.auth.rbac.enabled @@ -375,9 +384,13 @@ pub(crate) async fn run_server(explicit_config_path: Option<&str>, no_browser: b let app = build_app(&config, state); let bind_addr = format!("{}:{}", config.server.host, config.server.port); - let listener = tokio::net::TcpListener::bind(&bind_addr) - .await - .expect("Failed to bind to address"); + let listener = match tokio::net::TcpListener::bind(&bind_addr).await { + Ok(listener) => listener, + Err(e) => { + tracing::error!(error = %e, bind_addr = %bind_addr, "Failed to bind to address"); + std::process::exit(1); + } + }; tracing::info!("Server listening on http://{}", bind_addr); @@ -426,7 +439,7 @@ pub(crate) async fn run_server(explicit_config_path: Option<&str>, no_browser: b // `into_make_service_with_connect_info` is required so middleware can read the // connecting peer address via `ConnectInfo` for IP-based rate limits, // API-key IP allowlists, and audit logging. - axum::serve( + if let Err(e) = axum::serve( listener, app.into_make_service_with_connect_info::(), ) @@ -436,7 +449,10 @@ pub(crate) async fn run_server(explicit_config_path: Option<&str>, no_browser: b shutdown_config, )) .await - .unwrap(); + { + tracing::error!(error = %e, "Server error"); + std::process::exit(1); + } } async fn shutdown_signal( @@ -448,17 +464,22 @@ async fn shutdown_signal( shutdown_config: crate::config::ShutdownConfig, ) { let ctrl_c = async { - tokio::signal::ctrl_c() - .await - .expect("failed to install Ctrl+C handler"); + if let Err(e) = tokio::signal::ctrl_c().await { + tracing::error!(error = %e, "Failed to install Ctrl+C handler"); + } }; #[cfg(unix)] let terminate = async { - tokio::signal::unix::signal(tokio::signal::unix::SignalKind::terminate()) - .expect("failed to install signal handler") - .recv() - .await; + match tokio::signal::unix::signal(tokio::signal::unix::SignalKind::terminate()) { + Ok(mut sig) => { + sig.recv().await; + } + Err(e) => { + tracing::error!(error = %e, "Failed to install SIGTERM handler"); + std::future::pending::<()>().await; + } + } }; #[cfg(not(unix))] From 927b58faae7a4530cfbec975ea80aea9fd69a8b6 Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sun, 26 Apr 2026 09:39:39 +1000 Subject: [PATCH 094/172] Fail closed when IAP is configured without trusted_proxies --- src/config/mod.rs | 47 ++++++++++++------------ src/middleware/layers/admin.rs | 66 +++++++++++++++++----------------- 2 files changed, 56 insertions(+), 57 deletions(-) diff --git a/src/config/mod.rs b/src/config/mod.rs index 31f371c..2166192 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -166,26 +166,22 @@ impl GatewayConfig { )); } - // IAP without trusted_proxies is dangerous — anyone can spoof identity headers. + // IAP without trusted_proxies is fail-open: anyone who reaches the + // gateway can spoof identity headers. There is no safe fallback — + // refuse to start until the operator configures `server.trusted_proxies`. if matches!(self.auth.mode, AuthMode::Iap(_)) && !self.server.trusted_proxies.is_configured() { - if !self.server.host.is_loopback() { - return Err(ConfigError::Validation( - "IAP mode (auth.mode.type = \"iap\") is enabled and the server \ - binds to a non-localhost address, but server.trusted_proxies is not \ - configured. This allows any client to spoof identity headers. Either \ - configure server.trusted_proxies.cidrs with your proxy's IP ranges, \ - or bind to localhost (server.host = \"127.0.0.1\")." - .into(), - )); - } - tracing::warn!( - "IAP mode is enabled without server.trusted_proxies configured. \ - Identity headers will be accepted from ANY source. This is safe only if \ - the gateway is exclusively accessible through a trusted reverse proxy. \ - Configure server.trusted_proxies.cidrs for production deployments." - ); + return Err(ConfigError::Validation( + "IAP mode (auth.mode.type = \"iap\") is enabled but \ + server.trusted_proxies is not configured. Without trusted \ + proxies, identity headers can be spoofed by anyone able to \ + reach the gateway. Configure server.trusted_proxies.cidrs \ + with your proxy's IP ranges (or set \ + server.trusted_proxies.dangerously_trust_all = true \ + explicitly for isolated environments)." + .into(), + )); } // Validate individual sections @@ -809,9 +805,12 @@ key3 = "literal""# #[test] #[cfg(feature = "database-sqlite")] - fn test_iap_without_trusted_proxies_localhost_warns_but_ok() { - // IAP on localhost without trusted_proxies should succeed (just warn) - let result = GatewayConfig::parse( + fn test_iap_without_trusted_proxies_on_localhost_also_errors() { + // IAP on localhost without trusted_proxies must also fail; the + // localhost loopback compat path was removed (the proxy auth + // middleware no longer trusts headers when trusted_proxies is unset, + // so accepting this config would silently disable IAP). + let err = GatewayConfig::parse( r#" [server] host = "127.0.0.1" @@ -828,12 +827,12 @@ key3 = "literal""# type = "open_ai" api_key = "sk-test" "#, - ); + ) + .unwrap_err(); assert!( - result.is_ok(), - "IAP on localhost without trusted_proxies should be allowed: {:?}", - result.err() + err.to_string().contains("trusted_proxies"), + "should mention trusted_proxies: {err}" ); } diff --git a/src/middleware/layers/admin.rs b/src/middleware/layers/admin.rs index e78cd31..b2788c5 100644 --- a/src/middleware/layers/admin.rs +++ b/src/middleware/layers/admin.rs @@ -1084,34 +1084,33 @@ async fn try_proxy_auth_auth( None => return Ok(None), }; - // SECURITY: Validate that the request comes from a trusted proxy before trusting headers. - // If trusted_proxies is configured, we MUST verify the connecting IP is trusted. - // If trusted_proxies is NOT configured, we trust all sources (for backwards compatibility - // and development environments where the gateway is behind a trusted network boundary). + // SECURITY: Identity headers may only be trusted when the request comes + // from a trusted proxy. Config validation refuses startup if IAP is + // enabled without `server.trusted_proxies` set, so by this point the + // section must be configured — anything here that isn't from a trusted + // source is dropped. let trusted_proxies = &state.config.server.trusted_proxies; - if trusted_proxies.is_configured() { - let parsed_cidrs = trusted_proxies.parsed_cidrs(); + let parsed_cidrs = trusted_proxies.parsed_cidrs(); - let is_trusted = match connecting_ip { - Some(ip) => trusted_proxies.is_trusted_ip(ip, &parsed_cidrs), - // No connecting IP available - only trust if dangerously_trust_all is explicitly set - None => trusted_proxies.dangerously_trust_all, - }; + let is_trusted = match connecting_ip { + Some(ip) => trusted_proxies.is_trusted_ip(ip, &parsed_cidrs), + // No connecting IP available — only trust if `dangerously_trust_all` + // is explicitly set (e.g. unit tests or fully air-gapped envs). + None => trusted_proxies.dangerously_trust_all, + }; - if !is_trusted { - // Request is not from a trusted proxy - do not trust identity headers - if let Some(ip) = connecting_ip - && headers.contains_key(&config.identity_header) - { - tracing::warn!( - connecting_ip = %ip, - identity_header = %config.identity_header, - "Ignoring Proxy auth identity header from untrusted IP - \ - configure server.trusted_proxies to trust this source" - ); - } - return Ok(None); + if !is_trusted { + if let Some(ip) = connecting_ip + && headers.contains_key(&config.identity_header) + { + tracing::warn!( + connecting_ip = %ip, + identity_header = %config.identity_header, + "Ignoring Proxy auth identity header from untrusted IP - \ + configure server.trusted_proxies to trust this source" + ); } + return Ok(None); } // Check for identity header @@ -2403,12 +2402,12 @@ mod tests { map } - // ========== No trusted_proxies configured (backwards compatibility) ========== + // ========== No trusted_proxies configured (now fails closed) ========== #[tokio::test] - async fn test_proxy_auth_no_proxy_config_trusts_headers() { - // When trusted_proxies is NOT configured, headers should be trusted - // (backwards compatibility for development/internal deployments) + async fn test_proxy_auth_no_proxy_config_drops_headers() { + // Config validation refuses startup in this case, but we still want + // the middleware itself to fail closed defensively if it ever runs. let state = create_test_state( "X-Forwarded-User", TrustedProxiesConfig::default(), // No proxy config @@ -2419,20 +2418,21 @@ mod tests { .await .unwrap(); - assert!(result.is_some()); - assert_eq!(result.unwrap().external_id, "alice@example.com"); + assert!( + result.is_none(), + "headers must be dropped when trusted_proxies is unset" + ); } #[tokio::test] async fn test_proxy_auth_no_proxy_config_no_connecting_ip() { - // When no trusted_proxies and no connecting IP, still trust headers + // No trusted_proxies and no connecting IP — still fail closed. let state = create_test_state("X-Forwarded-User", TrustedProxiesConfig::default()); let headers = make_headers(vec![("X-Forwarded-User", "bob@example.com")]); let result = try_proxy_auth_auth(&headers, None, &state).await.unwrap(); - assert!(result.is_some()); - assert_eq!(result.unwrap().external_id, "bob@example.com"); + assert!(result.is_none()); } // ========== dangerously_trust_all mode ========== From bad30cc7e0fd44328a34a636c3f22e369be5b98f Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sun, 26 Apr 2026 09:40:59 +1000 Subject: [PATCH 095/172] Log a startup error when ignored TLS config is present --- src/cli/server.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/cli/server.rs b/src/cli/server.rs index e746837..b159829 100644 --- a/src/cli/server.rs +++ b/src/cli/server.rs @@ -105,6 +105,15 @@ pub(crate) async fn run_server(explicit_config_path: Option<&str>, no_browser: b ); } + if config.server.tls.is_some() { + tracing::error!( + "[server.tls] is set but the gateway does not yet terminate TLS \ + itself; the gateway will continue to listen on plain HTTP. \ + Terminate TLS upstream (reverse proxy / load balancer) and \ + remove the [server.tls] section, or wait for native TLS support." + ); + } + let state = match AppState::new(config.clone()).await { Ok(state) => state, Err(e) => { From 24cd6aeaae9646f59ac92e3336e51de8b7580ae3 Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sun, 26 Apr 2026 09:42:22 +1000 Subject: [PATCH 096/172] Regenerate Hadrian OpenAPI spec in CI before conformance check --- .github/workflows/ci.yml | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e2fc866..a9b65f7 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -427,6 +427,27 @@ jobs: - name: Install uv uses: astral-sh/setup-uv@v4 + - name: Install Rust toolchain + uses: dtolnay/rust-toolchain@stable + + - name: Cache cargo registry & target + uses: Swatinem/rust-cache@v2 + + - name: Install build deps for samael + run: | + sudo apt-get update + sudo apt-get install -y libxml2-dev libxslt1-dev libxmlsec1-dev pkg-config libssl-dev + + - name: Regenerate Hadrian OpenAPI spec + run: cargo run --release -- openapi --output openapi/hadrian.openapi.json + + - name: Verify checked-in spec matches generated + run: | + if ! git diff --exit-code -- openapi/hadrian.openapi.json; then + echo "::error::openapi/hadrian.openapi.json is out of date. Run ./scripts/generate-openapi.sh and commit the result." >&2 + exit 1 + fi + - name: Fetch reference specs run: ./scripts/fetch-openapi-specs.sh openai From c02234ac39af8982af2b9a1e1b786eddcc66ec0d Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sun, 26 Apr 2026 09:44:08 +1000 Subject: [PATCH 097/172] Set terminationGracePeriodSeconds=60 in Helm to fit 35s drain --- helm/hadrian/templates/deployment.yaml | 4 ++++ helm/hadrian/values.yaml | 5 +++++ 2 files changed, 9 insertions(+) diff --git a/helm/hadrian/templates/deployment.yaml b/helm/hadrian/templates/deployment.yaml index abb8521..a49e1d2 100644 --- a/helm/hadrian/templates/deployment.yaml +++ b/helm/hadrian/templates/deployment.yaml @@ -32,6 +32,10 @@ spec: {{- toYaml . | nindent 8 }} {{- end }} serviceAccountName: {{ include "hadrian.serviceAccountName" . }} + # Match the gateway's drain budget. Default drain is 35s (see + # `[server.shutdown]`), so the pod must be allowed at least that long + # plus a margin for OTLP/usage-buffer flushes after SIGTERM. + terminationGracePeriodSeconds: {{ .Values.terminationGracePeriodSeconds | default 60 }} securityContext: {{- toYaml .Values.podSecurityContext | nindent 8 }} {{- if or .Values.initContainers.waitForDb.enabled .Values.initContainers.migrate.enabled .Values.extraInitContainers }} diff --git a/helm/hadrian/values.yaml b/helm/hadrian/values.yaml index 40e614e..a1fcab7 100644 --- a/helm/hadrian/values.yaml +++ b/helm/hadrian/values.yaml @@ -37,6 +37,11 @@ podAnnotations: {} # -- Pod labels podLabels: {} +# -- Pod termination grace period in seconds. Must exceed the gateway's +# `[server.shutdown]` drain budget (default 35s) so in-flight requests +# finish and the OTLP/usage buffers flush before SIGKILL. +terminationGracePeriodSeconds: 60 + # -- Pod security context podSecurityContext: fsGroup: 1000 From 76536c1283046b8674fb1ed5ce089bb76718ef47 Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sun, 26 Apr 2026 09:45:40 +1000 Subject: [PATCH 098/172] Validate chat completion content, usage, and model echo in shared test --- .../src/tests/shared/chat-completions.ts | 48 +++++++++++++++++-- 1 file changed, 44 insertions(+), 4 deletions(-) diff --git a/deploy/tests/src/tests/shared/chat-completions.ts b/deploy/tests/src/tests/shared/chat-completions.ts index c3fd271..2649e6c 100644 --- a/deploy/tests/src/tests/shared/chat-completions.ts +++ b/deploy/tests/src/tests/shared/chat-completions.ts @@ -72,10 +72,50 @@ export function runChatCompletionsTests( }); expect(response.response.status).toBe(200); - expect(response.data).toBeDefined(); - // The response should have choices array - // Note: The actual response structure depends on the server - // For the test provider, it returns a mock response + // The generated client types this as `{}`; structurally validate the + // OpenAI-shaped response so the test catches breakage in content/usage + // shape, not just status code. + const data = response.data as + | { + model?: string; + choices?: Array<{ + message?: { role?: string; content?: string }; + finish_reason?: string; + }>; + usage?: { + prompt_tokens?: number; + completion_tokens?: number; + total_tokens?: number; + }; + } + | undefined; + expect(data).toBeDefined(); + + // Echoes back the requested model (or a downstream alias of it). + expect(typeof data!.model).toBe("string"); + expect(data!.model!.length).toBeGreaterThan(0); + + // Choices: at least one, with a non-empty assistant message and a + // finish_reason. We don't pin specific text because providers vary. + const choices = data!.choices; + expect(Array.isArray(choices)).toBe(true); + expect(choices!.length).toBeGreaterThan(0); + const choice = choices![0]; + expect(choice.message).toBeDefined(); + expect(choice.message!.role).toBe("assistant"); + expect(typeof choice.message!.content).toBe("string"); + expect(choice.message!.content!.length).toBeGreaterThan(0); + expect(typeof choice.finish_reason).toBe("string"); + + // Usage block must report at least the prompt tokens; total tokens + // should equal prompt + completion when both are present. + expect(data!.usage).toBeDefined(); + const usage = data!.usage!; + expect(usage.prompt_tokens!).toBeGreaterThanOrEqual(1); + expect(usage.completion_tokens!).toBeGreaterThanOrEqual(0); + expect(usage.total_tokens).toBe( + usage.prompt_tokens! + usage.completion_tokens!, + ); }); }); } From e4eec2562222e9c2f2b2bf2886e1d8879864149d Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sun, 26 Apr 2026 12:19:00 +1000 Subject: [PATCH 099/172] Throttle bootstrap admin auth per IP to deter key guessing --- src/cache/keys.rs | 16 ++++++++ src/middleware/layers/admin.rs | 70 ++++++++++++++++++++++++++++++++++ 2 files changed, 86 insertions(+) diff --git a/src/cache/keys.rs b/src/cache/keys.rs index 064acc3..339cbd6 100644 --- a/src/cache/keys.rs +++ b/src/cache/keys.rs @@ -127,6 +127,22 @@ impl CacheKeys { format!("gw:emergency:lockout:{}", ip) } + /// Bootstrap auth rate limiting: gw:bootstrap:ratelimit:{ip} + /// + /// Tracks failed bootstrap-key attempts from an IP address. Counter + /// increments on each failed attempt and resets after the window expires. + pub fn bootstrap_rate_limit(ip: &str) -> String { + format!("gw:bootstrap:ratelimit:{}", ip) + } + + /// Bootstrap auth lockout: gw:bootstrap:lockout:{ip} + /// + /// Set when an IP exceeds the bootstrap-auth failure threshold. + /// Presence blocks further bootstrap-auth attempts from the IP. + pub fn bootstrap_lockout(ip: &str) -> String { + format!("gw:bootstrap:lockout:{}", ip) + } + /// Response cache key for chat completions. /// /// Generates a deterministic cache key based on configurable components: diff --git a/src/middleware/layers/admin.rs b/src/middleware/layers/admin.rs index b2788c5..95445b3 100644 --- a/src/middleware/layers/admin.rs +++ b/src/middleware/layers/admin.rs @@ -206,6 +206,8 @@ async fn try_bootstrap_auth( connecting_ip: Option, state: &AppState, ) -> Result, AuthError> { + use crate::cache::CacheKeys; + // Check if bootstrap API key is configured let bootstrap_key = match &state.config.auth.bootstrap { Some(bootstrap) => match &bootstrap.api_key { @@ -222,6 +224,22 @@ async fn try_bootstrap_auth( None => return Ok(None), }; + // Per-IP throttle: refuse further attempts when this source IP is locked out. + let ip_str = connecting_ip + .map(|ip| ip.to_string()) + .unwrap_or_else(|| "unknown".to_string()); + if let Some(cache) = &state.cache { + let lockout_key = CacheKeys::bootstrap_lockout(&ip_str); + if let Ok(Some(_)) = cache.get_bytes(&lockout_key).await { + tracing::warn!( + ip = %ip_str, + event = "bootstrap_auth.locked_out", + "Bootstrap auth attempt blocked: IP is locked out" + ); + return Err(AuthError::Forbidden("Bootstrap auth denied".to_string())); + } + } + // Constant-time comparison to prevent timing attacks use subtle::ConstantTimeEq; let keys_match: bool = provided_key @@ -252,6 +270,7 @@ async fn try_bootstrap_auth( }) .await; } + increment_bootstrap_rate_limit(&ip_str, state).await; return Ok(None); } @@ -281,6 +300,8 @@ async fn try_bootstrap_auth( user_count = user_count, "Bootstrap auth rejected: database has users" ); + // Treat post-bootstrap probing as a failed attempt to deter scanners. + increment_bootstrap_rate_limit(&ip_str, state).await; return Ok(None); } @@ -300,6 +321,55 @@ async fn try_bootstrap_auth( })) } +/// Per-IP throttle parameters for bootstrap auth failures. +/// +/// Bootstrap is unauthenticated until the first user is created and is exposed +/// on every admin route, so an attacker can make unlimited guesses. We cap +/// failures and lock the source IP out for an hour after exceeding the +/// threshold. Values are intentionally hardcoded — bootstrap auth is a narrow +/// installer flow, so additional configuration would just be footgun surface. +const BOOTSTRAP_MAX_ATTEMPTS: i64 = 10; +const BOOTSTRAP_WINDOW_SECS: u64 = 900; +const BOOTSTRAP_LOCKOUT_SECS: u64 = 3600; + +/// Increment the bootstrap auth rate-limit counter for an IP and lock the IP +/// out once attempts exceed [`BOOTSTRAP_MAX_ATTEMPTS`]. +async fn increment_bootstrap_rate_limit(ip_str: &str, state: &AppState) { + use std::time::Duration; + + use crate::cache::CacheKeys; + + let Some(cache) = &state.cache else { + return; + }; + + let rate_limit_key = CacheKeys::bootstrap_rate_limit(ip_str); + let count = cache + .incr(&rate_limit_key, Duration::from_secs(BOOTSTRAP_WINDOW_SECS)) + .await + .unwrap_or(1); + + if count >= BOOTSTRAP_MAX_ATTEMPTS { + let lockout_key = CacheKeys::bootstrap_lockout(ip_str); + let _ = cache + .set_bytes( + &lockout_key, + b"1", + Duration::from_secs(BOOTSTRAP_LOCKOUT_SECS), + ) + .await; + + tracing::warn!( + ip = %ip_str, + attempts = count, + lockout_secs = BOOTSTRAP_LOCKOUT_SECS, + event = "bootstrap_auth.lockout_triggered", + "Bootstrap auth lockout triggered after {} failed attempts", + count + ); + } +} + /// Try to authenticate via emergency access key. /// /// Emergency authentication provides break-glass access when SSO is unavailable. From 8060b86490dc01df8b47b082f946a9c3e6f37fd4 Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sun, 26 Apr 2026 12:21:54 +1000 Subject: [PATCH 100/172] Throttle /auth/discover separately to deter SSO domain enumeration --- src/app.rs | 6 +++-- src/middleware/layers/rate_limit.rs | 40 +++++++++++++++++++++++++++++ src/middleware/mod.rs | 2 +- 3 files changed, 45 insertions(+), 3 deletions(-) diff --git a/src/app.rs b/src/app.rs index a66a214..d2e2bb7 100644 --- a/src/app.rs +++ b/src/app.rs @@ -2010,12 +2010,14 @@ pub fn build_app(config: &config::GatewayConfig, state: AppState) -> Router { } // Add SSO discovery endpoint if database is configured (for per-org SSO) - // This is needed for both OIDC and SAML per-org configurations + // This is needed for both OIDC and SAML per-org configurations. + // Use the dedicated discover throttle (tighter than the global IP + // rate limit) to deter SSO-domain enumeration. if !config.database.is_none() { let discover_route = get(routes::auth_routes::discover).route_layer( axum::middleware::from_fn_with_state( state.clone(), - middleware::rate_limit_middleware, + middleware::discover_rate_limit_middleware, ), ); app = app.route("/auth/discover", discover_route); diff --git a/src/middleware/layers/rate_limit.rs b/src/middleware/layers/rate_limit.rs index d3542d6..6685832 100644 --- a/src/middleware/layers/rate_limit.rs +++ b/src/middleware/layers/rate_limit.rs @@ -336,6 +336,46 @@ pub fn extract_client_ip_from_parts( connecting_ip } +/// Tighter per-IP throttle for `/auth/discover`. +/// +/// Discover takes an email and tells the caller whether the domain has SSO +/// configured (and which IdP), which makes it a fast oracle for enumerating +/// customer email domains. The default IP rate limit (60/min) is generous +/// for normal API traffic but lets a single host probe ~86k domains per +/// day. Bound discovery to roughly one domain per second per source IP, +/// using a separate `discover-minute` window so it doesn't share counters +/// with other IP-rate-limited endpoints. +const DISCOVER_REQUESTS_PER_MINUTE: u32 = 10; + +#[allow(clippy::question_mark)] +pub async fn discover_rate_limit_middleware( + State(state): State, + req: Request, + next: Next, +) -> Result { + let cache = match &state.cache { + Some(c) => c, + None => return Ok(next.run(req).await), + }; + + let client_ip = extract_client_ip(&req, &state.config.server.trusted_proxies); + let client_ip_str = client_ip + .map(|ip| ip.to_string()) + .unwrap_or_else(|| "unknown".to_string()); + + let result = check_ip_rate_limit( + cache, + &client_ip_str, + "discover-minute", + DISCOVER_REQUESTS_PER_MINUTE, + Duration::from_secs(60), + ) + .await?; + + let response = next.run(req).await; + Ok(add_rate_limit_headers(response, &result)) +} + async fn check_ip_rate_limit( cache: &std::sync::Arc, client_ip: &str, diff --git a/src/middleware/mod.rs b/src/middleware/mod.rs index b012238..7e5ef34 100644 --- a/src/middleware/mod.rs +++ b/src/middleware/mod.rs @@ -44,7 +44,7 @@ pub use layers::{ admin::admin_auth_middleware, api::api_middleware, authz::{AuthzResponse, api_authz_middleware, authz_middleware, permissive_authz_middleware}, - rate_limit::rate_limit_middleware, + rate_limit::{discover_rate_limit_middleware, rate_limit_middleware}, request_id::request_id_middleware, security_headers::security_headers_middleware, }; From 77d1c1b801a051755dff81836d41f97d8c7298eb Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sun, 26 Apr 2026 12:27:18 +1000 Subject: [PATCH 101/172] Make DLQ pop atomic via DELETE...RETURNING with row locking --- src/dlq/database.rs | 54 +++++++++++++++++++++------------------------ 1 file changed, 25 insertions(+), 29 deletions(-) diff --git a/src/dlq/database.rs b/src/dlq/database.rs index 55e49ee..cb4854c 100644 --- a/src/dlq/database.rs +++ b/src/dlq/database.rs @@ -91,51 +91,47 @@ impl DeadLetterQueue for DatabaseDlq { } async fn pop(&self) -> DlqResult> { - // Get the oldest entry + // Atomic claim-and-delete so concurrent consumers cannot pop the same row. + // Postgres uses FOR UPDATE SKIP LOCKED to let other workers progress past + // the locked row instead of blocking. SQLite doesn't support row locking, + // but write transactions are serialized at the database level, so the + // single DELETE ... WHERE id = (SELECT ... LIMIT 1) RETURNING ... is + // atomic with respect to other writers. let entry = match self.pool.pool() { #[cfg(feature = "database-sqlite")] DbPoolRef::Sqlite(pool) => { let row = sqlx::query_as::<_, DlqRow>(&format!( - r#"SELECT id, entry_type, payload, error, retry_count, created_at, last_retry_at, metadata - FROM {} ORDER BY created_at ASC LIMIT 1"#, - self.table_name + r#"DELETE FROM {table} + WHERE id = ( + SELECT id FROM {table} + ORDER BY created_at ASC + LIMIT 1 + ) + RETURNING id, entry_type, payload, error, retry_count, created_at, last_retry_at, metadata"#, + table = self.table_name )) .fetch_optional(pool) .await?; - if let Some(row) = row { - // Delete it - sqlx::query(&format!("DELETE FROM {} WHERE id = ?", self.table_name)) - .bind(&row.id) - .execute(pool) - .await?; - - Some(row.into_entry()?) - } else { - None - } + row.map(|r| r.into_entry()).transpose()? } #[cfg(feature = "database-postgres")] DbPoolRef::Postgres(pools) => { let row = sqlx::query_as::<_, DlqRowPg>(&format!( - r#"SELECT id, entry_type, payload, error, retry_count, created_at, last_retry_at, metadata - FROM {} ORDER BY created_at ASC LIMIT 1"#, - self.table_name + r#"DELETE FROM {table} + WHERE id = ( + SELECT id FROM {table} + ORDER BY created_at ASC + FOR UPDATE SKIP LOCKED + LIMIT 1 + ) + RETURNING id, entry_type, payload, error, retry_count, created_at, last_retry_at, metadata"#, + table = self.table_name )) .fetch_optional(pools.write_pool()) .await?; - if let Some(row) = row { - // Delete it - sqlx::query(&format!("DELETE FROM {} WHERE id = $1", self.table_name)) - .bind(row.id) - .execute(pools.write_pool()) - .await?; - - Some(row.into_entry()?) - } else { - None - } + row.map(|r| r.into_entry()).transpose()? } }; From 1417e838e0f9611f6d580827b6dca6d95b57db13 Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sun, 26 Apr 2026 12:33:11 +1000 Subject: [PATCH 102/172] Bind RFC-3339 timestamps in SQLite repos and grep-guard regressions --- scripts/ci-backend.sh | 14 ++++++++++++++ src/db/sqlite/api_keys.rs | 22 +++++++++++++++++----- src/db/sqlite/conversations.rs | 4 +++- src/db/sqlite/domain_verifications.rs | 13 ++++++++++--- 4 files changed, 44 insertions(+), 9 deletions(-) diff --git a/scripts/ci-backend.sh b/scripts/ci-backend.sh index a608984..6841f3a 100755 --- a/scripts/ci-backend.sh +++ b/scripts/ci-backend.sh @@ -97,6 +97,20 @@ else echo " cargo-audit not installed, skipping" fi +# SQLite repos must use truncate_to_millis-bound RFC-3339 timestamps, not +# datetime('now'), so cursor pagination and TEXT comparisons stay consistent +# (see CLAUDE.md "Cursor pagination timestamps"). DEFAULT clauses in CREATE +# TABLE are fine (only fire when no value is bound), so we exclude them. +step "Checking for datetime('now') in SQLite query bodies" +if datetime_hits=$(grep -RIn "datetime('now')" src/db/sqlite \ + | grep -v "DEFAULT (datetime('now'))" || true) && [ -n "$datetime_hits" ]; then + echo -e "${RED}✗${NC} datetime('now') found in SQLite repo queries; bind truncate_to_millis(Utc::now()) instead:" + echo "$datetime_hits" + FAILED=1 +else + success "No stray datetime('now') in SQLite query bodies" +fi + # Summary echo "" if [ $FAILED -eq 0 ]; then diff --git a/src/db/sqlite/api_keys.rs b/src/db/sqlite/api_keys.rs index 75d8489..9ffb895 100644 --- a/src/db/sqlite/api_keys.rs +++ b/src/db/sqlite/api_keys.rs @@ -784,13 +784,16 @@ impl ApiKeyRepo for SqliteApiKeyRepo { } async fn revoke(&self, id: Uuid) -> DbResult<()> { + let now = truncate_to_millis(Utc::now()); query( r#" UPDATE api_keys - SET revoked_at = datetime('now'), updated_at = datetime('now') + SET revoked_at = ?, updated_at = ? WHERE id = ? "#, ) + .bind(now) + .bind(now) .bind(id.to_string()) .execute(&self.pool) .await?; @@ -799,13 +802,15 @@ impl ApiKeyRepo for SqliteApiKeyRepo { } async fn update_last_used(&self, id: Uuid) -> DbResult<()> { + let now = truncate_to_millis(Utc::now()); query( r#" UPDATE api_keys - SET last_used_at = datetime('now') + SET last_used_at = ? WHERE id = ? "#, ) + .bind(now) .bind(id.to_string()) .execute(&self.pool) .await?; @@ -814,13 +819,16 @@ impl ApiKeyRepo for SqliteApiKeyRepo { } async fn revoke_by_user(&self, user_id: Uuid) -> DbResult { + let now = truncate_to_millis(Utc::now()); let result = query( r#" UPDATE api_keys - SET revoked_at = datetime('now'), updated_at = datetime('now') + SET revoked_at = ?, updated_at = ? WHERE owner_type = 'user' AND owner_id = ? AND revoked_at IS NULL "#, ) + .bind(now) + .bind(now) .bind(user_id.to_string()) .execute(&self.pool) .await?; @@ -901,13 +909,16 @@ impl ApiKeyRepo for SqliteApiKeyRepo { } async fn revoke_by_service_account(&self, service_account_id: Uuid) -> DbResult { + let now = truncate_to_millis(Utc::now()); let result = query( r#" UPDATE api_keys - SET revoked_at = datetime('now'), updated_at = datetime('now') + SET revoked_at = ?, updated_at = ? WHERE owner_type = 'service_account' AND owner_id = ? AND revoked_at IS NULL "#, ) + .bind(now) + .bind(now) .bind(service_account_id.to_string()) .execute(&self.pool) .await?; @@ -940,11 +951,12 @@ impl ApiKeyRepo for SqliteApiKeyRepo { query( r#" UPDATE api_keys - SET rotation_grace_until = ?, updated_at = datetime('now') + SET rotation_grace_until = ?, updated_at = ? WHERE id = ? "#, ) .bind(grace_until) + .bind(now) .bind(old_key_id.to_string()) .execute(&mut *tx) .await?; diff --git a/src/db/sqlite/conversations.rs b/src/db/sqlite/conversations.rs index 1b1f750..0fa71c9 100644 --- a/src/db/sqlite/conversations.rs +++ b/src/db/sqlite/conversations.rs @@ -2329,13 +2329,15 @@ mod tests { let org_id = Uuid::new_v4(); // Create deleted project + let deleted_at = crate::db::repos::truncate_to_millis(chrono::Utc::now()); sqlx::query( - "INSERT INTO projects (id, org_id, slug, name, deleted_at) VALUES (?, ?, ?, ?, datetime('now'))", + "INSERT INTO projects (id, org_id, slug, name, deleted_at) VALUES (?, ?, ?, ?, ?)", ) .bind(project_id.to_string()) .bind(org_id.to_string()) .bind("deleted-project") .bind("Deleted Project") + .bind(deleted_at) .execute(&pool) .await .expect("Failed to create project"); diff --git a/src/db/sqlite/domain_verifications.rs b/src/db/sqlite/domain_verifications.rs index 5e0bf1c..5249d3e 100644 --- a/src/db/sqlite/domain_verifications.rs +++ b/src/db/sqlite/domain_verifications.rs @@ -1,4 +1,5 @@ use async_trait::async_trait; +use chrono::Utc; use uuid::Uuid; use super::{ @@ -246,6 +247,7 @@ impl DomainVerificationRepo for SqliteDomainVerificationRepo { } async fn find_verified_by_domain(&self, domain: &str) -> DbResult> { + let now = truncate_to_millis(Utc::now()); let result = query( r#" SELECT dv.id, dv.org_sso_config_id, dv.domain, dv.verification_token, dv.status, @@ -256,11 +258,12 @@ impl DomainVerificationRepo for SqliteDomainVerificationRepo { WHERE dv.domain = ? AND dv.status = 'verified' AND osc.enabled = 1 - AND (dv.expires_at IS NULL OR dv.expires_at > datetime('now')) + AND (dv.expires_at IS NULL OR dv.expires_at > ?) LIMIT 1 "#, ) .bind(domain) + .bind(now) .fetch_optional(&self.pool) .await?; @@ -274,6 +277,7 @@ impl DomainVerificationRepo for SqliteDomainVerificationRepo { &self, org_sso_config_id: Uuid, ) -> DbResult> { + let now = truncate_to_millis(Utc::now()); let rows = query( r#" SELECT id, org_sso_config_id, domain, verification_token, status, @@ -282,11 +286,12 @@ impl DomainVerificationRepo for SqliteDomainVerificationRepo { FROM domain_verifications WHERE org_sso_config_id = ? AND status = 'verified' - AND (expires_at IS NULL OR expires_at > datetime('now')) + AND (expires_at IS NULL OR expires_at > ?) ORDER BY domain ASC "#, ) .bind(org_sso_config_id.to_string()) + .bind(now) .fetch_all(&self.pool) .await?; @@ -296,17 +301,19 @@ impl DomainVerificationRepo for SqliteDomainVerificationRepo { } async fn has_verified_domain(&self, org_sso_config_id: Uuid) -> DbResult { + let now = truncate_to_millis(Utc::now()); let row = query( r#" SELECT EXISTS( SELECT 1 FROM domain_verifications WHERE org_sso_config_id = ? AND status = 'verified' - AND (expires_at IS NULL OR expires_at > datetime('now')) + AND (expires_at IS NULL OR expires_at > ?) ) as has_verified "#, ) .bind(org_sso_config_id.to_string()) + .bind(now) .fetch_one(&self.pool) .await?; From f6b499e63a84ca72b97931c12acfbdfa027c8fdc Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sun, 26 Apr 2026 12:57:23 +1000 Subject: [PATCH 103/172] Scope response/semantic cache keys and vector search by tenant --- src/cache/keys.rs | 160 +++++++++++++++++++++++++++-- src/cache/mod.rs | 2 +- src/cache/response_cache.rs | 79 ++++++++++---- src/cache/semantic_cache.rs | 41 +++++--- src/cache/vector_store/mod.rs | 35 +++++++ src/cache/vector_store/pgvector.rs | 144 ++++++++++++++------------ src/cache/vector_store/qdrant.rs | 25 ++++- src/cache/vector_store/test.rs | 9 +- src/cache/vector_store/tests.rs | 58 +++++++++-- src/routes/api/chat.rs | 57 +++++++--- src/routes/api/embeddings.rs | 6 +- 11 files changed, 484 insertions(+), 132 deletions(-) diff --git a/src/cache/keys.rs b/src/cache/keys.rs index 339cbd6..4ac4ecb 100644 --- a/src/cache/keys.rs +++ b/src/cache/keys.rs @@ -13,6 +13,42 @@ use crate::{ models::BudgetPeriod, }; +/// Tenant identifiers mixed into response/embedding/completion cache keys so +/// two tenants that submit byte-identical requests do not share cache entries +/// or semantic-cache vector matches. +/// +/// All fields are optional because the gateway can serve unauthenticated or +/// partially-scoped requests; whatever scope the caller has, we hash it. The +/// `api_key_id` is the strongest isolator (every API key is tenant-bound), +/// but the other fields are folded in too so admin-issued or proxy-issued +/// requests stay scoped to the org/project/user that originated them. +#[derive(Debug, Clone, Default)] +pub struct CacheTenantScope { + pub org_id: Option, + pub project_id: Option, + pub api_key_id: Option, + pub user_id: Option, +} + +impl CacheTenantScope { + pub fn unscoped() -> Self { + Self::default() + } + + fn hash_into(&self, hasher: &mut Sha256) { + hasher.update(b"tenant:"); + hasher.update(b"org="); + hasher.update(self.org_id.as_deref().unwrap_or("").as_bytes()); + hasher.update(b"|proj="); + hasher.update(self.project_id.as_deref().unwrap_or("").as_bytes()); + hasher.update(b"|key="); + hasher.update(self.api_key_id.as_deref().unwrap_or("").as_bytes()); + hasher.update(b"|user="); + hasher.update(self.user_id.as_deref().unwrap_or("").as_bytes()); + hasher.update(b"\x00"); + } +} + pub struct CacheKeys; impl CacheKeys { @@ -159,9 +195,14 @@ impl CacheKeys { payload: &CreateChatCompletionPayload, model: &str, key_components: &CacheKeyComponents, + tenant: &CacheTenantScope, ) -> String { let mut hasher = Sha256::new(); + // Tenant scope first so cross-tenant collisions are impossible + // regardless of payload content. + tenant.hash_into(&mut hasher); + // Model is always included in the cache key hasher.update(b"model:"); hasher.update(model.as_bytes()); @@ -240,9 +281,12 @@ impl CacheKeys { payload: &CreateResponsesPayload, model: &str, key_components: &CacheKeyComponents, + tenant: &CacheTenantScope, ) -> String { let mut hasher = Sha256::new(); + tenant.hash_into(&mut hasher); + // Model is always included in the cache key hasher.update(b"model:"); hasher.update(model.as_bytes()); @@ -302,9 +346,12 @@ impl CacheKeys { payload: &CreateCompletionPayload, model: &str, key_components: &CacheKeyComponents, + tenant: &CacheTenantScope, ) -> String { let mut hasher = Sha256::new(); + tenant.hash_into(&mut hasher); + // Model is always included in the cache key hasher.update(b"model:"); hasher.update(model.as_bytes()); @@ -354,9 +401,15 @@ impl CacheKeys { /// making them excellent candidates for caching. /// /// Returns `gw:embeddings:{hash}` where hash is a SHA-256 digest of the key components. - pub fn embeddings_cache(payload: &CreateEmbeddingPayload, model: &str) -> String { + pub fn embeddings_cache( + payload: &CreateEmbeddingPayload, + model: &str, + tenant: &CacheTenantScope, + ) -> String { let mut hasher = Sha256::new(); + tenant.hash_into(&mut hasher); + // Model is always included in the cache key hasher.update(b"model:"); hasher.update(model.as_bytes()); @@ -607,8 +660,18 @@ mod tests { let key_components = CacheKeyComponents::default(); - let key1 = CacheKeys::response_cache(&payload, "gpt-4", &key_components); - let key2 = CacheKeys::response_cache(&payload, "gpt-4", &key_components); + let key1 = CacheKeys::response_cache( + &payload, + "gpt-4", + &key_components, + &CacheTenantScope::unscoped(), + ); + let key2 = CacheKeys::response_cache( + &payload, + "gpt-4", + &key_components, + &CacheTenantScope::unscoped(), + ); // Same input should produce same key assert_eq!(key1, key2); @@ -656,8 +719,18 @@ mod tests { ..payload1.clone() }; - let key1 = CacheKeys::response_cache(&payload1, "gpt-4", &key_components); - let key2 = CacheKeys::response_cache(&payload2, "gpt-4", &key_components); + let key1 = CacheKeys::response_cache( + &payload1, + "gpt-4", + &key_components, + &CacheTenantScope::unscoped(), + ); + let key2 = CacheKeys::response_cache( + &payload2, + "gpt-4", + &key_components, + &CacheTenantScope::unscoped(), + ); // Different messages should produce different keys assert_ne!(key1, key2); @@ -706,8 +779,18 @@ mod tests { ..payload1.clone() }; - let key1 = CacheKeys::response_cache(&payload1, "gpt-4", &key_components); - let key2 = CacheKeys::response_cache(&payload2, "gpt-4", &key_components); + let key1 = CacheKeys::response_cache( + &payload1, + "gpt-4", + &key_components, + &CacheTenantScope::unscoped(), + ); + let key2 = CacheKeys::response_cache( + &payload2, + "gpt-4", + &key_components, + &CacheTenantScope::unscoped(), + ); // Different temperatures should produce different keys when temperature is in key_components assert_ne!(key1, key2); @@ -746,10 +829,69 @@ mod tests { sovereignty_requirements: None, }; - let key1 = CacheKeys::response_cache(&payload, "gpt-4", &key_components); - let key2 = CacheKeys::response_cache(&payload, "claude-3", &key_components); + let tenant = CacheTenantScope::unscoped(); + let key1 = CacheKeys::response_cache(&payload, "gpt-4", &key_components, &tenant); + let key2 = CacheKeys::response_cache(&payload, "claude-3", &key_components, &tenant); // Different models should produce different keys assert_ne!(key1, key2); } + + #[test] + fn test_response_cache_key_scoped_per_tenant() { + let key_components = CacheKeyComponents::default(); + let payload = CreateChatCompletionPayload { + messages: vec![Message::User { + content: MessageContent::Text("Hello".to_string()), + name: None, + }], + model: Some("gpt-4".to_string()), + models: None, + temperature: Some(0.0), + seed: None, + response_format: None, + tools: None, + tool_choice: None, + frequency_penalty: None, + logit_bias: None, + logprobs: None, + top_logprobs: None, + max_completion_tokens: None, + max_tokens: None, + metadata: None, + presence_penalty: None, + reasoning: None, + stop: None, + stream: false, + stream_options: None, + top_p: None, + user: None, + sovereignty_requirements: None, + }; + + let tenant_a = CacheTenantScope { + org_id: Some("org-a".to_string()), + api_key_id: Some("key-1".to_string()), + ..Default::default() + }; + let tenant_b = CacheTenantScope { + org_id: Some("org-b".to_string()), + api_key_id: Some("key-2".to_string()), + ..Default::default() + }; + + let key_a = CacheKeys::response_cache(&payload, "gpt-4", &key_components, &tenant_a); + let key_b = CacheKeys::response_cache(&payload, "gpt-4", &key_components, &tenant_b); + let key_unscoped = CacheKeys::response_cache( + &payload, + "gpt-4", + &key_components, + &CacheTenantScope::unscoped(), + ); + + // Identical payloads from different tenants must hash to distinct keys. + assert_ne!(key_a, key_b); + assert_ne!(key_a, key_unscoped); + assert_ne!(key_b, key_unscoped); + } } diff --git a/src/cache/mod.rs b/src/cache/mod.rs index a5b90d0..ebe40d3 100644 --- a/src/cache/mod.rs +++ b/src/cache/mod.rs @@ -16,7 +16,7 @@ pub mod vector_store; ))] pub use embedding_service::EmbeddingError; pub use embedding_service::EmbeddingService; -pub use keys::CacheKeys; +pub use keys::{CacheKeys, CacheTenantScope}; pub use memory::MemoryCache; #[cfg(feature = "redis")] pub use redis::RedisCache; diff --git a/src/cache/response_cache.rs b/src/cache/response_cache.rs index cdc3995..7f1cb2e 100644 --- a/src/cache/response_cache.rs +++ b/src/cache/response_cache.rs @@ -34,7 +34,7 @@ use std::{sync::Arc, time::Duration}; use serde::{Deserialize, Serialize}; use super::{ - keys::CacheKeys, + keys::{CacheKeys, CacheTenantScope}, traits::{Cache, CacheExt}, }; use crate::{ @@ -96,6 +96,7 @@ impl ResponseCache { &self, payload: &CreateChatCompletionPayload, model: &str, + tenant: &CacheTenantScope, force_refresh: bool, ) -> CacheLookupResult { // Force refresh bypasses cache lookup but still allows caching the response @@ -122,7 +123,8 @@ impl ResponseCache { } // Generate cache key - let cache_key = CacheKeys::response_cache(payload, model, &self.config.key_components); + let cache_key = + CacheKeys::response_cache(payload, model, &self.config.key_components, tenant); // Look up in cache match self.cache.get_json::(&cache_key).await { @@ -162,6 +164,7 @@ impl ResponseCache { payload: &CreateChatCompletionPayload, model: &str, provider: &str, + tenant: &CacheTenantScope, body: Vec, content_type: &str, ) -> bool { @@ -194,7 +197,8 @@ impl ResponseCache { } // Generate cache key - let cache_key = CacheKeys::response_cache(payload, model, &self.config.key_components); + let cache_key = + CacheKeys::response_cache(payload, model, &self.config.key_components, tenant); // Create cached response let cached = CachedResponse { @@ -239,6 +243,7 @@ impl ResponseCache { &self, payload: &CreateResponsesPayload, model: &str, + tenant: &CacheTenantScope, force_refresh: bool, ) -> CacheLookupResult { // Force refresh bypasses cache lookup but still allows caching the response @@ -265,7 +270,8 @@ impl ResponseCache { } // Generate cache key - let cache_key = CacheKeys::responses_cache(payload, model, &self.config.key_components); + let cache_key = + CacheKeys::responses_cache(payload, model, &self.config.key_components, tenant); // Look up in cache match self.cache.get_json::(&cache_key).await { @@ -302,6 +308,7 @@ impl ResponseCache { payload: &CreateResponsesPayload, model: &str, provider: &str, + tenant: &CacheTenantScope, body: Vec, content_type: &str, ) -> bool { @@ -334,7 +341,8 @@ impl ResponseCache { } // Generate cache key - let cache_key = CacheKeys::responses_cache(payload, model, &self.config.key_components); + let cache_key = + CacheKeys::responses_cache(payload, model, &self.config.key_components, tenant); // Create cached response let cached = CachedResponse { @@ -419,6 +427,7 @@ impl ResponseCache { &self, payload: &CreateCompletionPayload, model: &str, + tenant: &CacheTenantScope, force_refresh: bool, ) -> CacheLookupResult { // Force refresh bypasses cache lookup but still allows caching the response @@ -445,7 +454,8 @@ impl ResponseCache { } // Generate cache key - let cache_key = CacheKeys::completions_cache(payload, model, &self.config.key_components); + let cache_key = + CacheKeys::completions_cache(payload, model, &self.config.key_components, tenant); // Look up in cache match self.cache.get_json::(&cache_key).await { @@ -482,6 +492,7 @@ impl ResponseCache { payload: &CreateCompletionPayload, model: &str, provider: &str, + tenant: &CacheTenantScope, body: Vec, content_type: &str, ) -> bool { @@ -514,7 +525,8 @@ impl ResponseCache { } // Generate cache key - let cache_key = CacheKeys::completions_cache(payload, model, &self.config.key_components); + let cache_key = + CacheKeys::completions_cache(payload, model, &self.config.key_components, tenant); // Create cached response let cached = CachedResponse { @@ -580,6 +592,7 @@ impl ResponseCache { &self, payload: &CreateEmbeddingPayload, model: &str, + tenant: &CacheTenantScope, force_refresh: bool, ) -> CacheLookupResult { // Force refresh bypasses cache lookup but still allows caching the response @@ -595,7 +608,7 @@ impl ResponseCache { // Embeddings don't have streaming or temperature, so no bypass checks needed // Generate cache key - let cache_key = CacheKeys::embeddings_cache(payload, model); + let cache_key = CacheKeys::embeddings_cache(payload, model, tenant); // Look up in cache match self.cache.get_json::(&cache_key).await { @@ -632,6 +645,7 @@ impl ResponseCache { payload: &CreateEmbeddingPayload, model: &str, provider: &str, + tenant: &CacheTenantScope, body: Vec, content_type: &str, ) -> bool { @@ -653,7 +667,7 @@ impl ResponseCache { } // Generate cache key - let cache_key = CacheKeys::embeddings_cache(payload, model); + let cache_key = CacheKeys::embeddings_cache(payload, model, tenant); // Create cached response let cached = CachedResponse { @@ -764,7 +778,9 @@ mod tests { let response_cache = ResponseCache::new(cache, config); let payload = create_test_payload(false, Some(0.0)); - let result = response_cache.lookup(&payload, "gpt-4", false).await; + let result = response_cache + .lookup(&payload, "gpt-4", &CacheTenantScope::unscoped(), false) + .await; assert!(matches!(result, CacheLookupResult::Bypass)); } @@ -776,7 +792,9 @@ mod tests { let response_cache = ResponseCache::new(cache, config); let payload = create_test_payload(true, Some(0.0)); - let result = response_cache.lookup(&payload, "gpt-4", false).await; + let result = response_cache + .lookup(&payload, "gpt-4", &CacheTenantScope::unscoped(), false) + .await; assert!(matches!(result, CacheLookupResult::Bypass)); } @@ -788,7 +806,9 @@ mod tests { let response_cache = ResponseCache::new(cache, config); let payload = create_test_payload(false, Some(0.7)); - let result = response_cache.lookup(&payload, "gpt-4", false).await; + let result = response_cache + .lookup(&payload, "gpt-4", &CacheTenantScope::unscoped(), false) + .await; assert!(matches!(result, CacheLookupResult::Bypass)); } @@ -801,7 +821,9 @@ mod tests { let payload = create_test_payload(false, Some(0.0)); // First lookup should be a miss - let result = response_cache.lookup(&payload, "gpt-4", false).await; + let result = response_cache + .lookup(&payload, "gpt-4", &CacheTenantScope::unscoped(), false) + .await; assert!(matches!(result, CacheLookupResult::Miss)); // Store a response @@ -811,6 +833,7 @@ mod tests { &payload, "gpt-4", "openai", + &CacheTenantScope::unscoped(), body.clone(), "application/json", ) @@ -818,7 +841,9 @@ mod tests { assert!(stored); // Second lookup should be a hit - let result = response_cache.lookup(&payload, "gpt-4", false).await; + let result = response_cache + .lookup(&payload, "gpt-4", &CacheTenantScope::unscoped(), false) + .await; match result { CacheLookupResult::Hit(cached) => { assert_eq!(cached.body, body); @@ -841,15 +866,26 @@ mod tests { // Store a response let body = br#"{"id":"test","object":"chat.completion"}"#.to_vec(); response_cache - .store(&payload, "gpt-4", "openai", body, "application/json") + .store( + &payload, + "gpt-4", + "openai", + &CacheTenantScope::unscoped(), + body, + "application/json", + ) .await; // With force_refresh=true, should return Miss even though cached - let result = response_cache.lookup(&payload, "gpt-4", true).await; + let result = response_cache + .lookup(&payload, "gpt-4", &CacheTenantScope::unscoped(), true) + .await; assert!(matches!(result, CacheLookupResult::Miss)); // With force_refresh=false, should return Hit - let result = response_cache.lookup(&payload, "gpt-4", false).await; + let result = response_cache + .lookup(&payload, "gpt-4", &CacheTenantScope::unscoped(), false) + .await; assert!(matches!(result, CacheLookupResult::Hit(_))); } @@ -865,7 +901,14 @@ mod tests { // Try to store a response larger than the limit let body = br#"{"id":"test","object":"chat.completion"}"#.to_vec(); let stored = response_cache - .store(&payload, "gpt-4", "openai", body, "application/json") + .store( + &payload, + "gpt-4", + "openai", + &CacheTenantScope::unscoped(), + body, + "application/json", + ) .await; assert!(!stored); } diff --git a/src/cache/semantic_cache.rs b/src/cache/semantic_cache.rs index 1acdb90..4f5f792 100644 --- a/src/cache/semantic_cache.rs +++ b/src/cache/semantic_cache.rs @@ -38,10 +38,10 @@ use tokio::sync::mpsc; use super::{ embedding_service::{EmbeddingError, EmbeddingService}, - keys::CacheKeys, + keys::{CacheKeys, CacheTenantScope}, response_cache::CachedResponse, traits::{Cache, CacheExt}, - vector_store::{VectorBackend, VectorMetadata, VectorStoreError}, + vector_store::{VectorBackend, VectorMetadata, VectorStoreError, VectorTenantFilter}, }; use crate::{ api_types::CreateChatCompletionPayload, config::SemanticCachingConfig, observability::metrics, @@ -89,6 +89,9 @@ pub struct StoreParams<'a> { pub model: &'a str, /// The provider that generated the response pub provider: &'a str, + /// Tenant scope used to key the response and tag the embedding so + /// cross-tenant exact and semantic matches are impossible. + pub tenant: &'a CacheTenantScope, /// The response body bytes pub body: Vec, /// The response content type @@ -97,10 +100,6 @@ pub struct StoreParams<'a> { pub key_components: &'a crate::config::CacheKeyComponents, /// Time-to-live for the cached response pub ttl: Duration, - /// Optional organization ID for multi-tenant isolation - pub organization_id: Option, - /// Optional project ID for finer-grained isolation - pub project_id: Option, } /// Semantic cache service combining exact and semantic matching. @@ -224,6 +223,7 @@ impl SemanticCache { payload: &CreateChatCompletionPayload, model: &str, key_components: &crate::config::CacheKeyComponents, + tenant: &CacheTenantScope, force_refresh: bool, ) -> SemanticLookupResult { // Force refresh bypasses cache lookup @@ -243,7 +243,7 @@ impl SemanticCache { } // Generate exact cache key - let cache_key = CacheKeys::response_cache(payload, model, key_components); + let cache_key = CacheKeys::response_cache(payload, model, key_components, tenant); // Step 1: Try exact match first (fastest) match self.cache.get_json::(&cache_key).await { @@ -283,7 +283,9 @@ impl SemanticCache { } }; - // Step 3: Search for similar embeddings + // Step 3: Search for similar embeddings, scoped to this tenant. + let vector_tenant_filter = + VectorTenantFilter::new(tenant.org_id.as_deref(), tenant.project_id.as_deref()); let search_results = match self .vector_store .search( @@ -291,6 +293,7 @@ impl SemanticCache { self.config.top_k, self.config.similarity_threshold, Some(model), + vector_tenant_filter, ) .await { @@ -305,8 +308,14 @@ impl SemanticCache { } }; - // Step 4: Find best semantic match - if let Some(best_match) = search_results.into_iter().next() { + // Step 4: Find best semantic match. We re-apply the tenant filter + // here as well so a backend that doesn't (or can't) enforce the + // filter at the query layer still cannot return another tenant's + // cached response. + if let Some(best_match) = search_results + .into_iter() + .find(|r| vector_tenant_filter.matches(&r.metadata)) + { // Look up the cached response using the matched cache key match self .cache @@ -370,8 +379,12 @@ impl SemanticCache { } // Generate exact cache key - let cache_key = - CacheKeys::response_cache(params.payload, params.model, params.key_components); + let cache_key = CacheKeys::response_cache( + params.payload, + params.model, + params.key_components, + params.tenant, + ); // Create cached response let cached = CachedResponse { @@ -410,8 +423,8 @@ impl SemanticCache { model: params.model.to_string(), text, ttl: params.ttl, - organization_id: params.organization_id, - project_id: params.project_id, + organization_id: params.tenant.org_id.clone(), + project_id: params.tenant.project_id.clone(), }; if let Err(e) = self.embedding_tx.try_send(task) { diff --git a/src/cache/vector_store/mod.rs b/src/cache/vector_store/mod.rs index 3da3846..c15c799 100644 --- a/src/cache/vector_store/mod.rs +++ b/src/cache/vector_store/mod.rs @@ -118,6 +118,36 @@ pub struct VectorSearchResult { pub similarity: f64, } +/// Tenant scope used to filter semantic-cache search results so a tenant can +/// never see another tenant's cached responses, even when their prompts are +/// semantically equivalent. `None` means "match entries with no value for this +/// field", so requests without an org/project don't fall through to scoped +/// entries. +#[derive(Debug, Clone, Copy, Default)] +pub struct VectorTenantFilter<'a> { + pub organization_id: Option<&'a str>, + pub project_id: Option<&'a str>, +} + +impl<'a> VectorTenantFilter<'a> { + pub fn new(organization_id: Option<&'a str>, project_id: Option<&'a str>) -> Self { + Self { + organization_id, + project_id, + } + } + + pub fn unscoped() -> Self { + Self::default() + } + + /// Returns true when the supplied metadata satisfies this filter. + pub fn matches(&self, metadata: &VectorMetadata) -> bool { + self.organization_id == metadata.organization_id.as_deref() + && self.project_id == metadata.project_id.as_deref() + } +} + // ============================================================================ // RAG VectorStore Chunk Types // ============================================================================ @@ -283,6 +313,10 @@ pub trait VectorBackend: Send + Sync { /// * `limit` - Maximum number of results to return /// * `threshold` - Minimum similarity threshold (0.0 to 1.0) /// * `model_filter` - Optional model name to filter results (only return same-model matches) + /// * `tenant_filter` - Tenant scope to filter results by (org/project). Cross-tenant + /// matches are dropped so two tenants with semantically equivalent prompts can't + /// serve each other's cached responses. `None` fields match any value, so a tenant + /// with no scope only sees entries that were also stored without scope. /// /// # Returns /// @@ -294,6 +328,7 @@ pub trait VectorBackend: Send + Sync { limit: usize, threshold: f64, model_filter: Option<&str>, + tenant_filter: VectorTenantFilter<'_>, ) -> VectorStoreResult>; /// Delete an embedding by its ID. diff --git a/src/cache/vector_store/pgvector.rs b/src/cache/vector_store/pgvector.rs index 80e3574..fc8aaa7 100644 --- a/src/cache/vector_store/pgvector.rs +++ b/src/cache/vector_store/pgvector.rs @@ -13,7 +13,7 @@ use uuid::Uuid; use super::{ ChunkFilter, ChunkSearchResult, ChunkWithEmbedding, HybridSearchConfig, StoredChunk, VectorBackend, VectorMetadata, VectorSearchResult, VectorStoreError, VectorStoreResult, - fusion::fuse_results_limited, + VectorTenantFilter, fusion::fuse_results_limited, }; use crate::{ config::{DistanceMetric, PgvectorIndexType}, @@ -685,6 +685,7 @@ impl VectorBackend for PgvectorStore { limit: usize, threshold: f64, model_filter: Option<&str>, + tenant_filter: VectorTenantFilter<'_>, ) -> VectorStoreResult> { if embedding.len() != self.dimensions { warn!( @@ -724,50 +725,68 @@ impl VectorBackend for PgvectorStore { let distance_threshold = self.similarity_to_distance_threshold(threshold); let op = self.distance_metric.pgvector_operator(); - // Build query with optional model filter - // We select the raw distance and convert to similarity in Rust - let query = if model_filter.is_some() { - format!( - r#" - SELECT - id, - cache_key, - model, - organization_id, - project_id, - created_at, - ttl_secs, - (embedding {op} $1::vector) as distance - FROM {} - WHERE expires_at > $2 - AND model = $3 - AND (embedding {op} $1::vector) < $4 - ORDER BY embedding {op} $1::vector - LIMIT $5 - "#, - self.table_name - ) - } else { - format!( - r#" - SELECT - id, - cache_key, - model, - organization_id, - project_id, - created_at, - ttl_secs, - (embedding {op} $1::vector) as distance - FROM {} - WHERE expires_at > $2 - AND (embedding {op} $1::vector) < $3 - ORDER BY embedding {op} $1::vector - LIMIT $4 - "#, - self.table_name - ) - }; + // Build query with optional model + tenant filters. Parameter indices + // match the order we bind below ($1=embedding, $2=now, then optional + // model/org/project, then distance threshold, then limit). + let mut where_clauses: Vec = vec![ + "expires_at > $2".to_string(), + format!("(embedding {op} $1::vector) < ${{distance}}"), + ]; + let mut next_param: usize = 3; + let mut model_idx: Option = None; + let mut org_idx: Option = None; + let mut project_idx: Option = None; + if model_filter.is_some() { + where_clauses.push(format!("model = ${}", next_param)); + model_idx = Some(next_param); + next_param += 1; + } + match tenant_filter.organization_id { + Some(_) => { + where_clauses.push(format!("organization_id = ${}", next_param)); + org_idx = Some(next_param); + next_param += 1; + } + None => { + where_clauses.push("organization_id IS NULL".to_string()); + } + } + match tenant_filter.project_id { + Some(_) => { + where_clauses.push(format!("project_id = ${}", next_param)); + project_idx = Some(next_param); + next_param += 1; + } + None => { + where_clauses.push("project_id IS NULL".to_string()); + } + } + let distance_idx = next_param; + next_param += 1; + let limit_idx = next_param; + + let where_sql = where_clauses + .join(" AND ") + .replace("${distance}", &format!("${}", distance_idx)); + + let query = format!( + r#" + SELECT + id, + cache_key, + model, + organization_id, + project_id, + created_at, + ttl_secs, + (embedding {op} $1::vector) as distance + FROM {table} + WHERE {where_sql} + ORDER BY embedding {op} $1::vector + LIMIT ${limit_idx} + "#, + table = self.table_name, + ); #[derive(sqlx::FromRow)] struct SearchRow { @@ -780,24 +799,23 @@ impl VectorBackend for PgvectorStore { distance: f64, } - let result: Result, _> = if let Some(model) = model_filter { - sqlx::query_as(&query) - .bind(&embedding_str) - .bind(now) - .bind(model) - .bind(distance_threshold) - .bind(limit as i32) - .fetch_all(&self.pool) - .await - } else { - sqlx::query_as(&query) - .bind(&embedding_str) - .bind(now) - .bind(distance_threshold) - .bind(limit as i32) - .fetch_all(&self.pool) - .await - }; + let mut q = sqlx::query_as::<_, SearchRow>(&query) + .bind(&embedding_str) + .bind(now); + if let (Some(_), Some(model)) = (model_idx, model_filter) { + q = q.bind(model); + } + if let (Some(_), Some(org)) = (org_idx, tenant_filter.organization_id) { + q = q.bind(org); + } + if let (Some(_), Some(proj)) = (project_idx, tenant_filter.project_id) { + q = q.bind(proj); + } + let result = q + .bind(distance_threshold) + .bind(limit as i32) + .fetch_all(&self.pool) + .await; let duration = start.elapsed().as_secs_f64(); let duration_ms = (duration * 1000.0) as u64; diff --git a/src/cache/vector_store/qdrant.rs b/src/cache/vector_store/qdrant.rs index ca00b90..2e97783 100644 --- a/src/cache/vector_store/qdrant.rs +++ b/src/cache/vector_store/qdrant.rs @@ -17,7 +17,7 @@ use uuid::Uuid; use super::{ ChunkFilter, ChunkSearchResult, ChunkWithEmbedding, HybridSearchConfig, StoredChunk, VectorBackend, VectorMetadata, VectorSearchResult, VectorStoreError, VectorStoreResult, - fusion::fuse_results_limited, + VectorTenantFilter, fusion::fuse_results_limited, }; use crate::{ config::DistanceMetric, @@ -794,6 +794,12 @@ impl VectorBackend for QdrantStore { limit: usize, threshold: f64, model_filter: Option<&str>, + // Qdrant doesn't ship `is_empty`/`is_null` in our minimal filter model, + // so tenant scoping is enforced via post-filter at the + // `SemanticCache::lookup` call site. We still take the parameter to + // satisfy the trait and to fold organization_id matching into the + // server-side `must` filter when a value is present. + tenant_filter: VectorTenantFilter<'_>, ) -> VectorStoreResult> { if embedding.len() != self.dimensions { warn!( @@ -844,6 +850,23 @@ impl VectorBackend for QdrantStore { }); } + if let Some(org) = tenant_filter.organization_id { + must.push(FilterCondition { + key: "organization_id".to_string(), + condition: FilterMatch::Match { + value: serde_json::json!(org), + }, + }); + } + if let Some(project) = tenant_filter.project_id { + must.push(FilterCondition { + key: "project_id".to_string(), + condition: FilterMatch::Match { + value: serde_json::json!(project), + }, + }); + } + // Convert similarity threshold to Qdrant score threshold let score_threshold = self.similarity_to_score_threshold(threshold); diff --git a/src/cache/vector_store/test.rs b/src/cache/vector_store/test.rs index 6361947..48a8dc9 100644 --- a/src/cache/vector_store/test.rs +++ b/src/cache/vector_store/test.rs @@ -15,7 +15,7 @@ use uuid::Uuid; use super::{ ChunkFilter, ChunkSearchResult, ChunkWithEmbedding, HybridSearchConfig, StoredChunk, - VectorBackend, VectorMetadata, VectorSearchResult, VectorStoreResult, + VectorBackend, VectorMetadata, VectorSearchResult, VectorStoreResult, VectorTenantFilter, }; /// Test vector store that returns no-op/empty results for all operations. @@ -54,6 +54,7 @@ impl VectorBackend for TestVectorStore { _limit: usize, _threshold: f64, _model_filter: Option<&str>, + _tenant_filter: VectorTenantFilter<'_>, ) -> VectorStoreResult> { Ok(vec![]) } @@ -248,6 +249,7 @@ impl VectorBackend for MockableTestVectorStore { _limit: usize, _threshold: f64, _model_filter: Option<&str>, + _tenant_filter: VectorTenantFilter<'_>, ) -> VectorStoreResult> { Ok(vec![]) } @@ -396,7 +398,10 @@ mod tests { #[tokio::test] async fn test_vector_store_search_returns_empty() { let store = TestVectorStore::new(1536); - let results = store.search(&[0.0; 1536], 10, 0.8, None).await.unwrap(); + let results = store + .search(&[0.0; 1536], 10, 0.8, None, VectorTenantFilter::unscoped()) + .await + .unwrap(); assert!(results.is_empty()); } diff --git a/src/cache/vector_store/tests.rs b/src/cache/vector_store/tests.rs index 7e8a347..e16b255 100644 --- a/src/cache/vector_store/tests.rs +++ b/src/cache/vector_store/tests.rs @@ -17,7 +17,7 @@ use uuid::Uuid; use super::{ ChunkFilter, ChunkWithEmbedding, HybridSearchConfig, VectorBackend, VectorMetadata, - VectorStoreError, + VectorStoreError, VectorTenantFilter, }; // ============================================================================ @@ -73,7 +73,13 @@ pub async fn test_store_and_search(store: &dyn VectorBackend) { // Search with the same embedding should return exact match let results = store - .search(&embedding, 5, 0.9, Some("gpt-4")) + .search( + &embedding, + 5, + 0.9, + Some("gpt-4"), + VectorTenantFilter::unscoped(), + ) .await .expect("Failed to search"); @@ -102,7 +108,13 @@ pub async fn test_search_with_similar_embedding(store: &dyn VectorBackend) { // Search with a similar embedding let similar = create_similar_embedding(&original, 0.05); let results = store - .search(&similar, 5, 0.9, Some("gpt-4")) + .search( + &similar, + 5, + 0.9, + Some("gpt-4"), + VectorTenantFilter::unscoped(), + ) .await .expect("Failed to search"); @@ -132,7 +144,13 @@ pub async fn test_search_threshold_filtering(store: &dyn VectorBackend) { // Search with a very different embedding let different = create_test_embedding(dimensions, 100.0); let results = store - .search(&different, 5, 0.99, Some("gpt-4")) + .search( + &different, + 5, + 0.99, + Some("gpt-4"), + VectorTenantFilter::unscoped(), + ) .await .expect("Failed to search"); @@ -175,7 +193,13 @@ pub async fn test_model_filter(store: &dyn VectorBackend) { // Search for gpt-4 only let results = store - .search(&embedding, 10, 0.9, Some("gpt-4")) + .search( + &embedding, + 10, + 0.9, + Some("gpt-4"), + VectorTenantFilter::unscoped(), + ) .await .expect("Failed to search"); @@ -189,7 +213,13 @@ pub async fn test_model_filter(store: &dyn VectorBackend) { // Search for claude only let results = store - .search(&embedding, 10, 0.9, Some("claude-3")) + .search( + &embedding, + 10, + 0.9, + Some("claude-3"), + VectorTenantFilter::unscoped(), + ) .await .expect("Failed to search"); @@ -215,7 +245,13 @@ pub async fn test_delete(store: &dyn VectorBackend) { .expect("Failed to store embedding"); let results = store - .search(&embedding, 5, 0.9, Some("gpt-4")) + .search( + &embedding, + 5, + 0.9, + Some("gpt-4"), + VectorTenantFilter::unscoped(), + ) .await .expect("Failed to search"); assert!(!results.is_empty(), "Should find embedding before delete"); @@ -329,7 +365,13 @@ pub async fn test_upsert(store: &dyn VectorBackend) { // Search should find the updated embedding let results = store - .search(&embedding2, 5, 0.9, Some("gpt-4")) + .search( + &embedding2, + 5, + 0.9, + Some("gpt-4"), + VectorTenantFilter::unscoped(), + ) .await .expect("Failed to search"); diff --git a/src/routes/api/chat.rs b/src/routes/api/chat.rs index 9db14e5..d298121 100644 --- a/src/routes/api/chat.rs +++ b/src/routes/api/chat.rs @@ -14,7 +14,7 @@ use crate::{ AppState, api_types, auth::AuthenticatedRequest, authz::RequestContext, - cache::{CacheLookupResult, SemanticLookupResult, StoreParams}, + cache::{CacheLookupResult, CacheTenantScope, SemanticLookupResult, StoreParams}, middleware::{AuthzContext, ClientInfo, RequestId}, models::UsageLogEntry, routes::execution::{ @@ -37,6 +37,23 @@ pub(super) enum CacheStatus { Miss, } +/// Build a tenant scope from the optional API-key auth, used to key cache +/// entries so two tenants never share a response/embedding cache hit. +pub(super) fn tenant_scope_from_auth( + auth: Option<&Extension>, +) -> CacheTenantScope { + let api_key = auth.and_then(|a| a.api_key()); + CacheTenantScope { + org_id: api_key.and_then(|k| k.org_id.map(|id| id.to_string())), + project_id: api_key.and_then(|k| k.project_id.map(|id| id.to_string())), + api_key_id: api_key.map(|k| k.key.id.to_string()), + user_id: api_key.and_then(|k| match &k.key.owner { + crate::models::ApiKeyOwner::User { user_id } => Some(user_id.to_string()), + _ => None, + }), + } +} + /// Apply output guardrails to a non-streaming response. /// /// Extracts assistant content from the response body, evaluates it against guardrails, @@ -675,11 +692,19 @@ pub async fn api_v1_chat_completions( .as_ref() .map(|c| &c.key_components); + let cache_tenant = tenant_scope_from_auth(auth.as_ref()); + // Check semantic cache first (if available), then fall back to simple response cache if let Some(ref semantic_cache) = state.semantic_cache { let key_components = key_components.cloned().unwrap_or_default(); match semantic_cache - .lookup(&payload, &model_name, &key_components, force_refresh) + .lookup( + &payload, + &model_name, + &key_components, + &cache_tenant, + force_refresh, + ) .await { SemanticLookupResult::ExactHit(cached) => { @@ -727,7 +752,7 @@ pub async fn api_v1_chat_completions( } else if let Some(ref response_cache) = state.response_cache { // Fall back to simple response cache if semantic cache is not configured match response_cache - .lookup(&payload, &model_name, force_refresh) + .lookup(&payload, &model_name, &cache_tenant, force_refresh) .await { CacheLookupResult::Hit(cached) => { @@ -916,14 +941,7 @@ pub async fn api_v1_chat_completions( .as_ref() .map(|c| c.ttl_secs) .unwrap_or(3600); - let org_id = auth - .as_ref() - .and_then(|a| a.org_id()) - .map(|id| id.to_string()); - let project_id = auth - .as_ref() - .and_then(|a| a.project_id()) - .map(|id| id.to_string()); + let tenant_clone = cache_tenant.clone(); #[cfg(feature = "server")] state.task_tracker.spawn(async move { @@ -931,12 +949,11 @@ pub async fn api_v1_chat_completions( payload: &payload_clone, model: &model_clone, provider: &provider_clone, + tenant: &tenant_clone, body: body_clone, content_type: &content_type_clone, key_components: &key_components_clone, ttl: Duration::from_secs(ttl_secs), - organization_id: org_id, - project_id, }; if !cache.store(params).await { tracing::debug!( @@ -951,6 +968,7 @@ pub async fn api_v1_chat_completions( let provider_clone = provider_name.clone(); let content_type_clone = content_type; let body_clone = body_vec.clone(); + let tenant_clone = cache_tenant.clone(); #[cfg(feature = "server")] state.task_tracker.spawn(async move { cache @@ -958,6 +976,7 @@ pub async fn api_v1_chat_completions( &payload_clone, &model_clone, &provider_clone, + &tenant_clone, body_clone, &content_type_clone, ) @@ -1212,10 +1231,12 @@ pub async fn api_v1_responses( // Track cache status for response headers let mut cache_status = CacheStatus::None; + let cache_tenant = tenant_scope_from_auth(auth.as_ref()); + // Check response cache (simple cache only for now - semantic cache not yet supported for responses) if let Some(ref response_cache) = state.response_cache { match response_cache - .lookup_responses(&payload, &model_name, force_refresh) + .lookup_responses(&payload, &model_name, &cache_tenant, force_refresh) .await { CacheLookupResult::Hit(cached) => { @@ -1615,6 +1636,7 @@ pub async fn api_v1_responses( let provider_clone = provider_name.clone(); let content_type_clone = content_type; let body_clone = body_vec.clone(); + let tenant_clone = cache_tenant.clone(); #[cfg(feature = "server")] state.task_tracker.spawn(async move { cache @@ -1622,6 +1644,7 @@ pub async fn api_v1_responses( &payload_clone, &model_clone, &provider_clone, + &tenant_clone, body_clone, &content_type_clone, ) @@ -1935,10 +1958,12 @@ pub async fn api_v1_completions( // Track cache status for response headers let mut cache_status = CacheStatus::None; + let cache_tenant = tenant_scope_from_auth(auth.as_ref()); + // Check response cache (simple cache only - semantic cache not yet supported for completions) if let Some(ref response_cache) = state.response_cache { match response_cache - .lookup_completions(&payload, &model_name, force_refresh) + .lookup_completions(&payload, &model_name, &cache_tenant, force_refresh) .await { CacheLookupResult::Hit(cached) => { @@ -2215,6 +2240,7 @@ pub async fn api_v1_completions( let provider_clone = provider_name.clone(); let content_type_clone = content_type; let body_clone = body_vec.clone(); + let tenant_clone = cache_tenant.clone(); #[cfg(feature = "server")] state.task_tracker.spawn(async move { cache @@ -2222,6 +2248,7 @@ pub async fn api_v1_completions( &payload_clone, &model_clone, &provider_clone, + &tenant_clone, body_clone, &content_type_clone, ) diff --git a/src/routes/api/embeddings.rs b/src/routes/api/embeddings.rs index 2a3b317..bd8f846 100644 --- a/src/routes/api/embeddings.rs +++ b/src/routes/api/embeddings.rs @@ -177,10 +177,12 @@ pub async fn api_v1_embeddings( // Track cache status for response headers let mut cache_status = CacheStatus::None; + let cache_tenant = super::chat::tenant_scope_from_auth(auth.as_ref()); + // Check response cache (embeddings are fully deterministic - excellent for caching) if let Some(ref response_cache) = state.response_cache { match response_cache - .lookup_embeddings(&payload, &model_name, force_refresh) + .lookup_embeddings(&payload, &model_name, &cache_tenant, force_refresh) .await { CacheLookupResult::Hit(cached) => { @@ -248,6 +250,7 @@ pub async fn api_v1_embeddings( let provider_clone = provider_name.clone(); let content_type_clone = content_type; let body_clone = body_vec.clone(); + let tenant_clone = cache_tenant.clone(); #[cfg(feature = "server")] state.task_tracker.spawn(async move { cache @@ -255,6 +258,7 @@ pub async fn api_v1_embeddings( &payload_clone, &model_clone, &provider_clone, + &tenant_clone, body_clone, &content_type_clone, ) From 4c65e17c99a7e72fabc16b922d780cefa96d6ad3 Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sun, 26 Apr 2026 13:03:21 +1000 Subject: [PATCH 104/172] Burn OAuth code after repeated PKCE verifier failures --- src/app.rs | 17 +++++++++- src/services/oauth_pkce.rs | 68 ++++++++++++++++++++++++++++++++++++-- 2 files changed, 82 insertions(+), 3 deletions(-) diff --git a/src/app.rs b/src/app.rs index d2e2bb7..79595ee 100644 --- a/src/app.rs +++ b/src/app.rs @@ -410,7 +410,7 @@ impl AppState { // Initialize database and services if configured #[allow(unreachable_patterns)] - let (db, services) = match &config.database { + let (db, mut services) = match &config.database { config::DatabaseConfig::None => (None, None), _ => { let pool = db::DbPool::from_config(&config.database).await?; @@ -460,6 +460,21 @@ impl AppState { } }; + // Wire the cache into services that benefit from a shared backend. + // OAuth PKCE uses it for the per-code failure counter that burns a + // code after repeated bad verifiers; without a cache it falls back + // to the legacy "never burn on failure" behaviour. + if let Some(services) = services.as_mut() { + services.oauth_pkce = std::mem::replace( + &mut services.oauth_pkce, + services::OAuthPkceService::new( + db.clone() + .expect("services exist only when db is configured"), + ), + ) + .with_cache(cache.clone()); + } + // Initialize secrets manager based on configuration let secrets: Arc = match &config.secrets { config::SecretsConfig::None => { diff --git a/src/services/oauth_pkce.rs b/src/services/oauth_pkce.rs index e8e6417..d9e22e2 100644 --- a/src/services/oauth_pkce.rs +++ b/src/services/oauth_pkce.rs @@ -1,4 +1,4 @@ -use std::sync::Arc; +use std::{sync::Arc, time::Duration as StdDuration}; use base64::{Engine as _, engine::general_purpose::URL_SAFE_NO_PAD}; use chrono::{Duration, Utc}; @@ -9,10 +9,23 @@ use thiserror::Error; use uuid::Uuid; use crate::{ + cache::Cache, db::{DbPool, DbResult, NewAuthorizationCode}, models::{OAuthAuthorizationCode, OAuthKeyOptions, PkceCodeChallengeMethod}, }; +/// How many failed PKCE verifications a single authorization code may suffer +/// before it is destroyed. The choice trades two attacks against each other: +/// burning on the first failure lets a network attacker who can write any +/// request DoS legitimate users; never burning lets an attacker who actually +/// stole the code keep guessing the verifier offline. Three matches the OAuth +/// security BCP guidance on "limited" retries. +const MAX_PKCE_FAILURES_PER_CODE: i64 = 3; +/// TTL for the failure counter. Authorization codes themselves live ~10 min, +/// so the counter is forced to outlive any reasonable code lifetime — that +/// way the count for a given code can't be reset by waiting it out. +const PKCE_FAILURE_TTL: StdDuration = StdDuration::from_secs(900); + /// Errors specific to the OAuth PKCE service. Mapped to HTTP status codes /// by the route handlers. #[derive(Debug, Error)] @@ -42,11 +55,21 @@ pub struct IssueCodeInput { #[derive(Clone)] pub struct OAuthPkceService { db: Arc, + /// Optional cache backing the per-code failure counter. When absent we + /// fall back to the legacy "never burn on failure" behaviour because we + /// have nowhere to track attempts; deployments that care about the + /// limited-retry guarantee should configure a cache backend. + cache: Option>, } impl OAuthPkceService { pub fn new(db: Arc) -> Self { - Self { db } + Self { db, cache: None } + } + + pub fn with_cache(mut self, cache: Option>) -> Self { + self.cache = cache; + self } /// Generate and persist a new authorization code bound to `user_id` and @@ -115,6 +138,12 @@ impl OAuthPkceService { .unwrap_u8() != 1 { + // Bump the per-code failure counter. Once the threshold is hit + // we burn the code so an attacker who stole it can't keep + // probing verifiers. We still hand out the same `PkceMismatch` + // error either way so the attacker can't probe for "this code + // is now burned" vs "still alive". + self.record_pkce_failure(code).await; return Err(OAuthPkceError::PkceMismatch); } @@ -123,6 +152,41 @@ impl OAuthPkceService { // rather than handing out a second key. repo.consume(code).await?.ok_or(OAuthPkceError::InvalidCode) } + + /// Increment the per-code PKCE failure counter and burn the code once it + /// exceeds `MAX_PKCE_FAILURES_PER_CODE`. Cache errors are swallowed: if + /// the cache is unavailable we fall back to the original (no-burn) + /// behaviour rather than blocking authentication. + async fn record_pkce_failure(&self, code: &str) { + let Some(cache) = &self.cache else { + return; + }; + let key = pkce_failure_key(code); + match cache.incr(&key, PKCE_FAILURE_TTL).await { + Ok(count) if count >= MAX_PKCE_FAILURES_PER_CODE => { + // Burn the code. Failures from a network attacker or a + // genuinely broken client both end up here; the legitimate + // user has had `MAX_PKCE_FAILURES_PER_CODE - 1` chances to + // retry, which is enough headroom for a transient bug. + if let Err(e) = self.db.oauth_authorization_codes().consume(code).await { + tracing::warn!(error = %e, "Failed to burn PKCE code after repeated verifier failures"); + } + } + Ok(_) => {} + Err(e) => { + tracing::warn!(error = %e, "Failed to record PKCE failure counter; not burning code"); + } + } + } +} + +/// Cache key for the per-code PKCE failure counter. The code itself is +/// hashed so we never persist a raw authorization code in the cache. +fn pkce_failure_key(code: &str) -> String { + let mut hasher = Sha256::new(); + hasher.update(code.as_bytes()); + let digest = hasher.finalize(); + format!("gw:oauth:pkce:fails:{:x}", digest) } /// Generate a 256-bit URL-safe base64 random code (~43 chars). From b406a3ec32fa764ebf7efdec46acff1febac301a Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sun, 26 Apr 2026 13:11:53 +1000 Subject: [PATCH 105/172] Pepper SCIM bearer token hashes with HMAC-SHA256 --- Cargo.lock | 1 + Cargo.toml | 1 + src/app.rs | 29 ++++++++++++++++++ src/services/scim_configs.rs | 58 +++++++++++++++++++++++++++--------- 4 files changed, 75 insertions(+), 14 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2f325a4..c1edd97 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3399,6 +3399,7 @@ dependencies = [ "google-cloud-token", "hex", "hickory-resolver", + "hmac", "hostname", "http 1.4.0", "http-body-util", diff --git a/Cargo.toml b/Cargo.toml index 9fe6345..3134db7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -285,6 +285,7 @@ rust_decimal = { version = "1.40.0", features = ["macros"] } serde = { version = "1.0.228", features = ["derive"] } serde_json = "1.0.145" sha2 = "0.10" +hmac = "0.12" subtle = "2.6.1" thiserror = "2.0.17" tokio = { version = "1.48.0", features = [ diff --git a/src/app.rs b/src/app.rs index 79595ee..0184455 100644 --- a/src/app.rs +++ b/src/app.rs @@ -473,6 +473,35 @@ impl AppState { ), ) .with_cache(cache.clone()); + + // SCIM tokens get HMAC-SHA256 hashed with a pepper so that an + // attacker who exfiltrates the database alone can't brute-force + // them. We derive the pepper from the configured session secret + // when one exists; otherwise we fall back to plain SHA-256 (and + // log so operators know to set a session secret). + #[cfg(feature = "sso")] + { + let pepper = config + .auth + .session + .as_ref() + .and_then(|s| s.secret.as_ref()) + .map(|secret| secret.as_bytes().to_vec()); + if pepper.is_none() { + tracing::warn!( + "[auth.session].secret is not set — SCIM tokens will be stored as \ + unsalted SHA-256. Configure a session secret to enable HMAC peppering." + ); + } + services.scim_configs = std::mem::replace( + &mut services.scim_configs, + services::OrgScimConfigService::new( + db.clone() + .expect("services exist only when db is configured"), + ), + ) + .with_token_pepper(pepper); + } } // Initialize secrets manager based on configuration diff --git a/src/services/scim_configs.rs b/src/services/scim_configs.rs index 1dc7e54..331e1d4 100644 --- a/src/services/scim_configs.rs +++ b/src/services/scim_configs.rs @@ -5,6 +5,7 @@ use std::sync::Arc; +use hmac::{Hmac, Mac}; use sha2::{Digest, Sha256}; use uuid::Uuid; @@ -16,19 +17,49 @@ use crate::{ }, }; +type HmacSha256 = Hmac; + /// Service layer for organization SCIM configuration operations. /// /// SCIM tokens are hashed (like API keys) before storage. Unlike SSO client /// secrets, we don't use the SecretManager because SCIM tokens need fast /// lookup for every provisioning request. +/// +/// Hashing uses HMAC-SHA256 keyed with a server-side pepper instead of a +/// raw SHA-256, so an attacker who exfiltrates the database alone can't +/// brute-force tokens — they also need the pepper, which lives only in +/// process memory and the deployment's session secret material. #[derive(Clone)] pub struct OrgScimConfigService { db: Arc, + /// HMAC pepper. `None` falls back to plain SHA-256 for tests/wasm/local + /// deployments that haven't configured a pepper. Production deployments + /// must set one (we wire this from the session secret in `app.rs`). + pepper: Option>>, } impl OrgScimConfigService { pub fn new(db: Arc) -> Self { - Self { db } + Self { db, pepper: None } + } + + /// Install the HMAC pepper used for SCIM token hashing. Pass `None` to + /// disable peppering (default for environments without a session secret). + pub fn with_token_pepper(mut self, pepper: Option>) -> Self { + self.pepper = pepper.map(Arc::new); + self + } + + fn hash_token(&self, token: &str) -> String { + match self.pepper.as_deref() { + Some(pepper) => { + let mut mac = + HmacSha256::new_from_slice(pepper).expect("HMAC-SHA256 accepts any key length"); + mac.update(token.as_bytes()); + hex::encode(mac.finalize().into_bytes()) + } + None => unsalted_sha256(token), + } } /// Create a new SCIM configuration for an organization. @@ -48,7 +79,8 @@ impl OrgScimConfigService { input: CreateOrgScimConfig, ) -> Result { // Generate a secure token - let (raw_token, token_hash, token_prefix) = generate_scim_token(); + let (raw_token, token_prefix) = generate_scim_token(); + let token_hash = self.hash_token(&raw_token); // Create the config in the database let config = self @@ -89,7 +121,7 @@ impl OrgScimConfigService { token: &str, ) -> Result, OrgScimConfigError> { // Hash the incoming token - let token_hash = hash_token(token); + let token_hash = self.hash_token(token); // Look up by hash let config = self @@ -126,7 +158,8 @@ impl OrgScimConfigService { /// The updated config along with the new raw token (shown only once) pub async fn rotate_token(&self, id: Uuid) -> Result { // Generate a new secure token - let (raw_token, token_hash, token_prefix) = generate_scim_token(); + let (raw_token, token_prefix) = generate_scim_token(); + let token_hash = self.hash_token(&raw_token); // Update the token in the database let config = self @@ -162,10 +195,11 @@ pub enum OrgScimConfigError { /// Generate a new SCIM bearer token. /// -/// Returns (raw_token, token_hash, token_prefix). +/// Returns (raw_token, token_prefix). The hash is computed by the service so +/// it can mix in the configured pepper. /// /// Token format: `scim_<32 bytes base64url>` (approximately 48 characters) -fn generate_scim_token() -> (String, String, String) { +fn generate_scim_token() -> (String, String) { use base64::Engine; use rand::RngCore; @@ -179,19 +213,15 @@ fn generate_scim_token() -> (String, String, String) { // Construct the full token let raw_token = format!("scim_{}", encoded); - // Hash for storage - let token_hash = hash_token(&raw_token); - // Prefix for identification (first 8 chars of the random part) let token_prefix = format!("scim_{}", &encoded[..4]); - (raw_token, token_hash, token_prefix) + (raw_token, token_prefix) } -/// Hash a token using SHA-256. -fn hash_token(token: &str) -> String { +/// Plain SHA-256 fallback used when no pepper is configured. +fn unsalted_sha256(token: &str) -> String { let mut hasher = Sha256::new(); hasher.update(token.as_bytes()); - let result = hasher.finalize(); - hex::encode(result) + hex::encode(hasher.finalize()) } From 2e919b0a2245c212ae85eeff0400a7e266633095 Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sun, 26 Apr 2026 13:38:33 +1000 Subject: [PATCH 106/172] Drop unused daily_spend table and its retention plumbing --- .../postgres/20250101000000_initial.sql | 30 ---------- .../sqlite/20250101000000_initial.sql | 30 ---------- src/config/retention.rs | 24 -------- src/db/postgres/usage.rs | 45 -------------- src/db/repos/usage.rs | 11 ---- src/db/sqlite/usage.rs | 45 -------------- src/observability/metrics.rs | 2 +- src/retention/worker.rs | 58 +------------------ 8 files changed, 3 insertions(+), 242 deletions(-) diff --git a/migrations_sqlx/postgres/20250101000000_initial.sql b/migrations_sqlx/postgres/20250101000000_initial.sql index 5396412..2b9e05e 100644 --- a/migrations_sqlx/postgres/20250101000000_initial.sql +++ b/migrations_sqlx/postgres/20250101000000_initial.sql @@ -683,36 +683,6 @@ CREATE INDEX IF NOT EXISTS idx_usage_records_recorded_at_id ON usage_records(rec CREATE INDEX IF NOT EXISTS idx_usage_records_model ON usage_records(model); CREATE INDEX IF NOT EXISTS idx_usage_records_request_id ON usage_records(request_id); --- ====================================================================== --- Daily Spend --- ====================================================================== - --- Materialized aggregates from usage_records, computed periodically -CREATE TABLE IF NOT EXISTS daily_spend ( - id UUID PRIMARY KEY NOT NULL, - -- Attribution context - api_key_id UUID REFERENCES api_keys(id) ON DELETE SET NULL, - -- Principal-based attribution (mirrors usage_records) - user_id UUID, - org_id UUID, - project_id UUID, - team_id UUID, - service_account_id UUID, - date DATE NOT NULL, - model VARCHAR(128) NOT NULL, - -- Total cost in microcents (1/1,000,000 of a dollar) for sub-cent precision - total_cost_microcents BIGINT NOT NULL DEFAULT 0, - total_tokens INTEGER NOT NULL DEFAULT 0, - request_count INTEGER NOT NULL DEFAULT 0 -); - -CREATE INDEX IF NOT EXISTS idx_daily_spend_date ON daily_spend(date); -CREATE INDEX IF NOT EXISTS idx_daily_spend_api_key_date ON daily_spend(api_key_id, date) WHERE api_key_id IS NOT NULL; -CREATE INDEX IF NOT EXISTS idx_daily_spend_org_date ON daily_spend(org_id, date) WHERE org_id IS NOT NULL; -CREATE INDEX IF NOT EXISTS idx_daily_spend_user_date ON daily_spend(user_id, date) WHERE user_id IS NOT NULL; -CREATE INDEX IF NOT EXISTS idx_daily_spend_project_date ON daily_spend(project_id, date) WHERE project_id IS NOT NULL; -CREATE INDEX IF NOT EXISTS idx_daily_spend_team_date ON daily_spend(team_id, date) WHERE team_id IS NOT NULL; - -- ====================================================================== -- Model Pricing -- ====================================================================== diff --git a/migrations_sqlx/sqlite/20250101000000_initial.sql b/migrations_sqlx/sqlite/20250101000000_initial.sql index e15eb2c..6d57b6a 100644 --- a/migrations_sqlx/sqlite/20250101000000_initial.sql +++ b/migrations_sqlx/sqlite/20250101000000_initial.sql @@ -635,36 +635,6 @@ CREATE INDEX IF NOT EXISTS idx_usage_records_recorded_at_id ON usage_records(rec CREATE INDEX IF NOT EXISTS idx_usage_records_model ON usage_records(model); CREATE INDEX IF NOT EXISTS idx_usage_records_request_id ON usage_records(request_id); --- ====================================================================== --- Daily Spend --- ====================================================================== - --- Materialized aggregates from usage_records, computed periodically -CREATE TABLE IF NOT EXISTS daily_spend ( - id TEXT PRIMARY KEY NOT NULL, - -- Attribution context - api_key_id TEXT REFERENCES api_keys(id) ON DELETE SET NULL, - -- Principal-based attribution (mirrors usage_records) - user_id TEXT, - org_id TEXT, - project_id TEXT, - team_id TEXT, - service_account_id TEXT, - date TEXT NOT NULL, - model TEXT NOT NULL, - -- Total cost in microcents (1/1,000,000 of a dollar) for sub-cent precision - total_cost_microcents INTEGER NOT NULL DEFAULT 0, - total_tokens INTEGER NOT NULL DEFAULT 0, - request_count INTEGER NOT NULL DEFAULT 0 -); - -CREATE INDEX IF NOT EXISTS idx_daily_spend_date ON daily_spend(date); -CREATE INDEX IF NOT EXISTS idx_daily_spend_api_key_date ON daily_spend(api_key_id, date); -CREATE INDEX IF NOT EXISTS idx_daily_spend_org_date ON daily_spend(org_id, date); -CREATE INDEX IF NOT EXISTS idx_daily_spend_user_date ON daily_spend(user_id, date); -CREATE INDEX IF NOT EXISTS idx_daily_spend_project_date ON daily_spend(project_id, date); -CREATE INDEX IF NOT EXISTS idx_daily_spend_team_date ON daily_spend(team_id, date); - -- ====================================================================== -- Model Pricing -- ====================================================================== diff --git a/src/config/retention.rs b/src/config/retention.rs index e4982f0..f541086 100644 --- a/src/config/retention.rs +++ b/src/config/retention.rs @@ -12,7 +12,6 @@ //! //! [retention.periods] //! usage_records_days = 90 -//! daily_spend_days = 365 //! audit_logs_days = 730 //! conversations_deleted_days = 30 //! @@ -80,12 +79,6 @@ pub struct RetentionPeriods { #[serde(default = "default_usage_records_days")] pub usage_records_days: u32, - /// Days to keep aggregated daily spend records. - /// These are lower-volume summary records (one per API key per model per day). - /// Default: 365 days - #[serde(default = "default_daily_spend_days")] - pub daily_spend_days: u32, - /// Days to keep audit log entries. /// Audit logs track admin operations and may be required for compliance. /// Default: 730 days (2 years) @@ -104,7 +97,6 @@ impl Default for RetentionPeriods { fn default() -> Self { Self { usage_records_days: default_usage_records_days(), - daily_spend_days: default_daily_spend_days(), audit_logs_days: default_audit_logs_days(), conversations_deleted_days: default_conversations_deleted_days(), } @@ -115,10 +107,6 @@ fn default_usage_records_days() -> u32 { 90 } -fn default_daily_spend_days() -> u32 { - 365 -} - fn default_audit_logs_days() -> u32 { 730 // 2 years } @@ -177,7 +165,6 @@ impl RetentionConfig { /// Check if any retention periods are configured (non-zero). pub fn has_any_retention(&self) -> bool { self.periods.usage_records_days > 0 - || self.periods.daily_spend_days > 0 || self.periods.audit_logs_days > 0 || self.periods.conversations_deleted_days > 0 } @@ -194,11 +181,6 @@ impl RetentionPeriods { self.usage_records_days > 0 } - /// Check if daily spend retention is enabled. - pub fn should_retain_daily_spend(&self) -> bool { - self.daily_spend_days > 0 - } - /// Check if audit logs retention is enabled. pub fn should_retain_audit_logs(&self) -> bool { self.audit_logs_days > 0 @@ -220,7 +202,6 @@ mod tests { assert!(!config.enabled); assert_eq!(config.interval_hours, 24); assert_eq!(config.periods.usage_records_days, 90); - assert_eq!(config.periods.daily_spend_days, 365); assert_eq!(config.periods.audit_logs_days, 730); assert_eq!(config.periods.conversations_deleted_days, 30); assert!(!config.safety.dry_run); @@ -246,7 +227,6 @@ mod tests { [periods] usage_records_days = 60 - daily_spend_days = 180 audit_logs_days = 365 conversations_deleted_days = 7 @@ -259,7 +239,6 @@ mod tests { assert!(config.enabled); assert_eq!(config.interval_hours, 12); assert_eq!(config.periods.usage_records_days, 60); - assert_eq!(config.periods.daily_spend_days, 180); assert_eq!(config.periods.audit_logs_days, 365); assert_eq!(config.periods.conversations_deleted_days, 7); assert!(config.safety.dry_run); @@ -274,13 +253,11 @@ mod tests { [periods] usage_records_days = 0 - daily_spend_days = 0 audit_logs_days = 0 conversations_deleted_days = 0 "#; let config: RetentionConfig = toml::from_str(toml).unwrap(); assert!(!config.periods.should_retain_usage_records()); - assert!(!config.periods.should_retain_daily_spend()); assert!(!config.periods.should_retain_audit_logs()); assert!(!config.periods.should_retain_conversations()); assert!(!config.has_any_retention()); @@ -292,7 +269,6 @@ mod tests { assert!(config.has_any_retention()); // Defaults have retention config.periods.usage_records_days = 0; - config.periods.daily_spend_days = 0; config.periods.audit_logs_days = 0; config.periods.conversations_deleted_days = 0; assert!(!config.has_any_retention()); diff --git a/src/db/postgres/usage.rs b/src/db/postgres/usage.rs index 65c28f2..a4d6b9d 100644 --- a/src/db/postgres/usage.rs +++ b/src/db/postgres/usage.rs @@ -4030,49 +4030,4 @@ impl UsageRepo for PostgresUsageRepo { Ok(total_deleted) } - - async fn delete_daily_spend_before( - &self, - cutoff: DateTime, - batch_size: u32, - max_deletes: u64, - ) -> DbResult { - let mut total_deleted: u64 = 0; - // daily_spend.date is stored as DATE in PostgreSQL - let cutoff_date = cutoff.date_naive(); - - loop { - if total_deleted >= max_deletes { - break; - } - - let remaining = max_deletes - total_deleted; - let limit = std::cmp::min(batch_size as u64, remaining) as i64; - - // PostgreSQL efficient batched deletion using ctid - let result = sqlx::query( - r#" - DELETE FROM daily_spend - WHERE ctid IN ( - SELECT ctid FROM daily_spend - WHERE date < $1 - LIMIT $2 - ) - "#, - ) - .bind(cutoff_date) - .bind(limit) - .execute(&self.write_pool) - .await?; - - let rows_deleted = result.rows_affected(); - total_deleted += rows_deleted; - - if rows_deleted < limit as u64 { - break; - } - } - - Ok(total_deleted) - } } diff --git a/src/db/repos/usage.rs b/src/db/repos/usage.rs index bac6c8e..4b9996b 100644 --- a/src/db/repos/usage.rs +++ b/src/db/repos/usage.rs @@ -561,15 +561,4 @@ pub trait UsageRepo: Send + Sync { batch_size: u32, max_deletes: u64, ) -> DbResult; - - /// Delete daily spend aggregates older than the given cutoff date. - /// - /// Deletes in batches to avoid locking the database. - /// Returns the total number of records deleted. - async fn delete_daily_spend_before( - &self, - cutoff: DateTime, - batch_size: u32, - max_deletes: u64, - ) -> DbResult; } diff --git a/src/db/sqlite/usage.rs b/src/db/sqlite/usage.rs index 1a2374f..7d19c89 100644 --- a/src/db/sqlite/usage.rs +++ b/src/db/sqlite/usage.rs @@ -4124,51 +4124,6 @@ impl UsageRepo for SqliteUsageRepo { Ok(total_deleted) } - - async fn delete_daily_spend_before( - &self, - cutoff: DateTime, - batch_size: u32, - max_deletes: u64, - ) -> DbResult { - let mut total_deleted: u64 = 0; - // daily_spend.date is stored as TEXT in 'YYYY-MM-DD' format - let cutoff_date = cutoff.format("%Y-%m-%d").to_string(); - - loop { - if total_deleted >= max_deletes { - break; - } - - let remaining = max_deletes - total_deleted; - let limit = std::cmp::min(batch_size as u64, remaining) as i64; - - // daily_spend uses composite primary key (api_key_id, date, model), use rowid for deletion - let result = query( - r#" - DELETE FROM daily_spend - WHERE rowid IN ( - SELECT rowid FROM daily_spend - WHERE date < ? - LIMIT ? - ) - "#, - ) - .bind(&cutoff_date) - .bind(limit) - .execute(&self.pool) - .await?; - - let rows_deleted = result.rows_affected(); - total_deleted += rows_deleted; - - if rows_deleted < limit as u64 { - break; - } - } - - Ok(total_deleted) - } } /// Helper function to compute usage stats from daily cost rows. diff --git a/src/observability/metrics.rs b/src/observability/metrics.rs index eb3bd2e..2736bc6 100644 --- a/src/observability/metrics.rs +++ b/src/observability/metrics.rs @@ -389,7 +389,7 @@ pub fn record_dlq_operation(operation: &str, entry_type: &str) { /// - Alerting on unexpected deletion volumes /// /// # Arguments -/// * `table` - The table from which records were deleted (e.g., "usage_records", "daily_spend", "audit_logs", "conversations") +/// * `table` - The table from which records were deleted (e.g., "usage_records", "audit_logs", "conversations") /// * `count` - The number of records deleted pub fn record_retention_deletion(table: &str, count: u64) { #[cfg(feature = "prometheus")] diff --git a/src/retention/worker.rs b/src/retention/worker.rs index c851945..2fa38d1 100644 --- a/src/retention/worker.rs +++ b/src/retention/worker.rs @@ -15,8 +15,6 @@ use crate::{config::RetentionConfig, db::DbPool, observability::metrics}; pub struct RetentionRunResult { /// Number of usage records deleted. pub usage_records_deleted: u64, - /// Number of daily spend records deleted. - pub daily_spend_deleted: u64, /// Number of audit log entries deleted. pub audit_logs_deleted: u64, /// Number of conversations hard-deleted. @@ -26,10 +24,7 @@ pub struct RetentionRunResult { impl RetentionRunResult { /// Total number of records deleted across all tables. pub fn total(&self) -> u64 { - self.usage_records_deleted - + self.daily_spend_deleted - + self.audit_logs_deleted - + self.conversations_deleted + self.usage_records_deleted + self.audit_logs_deleted + self.conversations_deleted } /// Check if any records were deleted. @@ -62,7 +57,6 @@ pub async fn start_retention_worker(db: Arc, config: RetentionConfig) { tracing::info!( interval_hours = config.interval_hours, usage_records_days = config.periods.usage_records_days, - daily_spend_days = config.periods.daily_spend_days, audit_logs_days = config.periods.audit_logs_days, conversations_deleted_days = config.periods.conversations_deleted_days, dry_run = config.safety.dry_run, @@ -78,7 +72,6 @@ pub async fn start_retention_worker(db: Arc, config: RetentionConfig) { if result.has_deletions() { tracing::info!( usage_records = result.usage_records_deleted, - daily_spend = result.daily_spend_deleted, audit_logs = result.audit_logs_deleted, conversations = result.conversations_deleted, total = result.total(), @@ -112,12 +105,6 @@ async fn run_retention( result.usage_records_deleted = deleted; } - // Delete daily spend records - if config.periods.should_retain_daily_spend() { - let deleted = delete_daily_spend(db, config).await?; - result.daily_spend_deleted = deleted; - } - // Delete audit logs if config.periods.should_retain_audit_logs() { let deleted = delete_audit_logs(db, config).await?; @@ -172,45 +159,6 @@ async fn delete_usage_records( Ok(deleted) } -/// Delete daily spend records older than the retention period. -async fn delete_daily_spend( - db: &Arc, - config: &RetentionConfig, -) -> Result> { - let cutoff = Utc::now() - Duration::days(config.periods.daily_spend_days as i64); - - if config.safety.dry_run { - tracing::info!( - cutoff = %cutoff, - "DRY RUN: Would delete daily spend records before {}", - cutoff - ); - return Ok(0); - } - - let max_deletes = if config.safety.max_deletes_per_run == 0 { - u64::MAX - } else { - config.safety.max_deletes_per_run - }; - - let deleted = db - .usage() - .delete_daily_spend_before(cutoff, config.safety.batch_size, max_deletes) - .await?; - - if deleted > 0 { - tracing::debug!( - deleted = deleted, - cutoff = %cutoff, - "Deleted daily spend records" - ); - metrics::record_retention_deletion("daily_spend", deleted); - } - - Ok(deleted) -} - /// Delete audit logs older than the retention period. async fn delete_audit_logs( db: &Arc, @@ -297,11 +245,10 @@ mod tests { fn test_retention_run_result_total() { let result = RetentionRunResult { usage_records_deleted: 100, - daily_spend_deleted: 50, audit_logs_deleted: 25, conversations_deleted: 10, }; - assert_eq!(result.total(), 185); + assert_eq!(result.total(), 135); } #[test] @@ -320,7 +267,6 @@ mod tests { fn test_retention_run_result_default() { let result = RetentionRunResult::default(); assert_eq!(result.usage_records_deleted, 0); - assert_eq!(result.daily_spend_deleted, 0); assert_eq!(result.audit_logs_deleted, 0); assert_eq!(result.conversations_deleted, 0); assert_eq!(result.total(), 0); From d2ccba9c66bedb5636a12f394b371a28d342f2c5 Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sun, 26 Apr 2026 13:45:42 +1000 Subject: [PATCH 107/172] Sandbox HTML artifact open-in-new-tab in unique-origin iframe --- ui/src/components/Artifact/HtmlArtifact.tsx | 21 ++++++++++++++----- ui/src/components/ChatMessage/ChatMessage.tsx | 5 +---- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/ui/src/components/Artifact/HtmlArtifact.tsx b/ui/src/components/Artifact/HtmlArtifact.tsx index b32820b..21dfdf5 100644 --- a/ui/src/components/Artifact/HtmlArtifact.tsx +++ b/ui/src/components/Artifact/HtmlArtifact.tsx @@ -88,11 +88,22 @@ function HtmlArtifactComponent({ artifact, className }: HtmlArtifactProps) { } const handleOpenInNewTab = () => { - const blob = new Blob([wrapHtml(html)], { type: "text/html" }); - const url = URL.createObjectURL(blob); - window.open(url, "_blank"); - // Clean up after a delay - setTimeout(() => URL.revokeObjectURL(url), 1000); + // Open a host tab whose body contains a sandboxed iframe. A blob URL + // would inherit our origin and let the model HTML touch cookies, + // storage, and same-origin APIs. The sandboxed iframe (no + // allow-same-origin) puts the model HTML in a unique origin instead. + const newWindow = window.open("about:blank", "_blank"); + if (!newWindow) return; + newWindow.opener = null; + const doc = newWindow.document; + doc.title = artifact.title || "HTML Preview"; + doc.documentElement.style.height = "100%"; + doc.body.style.cssText = "margin:0;padding:0;height:100vh;background:#fff"; + const iframe = doc.createElement("iframe"); + iframe.setAttribute("sandbox", "allow-scripts"); + iframe.style.cssText = "border:0;width:100%;height:100%;display:block"; + iframe.srcdoc = wrapHtml(html); + doc.body.appendChild(iframe); }; return ( diff --git a/ui/src/components/ChatMessage/ChatMessage.tsx b/ui/src/components/ChatMessage/ChatMessage.tsx index 74f2336..61f6aa7 100644 --- a/ui/src/components/ChatMessage/ChatMessage.tsx +++ b/ui/src/components/ChatMessage/ChatMessage.tsx @@ -295,10 +295,7 @@ function ChatMessageComponent({
{isStreaming ? "Assistant is responding" : ""}
-
+
{isUser ? ( isEditing ? (
From ee921ab064e7923a5f0a4bbdbce897360245231d Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sun, 26 Apr 2026 13:58:06 +1000 Subject: [PATCH 108/172] Add bash-style ${VAR:-default} fallback syntax for env-var expansion --- src/config/mod.rs | 70 ++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 63 insertions(+), 7 deletions(-) diff --git a/src/config/mod.rs b/src/config/mod.rs index 2166192..d1bbb53 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -126,8 +126,9 @@ pub struct GatewayConfig { impl GatewayConfig { /// Load configuration from a TOML file. /// - /// Environment variables in the format `${VAR_NAME}` are expanded. - /// Missing required variables will cause an error. + /// Environment variables in the format `${VAR_NAME}` are expanded; missing + /// required variables cause an error. Use `${VAR_NAME:-default}` to fall + /// back to a default value when the variable is unset (default may be empty). #[cfg(feature = "server")] pub fn from_file(path: impl AsRef) -> Result { let contents = std::fs::read_to_string(path.as_ref()) @@ -470,7 +471,14 @@ fn check_auth_mode_feature(_raw: &toml::Value, _issues: &mut Vec<(String, &str)> } } -/// Expand environment variables in the format `${VAR_NAME}`. +/// Expand environment variables in the format `${VAR_NAME}` or +/// `${VAR_NAME:-default}` (bash-style optional default). +/// +/// `${VAR}` requires the variable to be set, returning [`ConfigError::EnvVarNotFound`] +/// if it isn't. `${VAR:-default}` falls back to `default` (which may be empty) +/// when the variable is unset, so optional credentials don't force startup +/// failure on every fresh checkout. +/// /// Skips commented lines (lines where content before the variable is a comment). #[cfg(feature = "server")] fn expand_env_vars(input: &str) -> Result { @@ -498,10 +506,19 @@ fn expand_env_vars(input: &str) -> Result { // Add text before this match line_result.push_str(&line[last_end..match_start]); - // Expand the variable - let var_name = &cap[1]; - let value = std::env::var(var_name) - .map_err(|_| ConfigError::EnvVarNotFound(var_name.to_string()))?; + // Split on `:-` for optional defaults: `${VAR:-default}` expands + // to `default` when VAR is unset. Without `:-`, an unset VAR is + // an error so typos still surface. + let body = &cap[1]; + let (var_name, default) = match body.split_once(":-") { + Some((name, def)) => (name, Some(def)), + None => (body, None), + }; + let value = match (std::env::var(var_name), default) { + (Ok(v), _) => v, + (Err(_), Some(def)) => def.to_string(), + (Err(_), None) => return Err(ConfigError::EnvVarNotFound(var_name.to_string())), + }; line_result.push_str(&value); last_end = cap.get(0).unwrap().end(); @@ -574,6 +591,45 @@ mod tests { }); } + #[test] + fn test_env_var_default_when_unset() { + // Ensure the variable really is unset + unsafe { + std::env::remove_var("HADRIAN_TEST_DEFAULT_UNSET"); + } + let result = expand_env_vars("key = \"${HADRIAN_TEST_DEFAULT_UNSET:-fallback}\"").unwrap(); + assert_eq!(result, "key = \"fallback\""); + } + + #[test] + fn test_env_var_default_empty_when_unset() { + unsafe { + std::env::remove_var("HADRIAN_TEST_EMPTY_DEFAULT"); + } + let result = expand_env_vars("key = \"${HADRIAN_TEST_EMPTY_DEFAULT:-}\"").unwrap(); + assert_eq!(result, "key = \"\""); + } + + #[test] + fn test_env_var_default_overridden_when_set() { + temp_env::with_var("HADRIAN_TEST_DEFAULT_SET", Some("real"), || { + let result = + expand_env_vars("key = \"${HADRIAN_TEST_DEFAULT_SET:-fallback}\"").unwrap(); + assert_eq!(result, "key = \"real\""); + }); + } + + #[test] + fn test_env_var_without_default_still_errors_when_unset() { + unsafe { + std::env::remove_var("HADRIAN_TEST_REQUIRED"); + } + let err = expand_env_vars("key = \"${HADRIAN_TEST_REQUIRED}\"").unwrap_err(); + assert!( + matches!(err, ConfigError::EnvVarNotFound(name) if name == "HADRIAN_TEST_REQUIRED") + ); + } + #[test] fn test_env_var_in_comment_ignored() { // Variables in comments should not be expanded From 69aa1c9a76a8ccd89808f13d39db58cadd06cce6 Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sun, 26 Apr 2026 14:00:45 +1000 Subject: [PATCH 109/172] Stack modal focus traps and inert background under stacked dialogs --- ui/src/components/Modal/Modal.tsx | 144 +++++++++++++++++++++--------- 1 file changed, 102 insertions(+), 42 deletions(-) diff --git a/ui/src/components/Modal/Modal.tsx b/ui/src/components/Modal/Modal.tsx index 602dd9d..987b837 100644 --- a/ui/src/components/Modal/Modal.tsx +++ b/ui/src/components/Modal/Modal.tsx @@ -24,6 +24,40 @@ function useModalContext() { return useContext(ModalContext); } +// Shared stack of open modal contents. Only the top entry is interactive — +// stacked dialogs (e.g. a confirm-modal opened over a form-modal) used to +// share Escape/Tab handlers and could let focus tab into the dialog +// underneath. Tracking the stack lets us route keyboard events to the +// topmost dialog only and apply `inert` to everything beneath it. +const modalStack: HTMLElement[] = []; + +function refreshInertState() { + const top = modalStack[modalStack.length - 1] ?? null; + + // Background app: inert when any modal is open, otherwise interactive. + const root = document.getElementById("root"); + if (root) { + if (modalStack.length > 0) { + root.setAttribute("inert", ""); + root.setAttribute("aria-hidden", "true"); + } else { + root.removeAttribute("inert"); + root.removeAttribute("aria-hidden"); + } + } + + // Stacked modals: every dialog except the top one is inert. + for (const node of modalStack) { + if (node === top) { + node.removeAttribute("inert"); + node.removeAttribute("aria-hidden"); + } else { + node.setAttribute("inert", ""); + node.setAttribute("aria-hidden", "true"); + } + } +} + export interface ModalProps { open: boolean; onClose: () => void; @@ -37,61 +71,87 @@ export function Modal({ open, onClose, children, className }: ModalProps) { const titleId = useId(); const descriptionId = useId(); + const isTopModal = useCallback(() => { + const node = contentRef.current; + return node !== null && modalStack[modalStack.length - 1] === node; + }, []); + const handleEscape = useCallback( (e: KeyboardEvent) => { - if (e.key === "Escape") { - onClose(); - } + if (e.key !== "Escape") return; + // Stacked modals share a window-level keydown listener; only the + // topmost dialog should react, otherwise Escape closes everything. + if (!isTopModal()) return; + onClose(); }, - [onClose] + [onClose, isTopModal] ); // Focus trap - keep focus within modal - const handleTabKey = useCallback((e: KeyboardEvent) => { - if (e.key !== "Tab" || !contentRef.current) return; + const handleTabKey = useCallback( + (e: KeyboardEvent) => { + if (e.key !== "Tab" || !contentRef.current) return; + if (!isTopModal()) return; - const focusableElements = contentRef.current.querySelectorAll( - 'button, [href], input, select, textarea, [tabindex]:not([tabindex="-1"])' - ); - const firstElement = focusableElements[0]; - const lastElement = focusableElements[focusableElements.length - 1]; - - if (e.shiftKey && document.activeElement === firstElement) { - e.preventDefault(); - lastElement?.focus(); - } else if (!e.shiftKey && document.activeElement === lastElement) { - e.preventDefault(); - firstElement?.focus(); - } - }, []); + const focusableElements = contentRef.current.querySelectorAll( + 'button, [href], input, select, textarea, [tabindex]:not([tabindex="-1"])' + ); + const firstElement = focusableElements[0]; + const lastElement = focusableElements[focusableElements.length - 1]; + + if (e.shiftKey && document.activeElement === firstElement) { + e.preventDefault(); + lastElement?.focus(); + } else if (!e.shiftKey && document.activeElement === lastElement) { + e.preventDefault(); + firstElement?.focus(); + } + }, + [isTopModal] + ); // Handle initial focus when modal opens (only runs when `open` changes) useEffect(() => { - if (open) { - // Store currently focused element - previousActiveElement.current = document.activeElement as HTMLElement; - document.body.style.overflow = "hidden"; - - // Focus the first input if available, otherwise the modal content - requestAnimationFrame(() => { - const firstInput = - contentRef.current?.querySelector("input, select, textarea"); - if (firstInput) { - firstInput.focus(); + if (!open) return; + // Store currently focused element + previousActiveElement.current = document.activeElement as HTMLElement; + document.body.style.overflow = "hidden"; + + const node = contentRef.current; + if (node) { + modalStack.push(node); + refreshInertState(); + } + + // Focus the first input if available, otherwise the modal content + requestAnimationFrame(() => { + const firstInput = node?.querySelector("input, select, textarea"); + if (firstInput) { + firstInput.focus(); + } else { + const firstFocusable = node?.querySelector( + 'button, [href], input, select, textarea, [tabindex]:not([tabindex="-1"])' + ); + if (firstFocusable) { + firstFocusable.focus(); } else { - const firstFocusable = contentRef.current?.querySelector( - 'button, [href], input, select, textarea, [tabindex]:not([tabindex="-1"])' - ); - if (firstFocusable) { - firstFocusable.focus(); - } else { - contentRef.current?.focus(); - } + node?.focus(); } - }); - } + } + }); + return () => { - document.body.style.overflow = ""; + if (node) { + const idx = modalStack.lastIndexOf(node); + if (idx !== -1) modalStack.splice(idx, 1); + } + // Only release the body scroll lock when the last modal closes, so a + // background page doesn't briefly start scrolling between stacked + // dialogs. + if (modalStack.length === 0) { + document.body.style.overflow = ""; + } + refreshInertState(); // Restore focus to previously focused element if (previousActiveElement.current) { previousActiveElement.current.focus(); From dd942973c70f8965317d5b43ea595e93839a88aa Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sun, 26 Apr 2026 14:04:58 +1000 Subject: [PATCH 110/172] Centralize error toast formatting via formatApiError helper --- .../ConversationsProvider.tsx | 3 +- .../DataFileUpload/DataFileUpload.tsx | 3 +- .../DomainVerification/AddDomainModal.tsx | 3 +- .../DomainVerificationList.tsx | 5 +- .../VerificationInstructionsModal.tsx | 3 +- .../components/MCPConfigModal/MCPCatalog.tsx | 5 +- .../MCPConfigModal/MCPConfigModal.tsx | 7 +-- .../RbacPolicyVersionHistoryModal.tsx | 3 +- .../ScreenshotRenderer/ScreenshotRenderer.tsx | 6 ++- .../SkillImportModal/SkillImportModal.tsx | 7 +-- .../SkillImportModal/filesystemImport.ts | 3 +- ui/src/components/WasmSetup/WasmSetup.tsx | 5 +- .../components/WasmSetup/WasmSetupGuard.tsx | 3 +- ui/src/pages/AccountPage.tsx | 9 ++-- ui/src/pages/ApiKeyDetailPage.tsx | 5 +- ui/src/pages/ApiKeysPage.tsx | 7 +-- ui/src/pages/KnowledgeBasesPage.tsx | 3 +- ui/src/pages/MyProvidersPage.tsx | 9 ++-- ui/src/pages/admin/ApiKeysPage.tsx | 13 ++++- ui/src/pages/admin/OrgRbacPoliciesPage.tsx | 9 ++-- ui/src/pages/admin/OrgSsoConfigPage.tsx | 7 +-- ui/src/pages/admin/OrganizationDetailPage.tsx | 21 ++++++-- ui/src/pages/admin/OrganizationsPage.tsx | 13 ++++- ui/src/pages/admin/PricingPage.tsx | 19 +++++-- ui/src/pages/admin/ProjectDetailPage.tsx | 41 +++++++++++++--- ui/src/pages/admin/ProjectsPage.tsx | 19 +++++-- ui/src/pages/admin/ProvidersPage.tsx | 21 ++++++-- ui/src/pages/admin/ScimConfigPage.tsx | 9 ++-- ui/src/pages/admin/ServiceAccountsPage.tsx | 7 +-- ui/src/pages/admin/SsoConnectionsPage.tsx | 3 +- ui/src/pages/admin/SsoGroupMappingsPage.tsx | 39 ++++++++++++--- ui/src/pages/admin/TeamDetailPage.tsx | 17 +++++-- ui/src/pages/admin/TeamsPage.tsx | 7 +-- ui/src/pages/admin/UserDetailPage.tsx | 3 +- ui/src/pages/admin/UsersPage.tsx | 5 +- ui/src/pages/admin/VectorStoreDetailPage.tsx | 7 +-- ui/src/pages/admin/VectorStoresPage.tsx | 7 +-- ui/src/pages/chat/utils/skillExecutor.ts | 3 +- ui/src/pages/chat/utils/toolExecutors.ts | 17 ++++--- ui/src/pages/project/MembersTab.tsx | 9 +++- ui/src/pages/project/ProjectDetailPage.tsx | 7 ++- ui/src/pages/project/ProvidersTab.tsx | 21 ++++++-- ui/src/pages/project/SkillsTab.tsx | 3 +- ui/src/pages/project/TemplatesTab.tsx | 7 ++- ui/src/service-worker/sw.ts | 5 +- ui/src/services/duckdb/duckdbService.ts | 3 +- ui/src/services/duckdb/duckdbWorker.ts | 27 +++++----- ui/src/services/mcp/client.ts | 3 +- ui/src/services/pyodide/pyodideService.ts | 5 +- ui/src/services/pyodide/pyodideWorker.ts | 9 ++-- ui/src/services/quickjs/quickjsService.ts | 3 +- ui/src/services/quickjs/quickjsWorker.ts | 7 +-- .../websocket/WebSocketEventClient.ts | 3 +- ui/src/stores/mcpStore.ts | 3 +- ui/src/utils/__tests__/formatApiError.test.ts | 34 +++++++++++++ ui/src/utils/formatApiError.ts | 49 +++++++++++++++++++ 56 files changed, 429 insertions(+), 145 deletions(-) create mode 100644 ui/src/utils/__tests__/formatApiError.test.ts create mode 100644 ui/src/utils/formatApiError.ts diff --git a/ui/src/components/ConversationsProvider/ConversationsProvider.tsx b/ui/src/components/ConversationsProvider/ConversationsProvider.tsx index fdc3ea1..097c97a 100644 --- a/ui/src/components/ConversationsProvider/ConversationsProvider.tsx +++ b/ui/src/components/ConversationsProvider/ConversationsProvider.tsx @@ -25,6 +25,7 @@ import type { ChatMessage, Conversation } from "@/components/chat-types"; import { usePreferences } from "@/preferences/PreferencesProvider"; import { generateSimpleTitle, generateTitleWithLLM } from "@/utils/generateTitle"; +import { formatApiError } from "@/utils/formatApiError"; const STORAGE_KEY = "hadrian-conversations"; const BROADCAST_CHANNEL = "hadrian-conversations-sync"; @@ -160,7 +161,7 @@ async function withRetry( try { return await fn(); } catch (error) { - lastError = error instanceof Error ? error : new Error(String(error)); + lastError = error instanceof Error ? error : new Error(formatApiError(error)); if (attempt < maxAttempts - 1) { const delay = baseDelay * Math.pow(2, attempt); await new Promise((resolve) => setTimeout(resolve, delay)); diff --git a/ui/src/components/DataFileUpload/DataFileUpload.tsx b/ui/src/components/DataFileUpload/DataFileUpload.tsx index e754677..1e8733c 100644 --- a/ui/src/components/DataFileUpload/DataFileUpload.tsx +++ b/ui/src/components/DataFileUpload/DataFileUpload.tsx @@ -19,6 +19,7 @@ import { type DataFileTable, } from "@/stores/chatUIStore"; import { cn } from "@/utils/cn"; +import { formatApiError } from "@/utils/formatApiError"; import { Modal, ModalHeader, @@ -191,7 +192,7 @@ export function DataFileUpload({ updateDataFileStatus(fileId, false, result.error || "Registration failed"); } } catch (error) { - const errorMsg = error instanceof Error ? error.message : String(error); + const errorMsg = error instanceof Error ? error.message : formatApiError(error); updateDataFileStatus(fileId, false, errorMsg); } } diff --git a/ui/src/components/DomainVerification/AddDomainModal.tsx b/ui/src/components/DomainVerification/AddDomainModal.tsx index e2a20d1..0a311cf 100644 --- a/ui/src/components/DomainVerification/AddDomainModal.tsx +++ b/ui/src/components/DomainVerification/AddDomainModal.tsx @@ -18,6 +18,7 @@ import { Input } from "@/components/Input/Input"; import { Label } from "@/components/Label/Label"; import { useToast } from "@/components/Toast/Toast"; +import { formatApiError } from "@/utils/formatApiError"; const domainSchema = z.object({ domain: z .string() @@ -65,7 +66,7 @@ export function AddDomainModal({ open, onClose, orgSlug }: AddDomainModalProps) onError: (error) => { toast({ title: "Failed to add domain", - description: String(error), + description: formatApiError(error), type: "error", }); }, diff --git a/ui/src/components/DomainVerification/DomainVerificationList.tsx b/ui/src/components/DomainVerification/DomainVerificationList.tsx index 29ba993..f30cc2d 100644 --- a/ui/src/components/DomainVerification/DomainVerificationList.tsx +++ b/ui/src/components/DomainVerification/DomainVerificationList.tsx @@ -16,6 +16,7 @@ import { useToast } from "@/components/Toast/Toast"; import { useConfirm } from "@/components/ConfirmDialog/ConfirmDialog"; import { formatDateTime } from "@/utils/formatters"; +import { formatApiError } from "@/utils/formatApiError"; const statusVariantMap: Record = { pending: "warning", verified: "success", @@ -58,7 +59,7 @@ export function DomainVerificationList({ onError: (error) => { toast({ title: "Failed to remove domain", - description: String(error), + description: formatApiError(error), type: "error", }); }, @@ -83,7 +84,7 @@ export function DomainVerificationList({ setVerifyingDomainId(null); toast({ title: "Verification failed", - description: String(error), + description: formatApiError(error), type: "error", }); }, diff --git a/ui/src/components/DomainVerification/VerificationInstructionsModal.tsx b/ui/src/components/DomainVerification/VerificationInstructionsModal.tsx index a010930..ef4d924 100644 --- a/ui/src/components/DomainVerification/VerificationInstructionsModal.tsx +++ b/ui/src/components/DomainVerification/VerificationInstructionsModal.tsx @@ -22,6 +22,7 @@ import { Skeleton } from "@/components/Skeleton/Skeleton"; import { Badge, type BadgeVariant } from "@/components/Badge/Badge"; import { useToast } from "@/components/Toast/Toast"; +import { formatApiError } from "@/utils/formatApiError"; const statusVariantMap: Record = { pending: "warning", verified: "success", @@ -81,7 +82,7 @@ export function VerificationInstructionsModal({ onError: (error) => { toast({ title: "Verification failed", - description: String(error), + description: formatApiError(error), type: "error", }); }, diff --git a/ui/src/components/MCPConfigModal/MCPCatalog.tsx b/ui/src/components/MCPConfigModal/MCPCatalog.tsx index 0ded7b6..5d4afa0 100644 --- a/ui/src/components/MCPConfigModal/MCPCatalog.tsx +++ b/ui/src/components/MCPConfigModal/MCPCatalog.tsx @@ -46,6 +46,7 @@ import type { } from "@/services/mcpRegistry/types"; import type { FavoriteMcpServer } from "@/config/types"; +import { formatApiError } from "@/utils/formatApiError"; const PAGE_SIZE = 30; export interface CatalogPrefill { @@ -196,7 +197,7 @@ export function MCPCatalog({ onPick, onAddManual, onCancel, favorites = [] }: MC .catch((err: unknown) => { if (ctrl.signal.aborted || (err instanceof DOMException && err.name === "AbortError")) return; - setError(err instanceof Error ? err.message : String(err)); + setError(err instanceof Error ? err.message : formatApiError(err)); }) .finally(() => { if (!ctrl.signal.aborted) setLoading(false); @@ -240,7 +241,7 @@ export function MCPCatalog({ onPick, onAddManual, onCancel, favorites = [] }: MC } catch (err) { if (ctrl?.signal.aborted || (err instanceof DOMException && err.name === "AbortError")) return; - setError(err instanceof Error ? err.message : String(err)); + setError(err instanceof Error ? err.message : formatApiError(err)); } finally { // Always clear, even on abort — otherwise a new search cancelling an // in-flight load-more would leave the button stuck in its loading state. diff --git a/ui/src/components/MCPConfigModal/MCPConfigModal.tsx b/ui/src/components/MCPConfigModal/MCPConfigModal.tsx index 7d9f367..45329a2 100644 --- a/ui/src/components/MCPConfigModal/MCPConfigModal.tsx +++ b/ui/src/components/MCPConfigModal/MCPConfigModal.tsx @@ -66,6 +66,7 @@ import { import type { MCPToolDefinition, JSONSchema } from "@/services/mcp"; import { MCPCatalog, type CatalogPrefill } from "./MCPCatalog"; +import { formatApiError } from "@/utils/formatApiError"; // ============================================================================= // Types // ============================================================================= @@ -274,7 +275,7 @@ function ServerCard({ server, onEdit, onDelete }: ServerCardProps) { // Connection error stored in server state } } catch (err) { - setAuthError(err instanceof Error ? err.message : String(err)); + setAuthError(err instanceof Error ? err.message : formatApiError(err)); } finally { setIsAuthorizing(false); } @@ -631,7 +632,7 @@ function ServerForm({ editingServer, onSubmit, onCancel, prefill }: ServerFormPr setOauthStatus("authorized"); } catch (err) { setOauthStatus("error"); - setOauthError(err instanceof Error ? err.message : String(err)); + setOauthError(err instanceof Error ? err.message : formatApiError(err)); } }, [form]); @@ -695,7 +696,7 @@ function ServerForm({ editingServer, onSubmit, onCancel, prefill }: ServerFormPr setTestMessage(info ? `${info.name} v${info.version}` : "Connection successful"); } catch (err) { setTestStatus("error"); - setTestMessage(err instanceof Error ? err.message : String(err)); + setTestMessage(err instanceof Error ? err.message : formatApiError(err)); } finally { try { await client.disconnect(); diff --git a/ui/src/components/RbacPolicy/RbacPolicyVersionHistoryModal.tsx b/ui/src/components/RbacPolicy/RbacPolicyVersionHistoryModal.tsx index 03eede9..e53cdaa 100644 --- a/ui/src/components/RbacPolicy/RbacPolicyVersionHistoryModal.tsx +++ b/ui/src/components/RbacPolicy/RbacPolicyVersionHistoryModal.tsx @@ -21,6 +21,7 @@ import { useToast } from "@/components/Toast/Toast"; import { useConfirm } from "@/components/ConfirmDialog/ConfirmDialog"; import { formatDateTime } from "@/utils/formatters"; +import { formatApiError } from "@/utils/formatApiError"; interface RbacPolicyVersionHistoryModalProps { open: boolean; onClose: () => void; @@ -165,7 +166,7 @@ export function RbacPolicyVersionHistoryModal({ onError: (error) => { toast({ title: "Failed to rollback policy", - description: String(error), + description: formatApiError(error), type: "error", }); setRollbackVersion(null); diff --git a/ui/src/components/ScreenshotRenderer/ScreenshotRenderer.tsx b/ui/src/components/ScreenshotRenderer/ScreenshotRenderer.tsx index 6599d3d..4ba397e 100644 --- a/ui/src/components/ScreenshotRenderer/ScreenshotRenderer.tsx +++ b/ui/src/components/ScreenshotRenderer/ScreenshotRenderer.tsx @@ -11,6 +11,7 @@ import type { TotalUsageResult } from "@/stores/conversationStore"; import { captureElementAsBlob } from "@/utils/exportScreenshot"; import { formatCost, formatTokens } from "@/utils/formatters"; +import { formatApiError } from "@/utils/formatApiError"; interface MessageGroup { id: string; userMessage: ChatMessageType; @@ -74,7 +75,10 @@ export function ScreenshotRenderer({ if (!cancelled) onCompleteRef.current(blob); } catch (err) { if (!cancelled) - onCompleteRef.current(undefined, err instanceof Error ? err : new Error(String(err))); + onCompleteRef.current( + undefined, + err instanceof Error ? err : new Error(formatApiError(err)) + ); } }, 500); }); diff --git a/ui/src/components/SkillImportModal/SkillImportModal.tsx b/ui/src/components/SkillImportModal/SkillImportModal.tsx index f5db45b..5a9dd0c 100644 --- a/ui/src/components/SkillImportModal/SkillImportModal.tsx +++ b/ui/src/components/SkillImportModal/SkillImportModal.tsx @@ -25,6 +25,7 @@ import { } from "./githubImport"; import { walkFilesForSkills } from "./filesystemImport"; +import { formatApiError } from "@/utils/formatApiError"; type ImportTab = "github" | "filesystem"; export interface SkillImportModalProps { @@ -113,7 +114,7 @@ export function SkillImportModal({ } setSelected(valid); } catch (err) { - setScanError(err instanceof Error ? err.message : String(err)); + setScanError(err instanceof Error ? err.message : formatApiError(err)); } finally { setIsScanning(false); setScanProgress(""); @@ -138,7 +139,7 @@ export function SkillImportModal({ } setSelected(valid); } catch (err) { - setScanError(err instanceof Error ? err.message : String(err)); + setScanError(err instanceof Error ? err.message : formatApiError(err)); } finally { setIsScanning(false); } @@ -202,7 +203,7 @@ export function SkillImportModal({ })); } } catch (err) { - const message = err instanceof Error ? err.message : String(err); + const message = err instanceof Error ? err.message : formatApiError(err); results.push({ name: s.name, ok: false, error: message }); setImportStatus((prev) => ({ ...prev, diff --git a/ui/src/components/SkillImportModal/filesystemImport.ts b/ui/src/components/SkillImportModal/filesystemImport.ts index 6cf48ac..e6cd589 100644 --- a/ui/src/components/SkillImportModal/filesystemImport.ts +++ b/ui/src/components/SkillImportModal/filesystemImport.ts @@ -1,6 +1,7 @@ import { parseSkillMd } from "./parseFrontmatter"; import type { DiscoveredSkill } from "./githubImport"; +import { formatApiError } from "@/utils/formatApiError"; const utf8Encoder = new TextEncoder(); /** @@ -63,7 +64,7 @@ export async function walkFilesForSkills(files: File[]): Promise { toast({ title: "Failed to delete account", - description: String(error), + description: formatApiError(error), type: "error", }); }, @@ -200,7 +201,7 @@ export default function AccountPage() { } catch (error) { toast({ title: "Failed to export data", - description: String(error), + description: formatApiError(error), type: "error", }); } @@ -234,7 +235,7 @@ export default function AccountPage() { } catch (error) { toast({ title: "Failed to clear local data", - description: String(error), + description: formatApiError(error), type: "error", }); } diff --git a/ui/src/pages/ApiKeyDetailPage.tsx b/ui/src/pages/ApiKeyDetailPage.tsx index 105a702..e6b2a82 100644 --- a/ui/src/pages/ApiKeyDetailPage.tsx +++ b/ui/src/pages/ApiKeyDetailPage.tsx @@ -34,6 +34,7 @@ import { formatDateTime, formatCurrency, formatRelativeTime } from "@/utils/form import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/Tooltip/Tooltip"; import UsageDashboard from "@/components/UsageDashboard/UsageDashboard"; +import { formatApiError } from "@/utils/formatApiError"; type TabId = "overview" | "usage"; const tabs: Tab[] = [ @@ -65,7 +66,7 @@ export default function ApiKeyDetailPage() { toast({ title: "API key revoked", type: "success" }); }, onError: (err) => { - toast({ title: "Failed to revoke API key", description: String(err), type: "error" }); + toast({ title: "Failed to revoke API key", description: formatApiError(err), type: "error" }); }, }); @@ -81,7 +82,7 @@ export default function ApiKeyDetailPage() { toast({ title: "API key rotated", type: "success" }); }, onError: (err) => { - toast({ title: "Failed to rotate API key", description: String(err), type: "error" }); + toast({ title: "Failed to rotate API key", description: formatApiError(err), type: "error" }); }, }); diff --git a/ui/src/pages/ApiKeysPage.tsx b/ui/src/pages/ApiKeysPage.tsx index a2acaea..71c73c8 100644 --- a/ui/src/pages/ApiKeysPage.tsx +++ b/ui/src/pages/ApiKeysPage.tsx @@ -50,6 +50,7 @@ import { MoreHorizontal, Trash2 } from "lucide-react"; import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/Tooltip/Tooltip"; import { cn } from "@/utils/cn"; +import { formatApiError } from "@/utils/formatApiError"; function ApiKeyCard({ apiKey, readOnly, @@ -432,7 +433,7 @@ export default function ApiKeysPage() { onError: (error) => { toast({ title: "Failed to create API key", - description: String(error), + description: formatApiError(error), type: "error", }); }, @@ -448,7 +449,7 @@ export default function ApiKeysPage() { onError: (error) => { toast({ title: "Failed to revoke API key", - description: String(error), + description: formatApiError(error), type: "error", }); }, @@ -467,7 +468,7 @@ export default function ApiKeysPage() { onError: (error) => { toast({ title: "Failed to rotate API key", - description: String(error), + description: formatApiError(error), type: "error", }); }, diff --git a/ui/src/pages/KnowledgeBasesPage.tsx b/ui/src/pages/KnowledgeBasesPage.tsx index fc19760..b245128 100644 --- a/ui/src/pages/KnowledgeBasesPage.tsx +++ b/ui/src/pages/KnowledgeBasesPage.tsx @@ -18,6 +18,7 @@ import { VectorStoreFormModal } from "@/components/Admin"; import { useToast } from "@/components/Toast/Toast"; import { formatDateTime, formatBytes } from "@/utils/formatters"; +import { formatApiError } from "@/utils/formatApiError"; function StatusBadge({ status }: { status: string }) { const variants: Record = { completed: "default", @@ -148,7 +149,7 @@ export default function KnowledgeBasesPage() { onError: (error) => { toast({ title: "Failed to create knowledge base", - description: String(error), + description: formatApiError(error), type: "error", }); }, diff --git a/ui/src/pages/MyProvidersPage.tsx b/ui/src/pages/MyProvidersPage.tsx index 13c632f..ceb58f7 100644 --- a/ui/src/pages/MyProvidersPage.tsx +++ b/ui/src/pages/MyProvidersPage.tsx @@ -49,6 +49,7 @@ import { import { useToast } from "@/components/Toast/Toast"; import { useConfirm } from "@/components/ConfirmDialog/ConfirmDialog"; import { formatDateTime } from "@/utils/formatters"; +import { formatApiError } from "@/utils/formatApiError"; import { PROVIDER_TYPES, type ProviderTypeValue, @@ -623,7 +624,7 @@ export default function MyProvidersPage() { onError: (error) => { toast({ title: "Failed to create provider", - description: String(error), + description: formatApiError(error), type: "error", }); }, @@ -643,7 +644,7 @@ export default function MyProvidersPage() { onError: (error) => { toast({ title: "Failed to update provider", - description: String(error), + description: formatApiError(error), type: "error", }); }, @@ -658,7 +659,7 @@ export default function MyProvidersPage() { onError: (error) => { toast({ title: "Failed to delete provider", - description: String(error), + description: formatApiError(error), type: "error", }); }, @@ -679,7 +680,7 @@ export default function MyProvidersPage() { const id = variables.path.id; setTestResults((prev) => ({ ...prev, - [id]: { status: "error", message: String(error), latency_ms: null }, + [id]: { status: "error", message: formatApiError(error), latency_ms: null }, })); setTestingIds((prev) => { const next = new Set(prev); diff --git a/ui/src/pages/admin/ApiKeysPage.tsx b/ui/src/pages/admin/ApiKeysPage.tsx index b9b3b7d..3c2e942 100644 --- a/ui/src/pages/admin/ApiKeysPage.tsx +++ b/ui/src/pages/admin/ApiKeysPage.tsx @@ -31,6 +31,7 @@ import { import { useCursorPagination } from "@/hooks"; import { formatDateTime, formatCurrency } from "@/utils/formatters"; +import { formatApiError } from "@/utils/formatApiError"; const columnHelper = createColumnHelper(); export default function ApiKeysPage() { @@ -77,7 +78,11 @@ export default function ApiKeysPage() { toast({ title: "API key created", type: "success" }); }, onError: (error) => { - toast({ title: "Failed to create API key", description: String(error), type: "error" }); + toast({ + title: "Failed to create API key", + description: formatApiError(error), + type: "error", + }); }, }); @@ -88,7 +93,11 @@ export default function ApiKeysPage() { toast({ title: "API key revoked", type: "success" }); }, onError: (error) => { - toast({ title: "Failed to revoke API key", description: String(error), type: "error" }); + toast({ + title: "Failed to revoke API key", + description: formatApiError(error), + type: "error", + }); }, }); diff --git a/ui/src/pages/admin/OrgRbacPoliciesPage.tsx b/ui/src/pages/admin/OrgRbacPoliciesPage.tsx index b60412d..c63aab9 100644 --- a/ui/src/pages/admin/OrgRbacPoliciesPage.tsx +++ b/ui/src/pages/admin/OrgRbacPoliciesPage.tsx @@ -30,6 +30,7 @@ import { } from "@/components/RbacPolicy"; import { formatDateTime } from "@/utils/formatters"; +import { formatApiError } from "@/utils/formatApiError"; const columnHelper = createColumnHelper(); export default function OrgRbacPoliciesPage() { @@ -64,7 +65,7 @@ export default function OrgRbacPoliciesPage() { onError: (error) => { toast({ title: "Failed to create policy", - description: String(error), + description: formatApiError(error), type: "error", }); }, @@ -82,7 +83,7 @@ export default function OrgRbacPoliciesPage() { onError: (error) => { toast({ title: "Failed to update policy", - description: String(error), + description: formatApiError(error), type: "error", }); }, @@ -98,7 +99,7 @@ export default function OrgRbacPoliciesPage() { onError: (error) => { toast({ title: "Failed to delete policy", - description: String(error), + description: formatApiError(error), type: "error", }); }, @@ -113,7 +114,7 @@ export default function OrgRbacPoliciesPage() { onError: (error) => { toast({ title: "Failed to update policy", - description: String(error), + description: formatApiError(error), type: "error", }); }, diff --git a/ui/src/pages/admin/OrgSsoConfigPage.tsx b/ui/src/pages/admin/OrgSsoConfigPage.tsx index cffa413..aa45460 100644 --- a/ui/src/pages/admin/OrgSsoConfigPage.tsx +++ b/ui/src/pages/admin/OrgSsoConfigPage.tsx @@ -39,6 +39,7 @@ import { } from "@/components/DomainVerification"; import { formatDateTime } from "@/utils/formatters"; +import { formatApiError } from "@/utils/formatApiError"; export default function OrgSsoConfigPage() { const { orgSlug } = useParams<{ orgSlug: string }>(); const navigate = useNavigate(); @@ -82,7 +83,7 @@ export default function OrgSsoConfigPage() { onError: (error) => { toast({ title: "Failed to create SSO configuration", - description: String(error), + description: formatApiError(error), type: "error", }); }, @@ -99,7 +100,7 @@ export default function OrgSsoConfigPage() { onError: (error) => { toast({ title: "Failed to update SSO configuration", - description: String(error), + description: formatApiError(error), type: "error", }); }, @@ -115,7 +116,7 @@ export default function OrgSsoConfigPage() { onError: (error) => { toast({ title: "Failed to delete SSO configuration", - description: String(error), + description: formatApiError(error), type: "error", }); }, diff --git a/ui/src/pages/admin/OrganizationDetailPage.tsx b/ui/src/pages/admin/OrganizationDetailPage.tsx index 598f627..598162a 100644 --- a/ui/src/pages/admin/OrganizationDetailPage.tsx +++ b/ui/src/pages/admin/OrganizationDetailPage.tsx @@ -67,6 +67,7 @@ import { formatDateTime, formatCurrency } from "@/utils/formatters"; import UsageDashboard from "@/components/UsageDashboard/UsageDashboard"; import { createTemplateColumns } from "./promptColumns"; +import { formatApiError } from "@/utils/formatApiError"; type TabId = | "projects" | "teams" @@ -185,7 +186,11 @@ export default function OrganizationDetailPage() { toast({ title: "Organization updated", type: "success" }); }, onError: (error) => { - toast({ title: "Failed to update organization", description: String(error), type: "error" }); + toast({ + title: "Failed to update organization", + description: formatApiError(error), + type: "error", + }); }, }); @@ -198,7 +203,7 @@ export default function OrganizationDetailPage() { toast({ title: "Member added", type: "success" }); }, onError: (error) => { - toast({ title: "Failed to add member", description: String(error), type: "error" }); + toast({ title: "Failed to add member", description: formatApiError(error), type: "error" }); }, }); @@ -210,7 +215,11 @@ export default function OrganizationDetailPage() { toast({ title: "Member removed", type: "success" }); }, onError: (error) => { - toast({ title: "Failed to remove member", description: String(error), type: "error" }); + toast({ + title: "Failed to remove member", + description: formatApiError(error), + type: "error", + }); }, }); @@ -222,7 +231,11 @@ export default function OrganizationDetailPage() { toast({ title: "Template deleted", type: "success" }); }, onError: (error) => { - toast({ title: "Failed to delete template", description: String(error), type: "error" }); + toast({ + title: "Failed to delete template", + description: formatApiError(error), + type: "error", + }); }, }); diff --git a/ui/src/pages/admin/OrganizationsPage.tsx b/ui/src/pages/admin/OrganizationsPage.tsx index 47efdd2..2fff450 100644 --- a/ui/src/pages/admin/OrganizationsPage.tsx +++ b/ui/src/pages/admin/OrganizationsPage.tsx @@ -30,6 +30,7 @@ import { PageHeader, ResourceTable } from "@/components/Admin"; import { useCursorPagination } from "@/hooks"; import { formatDateTime } from "@/utils/formatters"; +import { formatApiError } from "@/utils/formatApiError"; const columnHelper = createColumnHelper(); const createOrganizationSchema = z.object({ @@ -73,7 +74,11 @@ export default function OrganizationsPage() { toast({ title: "Organization created", type: "success" }); }, onError: (error) => { - toast({ title: "Failed to create organization", description: String(error), type: "error" }); + toast({ + title: "Failed to create organization", + description: formatApiError(error), + type: "error", + }); }, }); @@ -84,7 +89,11 @@ export default function OrganizationsPage() { toast({ title: "Organization deleted", type: "success" }); }, onError: (error) => { - toast({ title: "Failed to delete organization", description: String(error), type: "error" }); + toast({ + title: "Failed to delete organization", + description: formatApiError(error), + type: "error", + }); }, }); diff --git a/ui/src/pages/admin/PricingPage.tsx b/ui/src/pages/admin/PricingPage.tsx index 8b3b873..892c8c4 100644 --- a/ui/src/pages/admin/PricingPage.tsx +++ b/ui/src/pages/admin/PricingPage.tsx @@ -34,6 +34,7 @@ import { import { useCursorPagination } from "@/hooks"; import { formatDateTime } from "@/utils/formatters"; +import { formatApiError } from "@/utils/formatApiError"; const columnHelper = createColumnHelper(); export default function PricingPage() { @@ -59,7 +60,11 @@ export default function PricingPage() { toast({ title: "Pricing created", type: "success" }); }, onError: (error) => { - toast({ title: "Failed to create pricing", description: String(error), type: "error" }); + toast({ + title: "Failed to create pricing", + description: formatApiError(error), + type: "error", + }); }, }); @@ -70,7 +75,11 @@ export default function PricingPage() { toast({ title: "Pricing deleted", type: "success" }); }, onError: (error) => { - toast({ title: "Failed to delete pricing", description: String(error), type: "error" }); + toast({ + title: "Failed to delete pricing", + description: formatApiError(error), + type: "error", + }); }, }); @@ -83,7 +92,11 @@ export default function PricingPage() { toast({ title: "Pricing updated", type: "success" }); }, onError: (error) => { - toast({ title: "Failed to update pricing", description: String(error), type: "error" }); + toast({ + title: "Failed to update pricing", + description: formatApiError(error), + type: "error", + }); }, }); diff --git a/ui/src/pages/admin/ProjectDetailPage.tsx b/ui/src/pages/admin/ProjectDetailPage.tsx index 0bd4611..346feaa 100644 --- a/ui/src/pages/admin/ProjectDetailPage.tsx +++ b/ui/src/pages/admin/ProjectDetailPage.tsx @@ -85,6 +85,7 @@ import { formatDateTime, formatCurrency } from "@/utils/formatters"; import UsageDashboard from "@/components/UsageDashboard/UsageDashboard"; import { createTemplateColumns } from "./promptColumns"; +import { formatApiError } from "@/utils/formatApiError"; type TabId = "members" | "api-keys" | "providers" | "pricing" | "templates" | "usage"; const tabs: Tab[] = [ @@ -418,7 +419,11 @@ export default function ProjectDetailPage() { toast({ title: "Project updated", type: "success" }); }, onError: (error) => { - toast({ title: "Failed to update project", description: String(error), type: "error" }); + toast({ + title: "Failed to update project", + description: formatApiError(error), + type: "error", + }); }, }); @@ -431,7 +436,7 @@ export default function ProjectDetailPage() { toast({ title: "Member added", type: "success" }); }, onError: (error) => { - toast({ title: "Failed to add member", description: String(error), type: "error" }); + toast({ title: "Failed to add member", description: formatApiError(error), type: "error" }); }, }); @@ -443,7 +448,11 @@ export default function ProjectDetailPage() { toast({ title: "Member removed", type: "success" }); }, onError: (error) => { - toast({ title: "Failed to remove member", description: String(error), type: "error" }); + toast({ + title: "Failed to remove member", + description: formatApiError(error), + type: "error", + }); }, }); @@ -456,7 +465,11 @@ export default function ProjectDetailPage() { toast({ title: "Provider created", type: "success" }); }, onError: (error) => { - toast({ title: "Failed to create provider", description: String(error), type: "error" }); + toast({ + title: "Failed to create provider", + description: formatApiError(error), + type: "error", + }); }, }); @@ -469,7 +482,11 @@ export default function ProjectDetailPage() { toast({ title: "Provider updated", type: "success" }); }, onError: (error) => { - toast({ title: "Failed to update provider", description: String(error), type: "error" }); + toast({ + title: "Failed to update provider", + description: formatApiError(error), + type: "error", + }); }, }); @@ -480,7 +497,11 @@ export default function ProjectDetailPage() { toast({ title: "Provider deleted", type: "success" }); }, onError: (error) => { - toast({ title: "Failed to delete provider", description: String(error), type: "error" }); + toast({ + title: "Failed to delete provider", + description: formatApiError(error), + type: "error", + }); }, }); @@ -499,7 +520,7 @@ export default function ProjectDetailPage() { const id = variables.path.id; setTestResults((prev) => ({ ...prev, - [id]: { status: "error", message: String(error), latency_ms: null }, + [id]: { status: "error", message: formatApiError(error), latency_ms: null }, })); setTestingIds((prev) => { const next = new Set(prev); @@ -517,7 +538,11 @@ export default function ProjectDetailPage() { toast({ title: "Template deleted", type: "success" }); }, onError: (error) => { - toast({ title: "Failed to delete template", description: String(error), type: "error" }); + toast({ + title: "Failed to delete template", + description: formatApiError(error), + type: "error", + }); }, }); diff --git a/ui/src/pages/admin/ProjectsPage.tsx b/ui/src/pages/admin/ProjectsPage.tsx index 90f7ecc..d216653 100644 --- a/ui/src/pages/admin/ProjectsPage.tsx +++ b/ui/src/pages/admin/ProjectsPage.tsx @@ -31,6 +31,7 @@ import { useConfirm } from "@/components/ConfirmDialog/ConfirmDialog"; import { PageHeader, ResourceTable, OrganizationSelect, TeamSelect } from "@/components/Admin"; import { formatDateTime } from "@/utils/formatters"; +import { formatApiError } from "@/utils/formatApiError"; const columnHelper = createColumnHelper(); const createProjectSchema = z.object({ @@ -104,7 +105,11 @@ export default function ProjectsPage() { toast({ title: "Project created", type: "success" }); }, onError: (error) => { - toast({ title: "Failed to create project", description: String(error), type: "error" }); + toast({ + title: "Failed to create project", + description: formatApiError(error), + type: "error", + }); }, }); @@ -115,7 +120,11 @@ export default function ProjectsPage() { toast({ title: "Project deleted", type: "success" }); }, onError: (error) => { - toast({ title: "Failed to delete project", description: String(error), type: "error" }); + toast({ + title: "Failed to delete project", + description: formatApiError(error), + type: "error", + }); }, }); @@ -128,7 +137,11 @@ export default function ProjectsPage() { toast({ title: "Project updated", type: "success" }); }, onError: (error) => { - toast({ title: "Failed to update project", description: String(error), type: "error" }); + toast({ + title: "Failed to update project", + description: formatApiError(error), + type: "error", + }); }, }); diff --git a/ui/src/pages/admin/ProvidersPage.tsx b/ui/src/pages/admin/ProvidersPage.tsx index b77beba..7d05b03 100644 --- a/ui/src/pages/admin/ProvidersPage.tsx +++ b/ui/src/pages/admin/ProvidersPage.tsx @@ -54,6 +54,7 @@ import { import { getProviderTypeLabel, TestResultDisplay } from "@/pages/providers/shared"; import { formatDateTime } from "@/utils/formatters"; +import { formatApiError } from "@/utils/formatApiError"; // -- Provider Card -- function ProviderCard({ @@ -269,7 +270,11 @@ export default function ProvidersPage() { toast({ title: "Provider created", type: "success" }); }, onError: (error) => { - toast({ title: "Failed to create provider", description: String(error), type: "error" }); + toast({ + title: "Failed to create provider", + description: formatApiError(error), + type: "error", + }); }, }); @@ -282,7 +287,11 @@ export default function ProvidersPage() { toast({ title: "Provider updated", type: "success" }); }, onError: (error) => { - toast({ title: "Failed to update provider", description: String(error), type: "error" }); + toast({ + title: "Failed to update provider", + description: formatApiError(error), + type: "error", + }); }, }); @@ -293,7 +302,11 @@ export default function ProvidersPage() { toast({ title: "Provider deleted", type: "success" }); }, onError: (error) => { - toast({ title: "Failed to delete provider", description: String(error), type: "error" }); + toast({ + title: "Failed to delete provider", + description: formatApiError(error), + type: "error", + }); }, }); @@ -312,7 +325,7 @@ export default function ProvidersPage() { const id = variables.path.id; setTestResults((prev) => ({ ...prev, - [id]: { status: "error", message: String(error), latency_ms: null }, + [id]: { status: "error", message: formatApiError(error), latency_ms: null }, })); setTestingIds((prev) => { const next = new Set(prev); diff --git a/ui/src/pages/admin/ScimConfigPage.tsx b/ui/src/pages/admin/ScimConfigPage.tsx index e1d34c7..01575db 100644 --- a/ui/src/pages/admin/ScimConfigPage.tsx +++ b/ui/src/pages/admin/ScimConfigPage.tsx @@ -35,6 +35,7 @@ import { useConfirm } from "@/components/ConfirmDialog/ConfirmDialog"; import { ScimConfigFormModal, ScimTokenCreatedModal } from "@/components/ScimConfig"; import { formatDateTime } from "@/utils/formatters"; +import { formatApiError } from "@/utils/formatApiError"; export default function ScimConfigPage() { const { orgSlug } = useParams<{ orgSlug: string }>(); const navigate = useNavigate(); @@ -78,7 +79,7 @@ export default function ScimConfigPage() { onError: (error) => { toast({ title: "Failed to create SCIM configuration", - description: String(error), + description: formatApiError(error), type: "error", }); }, @@ -95,7 +96,7 @@ export default function ScimConfigPage() { onError: (error) => { toast({ title: "Failed to update SCIM configuration", - description: String(error), + description: formatApiError(error), type: "error", }); }, @@ -111,7 +112,7 @@ export default function ScimConfigPage() { onError: (error) => { toast({ title: "Failed to delete SCIM configuration", - description: String(error), + description: formatApiError(error), type: "error", }); }, @@ -128,7 +129,7 @@ export default function ScimConfigPage() { onError: (error) => { toast({ title: "Failed to rotate SCIM token", - description: String(error), + description: formatApiError(error), type: "error", }); }, diff --git a/ui/src/pages/admin/ServiceAccountsPage.tsx b/ui/src/pages/admin/ServiceAccountsPage.tsx index 4a13fc8..26944bd 100644 --- a/ui/src/pages/admin/ServiceAccountsPage.tsx +++ b/ui/src/pages/admin/ServiceAccountsPage.tsx @@ -31,6 +31,7 @@ import { useConfirm } from "@/components/ConfirmDialog/ConfirmDialog"; import { PageHeader, ResourceTable, OrganizationSelect } from "@/components/Admin"; import { formatDateTime } from "@/utils/formatters"; +import { formatApiError } from "@/utils/formatApiError"; const columnHelper = createColumnHelper(); const createServiceAccountSchema = z.object({ @@ -118,7 +119,7 @@ export default function ServiceAccountsPage() { onError: (error) => { toast({ title: "Failed to create service account", - description: String(error), + description: formatApiError(error), type: "error", }); }, @@ -133,7 +134,7 @@ export default function ServiceAccountsPage() { onError: (error) => { toast({ title: "Failed to delete service account", - description: String(error), + description: formatApiError(error), type: "error", }); }, @@ -150,7 +151,7 @@ export default function ServiceAccountsPage() { onError: (error) => { toast({ title: "Failed to update service account", - description: String(error), + description: formatApiError(error), type: "error", }); }, diff --git a/ui/src/pages/admin/SsoConnectionsPage.tsx b/ui/src/pages/admin/SsoConnectionsPage.tsx index 205cce0..743d12d 100644 --- a/ui/src/pages/admin/SsoConnectionsPage.tsx +++ b/ui/src/pages/admin/SsoConnectionsPage.tsx @@ -6,6 +6,7 @@ import { SsoConnectionCard } from "@/components/SsoConnections"; import { Card, CardContent } from "@/components/Card/Card"; import { Skeleton } from "@/components/Skeleton/Skeleton"; +import { formatApiError } from "@/utils/formatApiError"; export default function SsoConnectionsPage() { const { data, isLoading, error } = useQuery(ssoConnectionsListOptions()); @@ -57,7 +58,7 @@ export default function SsoConnectionsPage() {

Failed to load SSO connections

-

{String(error)}

+

{formatApiError(error)}

diff --git a/ui/src/pages/admin/SsoGroupMappingsPage.tsx b/ui/src/pages/admin/SsoGroupMappingsPage.tsx index a2bb85e..a10baa9 100644 --- a/ui/src/pages/admin/SsoGroupMappingsPage.tsx +++ b/ui/src/pages/admin/SsoGroupMappingsPage.tsx @@ -60,6 +60,7 @@ import { useConfirm } from "@/components/ConfirmDialog/ConfirmDialog"; import { Badge } from "@/components/Badge/Badge"; import { formatDateTime } from "@/utils/formatters"; +import { formatApiError } from "@/utils/formatApiError"; const columnHelper = createColumnHelper(); // Form schema for creating/editing a mapping @@ -208,7 +209,11 @@ export default function SsoGroupMappingsPage() { toast({ title: "Group mapping created", type: "success" }); }, onError: (error) => { - toast({ title: "Failed to create mapping", description: String(error), type: "error" }); + toast({ + title: "Failed to create mapping", + description: formatApiError(error), + type: "error", + }); }, }); @@ -221,7 +226,11 @@ export default function SsoGroupMappingsPage() { toast({ title: "Group mapping updated", type: "success" }); }, onError: (error) => { - toast({ title: "Failed to update mapping", description: String(error), type: "error" }); + toast({ + title: "Failed to update mapping", + description: formatApiError(error), + type: "error", + }); }, }); @@ -233,7 +242,11 @@ export default function SsoGroupMappingsPage() { toast({ title: "Group mapping deleted", type: "success" }); }, onError: (error) => { - toast({ title: "Failed to delete mapping", description: String(error), type: "error" }); + toast({ + title: "Failed to delete mapping", + description: formatApiError(error), + type: "error", + }); }, }); @@ -244,7 +257,11 @@ export default function SsoGroupMappingsPage() { setTestResults(data); }, onError: (error) => { - toast({ title: "Failed to test mappings", description: String(error), type: "error" }); + toast({ + title: "Failed to test mappings", + description: formatApiError(error), + type: "error", + }); }, }); @@ -269,7 +286,11 @@ export default function SsoGroupMappingsPage() { } }, onError: (error) => { - toast({ title: "Failed to import mappings", description: String(error), type: "error" }); + toast({ + title: "Failed to import mappings", + description: formatApiError(error), + type: "error", + }); }, }); @@ -325,7 +346,11 @@ export default function SsoGroupMappingsPage() { toast({ title: `Exported as ${format.toUpperCase()}`, type: "success" }); } catch (error) { - toast({ title: "Failed to export mappings", description: String(error), type: "error" }); + toast({ + title: "Failed to export mappings", + description: formatApiError(error), + type: "error", + }); } finally { setIsExporting(false); } @@ -455,7 +480,7 @@ export default function SsoGroupMappingsPage() { }, }); } catch (err) { - toast({ title: "Failed to parse file", description: String(err), type: "error" }); + toast({ title: "Failed to parse file", description: formatApiError(err), type: "error" }); } }; diff --git a/ui/src/pages/admin/TeamDetailPage.tsx b/ui/src/pages/admin/TeamDetailPage.tsx index 65114bb..ec87c4b 100644 --- a/ui/src/pages/admin/TeamDetailPage.tsx +++ b/ui/src/pages/admin/TeamDetailPage.tsx @@ -40,6 +40,7 @@ import { formatDateTime } from "@/utils/formatters"; import UsageDashboard from "@/components/UsageDashboard/UsageDashboard"; import { createTemplateColumns } from "./promptColumns"; +import { formatApiError } from "@/utils/formatApiError"; type TabId = "members" | "templates" | "usage"; const tabs: Tab[] = [ @@ -112,7 +113,7 @@ export default function TeamDetailPage() { toast({ title: "Team updated", type: "success" }); }, onError: (error) => { - toast({ title: "Failed to update team", description: String(error), type: "error" }); + toast({ title: "Failed to update team", description: formatApiError(error), type: "error" }); }, }); @@ -125,7 +126,7 @@ export default function TeamDetailPage() { toast({ title: "Member added", type: "success" }); }, onError: (error) => { - toast({ title: "Failed to add member", description: String(error), type: "error" }); + toast({ title: "Failed to add member", description: formatApiError(error), type: "error" }); }, }); @@ -137,7 +138,11 @@ export default function TeamDetailPage() { toast({ title: "Member removed", type: "success" }); }, onError: (error) => { - toast({ title: "Failed to remove member", description: String(error), type: "error" }); + toast({ + title: "Failed to remove member", + description: formatApiError(error), + type: "error", + }); }, }); @@ -149,7 +154,11 @@ export default function TeamDetailPage() { toast({ title: "Template deleted", type: "success" }); }, onError: (error) => { - toast({ title: "Failed to delete template", description: String(error), type: "error" }); + toast({ + title: "Failed to delete template", + description: formatApiError(error), + type: "error", + }); }, }); diff --git a/ui/src/pages/admin/TeamsPage.tsx b/ui/src/pages/admin/TeamsPage.tsx index 1c1a326..1b10f44 100644 --- a/ui/src/pages/admin/TeamsPage.tsx +++ b/ui/src/pages/admin/TeamsPage.tsx @@ -31,6 +31,7 @@ import { useConfirm } from "@/components/ConfirmDialog/ConfirmDialog"; import { PageHeader, ResourceTable, OrganizationSelect } from "@/components/Admin"; import { formatDateTime } from "@/utils/formatters"; +import { formatApiError } from "@/utils/formatApiError"; const columnHelper = createColumnHelper(); const createTeamSchema = z.object({ @@ -96,7 +97,7 @@ export default function TeamsPage() { toast({ title: "Team created", type: "success" }); }, onError: (error) => { - toast({ title: "Failed to create team", description: String(error), type: "error" }); + toast({ title: "Failed to create team", description: formatApiError(error), type: "error" }); }, }); @@ -107,7 +108,7 @@ export default function TeamsPage() { toast({ title: "Team deleted", type: "success" }); }, onError: (error) => { - toast({ title: "Failed to delete team", description: String(error), type: "error" }); + toast({ title: "Failed to delete team", description: formatApiError(error), type: "error" }); }, }); @@ -120,7 +121,7 @@ export default function TeamsPage() { toast({ title: "Team updated", type: "success" }); }, onError: (error) => { - toast({ title: "Failed to update team", description: String(error), type: "error" }); + toast({ title: "Failed to update team", description: formatApiError(error), type: "error" }); }, }); diff --git a/ui/src/pages/admin/UserDetailPage.tsx b/ui/src/pages/admin/UserDetailPage.tsx index 528624a..7176589 100644 --- a/ui/src/pages/admin/UserDetailPage.tsx +++ b/ui/src/pages/admin/UserDetailPage.tsx @@ -29,6 +29,7 @@ import { formatDateTime, formatCurrency } from "@/utils/formatters"; import { SessionsPanel } from "@/components/Admin"; import UsageDashboard from "@/components/UsageDashboard/UsageDashboard"; +import { formatApiError } from "@/utils/formatApiError"; type TabId = "api-keys" | "providers" | "pricing" | "sessions" | "usage"; const tabs: { id: TabId; label: string; icon: React.ReactNode }[] = [ @@ -104,7 +105,7 @@ export default function UserDetailPage() { toast({ title: "User updated", type: "success" }); }, onError: (error) => { - toast({ title: "Failed to update user", description: String(error), type: "error" }); + toast({ title: "Failed to update user", description: formatApiError(error), type: "error" }); }, }); diff --git a/ui/src/pages/admin/UsersPage.tsx b/ui/src/pages/admin/UsersPage.tsx index 8acd1da..f1a3c94 100644 --- a/ui/src/pages/admin/UsersPage.tsx +++ b/ui/src/pages/admin/UsersPage.tsx @@ -29,6 +29,7 @@ import { PageHeader, ResourceTable } from "@/components/Admin"; import { useCursorPagination } from "@/hooks"; import { formatDateTime } from "@/utils/formatters"; +import { formatApiError } from "@/utils/formatApiError"; const columnHelper = createColumnHelper(); const createUserSchema = z.object({ @@ -87,7 +88,7 @@ export default function UsersPage() { toast({ title: "User created", type: "success" }); }, onError: (error) => { - toast({ title: "Failed to create user", description: String(error), type: "error" }); + toast({ title: "Failed to create user", description: formatApiError(error), type: "error" }); }, }); @@ -100,7 +101,7 @@ export default function UsersPage() { toast({ title: "User updated", type: "success" }); }, onError: (error) => { - toast({ title: "Failed to update user", description: String(error), type: "error" }); + toast({ title: "Failed to update user", description: formatApiError(error), type: "error" }); }, }); diff --git a/ui/src/pages/admin/VectorStoreDetailPage.tsx b/ui/src/pages/admin/VectorStoreDetailPage.tsx index 44f6802..582c969 100644 --- a/ui/src/pages/admin/VectorStoreDetailPage.tsx +++ b/ui/src/pages/admin/VectorStoreDetailPage.tsx @@ -42,6 +42,7 @@ import { DetailPageHeader, StatCard, StatValue, EMBEDDING_MODELS } from "@/compo import { ChunkViewer, SearchPreview } from "@/components/VectorStores"; import { formatDateTime, formatBytes } from "@/utils/formatters"; +import { formatApiError } from "@/utils/formatApiError"; const fileColumnHelper = createColumnHelper(); /** Status badge for file processing status */ @@ -188,7 +189,7 @@ function AddFileModal({ const uploadMutation = useMutation({ ...fileUploadMutation(), onError: (error) => { - toast({ title: "Failed to upload file", description: String(error), type: "error" }); + toast({ title: "Failed to upload file", description: formatApiError(error), type: "error" }); }, }); @@ -201,7 +202,7 @@ function AddFileModal({ handleClose(); }, onError: (error) => { - toast({ title: "Failed to add file", description: String(error), type: "error" }); + toast({ title: "Failed to add file", description: formatApiError(error), type: "error" }); }, }); @@ -532,7 +533,7 @@ export default function VectorStoreDetailPage() { toast({ title: "File removed from knowledge base", type: "success" }); }, onError: (error) => { - toast({ title: "Failed to remove file", description: String(error), type: "error" }); + toast({ title: "Failed to remove file", description: formatApiError(error), type: "error" }); }, }); diff --git a/ui/src/pages/admin/VectorStoresPage.tsx b/ui/src/pages/admin/VectorStoresPage.tsx index ea8cc99..616b7ff 100644 --- a/ui/src/pages/admin/VectorStoresPage.tsx +++ b/ui/src/pages/admin/VectorStoresPage.tsx @@ -31,6 +31,7 @@ import { import { useOpenAIPagination } from "@/hooks"; import { formatDateTime, formatBytes } from "@/utils/formatters"; +import { formatApiError } from "@/utils/formatApiError"; const columnHelper = createColumnHelper(); /** Status badge for vector store status */ @@ -99,7 +100,7 @@ export default function VectorStoresPage() { onError: (error) => { toast({ title: "Failed to create knowledge base", - description: String(error), + description: formatApiError(error), type: "error", }); }, @@ -114,7 +115,7 @@ export default function VectorStoresPage() { onError: (error) => { toast({ title: "Failed to delete knowledge base", - description: String(error), + description: formatApiError(error), type: "error", }); }, @@ -131,7 +132,7 @@ export default function VectorStoresPage() { onError: (error) => { toast({ title: "Failed to update knowledge base", - description: String(error), + description: formatApiError(error), type: "error", }); }, diff --git a/ui/src/pages/chat/utils/skillExecutor.ts b/ui/src/pages/chat/utils/skillExecutor.ts index d4bc1fd..7e95de1 100644 --- a/ui/src/pages/chat/utils/skillExecutor.ts +++ b/ui/src/pages/chat/utils/skillExecutor.ts @@ -4,6 +4,7 @@ import { getFullSkill, getSkillByName, setFullSkill } from "./skillCache"; import type { ParsedToolCall } from "./toolCallParser"; import type { Artifact, ToolExecutionResult, ToolExecutor } from "./toolExecutors"; +import { formatApiError } from "@/utils/formatApiError"; function formatBytes(n: number): string { if (n < 1024) return `${n} B`; if (n < 1024 * 1024) return `${(n / 1024).toFixed(1)} KiB`; @@ -172,7 +173,7 @@ export const skillExecutor: ToolExecutor = async ( } catch (err) { return { success: false, - error: `Failed to load skill "${command}": ${err instanceof Error ? err.message : String(err)}`, + error: `Failed to load skill "${command}": ${err instanceof Error ? err.message : formatApiError(err)}`, }; } } diff --git a/ui/src/pages/chat/utils/toolExecutors.ts b/ui/src/pages/chat/utils/toolExecutors.ts index 0a3d86f..db7127f 100644 --- a/ui/src/pages/chat/utils/toolExecutors.ts +++ b/ui/src/pages/chat/utils/toolExecutors.ts @@ -44,6 +44,7 @@ import { skillExecutor } from "./skillExecutor"; import type { ToolContent } from "@/services/mcp"; import safeRegex from "safe-regex"; +import { formatApiError } from "@/utils/formatApiError"; /** * Context provided to tool executors */ @@ -733,7 +734,7 @@ export const codeInterpreterExecutor: ToolExecutor = async ( // Unsubscribe from status updates on error unsubscribe?.(); - const errorMsg = error instanceof Error ? error.message : String(error); + const errorMsg = error instanceof Error ? error.message : formatApiError(error); // Still show the code that was attempted, plus the error const artifacts: Artifact[] = [ { @@ -912,7 +913,7 @@ export const jsInterpreterExecutor: ToolExecutor = async ( // Unsubscribe from status updates on error unsubscribe?.(); - const errorMsg = error instanceof Error ? error.message : String(error); + const errorMsg = error instanceof Error ? error.message : formatApiError(error); // Still show the code that was attempted, plus the error const artifacts: Artifact[] = [ { @@ -1411,7 +1412,7 @@ export const wikipediaExecutor: ToolExecutor = async ( }; } - const errorMsg = error instanceof Error ? error.message : String(error); + const errorMsg = error instanceof Error ? error.message : formatApiError(error); return { success: false, error: errorMsg, @@ -1784,7 +1785,7 @@ export const wikidataExecutor: ToolExecutor = async ( }; } - const errorMsg = error instanceof Error ? error.message : String(error); + const errorMsg = error instanceof Error ? error.message : formatApiError(error); return { success: false, error: errorMsg, @@ -1895,7 +1896,7 @@ export const chartRenderExecutor: ToolExecutor = async ( const { compile } = await import("vega-lite"); compile(spec as unknown as Parameters[0]); } catch (err) { - const message = err instanceof Error ? err.message : String(err); + const message = err instanceof Error ? err.message : formatApiError(err); return { success: false, error: message, @@ -2099,7 +2100,7 @@ export const sqlQueryExecutor: ToolExecutor = async ( // Unsubscribe from status updates on error unsubscribe?.(); - const errorMsg = error instanceof Error ? error.message : String(error); + const errorMsg = error instanceof Error ? error.message : formatApiError(error); // Still show the SQL that was attempted, plus the error const artifacts: Artifact[] = [ { @@ -2465,7 +2466,7 @@ export const subAgentExecutor: ToolExecutor = async ( // Clear status message on error context.onStatusMessage?.(toolId, ""); - const errorMsg = error instanceof Error ? error.message : String(error); + const errorMsg = error instanceof Error ? error.message : formatApiError(error); // Check for abort if (error instanceof Error && error.name === "AbortError") { @@ -2650,7 +2651,7 @@ const mcpToolExecutor: ToolExecutor = async (toolCall, context) => { // Clear status message on error context.onStatusMessage?.(toolCall.id, ""); - const errorMsg = error instanceof Error ? error.message : String(error); + const errorMsg = error instanceof Error ? error.message : formatApiError(error); return { success: false, diff --git a/ui/src/pages/project/MembersTab.tsx b/ui/src/pages/project/MembersTab.tsx index d433476..3f56779 100644 --- a/ui/src/pages/project/MembersTab.tsx +++ b/ui/src/pages/project/MembersTab.tsx @@ -18,6 +18,7 @@ import { AddMemberModal } from "@/components/Admin"; import { useToast } from "@/components/Toast/Toast"; import { useConfirm } from "@/components/ConfirmDialog/ConfirmDialog"; +import { formatApiError } from "@/utils/formatApiError"; const columnHelper = createColumnHelper(); interface MembersTabProps { @@ -50,7 +51,7 @@ export function MembersTab({ orgSlug, projectSlug }: MembersTabProps) { toast({ title: "Member added", type: "success" }); }, onError: (error) => { - toast({ title: "Failed to add member", description: String(error), type: "error" }); + toast({ title: "Failed to add member", description: formatApiError(error), type: "error" }); }, }); @@ -61,7 +62,11 @@ export function MembersTab({ orgSlug, projectSlug }: MembersTabProps) { toast({ title: "Member removed", type: "success" }); }, onError: (error) => { - toast({ title: "Failed to remove member", description: String(error), type: "error" }); + toast({ + title: "Failed to remove member", + description: formatApiError(error), + type: "error", + }); }, }); diff --git a/ui/src/pages/project/ProjectDetailPage.tsx b/ui/src/pages/project/ProjectDetailPage.tsx index 23fdc83..a82c0a4 100644 --- a/ui/src/pages/project/ProjectDetailPage.tsx +++ b/ui/src/pages/project/ProjectDetailPage.tsx @@ -39,6 +39,7 @@ import { TemplatesTab } from "./TemplatesTab"; import { SkillsTab } from "./SkillsTab"; import { UsageTab } from "./UsageTab"; +import { formatApiError } from "@/utils/formatApiError"; type TabId = "members" | "api-keys" | "providers" | "pricing" | "templates" | "skills" | "usage"; const tabs: Tab[] = [ @@ -90,7 +91,11 @@ export default function ProjectDetailPage() { toast({ title: "Project updated", type: "success" }); }, onError: (error) => { - toast({ title: "Failed to update project", description: String(error), type: "error" }); + toast({ + title: "Failed to update project", + description: formatApiError(error), + type: "error", + }); }, }); diff --git a/ui/src/pages/project/ProvidersTab.tsx b/ui/src/pages/project/ProvidersTab.tsx index 692d790..114619e 100644 --- a/ui/src/pages/project/ProvidersTab.tsx +++ b/ui/src/pages/project/ProvidersTab.tsx @@ -50,6 +50,7 @@ import { import { useToast } from "@/components/Toast/Toast"; import { useConfirm } from "@/components/ConfirmDialog/ConfirmDialog"; import { formatDateTime } from "@/utils/formatters"; +import { formatApiError } from "@/utils/formatApiError"; import { PROVIDER_TYPES, type ProviderTypeValue, @@ -618,7 +619,11 @@ export function ProvidersTab({ orgSlug, projectSlug, projectId }: ProvidersTabPr }); }, onError: (error) => { - toast({ title: "Failed to create provider", description: String(error), type: "error" }); + toast({ + title: "Failed to create provider", + description: formatApiError(error), + type: "error", + }); }, }); @@ -636,7 +641,11 @@ export function ProvidersTab({ orgSlug, projectSlug, projectId }: ProvidersTabPr }); }, onError: (error) => { - toast({ title: "Failed to update provider", description: String(error), type: "error" }); + toast({ + title: "Failed to update provider", + description: formatApiError(error), + type: "error", + }); }, }); @@ -649,7 +658,11 @@ export function ProvidersTab({ orgSlug, projectSlug, projectId }: ProvidersTabPr toast({ title: "Provider deleted", type: "success" }); }, onError: (error) => { - toast({ title: "Failed to delete provider", description: String(error), type: "error" }); + toast({ + title: "Failed to delete provider", + description: formatApiError(error), + type: "error", + }); }, }); @@ -668,7 +681,7 @@ export function ProvidersTab({ orgSlug, projectSlug, projectId }: ProvidersTabPr const id = variables.path.id; setTestResults((prev) => ({ ...prev, - [id]: { status: "error", message: String(error), latency_ms: null }, + [id]: { status: "error", message: formatApiError(error), latency_ms: null }, })); setTestingIds((prev) => { const next = new Set(prev); diff --git a/ui/src/pages/project/SkillsTab.tsx b/ui/src/pages/project/SkillsTab.tsx index e75a944..ab8e852 100644 --- a/ui/src/pages/project/SkillsTab.tsx +++ b/ui/src/pages/project/SkillsTab.tsx @@ -16,6 +16,7 @@ import { useToast } from "@/components/Toast/Toast"; import { useConfirm } from "@/components/ConfirmDialog/ConfirmDialog"; import { createSkillColumns } from "@/pages/admin/skillColumns"; +import { formatApiError } from "@/utils/formatApiError"; interface SkillsTabProps { orgSlug: string; projectSlug: string; @@ -42,7 +43,7 @@ export function SkillsTab({ orgSlug, projectSlug, projectId }: SkillsTabProps) { toast({ title: "Skill deleted", type: "success" }); }, onError: (error) => { - toast({ title: "Failed to delete skill", description: String(error), type: "error" }); + toast({ title: "Failed to delete skill", description: formatApiError(error), type: "error" }); }, }); diff --git a/ui/src/pages/project/TemplatesTab.tsx b/ui/src/pages/project/TemplatesTab.tsx index e06bcef..06a71d1 100644 --- a/ui/src/pages/project/TemplatesTab.tsx +++ b/ui/src/pages/project/TemplatesTab.tsx @@ -16,6 +16,7 @@ import { useToast } from "@/components/Toast/Toast"; import { useConfirm } from "@/components/ConfirmDialog/ConfirmDialog"; import { createTemplateColumns } from "@/pages/admin/promptColumns"; +import { formatApiError } from "@/utils/formatApiError"; interface TemplatesTabProps { orgSlug: string; projectSlug: string; @@ -42,7 +43,11 @@ export function TemplatesTab({ orgSlug, projectSlug, projectId }: TemplatesTabPr toast({ title: "Template deleted", type: "success" }); }, onError: (error) => { - toast({ title: "Failed to delete template", description: String(error), type: "error" }); + toast({ + title: "Failed to delete template", + description: formatApiError(error), + type: "error", + }); }, }); diff --git a/ui/src/service-worker/sw.ts b/ui/src/service-worker/sw.ts index 04c4e74..1e12a51 100644 --- a/ui/src/service-worker/sw.ts +++ b/ui/src/service-worker/sw.ts @@ -22,6 +22,7 @@ import "./sqlite-bridge"; // The WASM module is served from public/wasm/ at runtime. import wasmInit, { HadrianGateway } from "/wasm/hadrian.js"; +import { formatApiError } from "@/utils/formatApiError"; let gateway: HadrianGateway | null = null; let initPromise: Promise | null = null; @@ -75,7 +76,7 @@ async function handleRequest(request: Request): Promise { return new Response( JSON.stringify({ error: { - message: `Gateway initialization failed: ${String(error)}`, + message: `Gateway initialization failed: ${formatApiError(error)}`, type: "server_error", code: 503, }, @@ -95,7 +96,7 @@ async function handleRequest(request: Request): Promise { return new Response( JSON.stringify({ error: { - message: String(error), + message: formatApiError(error), type: "server_error", code: 500, }, diff --git a/ui/src/services/duckdb/duckdbService.ts b/ui/src/services/duckdb/duckdbService.ts index 5aa72f1..8c4666c 100644 --- a/ui/src/services/duckdb/duckdbService.ts +++ b/ui/src/services/duckdb/duckdbService.ts @@ -1,3 +1,4 @@ +import { formatApiError } from "@/utils/formatApiError"; /** * DuckDB Service * @@ -201,7 +202,7 @@ class DuckDBService { reject(new Error(error.message)); }; } catch (error) { - const errorMsg = error instanceof Error ? error.message : String(error); + const errorMsg = error instanceof Error ? error.message : formatApiError(error); this.setStatus("error", errorMsg); reject(error); } diff --git a/ui/src/services/duckdb/duckdbWorker.ts b/ui/src/services/duckdb/duckdbWorker.ts index 3a64d72..375bff2 100644 --- a/ui/src/services/duckdb/duckdbWorker.ts +++ b/ui/src/services/duckdb/duckdbWorker.ts @@ -12,6 +12,7 @@ import * as duckdb from "@duckdb/duckdb-wasm"; +import { formatApiError } from "@/utils/formatApiError"; /** Message types from main thread to worker */ interface ExecuteMessage { type: "execute"; @@ -199,7 +200,7 @@ async function initDuckDB(): Promise { sendMessage({ type: "ready" }); return db; } catch (error) { - const errorMsg = error instanceof Error ? error.message : String(error); + const errorMsg = error instanceof Error ? error.message : formatApiError(error); sendMessage({ type: "error", error: `Failed to load DuckDB: ${errorMsg}` }); throw error; } finally { @@ -255,7 +256,7 @@ async function executeQuery(sql: string): Promise<{ rowCount: rows.length, }; } catch (error) { - const errorMsg = error instanceof Error ? error.message : String(error); + const errorMsg = error instanceof Error ? error.message : formatApiError(error); return { success: false, columns: [], @@ -327,7 +328,7 @@ async function registerFile( registeredFiles.add(name); return { success: true }; } catch (error) { - const errorMsg = error instanceof Error ? error.message : String(error); + const errorMsg = error instanceof Error ? error.message : formatApiError(error); return { success: false, error: errorMsg }; } } @@ -361,7 +362,7 @@ async function registerDatabaseHandle( attachedDatabases.set(name, alias); return { success: true, dbAlias: alias }; } catch (error) { - const errorMsg = error instanceof Error ? error.message : String(error); + const errorMsg = error instanceof Error ? error.message : formatApiError(error); return { success: false, error: errorMsg }; } } @@ -386,7 +387,7 @@ async function unregisterFile(name: string): Promise<{ success: boolean; error?: registeredFiles.delete(name); return { success: true }; } catch (error) { - const errorMsg = error instanceof Error ? error.message : String(error); + const errorMsg = error instanceof Error ? error.message : formatApiError(error); return { success: false, error: errorMsg }; } } @@ -424,7 +425,7 @@ async function listTables(): Promise<{ return { success: true, tables }; } catch (error) { - const errorMsg = error instanceof Error ? error.message : String(error); + const errorMsg = error instanceof Error ? error.message : formatApiError(error); return { success: false, tables: [], error: errorMsg }; } } @@ -462,7 +463,7 @@ async function describeTable(tableName: string): Promise<{ return { success: true, columns }; } catch (error) { - const errorMsg = error instanceof Error ? error.message : String(error); + const errorMsg = error instanceof Error ? error.message : formatApiError(error); return { success: false, columns: [], error: errorMsg }; } } @@ -483,7 +484,7 @@ self.onmessage = async (event: MessageEvent) => { ...result, }); } catch (error) { - const errorMsg = error instanceof Error ? error.message : String(error); + const errorMsg = error instanceof Error ? error.message : formatApiError(error); sendMessage({ type: "error", id: message.id, @@ -502,7 +503,7 @@ self.onmessage = async (event: MessageEvent) => { ...result, }); } catch (error) { - const errorMsg = error instanceof Error ? error.message : String(error); + const errorMsg = error instanceof Error ? error.message : formatApiError(error); sendMessage({ type: "error", id: message.id, @@ -521,7 +522,7 @@ self.onmessage = async (event: MessageEvent) => { ...result, }); } catch (error) { - const errorMsg = error instanceof Error ? error.message : String(error); + const errorMsg = error instanceof Error ? error.message : formatApiError(error); sendMessage({ type: "error", id: message.id, @@ -540,7 +541,7 @@ self.onmessage = async (event: MessageEvent) => { ...result, }); } catch (error) { - const errorMsg = error instanceof Error ? error.message : String(error); + const errorMsg = error instanceof Error ? error.message : formatApiError(error); sendMessage({ type: "error", id: message.id, @@ -559,7 +560,7 @@ self.onmessage = async (event: MessageEvent) => { ...result, }); } catch (error) { - const errorMsg = error instanceof Error ? error.message : String(error); + const errorMsg = error instanceof Error ? error.message : formatApiError(error); sendMessage({ type: "error", id: message.id, @@ -578,7 +579,7 @@ self.onmessage = async (event: MessageEvent) => { ...result, }); } catch (error) { - const errorMsg = error instanceof Error ? error.message : String(error); + const errorMsg = error instanceof Error ? error.message : formatApiError(error); sendMessage({ type: "error", id: message.id, diff --git a/ui/src/services/mcp/client.ts b/ui/src/services/mcp/client.ts index 06ab55c..06fb13b 100644 --- a/ui/src/services/mcp/client.ts +++ b/ui/src/services/mcp/client.ts @@ -1,3 +1,4 @@ +import { formatApiError } from "@/utils/formatApiError"; /** * MCP Client - Streamable HTTP Transport * @@ -222,7 +223,7 @@ export class MCPClient { this.setStatus("connected"); } catch (err) { - const errorMsg = err instanceof Error ? err.message : String(err); + const errorMsg = err instanceof Error ? err.message : formatApiError(err); this.setStatus("error", errorMsg); throw err; } diff --git a/ui/src/services/pyodide/pyodideService.ts b/ui/src/services/pyodide/pyodideService.ts index f968f7c..0c592e9 100644 --- a/ui/src/services/pyodide/pyodideService.ts +++ b/ui/src/services/pyodide/pyodideService.ts @@ -1,3 +1,4 @@ +import { formatApiError } from "@/utils/formatApiError"; /** * Pyodide Service * @@ -183,7 +184,7 @@ class PyodideService { reject(new Error(error.message)); }; } catch (error) { - const errorMsg = error instanceof Error ? error.message : String(error); + const errorMsg = error instanceof Error ? error.message : formatApiError(error); this.setStatus("error", errorMsg); reject(error); } @@ -288,7 +289,7 @@ class PyodideService { const result = await this.executeInternal(item.code, item.options); item.resolve(result); } catch (error) { - item.reject(error instanceof Error ? error : new Error(String(error))); + item.reject(error instanceof Error ? error : new Error(formatApiError(error))); } } diff --git a/ui/src/services/pyodide/pyodideWorker.ts b/ui/src/services/pyodide/pyodideWorker.ts index 0489825..f2f812f 100644 --- a/ui/src/services/pyodide/pyodideWorker.ts +++ b/ui/src/services/pyodide/pyodideWorker.ts @@ -1,3 +1,4 @@ +import { formatApiError } from "@/utils/formatApiError"; /** * Pyodide Web Worker * @@ -183,7 +184,7 @@ def __hadrian_get_figures(): sendMessage({ type: "ready" }); return py; } catch (error) { - const errorMsg = error instanceof Error ? error.message : String(error); + const errorMsg = error instanceof Error ? error.message : formatApiError(error); sendMessage({ type: "error", error: `Failed to load Pyodide: ${errorMsg}` }); throw error; } finally { @@ -321,7 +322,7 @@ plt.close('all') figures, }; } catch (error) { - const errorMsg = error instanceof Error ? error.message : String(error); + const errorMsg = error instanceof Error ? error.message : formatApiError(error); return { success: false, stdout: stdout.trim(), @@ -348,7 +349,7 @@ self.onmessage = async (event: MessageEvent) => { ...result, }); } catch (error) { - const errorMsg = error instanceof Error ? error.message : String(error); + const errorMsg = error instanceof Error ? error.message : formatApiError(error); sendMessage({ type: "error", id: message.id, @@ -367,7 +368,7 @@ self.onmessage = async (event: MessageEvent) => { packages: loaded, }); } catch (error) { - const errorMsg = error instanceof Error ? error.message : String(error); + const errorMsg = error instanceof Error ? error.message : formatApiError(error); sendMessage({ type: "error", id: message.id, diff --git a/ui/src/services/quickjs/quickjsService.ts b/ui/src/services/quickjs/quickjsService.ts index 03cd842..46eaafe 100644 --- a/ui/src/services/quickjs/quickjsService.ts +++ b/ui/src/services/quickjs/quickjsService.ts @@ -1,3 +1,4 @@ +import { formatApiError } from "@/utils/formatApiError"; /** * QuickJS Service * @@ -158,7 +159,7 @@ class QuickJSService { reject(new Error(error.message)); }; } catch (error) { - const errorMsg = error instanceof Error ? error.message : String(error); + const errorMsg = error instanceof Error ? error.message : formatApiError(error); this.setStatus("error", errorMsg); reject(error); } diff --git a/ui/src/services/quickjs/quickjsWorker.ts b/ui/src/services/quickjs/quickjsWorker.ts index 98563b4..8974a4f 100644 --- a/ui/src/services/quickjs/quickjsWorker.ts +++ b/ui/src/services/quickjs/quickjsWorker.ts @@ -15,6 +15,7 @@ import { newQuickJSWASMModuleFromVariant } from "quickjs-emscripten-core"; import variant from "@jitl/quickjs-singlefile-browser-release-sync"; import type { QuickJSWASMModule, QuickJSContext } from "quickjs-emscripten-core"; +import { formatApiError } from "@/utils/formatApiError"; /** Message types from main thread to worker */ interface ExecuteMessage { type: "execute"; @@ -103,7 +104,7 @@ async function initQuickJS(): Promise { sendMessage({ type: "ready" }); return quickjs; } catch (error) { - const errorMsg = error instanceof Error ? error.message : String(error); + const errorMsg = error instanceof Error ? error.message : formatApiError(error); sendMessage({ type: "error", error: `Failed to load QuickJS: ${errorMsg}` }); throw error; } finally { @@ -216,7 +217,7 @@ async function executeCode( stderr: stderr.trim(), }; } catch (error) { - const errorMsg = error instanceof Error ? error.message : String(error); + const errorMsg = error instanceof Error ? error.message : formatApiError(error); return { success: false, stdout: stdout.trim(), @@ -244,7 +245,7 @@ self.onmessage = async (event: MessageEvent) => { ...result, }); } catch (error) { - const errorMsg = error instanceof Error ? error.message : String(error); + const errorMsg = error instanceof Error ? error.message : formatApiError(error); sendMessage({ type: "error", id: message.id, diff --git a/ui/src/services/websocket/WebSocketEventClient.ts b/ui/src/services/websocket/WebSocketEventClient.ts index b0b5ae2..c07a2dc 100644 --- a/ui/src/services/websocket/WebSocketEventClient.ts +++ b/ui/src/services/websocket/WebSocketEventClient.ts @@ -1,3 +1,4 @@ +import { formatApiError } from "@/utils/formatApiError"; /** * WebSocket Event Client * @@ -103,7 +104,7 @@ export class WebSocketEventClient { this.ws.onclose = this.handleClose.bind(this); this.ws.onerror = this.handleError.bind(this); } catch (err) { - const errorMsg = err instanceof Error ? err.message : String(err); + const errorMsg = err instanceof Error ? err.message : formatApiError(err); this.setStatus("error", `Failed to create WebSocket: ${errorMsg}`); } } diff --git a/ui/src/stores/mcpStore.ts b/ui/src/stores/mcpStore.ts index e090af2..abf5ebe 100644 --- a/ui/src/stores/mcpStore.ts +++ b/ui/src/stores/mcpStore.ts @@ -34,6 +34,7 @@ import { create } from "zustand"; import { persist } from "zustand/middleware"; +import { formatApiError } from "@/utils/formatApiError"; import { MCPClient, type MCPServerConfig, @@ -386,7 +387,7 @@ export const useMCPStore = create()( }), })); } catch (err) { - const errorMsg = err instanceof Error ? err.message : String(err); + const errorMsg = err instanceof Error ? err.message : formatApiError(err); get()._setServerStatus(serverId, "error", errorMsg); throw err; } diff --git a/ui/src/utils/__tests__/formatApiError.test.ts b/ui/src/utils/__tests__/formatApiError.test.ts new file mode 100644 index 0000000..9ee221c --- /dev/null +++ b/ui/src/utils/__tests__/formatApiError.test.ts @@ -0,0 +1,34 @@ +import { describe, it, expect } from "vitest"; +import { formatApiError } from "../formatApiError"; + +describe("formatApiError", () => { + it("passes strings through", () => { + expect(formatApiError("boom")).toBe("boom"); + }); + + it("falls back to 'Unknown error' on null/undefined", () => { + expect(formatApiError(null)).toBe("Unknown error"); + expect(formatApiError(undefined)).toBe("Unknown error"); + }); + + it("uses Error.message", () => { + expect(formatApiError(new Error("nope"))).toBe("nope"); + }); + + it("prefers an API body shape on Error objects", () => { + const err = Object.assign(new Error("HTTP 400"), { body: { message: "bad input" } }); + expect(formatApiError(err)).toBe("bad input"); + }); + + it("walks { error: { message } } envelopes", () => { + expect(formatApiError({ error: { message: "denied" } })).toBe("denied"); + }); + + it("walks { error: 'string' } envelopes", () => { + expect(formatApiError({ error: "denied" })).toBe("denied"); + }); + + it("falls back to Unknown error rather than [object Object]", () => { + expect(formatApiError({ random: 1 })).toBe("Unknown error"); + }); +}); diff --git a/ui/src/utils/formatApiError.ts b/ui/src/utils/formatApiError.ts new file mode 100644 index 0000000..ab9a500 --- /dev/null +++ b/ui/src/utils/formatApiError.ts @@ -0,0 +1,49 @@ +/** + * Convert any thrown value into a human-readable string for error toasts. + * + * `String(error)` produces "[object Object]" for most non-string, non-Error + * values — including the typed error bodies that hey-api / fetch wrappers + * surface. This helper unwraps the common shapes: + * - plain `Error` → `error.message` + * - hey-api errors with `.body` → drill into the body + * - API error envelopes (`{message}`, `{detail}`, `{error: string}`, + * `{error: {message}}`) + * - strings as-is + * + * Always returns a non-empty string so callers can pass the result straight + * to a toast description without an additional fallback. + */ +export function formatApiError(error: unknown): string { + if (typeof error === "string") return error || "Unknown error"; + if (error == null) return "Unknown error"; + + if (error instanceof Error) { + const fromBody = extractMessage((error as Error & { body?: unknown }).body); + if (fromBody) return fromBody; + return error.message || "Unknown error"; + } + + if (typeof error === "object") { + const fromBody = extractMessage(error); + if (fromBody) return fromBody; + } + + const fallback = String(error); + return fallback === "[object Object]" ? "Unknown error" : fallback; +} + +function extractMessage(body: unknown): string | null { + if (typeof body === "string") return body || null; + if (body == null || typeof body !== "object") return null; + + const obj = body as Record; + if (typeof obj.message === "string" && obj.message) return obj.message; + if (typeof obj.detail === "string" && obj.detail) return obj.detail; + if (typeof obj.error === "string" && obj.error) return obj.error; + if (typeof obj.error === "object" && obj.error) { + const inner = obj.error as Record; + if (typeof inner.message === "string" && inner.message) return inner.message; + if (typeof inner.detail === "string" && inner.detail) return inner.detail; + } + return null; +} From b422c1e07ec814d3b64aa0b3426534c68f7f79e1 Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sun, 26 Apr 2026 14:20:14 +1000 Subject: [PATCH 111/172] Scope admin authz to row owner instead of all-None tuples --- src/routes/admin/api_keys.rs | 19 +++- src/routes/admin/audit_logs.rs | 23 ++++- src/routes/admin/conversations.rs | 132 ++++++++++++++++++++++++-- src/routes/admin/model_pricing.rs | 149 ++++++++++++++++++++++++++++-- src/routes/admin/skills.rs | 90 ++++++++++++++++-- src/routes/admin/templates.rs | 88 ++++++++++++++++-- src/routes/admin/usage.rs | 99 +++++++++++++------- 7 files changed, 528 insertions(+), 72 deletions(-) diff --git a/src/routes/admin/api_keys.rs b/src/routes/admin/api_keys.rs index 63086b3..ce3bc73 100644 --- a/src/routes/admin/api_keys.rs +++ b/src/routes/admin/api_keys.rs @@ -162,8 +162,18 @@ pub(super) async fn check_owner_create_authz( Some(&project_id.to_string()), )?; } - crate::models::ApiKeyOwner::User { .. } => { - authz.require("api_key", "create", None, None, None, None)?; + crate::models::ApiKeyOwner::User { user_id } => { + // Surface the target user_id via `resource_id` so policies can + // reject cross-user key creation; `check_owner_modify_authz` + // already does the same for revoke/rotate. + authz.require( + "api_key", + "create", + Some(&user_id.to_string()), + None, + None, + None, + )?; } crate::models::ApiKeyOwner::ServiceAccount { service_account_id } => { let sa = services @@ -765,7 +775,10 @@ pub async fn list_by_user( Path(user_id): Path, Query(query): Query, ) -> Result, AdminError> { - authz.require("api_key", "list", None, None, None, None)?; + // Pass the target user_id through `resource_id` so policies can compare + // it against the calling subject and reject cross-user listing. + let user_id_str = user_id.to_string(); + authz.require("api_key", "list", Some(&user_id_str), None, None, None)?; let services = get_services(&state)?; let limit = query.limit.unwrap_or(100); diff --git a/src/routes/admin/audit_logs.rs b/src/routes/admin/audit_logs.rs index b2d57a0..19325cf 100644 --- a/src/routes/admin/audit_logs.rs +++ b/src/routes/admin/audit_logs.rs @@ -45,7 +45,6 @@ pub async fn list( Extension(authz): Extension, Query(query): Query, ) -> Result, AdminError> { - authz.require("audit_log", "list", None, None, None, None)?; let services = get_services(&state)?; let limit = query.limit.unwrap_or(100); @@ -92,6 +91,12 @@ pub async fn list( } } + // Run authz with the effective org scope so policies see the tenant they + // need to allow/deny against. `authz.require` evaluated with all-None + // would let anyone with `audit_log:list` see logs across orgs. + let org_scope = query.org_id.map(|id| id.to_string()); + authz.require("audit_log", "list", None, org_scope.as_deref(), None, None)?; + let result = services.audit_logs.list(query).await?; let pagination = PaginationMeta::with_cursors( @@ -124,14 +129,28 @@ pub async fn get( Extension(authz): Extension, Path(id): Path, ) -> Result, AdminError> { - authz.require("audit_log", "read", None, None, None, None)?; let services = get_services(&state)?; + // Pre-fetch the row so authz can see the entry's org/project rather than + // an all-None scope; otherwise a permissive policy would expose every + // tenant's audit history through this endpoint. let entry = services .audit_logs .get_by_id(id) .await? .ok_or_else(|| AdminError::NotFound("Audit log entry not found".to_string()))?; + let id_str = id.to_string(); + let org_scope = entry.org_id.map(|o| o.to_string()); + let project_scope = entry.project_id.map(|p| p.to_string()); + authz.require( + "audit_log", + "read", + Some(&id_str), + org_scope.as_deref(), + None, + project_scope.as_deref(), + )?; + Ok(Json(entry)) } diff --git a/src/routes/admin/conversations.rs b/src/routes/admin/conversations.rs index 6d57473..91ea4d2 100644 --- a/src/routes/admin/conversations.rs +++ b/src/routes/admin/conversations.rs @@ -12,13 +12,29 @@ use crate::{ AppState, middleware::AuthzContext, models::{ - AppendMessages, Conversation, ConversationWithProject, CreateConversation, Message, - SetPinOrder, UpdateConversation, + AppendMessages, Conversation, ConversationOwnerType, ConversationWithProject, + CreateConversation, Message, SetPinOrder, UpdateConversation, }, openapi::PaginationMeta, services::Services, }; +/// Scope tuple for `authz.require` derived from a conversation's owner. +struct ConversationAuthzScope { + project: Option, +} + +fn conversation_authz_scope(c: &Conversation) -> ConversationAuthzScope { + let id = c.owner_id.to_string(); + match c.owner_type { + ConversationOwnerType::Project => ConversationAuthzScope { project: Some(id) }, + // User-owned conversations have no project/team/org context; the + // policy compares owner_id against the caller's subject via + // resource_id. + ConversationOwnerType::User => ConversationAuthzScope { project: None }, + } +} + /// Paginated list of conversations #[derive(Debug, Serialize, Deserialize)] #[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))] @@ -50,9 +66,27 @@ pub async fn create( Extension(authz): Extension, Valid(Json(input)): Valid>, ) -> Result<(StatusCode, Json), AdminError> { - authz.require("conversation", "create", None, None, None, None)?; let services = get_services(&state)?; + // Pass the requested owner scope into authz so the policy can reject + // creating a conversation under a project the caller does not own. + // User-owned conversations carry no project scope; the policy must + // compare the request's user_id (resource_id) against the subject. + let (owner_resource, owner_project) = match &input.owner { + crate::models::ConversationOwner::Project { project_id } => { + (None, Some(project_id.to_string())) + } + crate::models::ConversationOwner::User { user_id } => (Some(user_id.to_string()), None), + }; + authz.require( + "conversation", + "create", + owner_resource.as_deref(), + None, + None, + owner_project.as_deref(), + )?; + // Verify the owner exists match &input.owner { crate::models::ConversationOwner::Project { project_id } => { @@ -112,15 +146,28 @@ pub async fn get( Extension(authz): Extension, Path(id): Path, ) -> Result, AdminError> { - authz.require("conversation", "read", None, None, None, None)?; let services = get_services(&state)?; + // Pre-fetch the row so authz sees the conversation's project scope; + // otherwise every read is evaluated against an all-None scope and a + // permissive policy could leak conversations cross-project. let conversation = services .conversations .get_by_id(id) .await? .ok_or_else(|| AdminError::NotFound(format!("Conversation '{}' not found", id)))?; + let id_str = id.to_string(); + let scope = conversation_authz_scope(&conversation); + authz.require( + "conversation", + "read", + Some(&id_str), + None, + None, + scope.project.as_deref(), + )?; + Ok(Json(conversation)) } @@ -219,7 +266,8 @@ pub async fn list_by_user( Path(user_id): Path, Query(query): Query, ) -> Result, AdminError> { - authz.require("conversation", "list", None, None, None, None)?; + let user_id_str = user_id.to_string(); + authz.require("conversation", "list", Some(&user_id_str), None, None, None)?; let services = get_services(&state)?; // Verify user exists @@ -290,7 +338,8 @@ pub async fn list_accessible_for_user( Path(user_id): Path, Query(query): Query, ) -> Result, AdminError> { - authz.require("conversation", "list", None, None, None, None)?; + let user_id_str = user_id.to_string(); + authz.require("conversation", "list", Some(&user_id_str), None, None, None)?; let services = get_services(&state)?; // Verify user exists @@ -342,9 +391,27 @@ pub async fn update( Path(id): Path, Valid(Json(input)): Valid>, ) -> Result, AdminError> { - authz.require("conversation", "update", None, None, None, None)?; let services = get_services(&state)?; + // Pre-fetch the existing conversation so authz sees the current owner + // scope (a permissive policy with all-None would otherwise allow + // editing across projects). + let existing = services + .conversations + .get_by_id(id) + .await? + .ok_or_else(|| AdminError::NotFound(format!("Conversation '{}' not found", id)))?; + let id_str = id.to_string(); + let scope = conversation_authz_scope(&existing); + authz.require( + "conversation", + "update", + Some(&id_str), + None, + None, + scope.project.as_deref(), + )?; + // Verify the new owner exists if one is provided if let Some(ref owner) = input.owner { match owner { @@ -390,9 +457,24 @@ pub async fn append_messages( Path(id): Path, Valid(Json(input)): Valid>, ) -> Result>, AdminError> { - authz.require("conversation", "update", None, None, None, None)?; let services = get_services(&state)?; + let conversation = services + .conversations + .get_by_id(id) + .await? + .ok_or_else(|| AdminError::NotFound(format!("Conversation '{}' not found", id)))?; + let id_str = id.to_string(); + let scope = conversation_authz_scope(&conversation); + authz.require( + "conversation", + "update", + Some(&id_str), + None, + None, + scope.project.as_deref(), + )?; + let messages = services.conversations.append_messages(id, input).await?; Ok(Json(messages)) } @@ -414,9 +496,24 @@ pub async fn delete( Extension(authz): Extension, Path(id): Path, ) -> Result, AdminError> { - authz.require("conversation", "delete", None, None, None, None)?; let services = get_services(&state)?; + let conversation = services + .conversations + .get_by_id(id) + .await? + .ok_or_else(|| AdminError::NotFound(format!("Conversation '{}' not found", id)))?; + let id_str = id.to_string(); + let scope = conversation_authz_scope(&conversation); + authz.require( + "conversation", + "delete", + Some(&id_str), + None, + None, + scope.project.as_deref(), + )?; + services.conversations.delete(id).await?; Ok(Json(())) } @@ -443,9 +540,24 @@ pub async fn set_pin( Path(id): Path, Valid(Json(input)): Valid>, ) -> Result, AdminError> { - authz.require("conversation", "update", None, None, None, None)?; let services = get_services(&state)?; + let conversation = services + .conversations + .get_by_id(id) + .await? + .ok_or_else(|| AdminError::NotFound(format!("Conversation '{}' not found", id)))?; + let id_str = id.to_string(); + let scope = conversation_authz_scope(&conversation); + authz.require( + "conversation", + "update", + Some(&id_str), + None, + None, + scope.project.as_deref(), + )?; + let updated = services .conversations .set_pin_order(id, input.pin_order) diff --git a/src/routes/admin/model_pricing.rs b/src/routes/admin/model_pricing.rs index 2d29d69..76b3e4b 100644 --- a/src/routes/admin/model_pricing.rs +++ b/src/routes/admin/model_pricing.rs @@ -12,11 +12,58 @@ use super::{AuditActor, error::AdminError, organizations::ListQuery}; use crate::{ AppState, middleware::{AdminAuth, AuthzContext, ClientInfo}, - models::{CreateAuditLog, CreateModelPricing, DbModelPricing, UpdateModelPricing}, + models::{ + CreateAuditLog, CreateModelPricing, DbModelPricing, PricingOwner, UpdateModelPricing, + }, openapi::PaginationMeta, services::Services, }; +/// Authorization scope derived from a pricing entry's owner. Maps the row's +/// PricingOwner to the (resource_id, org_id, team_id, project_id) tuple that +/// `authz.require` consumes. +struct PricingAuthzScope { + resource_id: Option, + org: Option, + team: Option, + project: Option, +} + +fn pricing_authz_scope(owner: &PricingOwner, fallback_id: &str) -> PricingAuthzScope { + match owner { + PricingOwner::Global => PricingAuthzScope { + resource_id: Some(fallback_id.to_string()), + org: None, + team: None, + project: None, + }, + PricingOwner::Organization { org_id } => PricingAuthzScope { + resource_id: Some(fallback_id.to_string()), + org: Some(org_id.to_string()), + team: None, + project: None, + }, + PricingOwner::Team { team_id } => PricingAuthzScope { + resource_id: Some(fallback_id.to_string()), + org: None, + team: Some(team_id.to_string()), + project: None, + }, + PricingOwner::Project { project_id } => PricingAuthzScope { + resource_id: Some(fallback_id.to_string()), + org: None, + team: None, + project: Some(project_id.to_string()), + }, + PricingOwner::User { user_id } => PricingAuthzScope { + resource_id: Some(user_id.to_string()), + org: None, + team: None, + project: None, + }, + } +} + /// Paginated list of model pricing entries #[derive(Debug, Serialize, Deserialize)] #[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))] @@ -50,7 +97,18 @@ pub async fn create( Extension(client_info): Extension, Valid(Json(input)): Valid>, ) -> Result<(StatusCode, Json), AdminError> { - authz.require("model_pricing", "create", None, None, None, None)?; + // Authorize against the requested owner scope so a permissive policy + // can't be tricked into accepting a global write request from someone + // who only has org-scoped privileges. + let scope = pricing_authz_scope(&input.owner, ""); + authz.require( + "model_pricing", + "create", + scope.resource_id.as_deref(), + scope.org.as_deref(), + scope.team.as_deref(), + scope.project.as_deref(), + )?; let services = get_services(&state)?; let actor = AuditActor::from(&admin_auth); @@ -106,14 +164,26 @@ pub async fn get( Extension(authz): Extension, Path(id): Path, ) -> Result, AdminError> { - authz.require("model_pricing", "read", None, None, None, None)?; let services = get_services(&state)?; + // Pre-fetch the row so authz can scope by the pricing entry's actual + // owner; `authz.require` with all-None lets a permissive policy expose + // every tenant's pricing through a single endpoint. let pricing = services .model_pricing .get_by_id(id) .await? .ok_or_else(|| AdminError::NotFound("Model pricing not found".to_string()))?; + let id_str = id.to_string(); + let scope = pricing_authz_scope(&pricing.owner, &id_str); + authz.require( + "model_pricing", + "read", + scope.resource_id.as_deref(), + scope.org.as_deref(), + scope.team.as_deref(), + scope.project.as_deref(), + )?; Ok(Json(pricing)) } @@ -139,10 +209,27 @@ pub async fn update( Path(id): Path, Valid(Json(input)): Valid>, ) -> Result, AdminError> { - authz.require("model_pricing", "update", None, None, None, None)?; let services = get_services(&state)?; let actor = AuditActor::from(&admin_auth); + // Resolve the existing pricing row so authz sees its real owner before + // we mutate anything. + let existing = services + .model_pricing + .get_by_id(id) + .await? + .ok_or_else(|| AdminError::NotFound("Model pricing not found".to_string()))?; + let id_str = id.to_string(); + let scope = pricing_authz_scope(&existing.owner, &id_str); + authz.require( + "model_pricing", + "update", + scope.resource_id.as_deref(), + scope.org.as_deref(), + scope.team.as_deref(), + scope.project.as_deref(), + )?; + // Capture what's being changed for audit log let changes = json!({ "input_per_1m_tokens": input.input_per_1m_tokens, @@ -207,16 +294,26 @@ pub async fn delete( Extension(client_info): Extension, Path(id): Path, ) -> Result, AdminError> { - authz.require("model_pricing", "delete", None, None, None, None)?; let services = get_services(&state)?; let actor = AuditActor::from(&admin_auth); - // Fetch pricing details before deletion for audit log + // Pre-fetch the pricing row so authz scopes by its real owner; reuse the + // row for the audit log below. let pricing = services .model_pricing .get_by_id(id) .await? .ok_or_else(|| AdminError::NotFound("Model pricing not found".to_string()))?; + let id_str = id.to_string(); + let scope = pricing_authz_scope(&pricing.owner, &id_str); + authz.require( + "model_pricing", + "delete", + scope.resource_id.as_deref(), + scope.org.as_deref(), + scope.team.as_deref(), + scope.project.as_deref(), + )?; // Extract org_id and project_id from owner for audit log context let (org_id, project_id) = match &pricing.owner { @@ -444,7 +541,15 @@ pub async fn list_by_user( Path(user_id): Path, Query(query): Query, ) -> Result, AdminError> { - authz.require("model_pricing", "list", None, None, None, None)?; + let user_id_str = user_id.to_string(); + authz.require( + "model_pricing", + "list", + Some(&user_id_str), + None, + None, + None, + )?; let services = get_services(&state)?; let limit = query.limit.unwrap_or(100); @@ -486,7 +591,10 @@ pub async fn list_by_provider( Path(provider): Path, Query(query): Query, ) -> Result, AdminError> { - authz.require("model_pricing", "list", None, None, None, None)?; + // Pass the provider name through `resource_id` so policies can scope by + // provider; with all-None a permissive policy would expose every + // tenant-scoped pricing row this endpoint surfaces. + authz.require("model_pricing", "list", Some(&provider), None, None, None)?; let services = get_services(&state)?; let limit = query.limit.unwrap_or(100); @@ -528,7 +636,15 @@ pub async fn upsert( Extension(client_info): Extension, Valid(Json(input)): Valid>, ) -> Result, AdminError> { - authz.require("model_pricing", "update", None, None, None, None)?; + let scope = pricing_authz_scope(&input.owner, ""); + authz.require( + "model_pricing", + "update", + scope.resource_id.as_deref(), + scope.org.as_deref(), + scope.team.as_deref(), + scope.project.as_deref(), + )?; let services = get_services(&state)?; let actor = AuditActor::from(&admin_auth); @@ -593,9 +709,22 @@ pub async fn bulk_upsert( Extension(client_info): Extension, Json(entries): Json>, ) -> Result, AdminError> { - authz.require("model_pricing", "update", None, None, None, None)?; + // Bulk upserts span owners; require authz against every distinct owner + // in the payload so a caller scoped to one tenant can't smuggle global + // or cross-tenant pricing rows through this endpoint. let services = get_services(&state)?; let actor = AuditActor::from(&admin_auth); + for entry in &entries { + let scope = pricing_authz_scope(&entry.owner, ""); + authz.require( + "model_pricing", + "update", + scope.resource_id.as_deref(), + scope.org.as_deref(), + scope.team.as_deref(), + scope.project.as_deref(), + )?; + } // Capture summary for audit log before bulk operation let entry_count = entries.len(); diff --git a/src/routes/admin/skills.rs b/src/routes/admin/skills.rs index a87193e..34b9866 100644 --- a/src/routes/admin/skills.rs +++ b/src/routes/admin/skills.rs @@ -40,6 +40,43 @@ fn audit_owner(skill: &Skill) -> (Option, Option) { } } +/// Authorization scope derived from a skill's owner. Mirrors the pattern in +/// `create()`, which routes the request scope through `(owner_org, owner_team, +/// owner_project)` so policies can deny cross-tenant operations. +struct SkillAuthzScope { + org: Option, + team: Option, + project: Option, +} + +fn skill_authz_scope(skill: &Skill) -> SkillAuthzScope { + let id = skill.owner_id.to_string(); + match skill.owner_type { + SkillOwnerType::Organization => SkillAuthzScope { + org: Some(id), + team: None, + project: None, + }, + SkillOwnerType::Team => SkillAuthzScope { + org: None, + team: Some(id), + project: None, + }, + SkillOwnerType::Project => SkillAuthzScope { + org: None, + team: None, + project: Some(id), + }, + // User-owned skills carry no team/org scope; the policy compares + // owner_id against the caller subject via resource_id. + SkillOwnerType::User => SkillAuthzScope { + org: None, + team: None, + project: None, + }, + } +} + /// Create a skill. #[cfg_attr(feature = "utoipa", utoipa::path( post, @@ -148,13 +185,24 @@ pub async fn get( ) -> Result, AdminError> { let services = get_services(&state)?; - authz.require("skill", "read", None, None, None, None)?; - + // Pre-fetch the skill so the authz check sees its owner scope; otherwise + // every "skill", "read" call is evaluated against an all-None scope and a + // permissive policy would happily return cross-tenant skills. let skill = services .skills .get_by_id(id) .await? .ok_or_else(|| AdminError::NotFound("Skill not found".to_string()))?; + let id_str = id.to_string(); + let scope = skill_authz_scope(&skill); + authz.require( + "skill", + "read", + Some(&id_str), + scope.org.as_deref(), + scope.team.as_deref(), + scope.project.as_deref(), + )?; Ok(Json(skill)) } @@ -188,7 +236,23 @@ pub async fn update( let services = get_services(&state)?; let actor = AuditActor::from(&admin_auth); - authz.require("skill", "update", None, None, None, None)?; + // Resolve the existing skill's owner scope first so authz can deny + // cross-tenant updates before we touch storage or audit. + let existing = services + .skills + .get_by_id(id) + .await? + .ok_or_else(|| AdminError::NotFound("Skill not found".to_string()))?; + let id_str = id.to_string(); + let scope = skill_authz_scope(&existing); + authz.require( + "skill", + "update", + Some(&id_str), + scope.org.as_deref(), + scope.team.as_deref(), + scope.project.as_deref(), + )?; // Capture a redacted change summary for the audit log (avoids logging // full file contents). @@ -256,14 +320,23 @@ pub async fn delete( let services = get_services(&state)?; let actor = AuditActor::from(&admin_auth); - authz.require("skill", "delete", None, None, None, None)?; - - // Capture details before deletion for the audit log. + // Capture details before deletion for the audit log, *and* derive the + // owner scope so authz sees the real tenant rather than all-None. let skill = services .skills .get_by_id(id) .await? .ok_or_else(|| AdminError::NotFound("Skill not found".to_string()))?; + let id_str = id.to_string(); + let scope = skill_authz_scope(&skill); + authz.require( + "skill", + "delete", + Some(&id_str), + scope.org.as_deref(), + scope.team.as_deref(), + scope.project.as_deref(), + )?; let (org_id, project_id) = audit_owner(&skill); let name = skill.name.clone(); @@ -516,7 +589,10 @@ pub async fn list_by_user( ) -> Result, AdminError> { let services = get_services(&state)?; - authz.require("skill", "list", None, None, None, None)?; + // Pass the target user_id through `resource_id` so policies can compare + // against the calling subject and reject cross-user listing. + let user_id_str = user_id.to_string(); + authz.require("skill", "list", Some(&user_id_str), None, None, None)?; let limit = query.limit.unwrap_or(100); let params = query.try_into_with_cursor()?; diff --git a/src/routes/admin/templates.rs b/src/routes/admin/templates.rs index 9f7172a..a2d6f34 100644 --- a/src/routes/admin/templates.rs +++ b/src/routes/admin/templates.rs @@ -31,6 +31,42 @@ fn get_services(state: &AppState) -> Result<&Services, AdminError> { state.services.as_ref().ok_or(AdminError::ServicesRequired) } +/// Authorization scope derived from a template's owner. Mirrors the pattern in +/// `create()` so policies can deny cross-tenant operations on existing rows. +struct TemplateAuthzScope { + org: Option, + team: Option, + project: Option, +} + +fn template_authz_scope(template: &Template) -> TemplateAuthzScope { + let id = template.owner_id.to_string(); + match template.owner_type { + TemplateOwnerType::Organization => TemplateAuthzScope { + org: Some(id), + team: None, + project: None, + }, + TemplateOwnerType::Team => TemplateAuthzScope { + org: None, + team: Some(id), + project: None, + }, + TemplateOwnerType::Project => TemplateAuthzScope { + org: None, + team: None, + project: Some(id), + }, + // User-owned templates carry no team/org scope; the policy compares + // owner_id against the caller subject via resource_id. + TemplateOwnerType::User => TemplateAuthzScope { + org: None, + team: None, + project: None, + }, + } +} + /// Create a template #[cfg_attr(feature = "utoipa", utoipa::path( post, @@ -143,13 +179,23 @@ pub async fn get( ) -> Result, AdminError> { let services = get_services(&state)?; - authz.require("template", "read", None, None, None, None)?; - + // Pre-fetch the template so authz sees its owner scope; without this an + // all-None call lets a permissive policy return cross-tenant templates. let template = services .templates .get_by_id(id) .await? .ok_or_else(|| AdminError::NotFound("Template not found".to_string()))?; + let id_str = id.to_string(); + let scope = template_authz_scope(&template); + authz.require( + "template", + "read", + Some(&id_str), + scope.org.as_deref(), + scope.team.as_deref(), + scope.project.as_deref(), + )?; Ok(Json(template)) } @@ -180,7 +226,23 @@ pub async fn update( let services = get_services(&state)?; let actor = AuditActor::from(&admin_auth); - authz.require("template", "update", None, None, None, None)?; + // Resolve the existing template's owner scope so authz can deny + // cross-tenant updates before we mutate storage. + let existing = services + .templates + .get_by_id(id) + .await? + .ok_or_else(|| AdminError::NotFound("Template not found".to_string()))?; + let id_str = id.to_string(); + let scope = template_authz_scope(&existing); + authz.require( + "template", + "update", + Some(&id_str), + scope.org.as_deref(), + scope.team.as_deref(), + scope.project.as_deref(), + )?; // Capture changes for audit log let changes = json!({ @@ -245,14 +307,23 @@ pub async fn delete( let services = get_services(&state)?; let actor = AuditActor::from(&admin_auth); - authz.require("template", "delete", None, None, None, None)?; - - // Get template details before deletion for audit log + // Pre-fetch the template so authz sees its owner scope rather than + // all-None, and reuse the row for the audit log below. let template = services .templates .get_by_id(id) .await? .ok_or_else(|| AdminError::NotFound("Template not found".to_string()))?; + let id_str = id.to_string(); + let scope = template_authz_scope(&template); + authz.require( + "template", + "delete", + Some(&id_str), + scope.org.as_deref(), + scope.team.as_deref(), + scope.project.as_deref(), + )?; // Extract org_id and project_id from owner for audit log let (org_id, project_id) = match template.owner_type { @@ -525,7 +596,10 @@ pub async fn list_by_user( ) -> Result, AdminError> { let services = get_services(&state)?; - authz.require("template", "list", None, None, None, None)?; + // Pass the target user_id through `resource_id` so policies can reject + // listing templates owned by a different user. + let user_id_str = user_id.to_string(); + authz.require("template", "list", Some(&user_id_str), None, None, None)?; let limit = query.limit.unwrap_or(100); let params = query.try_into_with_cursor()?; diff --git a/src/routes/admin/usage.rs b/src/routes/admin/usage.rs index f272ca0..eb9fa38 100644 --- a/src/routes/admin/usage.rs +++ b/src/routes/admin/usage.rs @@ -798,7 +798,8 @@ pub async fn get_summary( Query(query): Query, Extension(authz): Extension, ) -> Result, AdminError> { - authz.require("usage", "read", None, None, None, None)?; + let key_id_str = key_id.to_string(); + authz.require("usage", "read", Some(&key_id_str), None, None, None)?; let services = get_services(&state)?; let range = query.parse_date_range()?; @@ -828,7 +829,8 @@ pub async fn get_by_date( Query(query): Query, Extension(authz): Extension, ) -> Result>, AdminError> { - authz.require("usage", "read", None, None, None, None)?; + let key_id_str = key_id.to_string(); + authz.require("usage", "read", Some(&key_id_str), None, None, None)?; let services = get_services(&state)?; let range = query.parse_date_range()?; @@ -858,7 +860,8 @@ pub async fn get_by_model( Query(query): Query, Extension(authz): Extension, ) -> Result>, AdminError> { - authz.require("usage", "read", None, None, None, None)?; + let key_id_str = key_id.to_string(); + authz.require("usage", "read", Some(&key_id_str), None, None, None)?; let services = get_services(&state)?; let range = query.parse_date_range()?; @@ -888,7 +891,8 @@ pub async fn get_by_referer( Query(query): Query, Extension(authz): Extension, ) -> Result>, AdminError> { - authz.require("usage", "read", None, None, None, None)?; + let key_id_str = key_id.to_string(); + authz.require("usage", "read", Some(&key_id_str), None, None, None)?; let services = get_services(&state)?; let range = query.parse_date_range()?; @@ -921,7 +925,8 @@ pub async fn get_forecast( Query(query): Query, Extension(authz): Extension, ) -> Result, AdminError> { - authz.require("usage", "read", None, None, None, None)?; + let key_id_str = key_id.to_string(); + authz.require("usage", "read", Some(&key_id_str), None, None, None)?; let services = get_services(&state)?; let forecast = services @@ -1463,7 +1468,8 @@ pub async fn get_user_summary( Query(query): Query, Extension(authz): Extension, ) -> Result, AdminError> { - authz.require("usage", "read", None, None, None, None)?; + let user_id_str = user_id.to_string(); + authz.require("usage", "read", Some(&user_id_str), None, None, None)?; let services = get_services(&state)?; // Verify user exists @@ -1500,7 +1506,8 @@ pub async fn get_user_by_date( Query(query): Query, Extension(authz): Extension, ) -> Result>, AdminError> { - authz.require("usage", "read", None, None, None, None)?; + let user_id_str = user_id.to_string(); + authz.require("usage", "read", Some(&user_id_str), None, None, None)?; let services = get_services(&state)?; // Verify user exists @@ -1537,7 +1544,8 @@ pub async fn get_user_by_model( Query(query): Query, Extension(authz): Extension, ) -> Result>, AdminError> { - authz.require("usage", "read", None, None, None, None)?; + let user_id_str = user_id.to_string(); + authz.require("usage", "read", Some(&user_id_str), None, None, None)?; let services = get_services(&state)?; // Verify user exists @@ -1577,7 +1585,8 @@ pub async fn get_user_forecast( Query(query): Query, Extension(authz): Extension, ) -> Result, AdminError> { - authz.require("usage", "read", None, None, None, None)?; + let user_id_str = user_id.to_string(); + authz.require("usage", "read", Some(&user_id_str), None, None, None)?; let services = get_services(&state)?; // Verify user exists @@ -1647,7 +1656,8 @@ pub async fn get_provider_summary( Query(query): Query, Extension(authz): Extension, ) -> Result, AdminError> { - authz.require("usage", "read", None, None, None, None)?; + let provider_str = provider.to_string(); + authz.require("usage", "read", Some(&provider_str), None, None, None)?; let services = get_services(&state)?; let range = query.parse_date_range()?; @@ -1679,7 +1689,8 @@ pub async fn get_provider_by_date( Query(query): Query, Extension(authz): Extension, ) -> Result>, AdminError> { - authz.require("usage", "read", None, None, None, None)?; + let provider_str = provider.to_string(); + authz.require("usage", "read", Some(&provider_str), None, None, None)?; let services = get_services(&state)?; let range = query.parse_date_range()?; @@ -1711,7 +1722,8 @@ pub async fn get_provider_by_model( Query(query): Query, Extension(authz): Extension, ) -> Result>, AdminError> { - authz.require("usage", "read", None, None, None, None)?; + let provider_str = provider.to_string(); + authz.require("usage", "read", Some(&provider_str), None, None, None)?; let services = get_services(&state)?; let range = query.parse_date_range()?; @@ -1746,7 +1758,8 @@ pub async fn get_provider_forecast( Query(query): Query, Extension(authz): Extension, ) -> Result, AdminError> { - authz.require("usage", "read", None, None, None, None)?; + let provider_str = provider.to_string(); + authz.require("usage", "read", Some(&provider_str), None, None, None)?; let services = get_services(&state)?; let forecast = services @@ -2001,7 +2014,8 @@ pub async fn get_me_summary( let user_id = admin_auth.identity.user_id.ok_or(AdminError::NotFound( "User not found in database".to_string(), ))?; - authz.require("usage", "read", None, None, None, None)?; + let user_id_str = user_id.to_string(); + authz.require("usage", "read", Some(&user_id_str), None, None, None)?; let services = get_services(&state)?; let range = query.parse_date_range()?; let summary = services.usage.get_summary_by_user(user_id, range).await?; @@ -2029,7 +2043,8 @@ pub async fn get_me_by_date( let user_id = admin_auth.identity.user_id.ok_or(AdminError::NotFound( "User not found in database".to_string(), ))?; - authz.require("usage", "read", None, None, None, None)?; + let user_id_str = user_id.to_string(); + authz.require("usage", "read", Some(&user_id_str), None, None, None)?; let services = get_services(&state)?; let range = query.parse_date_range()?; let daily_spend = services.usage.get_by_date_by_user(user_id, range).await?; @@ -2057,7 +2072,8 @@ pub async fn get_me_by_model( let user_id = admin_auth.identity.user_id.ok_or(AdminError::NotFound( "User not found in database".to_string(), ))?; - authz.require("usage", "read", None, None, None, None)?; + let user_id_str = user_id.to_string(); + authz.require("usage", "read", Some(&user_id_str), None, None, None)?; let services = get_services(&state)?; let range = query.parse_date_range()?; let model_spend = services.usage.get_by_model_by_user(user_id, range).await?; @@ -2085,7 +2101,8 @@ pub async fn get_by_provider( Query(query): Query, Extension(authz): Extension, ) -> Result>, AdminError> { - authz.require("usage", "read", None, None, None, None)?; + let key_id_str = key_id.to_string(); + authz.require("usage", "read", Some(&key_id_str), None, None, None)?; let services = get_services(&state)?; let range = query.parse_date_range()?; let provider_spend = services.usage.get_by_provider(key_id, range).await?; @@ -2162,7 +2179,8 @@ pub async fn get_user_by_provider( Query(query): Query, Extension(authz): Extension, ) -> Result>, AdminError> { - authz.require("usage", "read", None, None, None, None)?; + let user_id_str = user_id.to_string(); + authz.require("usage", "read", Some(&user_id_str), None, None, None)?; let services = get_services(&state)?; let _ = services .users @@ -2198,7 +2216,8 @@ pub async fn get_me_by_provider( let user_id = admin_auth.identity.user_id.ok_or(AdminError::NotFound( "User not found in database".to_string(), ))?; - authz.require("usage", "read", None, None, None, None)?; + let user_id_str = user_id.to_string(); + authz.require("usage", "read", Some(&user_id_str), None, None, None)?; let services = get_services(&state)?; let range = query.parse_date_range()?; let provider_spend = services @@ -2227,7 +2246,8 @@ pub async fn get_by_date_model( Query(query): Query, Extension(authz): Extension, ) -> Result>, AdminError> { - authz.require("usage", "read", None, None, None, None)?; + let key_id_str = key_id.to_string(); + authz.require("usage", "read", Some(&key_id_str), None, None, None)?; let services = get_services(&state)?; let range = query.parse_date_range()?; let data = services.usage.get_by_date_model(key_id, range).await?; @@ -2331,7 +2351,8 @@ pub async fn get_user_by_date_model( Query(query): Query, Extension(authz): Extension, ) -> Result>, AdminError> { - authz.require("usage", "read", None, None, None, None)?; + let user_id_str = user_id.to_string(); + authz.require("usage", "read", Some(&user_id_str), None, None, None)?; let services = get_services(&state)?; let _ = services .users @@ -2398,7 +2419,8 @@ pub async fn get_me_by_date_model( let user_id = admin_auth.identity.user_id.ok_or(AdminError::NotFound( "User not found in database".to_string(), ))?; - authz.require("usage", "read", None, None, None, None)?; + let user_id_str = user_id.to_string(); + authz.require("usage", "read", Some(&user_id_str), None, None, None)?; let services = get_services(&state)?; let range = query.parse_date_range()?; let data = services @@ -2427,7 +2449,8 @@ pub async fn get_by_date_provider( Query(query): Query, Extension(authz): Extension, ) -> Result>, AdminError> { - authz.require("usage", "read", None, None, None, None)?; + let key_id_str = key_id.to_string(); + authz.require("usage", "read", Some(&key_id_str), None, None, None)?; let services = get_services(&state)?; let range = query.parse_date_range()?; let data = services.usage.get_by_date_provider(key_id, range).await?; @@ -2531,7 +2554,8 @@ pub async fn get_user_by_date_provider( Query(query): Query, Extension(authz): Extension, ) -> Result>, AdminError> { - authz.require("usage", "read", None, None, None, None)?; + let user_id_str = user_id.to_string(); + authz.require("usage", "read", Some(&user_id_str), None, None, None)?; let services = get_services(&state)?; let _ = services .users @@ -2598,7 +2622,8 @@ pub async fn get_me_by_date_provider( let user_id = admin_auth.identity.user_id.ok_or(AdminError::NotFound( "User not found in database".to_string(), ))?; - authz.require("usage", "read", None, None, None, None)?; + let user_id_str = user_id.to_string(); + authz.require("usage", "read", Some(&user_id_str), None, None, None)?; let services = get_services(&state)?; let range = query.parse_date_range()?; let data = services @@ -2627,7 +2652,8 @@ pub async fn get_by_pricing_source( Query(query): Query, Extension(authz): Extension, ) -> Result>, AdminError> { - authz.require("usage", "read", None, None, None, None)?; + let key_id_str = key_id.to_string(); + authz.require("usage", "read", Some(&key_id_str), None, None, None)?; let services = get_services(&state)?; let range = query.parse_date_range()?; let data = services.usage.get_by_pricing_source(key_id, range).await?; @@ -2731,7 +2757,8 @@ pub async fn get_user_by_pricing_source( Query(query): Query, Extension(authz): Extension, ) -> Result>, AdminError> { - authz.require("usage", "read", None, None, None, None)?; + let user_id_str = user_id.to_string(); + authz.require("usage", "read", Some(&user_id_str), None, None, None)?; let services = get_services(&state)?; let _ = services .users @@ -2798,7 +2825,8 @@ pub async fn get_me_by_pricing_source( let user_id = admin_auth.identity.user_id.ok_or(AdminError::NotFound( "User not found in database".to_string(), ))?; - authz.require("usage", "read", None, None, None, None)?; + let user_id_str = user_id.to_string(); + authz.require("usage", "read", Some(&user_id_str), None, None, None)?; let services = get_services(&state)?; let range = query.parse_date_range()?; let data = services @@ -2827,7 +2855,8 @@ pub async fn get_by_date_pricing_source( Query(query): Query, Extension(authz): Extension, ) -> Result>, AdminError> { - authz.require("usage", "read", None, None, None, None)?; + let key_id_str = key_id.to_string(); + authz.require("usage", "read", Some(&key_id_str), None, None, None)?; let services = get_services(&state)?; let range = query.parse_date_range()?; let data = services @@ -2934,7 +2963,8 @@ pub async fn get_user_by_date_pricing_source( Query(query): Query, Extension(authz): Extension, ) -> Result>, AdminError> { - authz.require("usage", "read", None, None, None, None)?; + let user_id_str = user_id.to_string(); + authz.require("usage", "read", Some(&user_id_str), None, None, None)?; let services = get_services(&state)?; let _ = services .users @@ -3001,7 +3031,8 @@ pub async fn get_me_by_date_pricing_source( let user_id = admin_auth.identity.user_id.ok_or(AdminError::NotFound( "User not found in database".to_string(), ))?; - authz.require("usage", "read", None, None, None, None)?; + let user_id_str = user_id.to_string(); + authz.require("usage", "read", Some(&user_id_str), None, None, None)?; let services = get_services(&state)?; let range = query.parse_date_range()?; let data = services @@ -4104,7 +4135,8 @@ pub async fn list_me_logs( let user_id = admin_auth.identity.user_id.ok_or(AdminError::NotFound( "User not found in database".to_string(), ))?; - authz.require("usage", "read", None, None, None, None)?; + let user_id_str = user_id.to_string(); + authz.require("usage", "read", Some(&user_id_str), None, None, None)?; let services = get_services(&state)?; let limit = params.limit.unwrap_or(100).min(1000); @@ -4298,7 +4330,8 @@ pub async fn export_me_logs( let user_id = admin_auth.identity.user_id.ok_or(AdminError::NotFound( "User not found in database".to_string(), ))?; - authz.require("usage", "read", None, None, None, None)?; + let user_id_str = user_id.to_string(); + authz.require("usage", "read", Some(&user_id_str), None, None, None)?; let services = get_services(&state)?; let (params, format) = export_query.into_params(); From 2c47aa1b422554af9dd025fc09eb71b23ebd20cf Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sun, 26 Apr 2026 14:22:09 +1000 Subject: [PATCH 112/172] Delegate SPA OIDC logout to backend instead of fragile URL rewrite --- ui/src/auth/AuthProvider.tsx | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/ui/src/auth/AuthProvider.tsx b/ui/src/auth/AuthProvider.tsx index 05ceaf7..9ee7444 100644 --- a/ui/src/auth/AuthProvider.tsx +++ b/ui/src/auth/AuthProvider.tsx @@ -224,13 +224,17 @@ export function AuthProvider({ children }: { children: React.ReactNode }) { token: null, }); - // For OIDC, we might want to redirect to the logout endpoint - if (state.method === "oidc" && config?.auth.oidc) { - // Most OIDC providers have a logout endpoint - const logoutUrl = config.auth.oidc.authorization_url.replace("/auth", "/logout"); - window.location.href = `${logoutUrl}?redirect_uri=${encodeURIComponent(window.location.origin)}`; + // For OIDC, hand off to the backend logout endpoint. The previous + // `authorization_url.replace("/auth", "/logout")` trick produced a wrong + // URL for any provider whose authorization endpoint isn't of the form + // `https://idp/.../auth` (Keycloak, dex, generic providers, etc). The + // backend already deletes the session, redirects to + // `end_session_endpoint` from OIDC discovery when configured, and falls + // back to "/", so we just navigate there. + if (state.method === "oidc") { + window.location.href = "/auth/logout"; } - }, [config?.auth.oidc, setStoredAuth, state.method]); + }, [setStoredAuth, state.method]); const setApiKey = useCallback( (apiKey: string) => { From f4c9ef2f0bfc0a539ba99bc2dd7385a9e5194d81 Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sun, 26 Apr 2026 14:26:27 +1000 Subject: [PATCH 113/172] Add integration tests covering the OAuth PKCE redeem path --- src/services/oauth_pkce.rs | 195 +++++++++++++++++++++++++++++++++++++ 1 file changed, 195 insertions(+) diff --git a/src/services/oauth_pkce.rs b/src/services/oauth_pkce.rs index d9e22e2..f2da158 100644 --- a/src/services/oauth_pkce.rs +++ b/src/services/oauth_pkce.rs @@ -240,4 +240,199 @@ mod tests { assert!(!a.contains('+') && !a.contains('/') && !a.contains('=')); assert!(a.len() >= 40); } + + // ==================================================================== + // Integration tests against an in-memory SQLite DbPool. These cover the + // full PKCE redeem path: code reuse, expiry, verifier mismatch, the + // 3-strikes burn rule, and the plain-method client/server gate. + // ==================================================================== + + #[cfg(feature = "database-sqlite")] + mod integration { + use super::*; + use crate::{ + cache::MemoryCache, + config::MemoryCacheConfig, + db::{DbPool, tests::harness::create_sqlite_pool}, + models::CreateUser, + }; + + async fn setup() -> (Arc, Uuid) { + let pool = create_sqlite_pool().await; + sqlx::migrate!("./migrations_sqlx/sqlite") + .run(&pool) + .await + .expect("Failed to run SQLite migrations"); + let db = Arc::new(DbPool::from_sqlite(pool)); + // Insert a real user via the repo so the auth-code FK is + // satisfied without us reaching into raw SQL. + let user = db + .users() + .create(CreateUser { + external_id: format!("test-{}", Uuid::new_v4()), + email: Some(format!("user-{}@example.test", Uuid::new_v4())), + name: Some("Test User".to_string()), + }) + .await + .expect("create test user"); + (db, user.id) + } + + fn issue_input(user_id: Uuid, challenge: &str, ttl_seconds: u64) -> IssueCodeInput { + IssueCodeInput { + user_id, + callback_url: "https://example.test/cb".to_string(), + code_challenge: challenge.to_string(), + code_challenge_method: PkceCodeChallengeMethod::S256, + app_name: Some("test app".to_string()), + key_options: OAuthKeyOptions::default(), + ttl_seconds, + } + } + + fn s256(verifier: &str) -> String { + derive_challenge(verifier, PkceCodeChallengeMethod::S256) + } + + #[tokio::test] + async fn redeem_succeeds_then_reuse_fails() { + let (db, user_id) = setup().await; + let svc = OAuthPkceService::new(db.clone()); + let verifier = "verifier-12345678901234567890123456789012345678901234"; + let issued = svc + .issue_code(issue_input(user_id, &s256(verifier), 600)) + .await + .expect("issue code"); + + // First redeem succeeds. + svc.redeem_code(&issued.code, verifier, None) + .await + .expect("first redeem"); + + // Second redeem fails — code was consumed. + let err = svc + .redeem_code(&issued.code, verifier, None) + .await + .expect_err("second redeem must fail"); + assert!(matches!(err, OAuthPkceError::InvalidCode)); + } + + #[tokio::test] + async fn expired_code_rejected_as_invalid() { + let (db, user_id) = setup().await; + let svc = OAuthPkceService::new(db.clone()); + let verifier = "verifier-abcdefghijklmnopqrstuvwxyz0123456789ABCDEF01"; + // TTL of zero means the row is immediately past expires_at. + let issued = svc + .issue_code(issue_input(user_id, &s256(verifier), 0)) + .await + .expect("issue code"); + + // Sleep a hair so `expires_at < now` deterministically. + tokio::time::sleep(StdDuration::from_millis(50)).await; + + let err = svc + .redeem_code(&issued.code, verifier, None) + .await + .expect_err("expired code must not redeem"); + assert!(matches!(err, OAuthPkceError::InvalidCode)); + } + + #[tokio::test] + async fn verifier_mismatch_keeps_code_alive_without_cache() { + let (db, user_id) = setup().await; + let svc = OAuthPkceService::new(db.clone()); + let verifier = "verifier-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"; + let issued = svc + .issue_code(issue_input(user_id, &s256(verifier), 600)) + .await + .expect("issue code"); + + // Without a cache, repeated wrong verifiers must NOT burn the code + // (legitimate clients still need to be able to retry). + for _ in 0..5 { + let err = svc + .redeem_code(&issued.code, "wrong-verifier", None) + .await + .expect_err("wrong verifier must fail"); + assert!(matches!(err, OAuthPkceError::PkceMismatch)); + } + + // The original verifier still works. + svc.redeem_code(&issued.code, verifier, None) + .await + .expect("legitimate redeem after retries"); + } + + #[tokio::test] + async fn three_verifier_failures_burn_code_with_cache() { + let (db, user_id) = setup().await; + let cache: Arc = Arc::new(MemoryCache::new(&MemoryCacheConfig::default())); + let svc = OAuthPkceService::new(db.clone()).with_cache(Some(cache)); + let verifier = "verifier-bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; + let issued = svc + .issue_code(issue_input(user_id, &s256(verifier), 600)) + .await + .expect("issue code"); + + // First two failures: PkceMismatch, code stays usable. + for _ in 0..2 { + let err = svc + .redeem_code(&issued.code, "wrong", None) + .await + .expect_err("wrong verifier #1/#2 must fail with mismatch"); + assert!(matches!(err, OAuthPkceError::PkceMismatch)); + } + + // Third failure: still PkceMismatch *to the caller* (so an + // attacker can't probe for the burn boundary), but the code is + // burned server-side. + let err = svc + .redeem_code(&issued.code, "wrong", None) + .await + .expect_err("wrong verifier #3 must fail with mismatch"); + assert!(matches!(err, OAuthPkceError::PkceMismatch)); + + // After burn, the legitimate verifier no longer succeeds. + let err = svc + .redeem_code(&issued.code, verifier, None) + .await + .expect_err("legitimate redeem after burn must fail"); + assert!(matches!(err, OAuthPkceError::InvalidCode)); + } + + #[tokio::test] + async fn client_method_must_match_stored() { + let (db, user_id) = setup().await; + let svc = OAuthPkceService::new(db.clone()); + let verifier = "verifier-ccccccccccccccccccccccccccccccccccccccccccc"; + let issued = svc + .issue_code(issue_input(user_id, &s256(verifier), 600)) + .await + .expect("issue code (S256)"); + + // Client claims `plain` but server stored `S256` — reject before + // even running the SHA-256 comparison. + let err = svc + .redeem_code(&issued.code, verifier, Some(PkceCodeChallengeMethod::Plain)) + .await + .expect_err("method mismatch must reject"); + assert!(matches!(err, OAuthPkceError::PkceMismatch)); + } + + #[tokio::test] + async fn plain_method_works_when_explicitly_chosen() { + let (db, user_id) = setup().await; + let svc = OAuthPkceService::new(db.clone()); + // Plain mode: challenge == verifier. + let verifier = "plain-verifier-9999999999999999999999999999999999999"; + let mut input = issue_input(user_id, verifier, 600); + input.code_challenge_method = PkceCodeChallengeMethod::Plain; + let issued = svc.issue_code(input).await.expect("issue plain code"); + + svc.redeem_code(&issued.code, verifier, Some(PkceCodeChallengeMethod::Plain)) + .await + .expect("plain redeem succeeds"); + } + } } From c8861520dd89f10b78c07f7ddf029df574bc5993 Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sun, 26 Apr 2026 14:27:03 +1000 Subject: [PATCH 114/172] Namespace inline-edit keys to avoid chat/multi-model collision --- ui/src/components/ChatMessage/ChatMessage.tsx | 11 +++++++---- .../MultiModelResponse/MultiModelResponse.tsx | 6 ++++-- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/ui/src/components/ChatMessage/ChatMessage.tsx b/ui/src/components/ChatMessage/ChatMessage.tsx index 61f6aa7..0c5ae96 100644 --- a/ui/src/components/ChatMessage/ChatMessage.tsx +++ b/ui/src/components/ChatMessage/ChatMessage.tsx @@ -92,8 +92,11 @@ function ChatMessageComponent({ const isUser = message.role === "user"; const isAnyStreaming = useIsStreaming(); - // Inline editing state - const isEditing = useIsEditing(message.id); + // Inline editing state. Namespace the key so a user-message id can never + // collide with the `:` composite that + // MultiModelResponse writes into the same global slot. + const editingKey = `chat:${message.id}`; + const isEditing = useIsEditing(editingKey); const [editContent, setEditContent] = useState(message.content); const textareaRef = useRef(null); const { startEditing, stopEditing } = useChatUIStore(); @@ -108,8 +111,8 @@ function ChatMessageComponent({ }, [isEditing, message.content]); const handleStartEdit = useCallback(() => { - startEditing(message.id); - }, [startEditing, message.id]); + startEditing(editingKey); + }, [startEditing, editingKey]); const handleRegenerate = useCallback(() => { onRegenerate?.(message.id); diff --git a/ui/src/components/MultiModelResponse/MultiModelResponse.tsx b/ui/src/components/MultiModelResponse/MultiModelResponse.tsx index 6369f80..a0e9f76 100644 --- a/ui/src/components/MultiModelResponse/MultiModelResponse.tsx +++ b/ui/src/components/MultiModelResponse/MultiModelResponse.tsx @@ -636,8 +636,10 @@ const ModelResponseCard = memo(function ModelResponseCard({ setQuotePopover((prev) => ({ ...prev, isOpen: false })); }, []); - // Inline editing state - use composite key for unique identification - const editingKey = `${groupId}:${instanceId}`; + // Inline editing state - use a namespaced composite key so it can never + // collide with the `chat:` keys ChatMessage writes into the + // same global slot. + const editingKey = `multi:${groupId}:${instanceId}`; const isEditing = useIsEditing(editingKey); const [editContent, setEditContent] = useState(response.content); const textareaRef = useRef(null); From fb6b4e234949d797106f4e553b76ef06116716ff Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sun, 26 Apr 2026 14:28:12 +1000 Subject: [PATCH 115/172] Wrap chat tree in ErrorBoundary for recoverable render-time crashes --- ui/src/pages/chat/ChatPage.tsx | 46 +++++++++++++++++++++------------- 1 file changed, 28 insertions(+), 18 deletions(-) diff --git a/ui/src/pages/chat/ChatPage.tsx b/ui/src/pages/chat/ChatPage.tsx index 08834ba..97eac1f 100644 --- a/ui/src/pages/chat/ChatPage.tsx +++ b/ui/src/pages/chat/ChatPage.tsx @@ -4,6 +4,7 @@ import { useQuery } from "@tanstack/react-query"; import { apiV1ModelsOptions } from "@/api/generated/@tanstack/react-query.gen"; import { ChatView, type ChatFile } from "@/components/ChatView/ChatView"; +import { ErrorBoundary } from "@/components/ErrorBoundary/ErrorBoundary"; import { useConversationsContext } from "@/components/ConversationsProvider/ConversationsProvider"; import { ForkConversationModal, @@ -285,24 +286,33 @@ export default function ChatPage() { return ( <> - + {/* + Wrap the chat tree in an ErrorBoundary so a render-time crash inside + any descendant — message list, model card, artifact renderer — falls + back to a recoverable card instead of unmounting the whole shell. The + boundary covers ChatMessageList, MultiModelResponse, ChatMessage, + artifacts, etc. by virtue of sitting at the root of ChatView. + */} + + + {currentConversation && ( Date: Sun, 26 Apr 2026 14:31:51 +1000 Subject: [PATCH 116/172] Replace ad-hoc SSE line split with spec-compliant SseParser --- ui/src/pages/chat/useChat.ts | 618 ++++++++++++----------- ui/src/utils/__tests__/sseParser.test.ts | 72 +++ ui/src/utils/sseParser.ts | 143 ++++++ 3 files changed, 531 insertions(+), 302 deletions(-) create mode 100644 ui/src/utils/__tests__/sseParser.test.ts create mode 100644 ui/src/utils/sseParser.ts diff --git a/ui/src/pages/chat/useChat.ts b/ui/src/pages/chat/useChat.ts index 2b7331f..9c4e2ec 100644 --- a/ui/src/pages/chat/useChat.ts +++ b/ui/src/pages/chat/useChat.ts @@ -1,6 +1,7 @@ import { useCallback, useRef } from "react"; import { useAuth } from "@/auth"; +import { SseParser } from "@/utils/sseParser"; import { useStreamingStore, useAllStreams, @@ -897,7 +898,10 @@ export function useChat({ const decoder = new TextDecoder(); let content = ""; let reasoningContent = ""; - let buffer = ""; + // Spec-compliant SSE parser — handles `\r\n`/`\r`/`\n`, multi-line + // `data:` fields joined with `\n`, and dispatches events on blank + // lines instead of every `data:` line. + const sseParser = new SseParser(); let usage: MessageUsage | undefined; // Fallback: extract tool calls from response.completed if not captured during streaming let completedToolCalls: ParsedToolCall[] = []; @@ -905,320 +909,330 @@ export function useChat({ // Capture response output for debugging let responseOutput: unknown[] | undefined; + // Iterate every event yielded by the parser through the existing + // event-handling logic. We parameterise as a generator so the same + // body runs for both `feed()` (during streaming) and `flush()` (at + // end-of-stream). + const handleEvents = function* (events: Iterable<{ data: string }>) { + for (const sseEvent of events) { + const data = sseEvent.data.trim(); + if (!data || data === "[DONE]") continue; + yield data; + } + }; + while (true) { const { done, value } = await reader.read(); - if (done) break; - - buffer += decoder.decode(value, { stream: true }); - const lines = buffer.split("\n"); - // Keep the last partial line in the buffer - buffer = lines.pop() || ""; - - for (const line of lines) { - if (line.startsWith("data: ")) { - const data = line.slice(6).trim(); - if (!data || data === "[DONE]") continue; - - try { - const event = JSON.parse(data) as ResponsesStreamEvent; - - // Capture SSE event for debugging if callback provided - if (onSSEEvent) { - onSSEEvent({ - type: event.type, - timestamp: Date.now(), - data: event, - }); - } + if (done) { + // End of stream: emit any trailing buffered event the producer + // didn't terminate with a blank line. + for (const data of handleEvents(sseParser.flush())) { + await processEventData(data); + } + break; + } - // Track tool calls if enabled - if (toolTracker) { - // Cast to BaseSSEEvent since parseToolCallFromEvent expects that type - const parseResult = parseToolCallFromEvent( - event as { type: string; [key: string]: unknown }, - toolTracker - ); - if (parseResult.type === "tool_call_added") { - // Update streaming store with new tool call - streamingStore.addToolCall(storeKey, parseResult.toolCall); - } else if (parseResult.type === "tool_call_arguments_delta") { - streamingStore.updateToolCallArguments( - storeKey, - parseResult.id, - parseResult.delta - ); - } else if (parseResult.type === "tool_call_complete") { - streamingStore.completeToolCall( - storeKey, - parseResult.toolCall.id, - parseResult.toolCall.arguments as Record - ); - } - } + const chunk = decoder.decode(value, { stream: true }); + for (const data of handleEvents(sseParser.feed(chunk))) { + await processEventData(data); + } + } - // Handle different Responses API event types - if (event.type === "response.output_text.delta" && event.delta) { - hasOutputText = true; - content += event.delta; - streamingStore.appendContent(storeKey, event.delta); - } else if ( - (event.type === "response.reasoning_text.delta" || - event.type === "response.reasoning_summary_text.delta") && - event.delta - ) { - // Stream reasoning content (extended thinking) - reasoningContent += event.delta; - streamingStore.appendReasoningContent(storeKey, event.delta); - } else if ( - (event.type === "response.reasoning_text.done" || - event.type === "response.reasoning_summary_text.done") && - event.text - ) { - // Final reasoning text - reasoningContent = event.text; - streamingStore.setReasoningContent(storeKey, reasoningContent); - } else if (event.type === "response.output_text.done") { - // Completion signal only — streamed deltas are authoritative. - } else if (event.type === "response.output_item.done" && event.item) { - // Handle file_search_call output items (server-side file search) - if (event.item.type === "file_search_call" && event.item.results) { - // Convert file_search results to citations - const citations: Citation[] = event.item.results.map( - ( - result: { - file_id: string; - filename: string; - score: number; - content?: Array<{ type: string; text: string }>; - }, - index: number - ): ChunkCitation => ({ - id: `citation-${result.file_id}-${index}`, - type: "chunk", - fileId: result.file_id, - filename: result.filename, - score: result.score, - chunkIndex: index, - content: result.content?.[0]?.text ?? "", - }) - ); - if (citations.length > 0) { - streamingStore.addCitations(storeKey, citations); - } - } else if (event.item.type === "image_generation_call" && event.item.result) { - // Image generation completed - create image artifact from data URL - const artifact: Artifact = { - id: event.item.id ?? `img_${Date.now()}`, - type: "image", - title: "Generated Image", - data: event.item.result, - mimeType: "image/png", - role: "output", - }; - streamingStore.addArtifacts(storeKey, [artifact]); - } - } else if (event.type === "response.file_search_call.in_progress") { - // Server-side file search starting - add tool call to streaming store - const itemId = event.item_id ?? `fs_${Date.now()}`; - streamingStore.addToolCall(storeKey, { - id: itemId, - callId: itemId, - name: "file_search", - outputIndex: event.output_index ?? 0, - argumentsBuffer: "", - status: "pending", - }); - } else if (event.type === "response.file_search_call.searching") { - // Server-side file search actively searching - update status - if (event.item_id) { - streamingStore.updateToolCallArguments(storeKey, event.item_id, ""); - } - } else if (event.type === "response.file_search_call.completed") { - // Server-side file search completed - remove the tool call indicator - if (event.item_id) { - streamingStore.completeToolCall(storeKey, event.item_id, {}); - } - } else if (event.type === "response.image_generation_call.in_progress") { - // Image generation starting - show tool call indicator - const itemId = event.item_id ?? `img_${Date.now()}`; - streamingStore.addToolCall(storeKey, { - id: itemId, - callId: itemId, - name: "image_generation", - outputIndex: event.output_index ?? 0, - argumentsBuffer: "", - status: "pending", - }); - } else if (event.type === "response.image_generation_call.generating") { - // Image generation in progress - update status - if (event.item_id) { - streamingStore.updateToolCallArguments(storeKey, event.item_id, ""); - } - } else if (event.type === "response.image_generation_call.partial_image") { - // Progressive image preview - if (event.partial_image_b64) { - const dataUrl = `data:image/png;base64,${event.partial_image_b64}`; - const artifact: Artifact = { - id: event.item_id ?? `img_partial_${Date.now()}`, - type: "image", - title: "Generated Image", - data: dataUrl, - mimeType: "image/png", - role: "output", - }; - streamingStore.setArtifacts(storeKey, [artifact]); - } - } else if (event.type === "response.image_generation_call.completed") { - // Image generation completed - remove tool call indicator - if (event.item_id) { - streamingStore.completeToolCall(storeKey, event.item_id, {}); - } - } else if (event.type === "response.completed" && event.response) { - // Extract final text from completed response - // First try output_text, then message content, then reasoning content as fallback - const outputText = - event.response.output_text || - event.response.output - ?.flatMap( - (item) => - item.content - ?.filter((c) => c.type === "output_text") - .map((c) => c.text || "") ?? [] - ) - .join("\n\n---\n\n"); - - // If no output_text, try to extract from reasoning content (for reasoning models) - // This is useful for modes like "elected" where we need to parse a vote number - // from reasoning-only responses. - const reasoningText = - event.response.output - ?.filter((item) => item.type === "reasoning") - .flatMap((item) => { - // Extract from content (reasoning_text items) - const fromContent = - item.content - ?.filter((c) => c.type === "reasoning_text") - .map((c) => c.text || "") || []; - // Extract from summary (summary_text items) - const fromSummary = - item.summary - ?.filter((s) => s.type === "summary_text") - .map((s) => s.text || "") || []; - return [...fromContent, ...fromSummary]; - }) - .join("") || ""; - - // Store reasoning content if present - if (reasoningText && !reasoningContent) { - reasoningContent = reasoningText; - streamingStore.setReasoningContent(storeKey, reasoningContent); - } + async function processEventData(data: string) { + try { + const event = JSON.parse(data) as ResponsesStreamEvent; - // Only use response object text as fallback when no streamed deltas were received - if (!hasOutputText) { - content = outputText || reasoningText || content; - } + // Capture SSE event for debugging if callback provided + if (onSSEEvent) { + onSSEEvent({ + type: event.type, + timestamp: Date.now(), + data: event, + }); + } - // Extract usage data if present - if (event.response.usage) { - const u = event.response.usage; - const completedTime = Date.now(); - - // Get timing data from streaming store (use hook.getState() for imperative access) - const streamState = useStreamingStore.getState().streams.get(storeKey); - const startTime = streamState?.startTime; - const firstTokenTime = streamState?.firstTokenTime; - - // Calculate timing stats - const firstTokenMs = - startTime && firstTokenTime ? firstTokenTime - startTime : undefined; - const totalDurationMs = startTime ? completedTime - startTime : undefined; - const tokensPerSecond = - totalDurationMs && totalDurationMs > 0 && u.output_tokens > 0 - ? (u.output_tokens / totalDurationMs) * 1000 - : undefined; - - // Extract provider from model string (format: "provider/model-name") - const responseModel = event.response.model; - const provider = responseModel?.includes("/") - ? responseModel.split("/")[0] - : undefined; - - usage = { - inputTokens: u.input_tokens, - outputTokens: u.output_tokens, - totalTokens: u.total_tokens, - cost: u.cost, - cachedTokens: u.input_tokens_details?.cached_tokens, - reasoningTokens: u.output_tokens_details?.reasoning_tokens, - reasoningContent: reasoningContent || undefined, - // Timing stats - firstTokenMs, - totalDurationMs, - tokensPerSecond, - // Response metadata - finishReason: event.response.status, - modelId: responseModel, - provider, - }; - } + // Track tool calls if enabled + if (toolTracker) { + // Cast to BaseSSEEvent since parseToolCallFromEvent expects that type + const parseResult = parseToolCallFromEvent( + event as { type: string; [key: string]: unknown }, + toolTracker + ); + if (parseResult.type === "tool_call_added") { + // Update streaming store with new tool call + streamingStore.addToolCall(storeKey, parseResult.toolCall); + } else if (parseResult.type === "tool_call_arguments_delta") { + streamingStore.updateToolCallArguments(storeKey, parseResult.id, parseResult.delta); + } else if (parseResult.type === "tool_call_complete") { + streamingStore.completeToolCall( + storeKey, + parseResult.toolCall.id, + parseResult.toolCall.arguments as Record + ); + } + } - // Capture full response output for debugging - if (event.response.output) { - responseOutput = event.response.output; - } + // Handle different Responses API event types + if (event.type === "response.output_text.delta" && event.delta) { + hasOutputText = true; + content += event.delta; + streamingStore.appendContent(storeKey, event.delta); + } else if ( + (event.type === "response.reasoning_text.delta" || + event.type === "response.reasoning_summary_text.delta") && + event.delta + ) { + // Stream reasoning content (extended thinking) + reasoningContent += event.delta; + streamingStore.appendReasoningContent(storeKey, event.delta); + } else if ( + (event.type === "response.reasoning_text.done" || + event.type === "response.reasoning_summary_text.done") && + event.text + ) { + // Final reasoning text + reasoningContent = event.text; + streamingStore.setReasoningContent(storeKey, reasoningContent); + } else if (event.type === "response.output_text.done") { + // Completion signal only — streamed deltas are authoritative. + } else if (event.type === "response.output_item.done" && event.item) { + // Handle file_search_call output items (server-side file search) + if (event.item.type === "file_search_call" && event.item.results) { + // Convert file_search results to citations + const citations: Citation[] = event.item.results.map( + ( + result: { + file_id: string; + filename: string; + score: number; + content?: Array<{ type: string; text: string }>; + }, + index: number + ): ChunkCitation => ({ + id: `citation-${result.file_id}-${index}`, + type: "chunk", + fileId: result.file_id, + filename: result.filename, + score: result.score, + chunkIndex: index, + content: result.content?.[0]?.text ?? "", + }) + ); + if (citations.length > 0) { + streamingStore.addCitations(storeKey, citations); + } + } else if (event.item.type === "image_generation_call" && event.item.result) { + // Image generation completed - create image artifact from data URL + const artifact: Artifact = { + id: event.item.id ?? `img_${Date.now()}`, + type: "image", + title: "Generated Image", + data: event.item.result, + mimeType: "image/png", + role: "output", + }; + streamingStore.addArtifacts(storeKey, [artifact]); + } + } else if (event.type === "response.file_search_call.in_progress") { + // Server-side file search starting - add tool call to streaming store + const itemId = event.item_id ?? `fs_${Date.now()}`; + streamingStore.addToolCall(storeKey, { + id: itemId, + callId: itemId, + name: "file_search", + outputIndex: event.output_index ?? 0, + argumentsBuffer: "", + status: "pending", + }); + } else if (event.type === "response.file_search_call.searching") { + // Server-side file search actively searching - update status + if (event.item_id) { + streamingStore.updateToolCallArguments(storeKey, event.item_id, ""); + } + } else if (event.type === "response.file_search_call.completed") { + // Server-side file search completed - remove the tool call indicator + if (event.item_id) { + streamingStore.completeToolCall(storeKey, event.item_id, {}); + } + } else if (event.type === "response.image_generation_call.in_progress") { + // Image generation starting - show tool call indicator + const itemId = event.item_id ?? `img_${Date.now()}`; + streamingStore.addToolCall(storeKey, { + id: itemId, + callId: itemId, + name: "image_generation", + outputIndex: event.output_index ?? 0, + argumentsBuffer: "", + status: "pending", + }); + } else if (event.type === "response.image_generation_call.generating") { + // Image generation in progress - update status + if (event.item_id) { + streamingStore.updateToolCallArguments(storeKey, event.item_id, ""); + } + } else if (event.type === "response.image_generation_call.partial_image") { + // Progressive image preview + if (event.partial_image_b64) { + const dataUrl = `data:image/png;base64,${event.partial_image_b64}`; + const artifact: Artifact = { + id: event.item_id ?? `img_partial_${Date.now()}`, + type: "image", + title: "Generated Image", + data: dataUrl, + mimeType: "image/png", + role: "output", + }; + streamingStore.setArtifacts(storeKey, [artifact]); + } + } else if (event.type === "response.image_generation_call.completed") { + // Image generation completed - remove tool call indicator + if (event.item_id) { + streamingStore.completeToolCall(storeKey, event.item_id, {}); + } + } else if (event.type === "response.completed" && event.response) { + // Extract final text from completed response + // First try output_text, then message content, then reasoning content as fallback + const outputText = + event.response.output_text || + event.response.output + ?.flatMap( + (item) => + item.content + ?.filter((c) => c.type === "output_text") + .map((c) => c.text || "") ?? [] + ) + .join("\n\n---\n\n"); + + // If no output_text, try to extract from reasoning content (for reasoning models) + // This is useful for modes like "elected" where we need to parse a vote number + // from reasoning-only responses. + const reasoningText = + event.response.output + ?.filter((item) => item.type === "reasoning") + .flatMap((item) => { + // Extract from content (reasoning_text items) + const fromContent = + item.content + ?.filter((c) => c.type === "reasoning_text") + .map((c) => c.text || "") || []; + // Extract from summary (summary_text items) + const fromSummary = + item.summary + ?.filter((s) => s.type === "summary_text") + .map((s) => s.text || "") || []; + return [...fromContent, ...fromSummary]; + }) + .join("") || ""; + + // Store reasoning content if present + if (reasoningText && !reasoningContent) { + reasoningContent = reasoningText; + streamingStore.setReasoningContent(storeKey, reasoningContent); + } - // Extract function calls from output (fallback for when streaming events don't include them) - if (trackToolCalls && event.response.output) { - const functionCalls = event.response.output.filter( - (item: { type: string }) => item.type === "function_call" - ) as Array<{ type: string; call_id: string; name: string; arguments: string }>; - if (functionCalls.length > 0) { - completedToolCalls = functionCalls.map((fc) => ({ - id: fc.call_id, // Use call_id as id since that's what we have - callId: fc.call_id, - name: fc.name, - status: "completed" as const, - arguments: JSON.parse(fc.arguments || "{}"), - })); - } - } + // Only use response object text as fallback when no streamed deltas were received + if (!hasOutputText) { + content = outputText || reasoningText || content; + } + + // Extract usage data if present + if (event.response.usage) { + const u = event.response.usage; + const completedTime = Date.now(); + + // Get timing data from streaming store (use hook.getState() for imperative access) + const streamState = useStreamingStore.getState().streams.get(storeKey); + const startTime = streamState?.startTime; + const firstTokenTime = streamState?.firstTokenTime; + + // Calculate timing stats + const firstTokenMs = + startTime && firstTokenTime ? firstTokenTime - startTime : undefined; + const totalDurationMs = startTime ? completedTime - startTime : undefined; + const tokensPerSecond = + totalDurationMs && totalDurationMs > 0 && u.output_tokens > 0 + ? (u.output_tokens / totalDurationMs) * 1000 + : undefined; + + // Extract provider from model string (format: "provider/model-name") + const responseModel = event.response.model; + const provider = responseModel?.includes("/") + ? responseModel.split("/")[0] + : undefined; + + usage = { + inputTokens: u.input_tokens, + outputTokens: u.output_tokens, + totalTokens: u.total_tokens, + cost: u.cost, + cachedTokens: u.input_tokens_details?.cached_tokens, + reasoningTokens: u.output_tokens_details?.reasoning_tokens, + reasoningContent: reasoningContent || undefined, + // Timing stats + firstTokenMs, + totalDurationMs, + tokensPerSecond, + // Response metadata + finishReason: event.response.status, + modelId: responseModel, + provider, + }; + } + + // Capture full response output for debugging + if (event.response.output) { + responseOutput = event.response.output; + } + + // Extract function calls from output (fallback for when streaming events don't include them) + if (trackToolCalls && event.response.output) { + const functionCalls = event.response.output.filter( + (item: { type: string }) => item.type === "function_call" + ) as Array<{ type: string; call_id: string; name: string; arguments: string }>; + if (functionCalls.length > 0) { + completedToolCalls = functionCalls.map((fc) => ({ + id: fc.call_id, // Use call_id as id since that's what we have + callId: fc.call_id, + name: fc.name, + status: "completed" as const, + arguments: JSON.parse(fc.arguments || "{}"), + })); + } + } - // Extract image_generation_call items as fallback - // (for providers that don't emit output_item.done per item) - if (event.response.output) { - const imageItems = event.response.output.filter( - (item) => item.type === "image_generation_call" && item.result - ); - if (imageItems.length > 0) { - // Get existing artifact IDs to avoid duplicates - const existingArtifacts = - useStreamingStore.getState().streams.get(storeKey)?.artifacts ?? []; - const existingIds = new Set(existingArtifacts.map((a) => a.id)); - const newArtifacts: Artifact[] = imageItems - .filter((item) => !existingIds.has(item.id ?? "")) - .map((item) => ({ - id: item.id ?? `img_${Date.now()}`, - type: "image" as const, - title: "Generated Image", - data: item.result!, - mimeType: "image/png", - role: "output" as const, - })); - if (newArtifacts.length > 0) { - streamingStore.addArtifacts(storeKey, newArtifacts); - } - } + // Extract image_generation_call items as fallback + // (for providers that don't emit output_item.done per item) + if (event.response.output) { + const imageItems = event.response.output.filter( + (item) => item.type === "image_generation_call" && item.result + ); + if (imageItems.length > 0) { + // Get existing artifact IDs to avoid duplicates + const existingArtifacts = + useStreamingStore.getState().streams.get(storeKey)?.artifacts ?? []; + const existingIds = new Set(existingArtifacts.map((a) => a.id)); + const newArtifacts: Artifact[] = imageItems + .filter((item) => !existingIds.has(item.id ?? "")) + .map((item) => ({ + id: item.id ?? `img_${Date.now()}`, + type: "image" as const, + title: "Generated Image", + data: item.result!, + mimeType: "image/png", + role: "output" as const, + })); + if (newArtifacts.length > 0) { + streamingStore.addArtifacts(storeKey, newArtifacts); } } - } catch (err) { - // Per-line `data:` payloads should always be complete JSON - // (we already split on `\n` and the last partial line stays - // in `buffer`). Surface the error at debug so producer/spec - // drift doesn't silently drop tool calls or citations. - console.debug("Failed to parse SSE event payload", { data, err }); } } + } catch (err) { + // The SSE parser now joins multi-line `data:` fields and only + // dispatches on blank lines, so a partial JSON shouldn't reach + // here. Surface failures at debug so producer/spec drift doesn't + // silently drop tool calls or citations. + console.debug("Failed to parse SSE event payload", { data, err }); } } diff --git a/ui/src/utils/__tests__/sseParser.test.ts b/ui/src/utils/__tests__/sseParser.test.ts new file mode 100644 index 0000000..e41d759 --- /dev/null +++ b/ui/src/utils/__tests__/sseParser.test.ts @@ -0,0 +1,72 @@ +import { describe, it, expect } from "vitest"; +import { SseParser } from "../sseParser"; + +describe("SseParser", () => { + it("parses single-line data events with \\n terminator", () => { + const parser = new SseParser(); + const events = [...parser.feed('data: {"hello": "world"}\n\n'), ...parser.flush()]; + expect(events).toEqual([ + { data: '{"hello": "world"}', event: "message", id: undefined, retry: undefined }, + ]); + }); + + it("handles \\r\\n line terminators", () => { + const parser = new SseParser(); + const events = [...parser.feed("data: alpha\r\n\r\n"), ...parser.flush()]; + expect(events).toEqual([{ data: "alpha", event: "message", id: undefined, retry: undefined }]); + }); + + it("handles bare \\r line terminators", () => { + const parser = new SseParser(); + const events = [...parser.feed("data: line\r\r"), ...parser.flush()]; + expect(events.map((e) => e.data)).toEqual(["line"]); + }); + + it("joins multi-line data fields with \\n", () => { + const parser = new SseParser(); + const events = [...parser.feed("data: line1\ndata: line2\ndata: line3\n\n"), ...parser.flush()]; + expect(events[0].data).toBe("line1\nline2\nline3"); + }); + + it("dispatches only on blank line", () => { + const parser = new SseParser(); + // First chunk has no blank line — nothing should emit yet. + const partial = [...parser.feed('data: {"a":1}\n')]; + expect(partial).toEqual([]); + // Second chunk completes the event. + const completed = [...parser.feed("data: more\n\n")]; + expect(completed.map((e) => e.data)).toEqual(['{"a":1}\nmore']); + }); + + it("handles chunked input with split mid-line", () => { + const parser = new SseParser(); + const out = [...parser.feed('data: {"par'), ...parser.feed('tial": true}\n\n')]; + expect(out.map((e) => e.data)).toEqual(['{"partial": true}']); + }); + + it("ignores comment lines", () => { + const parser = new SseParser(); + const events = [...parser.feed(": keep-alive\ndata: payload\n\n")]; + expect(events.map((e) => e.data)).toEqual(["payload"]); + }); + + it("captures event name and id", () => { + const parser = new SseParser(); + const events = [...parser.feed("event: ping\nid: 42\ndata: hi\n\n")]; + expect(events).toEqual([{ data: "hi", event: "ping", id: "42", retry: undefined }]); + }); + + it("flush emits unterminated trailing event", () => { + const parser = new SseParser(); + const buffered = [...parser.feed("data: trailing")]; + expect(buffered).toEqual([]); + const flushed = [...parser.flush()]; + expect(flushed.map((e) => e.data)).toEqual(["trailing"]); + }); + + it("treats blank-only input as keep-alive (no events)", () => { + const parser = new SseParser(); + const events = [...parser.feed("\n\n\n")]; + expect(events).toEqual([]); + }); +}); diff --git a/ui/src/utils/sseParser.ts b/ui/src/utils/sseParser.ts new file mode 100644 index 0000000..bd0d08f --- /dev/null +++ b/ui/src/utils/sseParser.ts @@ -0,0 +1,143 @@ +/** + * Minimal SSE parser following the WHATWG EventSource spec, used by the + * streaming chat client. + * + * The previous parser called `buffer.split("\n")` and treated every + * `data: ...` line as a complete event. That breaks on: + * - servers that emit `\r\n` (or `\r`) line terminators, + * - events that span multiple `data:` lines (the spec says concatenate + * them with `\n`), + * - producers that rely on the spec's "events end on a blank line" + * semantics (we'd emit half-events early). + * + * Usage: + * const parser = new SseParser(); + * for (const chunk of stream) { + * for (const ev of parser.feed(chunk)) { + * handle(ev); + * } + * } + * for (const ev of parser.flush()) handle(ev); // flush trailing event + */ + +export interface SseEvent { + /** Concatenated `data:` fields, joined with `\n`. Empty string if none. */ + data: string; + /** `event:` field, or `"message"` if absent (per spec). */ + event: string; + /** `id:` field, if present. */ + id?: string; + /** `retry:` reconnect time in ms, if present. */ + retry?: number; +} + +export class SseParser { + private buffer = ""; + private dataLines: string[] = []; + private eventName = ""; + private lastEventId: string | undefined; + private retry: number | undefined; + + /** + * Append `chunk` to the buffer and yield any complete events that + * become available. Trailing partial lines are kept buffered until the + * next call. + */ + *feed(chunk: string): Generator { + this.buffer += chunk; + // Spec: events are separated by `\r\n`, `\r`, or `\n`. Use a regex + // that matches any of them. + let newlineIdx: number; + while ((newlineIdx = this.buffer.search(/\r\n|\r|\n/)) !== -1) { + const line = this.buffer.slice(0, newlineIdx); + const sepLen = + this.buffer.charAt(newlineIdx) === "\r" && this.buffer.charAt(newlineIdx + 1) === "\n" + ? 2 + : 1; + this.buffer = this.buffer.slice(newlineIdx + sepLen); + + if (line === "") { + // Blank line: dispatch the accumulated event, if any. + const ev = this.dispatch(); + if (ev) yield ev; + continue; + } + + this.processField(line); + } + } + + /** + * Emit any pending event that hasn't been terminated by a blank line. + * Use at end-of-stream so a producer that closes without a trailing + * blank line still surfaces its last event. + */ + *flush(): Generator { + if (this.buffer.length > 0) { + // Treat the trailing partial line as a final field. + this.processField(this.buffer); + this.buffer = ""; + } + const ev = this.dispatch(); + if (ev) yield ev; + } + + private processField(line: string) { + // Comment lines start with ":" per spec — ignore. + if (line.startsWith(":")) return; + + const colon = line.indexOf(":"); + let field: string; + let value: string; + if (colon === -1) { + field = line; + value = ""; + } else { + field = line.slice(0, colon); + value = line.slice(colon + 1); + // Per spec: a single leading space in the value is removed. + if (value.startsWith(" ")) value = value.slice(1); + } + + switch (field) { + case "data": + this.dataLines.push(value); + break; + case "event": + this.eventName = value; + break; + case "id": + // Per spec: ignore IDs containing NUL. + if (!value.includes("\0")) this.lastEventId = value; + break; + case "retry": { + const n = Number(value); + if (Number.isFinite(n) && n >= 0) this.retry = n; + break; + } + // Unknown fields are silently ignored. + } + } + + private dispatch(): SseEvent | null { + if (this.dataLines.length === 0 && this.eventName === "") { + // Nothing buffered — happens for keep-alive blank lines. + this.resetEventState(); + return null; + } + const ev: SseEvent = { + data: this.dataLines.join("\n"), + event: this.eventName || "message", + id: this.lastEventId, + retry: this.retry, + }; + this.resetEventState(); + return ev; + } + + private resetEventState() { + this.dataLines = []; + this.eventName = ""; + // Per spec, `id` and `retry` persist across events; only data/event reset. + } +} From 072bb79bd319a37c6346e9c0ab929f057734f4f8 Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sun, 26 Apr 2026 14:33:33 +1000 Subject: [PATCH 117/172] Tighten service-worker bootstrap to avoid races and stranger SWs --- ui/src/main.tsx | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/ui/src/main.tsx b/ui/src/main.tsx index 1c8e3e7..bebc175 100644 --- a/ui/src/main.tsx +++ b/ui/src/main.tsx @@ -16,21 +16,41 @@ if (handleMCPOAuthCallback()) { async function bootstrap() { // In WASM mode, register the service worker and wait for it to control the - // page before rendering. This prevents API calls from firing before the SW - // is active (race condition on hard refresh). + // page before rendering. `serviceWorker.ready` resolves once a SW with a + // scope covering this page is *active*, which closes the hard-refresh race + // where API calls fired before the WASM gateway was reachable. if (import.meta.env.VITE_WASM_MODE === "true") { const { registerWasmServiceWorker } = await import("./service-worker/register"); await registerWasmServiceWorker(); + if ("serviceWorker" in navigator) { + await navigator.serviceWorker.ready; + } } else if ("serviceWorker" in navigator) { - // Unregister any lingering WASM service workers so they don't intercept - // requests when running the normal dev server. + // Only unregister service workers we recognise as ours. The previous + // implementation called `unregister()` on every registration, which + // tore down legitimate third-party service workers if the gateway was + // installed on a shared origin. The Hadrian WASM SW always lives at + // `/sw.js` (see `service-worker/register.ts`); leave anything else + // alone. const registrations = await navigator.serviceWorker.getRegistrations(); - await Promise.all(registrations.map((r) => r.unregister())); + await Promise.all( + registrations + .filter((r) => { + const sw = r.active ?? r.waiting ?? r.installing; + if (!sw) return false; + try { + return new URL(sw.scriptURL).pathname === "/sw.js"; + } catch { + return false; + } + }) + .map((r) => r.unregister()), + ); } createRoot(document.getElementById("root")!).render( - + , ); } From 73b040d65377baddddf306b1c3fc21c6a1c48881 Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sun, 26 Apr 2026 14:34:43 +1000 Subject: [PATCH 118/172] Cache matchSkills result so slash popover does one scan per keystroke --- .../pages/chat/utils/slashCommandMatcher.ts | 41 ++++++++++++++----- 1 file changed, 30 insertions(+), 11 deletions(-) diff --git a/ui/src/pages/chat/utils/slashCommandMatcher.ts b/ui/src/pages/chat/utils/slashCommandMatcher.ts index 9e8ec70..7132d58 100644 --- a/ui/src/pages/chat/utils/slashCommandMatcher.ts +++ b/ui/src/pages/chat/utils/slashCommandMatcher.ts @@ -42,20 +42,39 @@ export function detectSlashQuery(text: string, caret: number): SlashQuery | null * substring (fallback). Skills marked `user_invocable: false` are excluded * since the slash-command UI is a user-facing surface. Results are sorted * with prefix matches first, then alphabetical. + * + * The result is cached on `(skills array identity, query)` so the keystroke + * paths in `ChatInput` (input-change handler, key-down Enter/Tab handlers, + * the popover's own `useMemo`) share work — without this, each keystroke + * fanned out into 2–3 redundant linear scans of every user skill. */ +let lastSkillsRef: Skill[] | null = null; +let lastQuery: string | null = null; +let lastResult: Skill[] = []; + export function matchSkills(skills: Skill[], query: string): Skill[] { + if (skills === lastSkillsRef && query === lastQuery) return lastResult; + const q = query.toLowerCase(); const invocable = skills.filter((s) => s.user_invocable !== false); - if (!q) return invocable.slice(0, 20); - - const prefix: Skill[] = []; - const contains: Skill[] = []; - for (const s of invocable) { - const name = s.name.toLowerCase(); - if (name.startsWith(q)) prefix.push(s); - else if (name.includes(q)) contains.push(s); + let result: Skill[]; + if (!q) { + result = invocable.slice(0, 20); + } else { + const prefix: Skill[] = []; + const contains: Skill[] = []; + for (const s of invocable) { + const name = s.name.toLowerCase(); + if (name.startsWith(q)) prefix.push(s); + else if (name.includes(q)) contains.push(s); + } + prefix.sort((a, b) => a.name.localeCompare(b.name)); + contains.sort((a, b) => a.name.localeCompare(b.name)); + result = [...prefix, ...contains].slice(0, 20); } - prefix.sort((a, b) => a.name.localeCompare(b.name)); - contains.sort((a, b) => a.name.localeCompare(b.name)); - return [...prefix, ...contains].slice(0, 20); + + lastSkillsRef = skills; + lastQuery = query; + lastResult = result; + return result; } From 42a25ed62d4dc71325a7effb0045744bd9d534cb Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sun, 26 Apr 2026 14:36:18 +1000 Subject: [PATCH 119/172] Stop dropdown mouseenter from stealing focus during keyboard nav --- ui/src/components/Dropdown/Dropdown.tsx | 53 ++++++++++++++++++++++--- 1 file changed, 47 insertions(+), 6 deletions(-) diff --git a/ui/src/components/Dropdown/Dropdown.tsx b/ui/src/components/Dropdown/Dropdown.tsx index 67c30e0..e39a5e0 100644 --- a/ui/src/components/Dropdown/Dropdown.tsx +++ b/ui/src/components/Dropdown/Dropdown.tsx @@ -28,6 +28,11 @@ interface DropdownContextValue { menuId: string; registerItem: () => number; itemCount: number; + /** Most recent input modality. `mouseenter` only steals focus when the + * user was already using the mouse — otherwise arrow keys would lose + * the highlight as soon as the cursor drifted across an item. */ + inputModalityRef: React.RefObject<"keyboard" | "mouse">; + setInputModality: (modality: "keyboard" | "mouse") => void; } const DropdownContext = createContext(null); @@ -52,6 +57,7 @@ export function Dropdown({ children }: DropdownProps) { const contentRef = useRef(null); const menuId = useId(); const itemCounterRef = useRef(0); + const inputModalityRef = useRef<"keyboard" | "mouse">("mouse"); // Wrapper to reset state when opening const setOpen = useCallback((value: boolean) => { @@ -69,6 +75,10 @@ export function Dropdown({ children }: DropdownProps) { return index; }, []); + const setInputModality = useCallback((modality: "keyboard" | "mouse") => { + inputModalityRef.current = modality; + }, []); + return (
{children}
@@ -247,8 +259,16 @@ export function DropdownContent({ sideOffset = 4, ...props }: DropdownContentProps) { - const { open, setOpen, triggerRef, menuId, highlightedIndex, setHighlightedIndex, itemCount } = - useDropdownContext(); + const { + open, + setOpen, + triggerRef, + menuId, + highlightedIndex, + setHighlightedIndex, + itemCount, + setInputModality, + } = useDropdownContext(); const localContentRef = useRef(null); const [position, setPosition] = useState<{ top: number; left: number } | null>(null); @@ -308,18 +328,22 @@ export function DropdownContent({ break; case "ArrowDown": e.preventDefault(); + setInputModality("keyboard"); setHighlightedIndex(highlightedIndex < itemCount - 1 ? highlightedIndex + 1 : 0); break; case "ArrowUp": e.preventDefault(); + setInputModality("keyboard"); setHighlightedIndex(highlightedIndex > 0 ? highlightedIndex - 1 : itemCount - 1); break; case "Home": e.preventDefault(); + setInputModality("keyboard"); setHighlightedIndex(0); break; case "End": e.preventDefault(); + setInputModality("keyboard"); setHighlightedIndex(itemCount - 1); break; case "Tab": @@ -338,7 +362,7 @@ export function DropdownContent({ document.removeEventListener("mousedown", handleClickOutside); document.removeEventListener("keydown", handleKeyDown); }; - }, [open, setOpen, triggerRef, highlightedIndex, setHighlightedIndex, itemCount]); + }, [open, setOpen, triggerRef, highlightedIndex, setHighlightedIndex, itemCount, setInputModality]); if (!open) return null; @@ -381,8 +405,15 @@ export function DropdownItem({ onClick, ...props }: DropdownItemProps) { - const { setOpen, triggerRef, highlightedIndex, registerItem, setHighlightedIndex } = - useDropdownContext(); + const { + setOpen, + triggerRef, + highlightedIndex, + registerItem, + setHighlightedIndex, + inputModalityRef, + setInputModality, + } = useDropdownContext(); const itemRef = useRef(null); const [itemIndex, setItemIndex] = useState(-1); @@ -430,7 +461,17 @@ export function DropdownItem({ setOpen(false); }} onKeyDown={handleKeyDown} - onMouseEnter={() => setHighlightedIndex(itemIndex)} + onMouseMove={() => setInputModality("mouse")} + onMouseEnter={() => { + // Only steal focus on hover when the user is actually using the + // mouse. Without this, an arrow-key navigator would lose their + // selection any time the cursor happened to be sitting on a + // different item — a common trigger when the dropdown opens + // beneath the cursor. + if (inputModalityRef.current === "mouse") { + setHighlightedIndex(itemIndex); + } + }} {...props} > {selected && } From d7b88545c75725f2ca6d4d30131f70b33ed926a0 Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sun, 26 Apr 2026 14:41:07 +1000 Subject: [PATCH 120/172] Stop useChat from subscribing to entire streaming/debug stores --- ui/src/pages/chat/useChat.ts | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/ui/src/pages/chat/useChat.ts b/ui/src/pages/chat/useChat.ts index 9c4e2ec..3f1b52e 100644 --- a/ui/src/pages/chat/useChat.ts +++ b/ui/src/pages/chat/useChat.ts @@ -284,8 +284,10 @@ export function useChat({ projectIdRef.current = projectId; const conversationIdRef = useRef(conversationId); conversationIdRef.current = conversationId; - const streamingStore = useStreamingStore(); - const debugStore = useDebugStore(); + // Pull actions through getState() — subscribing to the entire store would + // re-render this hook on every streaming/debug update. + const streamingStore = useStreamingStore.getState(); + const debugStore = useDebugStore.getState(); const modelResponses = useAllStreams(); const isStreaming = useIsStreaming(); From 149f5f1a3ca282e9cf68eedac38fcc67eba62958 Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sun, 26 Apr 2026 14:42:58 +1000 Subject: [PATCH 121/172] Abort streams and epoch-tag commits on conversation switch --- ui/src/pages/chat/useChat.ts | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/ui/src/pages/chat/useChat.ts b/ui/src/pages/chat/useChat.ts index 3f1b52e..55c443b 100644 --- a/ui/src/pages/chat/useChat.ts +++ b/ui/src/pages/chat/useChat.ts @@ -1,4 +1,4 @@ -import { useCallback, useRef } from "react"; +import { useCallback, useEffect, useRef } from "react"; import { useAuth } from "@/auth"; import { SseParser } from "@/utils/sseParser"; @@ -297,6 +297,20 @@ export function useChat({ streamingStore.stopStreaming(); }, [streamingStore]); + // Abort any in-flight streams when the user switches conversations. + // Without this, an in-progress stream from conversation A would commit its + // assistant message into conversation B's store after the switch. + // Per-send epoch checks below also drop any results that race the abort. + const previousConversationIdRef = useRef(conversationId); + useEffect(() => { + if (previousConversationIdRef.current === conversationId) return; + previousConversationIdRef.current = conversationId; + abortControllersRef.current.forEach((controller) => controller.abort()); + abortControllersRef.current = []; + streamingStore.stopStreaming(); + streamingStore.clearStreams(); + }, [conversationId, streamingStore]); + /** * Stream a response from a model using the Responses API * @@ -1820,6 +1834,12 @@ export function useChat({ async (content: string, files: ChatFile[]) => { if (models.length === 0) return; + // Snapshot the conversation we're sending into. If the user switches + // conversations before the stream completes, the stored ref will diverge + // and we drop the results below instead of writing them into the new + // conversation's message list. + const sendEpoch = conversationIdRef.current; + // Add user message to conversation store (with the current historyMode) addUserMessage(content, files.length > 0 ? files : undefined, historyMode); @@ -1958,7 +1978,9 @@ export function useChat({ } } - if (allResponses.length > 0) { + // Drop results if the user switched conversations during the stream — + // committing them now would attach them to the wrong conversation. + if (sendEpoch === conversationIdRef.current && allResponses.length > 0) { addAssistantMessages(allResponses); } @@ -1996,6 +2018,8 @@ export function useChat({ const userMessage = messages[userMessageIndex]; if (userMessage.role !== "user") return; + const sendEpoch = conversationIdRef.current; + // Get all messages up to and including the user message, filtered by the history mode // that was stored on that user message (use current historyMode as fallback for old messages) const messageHistoryMode = userMessage.historyMode ?? historyMode; @@ -2024,7 +2048,7 @@ export function useChat({ debugMessageId ); - if (result !== null) { + if (result !== null && sendEpoch === conversationIdRef.current) { const stream = useStreamingStore.getState().streams.get(model); replaceAssistantMessage(userMessageId, model, { content: result.content, @@ -2067,6 +2091,8 @@ export function useChat({ // If it's a user message, delete subsequent messages and re-run to get new responses // For assistant messages, we only update the content (no deletion of sibling responses) if (message.role === "user") { + const sendEpoch = conversationIdRef.current; + // Delete all messages after the edited user message deleteMessagesAfter(messageId); @@ -2157,7 +2183,7 @@ export function useChat({ } } - if (allResponses.length > 0) { + if (sendEpoch === conversationIdRef.current && allResponses.length > 0) { addAssistantMessages(allResponses); } From 2c60e8d504e7caf34425f9b92e6d64dca921bc4c Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sun, 26 Apr 2026 14:44:53 +1000 Subject: [PATCH 122/172] Stop retrying body errors so we don't double-bill on partial uploads --- src/providers/retry.rs | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/src/providers/retry.rs b/src/providers/retry.rs index bf461bb..a2d53a2 100644 --- a/src/providers/retry.rs +++ b/src/providers/retry.rs @@ -18,10 +18,34 @@ use crate::{ /// Determines if a reqwest error is retryable. /// -/// Connection errors, timeouts, and other transient issues are retryable. +/// Only errors where we are reasonably confident the request did *not* reach +/// (or was not processed by) the upstream server are retried. In particular, +/// `is_body()` errors mean the request body failed mid-transmission after the +/// server already accepted the connection — retrying would risk re-charging +/// the user for an upstream that already started inference / token-counting. +/// +/// Retryable: +/// - Connection errors (`is_connect`): TCP handshake / DNS / TLS setup failed. +/// - Timeouts (`is_timeout`): the call did not complete in the configured time. +/// Note this is still ambiguous — the server may have processed the request +/// but failed to deliver the response in time. We keep it retryable because +/// the dominant case in practice is hung connects / hung first byte; users +/// that want stricter no-double-bill semantics should narrow `max_retries`. +/// +/// Not retryable: +/// - `is_body()` — body stream errored after the server accepted bytes. +/// - `is_decode()` / `is_redirect()` / `is_builder()` / `is_status()` — either +/// we already got a response or the failure is a programming/config bug that +/// retrying won't fix. +/// - The catch-all `is_request()`, which conflates the above. pub fn is_retryable_error(error: &reqwest::Error) -> bool { - // Connection errors, timeouts, and other transient issues - let mut retryable = error.is_timeout() || error.is_request(); + // Body errors mean bytes were already in flight to the server. Surface + // those to the caller without retrying so we don't double-bill. + if error.is_body() { + return false; + } + + let mut retryable = error.is_timeout(); #[cfg(not(target_arch = "wasm32"))] { retryable = retryable || error.is_connect(); From 9ff6a28d869d0ac12b68cf45030d019e25c86c20 Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sun, 26 Apr 2026 14:50:30 +1000 Subject: [PATCH 123/172] Dedupe fallback chain, cap length, and re-check CB per hop --- src/providers/fallback.rs | 126 +++++++++++++++++++++++++++++++++++--- src/routes/execution.rs | 17 +++++ 2 files changed, 135 insertions(+), 8 deletions(-) diff --git a/src/providers/fallback.rs b/src/providers/fallback.rs index a288dd1..5fc83b9 100644 --- a/src/providers/fallback.rs +++ b/src/providers/fallback.rs @@ -161,12 +161,24 @@ pub struct FallbackTarget { pub model_name: String, } +/// Hard cap on the number of fallback targets we'll try for a single request. +/// +/// Without a cap, a misconfiguration where every provider lists every other +/// provider as a fallback can produce a very long chain (latency budget eaten +/// + amplified upstream pressure if many of them fail). 8 is generous in +/// practice — Hadrian's documented examples top out at 3-4. +pub const MAX_FALLBACK_CHAIN_LENGTH: usize = 8; + /// Builds the fallback chain for a request. /// /// The chain is built in this order: /// 1. Model-specific fallbacks (if any) - tried first /// 2. Provider-level fallbacks - tried after model fallbacks are exhausted /// +/// `(provider, model)` pairs are deduplicated against the primary and against +/// each other so we never call the same target twice in a row, and the chain +/// is capped at `MAX_FALLBACK_CHAIN_LENGTH` entries. +/// /// # Arguments /// /// * `primary_provider_name` - Name of the primary provider @@ -182,12 +194,47 @@ pub fn build_fallback_chain( providers_config: &crate::config::ProvidersConfig, ) -> Vec { let mut chain = Vec::new(); + let mut seen: std::collections::HashSet<(String, String)> = + std::collections::HashSet::new(); + // Seed with the primary so we never retry the same (provider, model) + // pair via a redundant model_fallbacks entry. + seen.insert(( + primary_provider_name.to_string(), + primary_model_name.to_string(), + )); // Get the primary provider config let Some(primary_config) = providers_config.get(primary_provider_name) else { return chain; }; + let push_target = |chain: &mut Vec, + seen: &mut std::collections::HashSet<(String, String)>, + provider: String, + model: String| + -> bool { + if chain.len() >= MAX_FALLBACK_CHAIN_LENGTH { + tracing::warn!( + cap = MAX_FALLBACK_CHAIN_LENGTH, + "Fallback chain hit the per-request length cap; dropping further entries" + ); + return false; + } + if !seen.insert((provider.clone(), model.clone())) { + tracing::debug!( + provider = %provider, + model = %model, + "Skipping duplicate fallback target" + ); + return true; + } + chain.push(FallbackTarget { + provider_name: provider, + model_name: model, + }); + true + }; + // 1. Add model-specific fallbacks first if let Some(model_fallbacks) = primary_config.get_model_fallbacks(primary_model_name) { for fallback in model_fallbacks { @@ -206,10 +253,14 @@ pub fn build_fallback_chain( continue; } - chain.push(FallbackTarget { - provider_name: target_provider.to_string(), - model_name: fallback.model.clone(), - }); + if !push_target( + &mut chain, + &mut seen, + target_provider.to_string(), + fallback.model.clone(), + ) { + return chain; + } } } @@ -224,11 +275,15 @@ pub fn build_fallback_chain( continue; } - chain.push(FallbackTarget { - provider_name: fallback_provider_name.clone(), + if !push_target( + &mut chain, + &mut seen, + fallback_provider_name.clone(), // Use the original model name for provider fallbacks - model_name: primary_model_name.to_string(), - }); + primary_model_name.to_string(), + ) { + return chain; + } } chain @@ -481,6 +536,61 @@ mod tests { assert!(chain.is_empty()); } + #[test] + fn test_build_fallback_chain_dedupes_pairs() { + let config: crate::config::ProvidersConfig = toml::from_str( + r#" + [primary] + type = "test" + fallback_providers = ["backup", "backup"] + + [primary.model_fallbacks] + "gpt-4o" = [ + { model = "gpt-4o-mini" }, + { model = "gpt-4o-mini" }, + { provider = "backup", model = "gpt-4o" }, + ] + + [backup] + type = "test" + "#, + ) + .unwrap(); + + let chain = build_fallback_chain("primary", "gpt-4o", &config); + // Expected (post-dedup): primary/gpt-4o-mini, backup/gpt-4o (from + // model_fallbacks). The duplicate model entry is dropped, the second + // `backup` provider entry collides with the model_fallbacks entry, and + // the (primary, gpt-4o) pair is the seeded primary. + assert_eq!(chain.len(), 2); + assert_eq!(chain[0].provider_name, "primary"); + assert_eq!(chain[0].model_name, "gpt-4o-mini"); + assert_eq!(chain[1].provider_name, "backup"); + assert_eq!(chain[1].model_name, "gpt-4o"); + } + + #[test] + fn test_build_fallback_chain_caps_length() { + // Construct a primary with more model fallbacks than the cap allows. + let mut toml = String::from( + r#" + [primary] + type = "test" + + [primary.model_fallbacks] + "gpt-4o" = [ + "#, + ); + for i in 0..(MAX_FALLBACK_CHAIN_LENGTH + 5) { + toml.push_str(&format!(" {{ model = \"m{}\" }},\n", i)); + } + toml.push_str(" ]\n"); + + let config: crate::config::ProvidersConfig = toml::from_str(&toml).unwrap(); + let chain = build_fallback_chain("primary", "gpt-4o", &config); + assert_eq!(chain.len(), MAX_FALLBACK_CHAIN_LENGTH); + } + #[test] fn test_build_fallback_chain_no_model_match() { let config: crate::config::ProvidersConfig = toml::from_str( diff --git a/src/routes/execution.rs b/src/routes/execution.rs index 10230a1..c617013 100644 --- a/src/routes/execution.rs +++ b/src/routes/execution.rs @@ -632,6 +632,23 @@ pub async fn execute_with_fallback( continue; }; + // Re-check the circuit breaker right before we call this fallback. + // The chain was built once up front, but a provider may have tripped + // its breaker since then (often *because of* the failures that drove + // us into the fallback path). Skip provider+model combos whose breaker + // is open so we don't waste a hop poking a known-down upstream. + if let Some(breaker) = state.circuit_breakers.get(&fallback.provider_name) { + if let Err(cb_err) = breaker.check() { + tracing::info!( + provider = %fallback.provider_name, + model = %fallback.model_name, + error = %cb_err, + "Skipping fallback: circuit breaker is open" + ); + continue; + } + } + // Check sovereignty requirements for fallback provider/model if let Some(reqs) = sovereignty_requirements { let model_config = fallback_config.get_model_config(&fallback.model_name); From f66f642d542c4694f252434c4f2db6ed2826ff91 Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sun, 26 Apr 2026 15:02:51 +1000 Subject: [PATCH 124/172] Drain usage logs through bounded channel instead of spawning from Drop --- src/app.rs | 14 ++++ src/middleware/layers/admin.rs | 8 +++ src/middleware/layers/api.rs | 8 +++ src/providers/mod.rs | 11 ++- src/routes/api/chat.rs | 6 ++ src/routes/api/embeddings.rs | 2 + src/routes/execution.rs | 4 ++ src/streaming/mod.rs | 124 +++++++++++++++++++++++---------- 8 files changed, 138 insertions(+), 39 deletions(-) diff --git a/src/app.rs b/src/app.rs index 0184455..23eca11 100644 --- a/src/app.rs +++ b/src/app.rs @@ -38,6 +38,8 @@ use crate::{ usage_buffer, }; #[cfg(feature = "server")] +use crate::streaming; +#[cfg(feature = "server")] use crate::{middleware, routes}; /// Embedded UI assets from ui/dist directory. @@ -320,6 +322,11 @@ pub struct AppState { /// Ensures all spawned tasks complete during graceful shutdown. #[cfg(feature = "server")] pub task_tracker: TaskTracker, + /// Bounded channel + drainer for partial-usage logging from + /// `UsageTrackingStream::Drop`, which can fire outside a runtime context + /// (so it cannot safely spawn tasks of its own). + #[cfg(feature = "server")] + pub usage_drain: streaming::UsageDrainHandle, /// Registry of per-organization OIDC authenticators. /// Loaded from org_sso_configs table at startup for multi-tenant SSO. #[cfg(feature = "sso")] @@ -953,6 +960,11 @@ impl AppState { // Create the task tracker for background tasks #[cfg(feature = "server")] let task_tracker = TaskTracker::new(); + // Bounded usage-drain channel + drainer task. Owned by the same + // tracker so graceful shutdown waits for it to finish flushing. + #[cfg(feature = "server")] + let usage_drain = + streaming::UsageDrainHandle::spawn(&task_tracker, streaming::USAGE_DRAIN_CAPACITY); // Initialize semantic cache if configured #[cfg(feature = "server")] @@ -1129,6 +1141,8 @@ impl AppState { provider_health: jobs::ProviderHealthStateRegistry::new(), #[cfg(feature = "server")] task_tracker, + #[cfg(feature = "server")] + usage_drain, #[cfg(feature = "sso")] oidc_registry, #[cfg(feature = "saml")] diff --git a/src/middleware/layers/admin.rs b/src/middleware/layers/admin.rs index 95445b3..3ec9858 100644 --- a/src/middleware/layers/admin.rs +++ b/src/middleware/layers/admin.rs @@ -2430,6 +2430,10 @@ mod tests { circuit_breakers: crate::providers::CircuitBreakerRegistry::new(), provider_health: crate::jobs::ProviderHealthStateRegistry::new(), task_tracker: TaskTracker::new(), + usage_drain: { + let tracker = TaskTracker::new(); + crate::streaming::UsageDrainHandle::spawn(&tracker, 16) + }, #[cfg(feature = "sso")] oidc_registry: None, #[cfg(feature = "saml")] @@ -2735,6 +2739,10 @@ mod tests { circuit_breakers: crate::providers::CircuitBreakerRegistry::new(), provider_health: crate::jobs::ProviderHealthStateRegistry::new(), task_tracker: TaskTracker::new(), + usage_drain: { + let tracker = TaskTracker::new(); + crate::streaming::UsageDrainHandle::spawn(&tracker, 16) + }, #[cfg(feature = "sso")] oidc_registry: None, #[cfg(feature = "saml")] diff --git a/src/middleware/layers/api.rs b/src/middleware/layers/api.rs index 9fa8431..166b0dc 100644 --- a/src/middleware/layers/api.rs +++ b/src/middleware/layers/api.rs @@ -2264,6 +2264,10 @@ mod tests { circuit_breakers: crate::providers::CircuitBreakerRegistry::new(), provider_health: crate::jobs::ProviderHealthStateRegistry::new(), task_tracker: TaskTracker::new(), + usage_drain: { + let tracker = TaskTracker::new(); + crate::streaming::UsageDrainHandle::spawn(&tracker, 16) + }, #[cfg(feature = "sso")] oidc_registry: None, #[cfg(feature = "saml")] @@ -2318,6 +2322,10 @@ mod tests { circuit_breakers: crate::providers::CircuitBreakerRegistry::new(), provider_health: crate::jobs::ProviderHealthStateRegistry::new(), task_tracker: TaskTracker::new(), + usage_drain: { + let tracker = TaskTracker::new(); + crate::streaming::UsageDrainHandle::spawn(&tracker, 16) + }, #[cfg(feature = "sso")] oidc_registry: None, #[cfg(feature = "saml")] diff --git a/src/providers/mod.rs b/src/providers/mod.rs index c22d681..935ad6e 100644 --- a/src/providers/mod.rs +++ b/src/providers/mod.rs @@ -124,6 +124,10 @@ pub struct CostInjectionParams<'a> { pub usage_entry: Option, #[cfg(feature = "server")] pub task_tracker: Option<&'a TaskTracker>, + /// Handle to the usage-drain channel; used by `UsageTrackingStream` to + /// log partial usage from `Drop` without spawning a task there directly. + #[cfg(feature = "server")] + pub usage_drain: Option<&'a crate::streaming::UsageDrainHandle>, pub max_response_body_bytes: usize, /// Idle timeout for streaming responses in seconds. /// If a streaming response doesn't receive a chunk within this timeout, @@ -570,6 +574,8 @@ async fn build_response( pub async fn inject_cost_into_response(params: CostInjectionParams<'_>) -> Response { #[cfg(feature = "server")] let task_tracker = params.task_tracker; + #[cfg(feature = "server")] + let usage_drain = params.usage_drain; let CostInjectionParams { response, provider, @@ -617,7 +623,9 @@ pub async fn inject_cost_into_response(params: CostInjectionParams<'_>) -> Respo #[cfg(feature = "server")] { // For streaming responses, wrap the body to track tokens as they arrive - if let (Some(db_pool), Some(entry), Some(tracker)) = (db, usage_entry, task_tracker) { + if let (Some(db_pool), Some(entry), Some(tracker), Some(drain)) = + (db, usage_entry, task_tracker, usage_drain) + { use futures_util::StreamExt; let (parts, body) = response.into_parts(); @@ -669,6 +677,7 @@ pub async fn inject_cost_into_response(params: CostInjectionParams<'_>) -> Respo provider.to_string(), model.to_string(), tracker.clone(), + drain.clone(), ); let new_body = axum::body::Body::from_stream(tracking_stream); diff --git a/src/routes/api/chat.rs b/src/routes/api/chat.rs index d298121..20a8c48 100644 --- a/src/routes/api/chat.rs +++ b/src/routes/api/chat.rs @@ -1023,6 +1023,8 @@ pub async fn api_v1_chat_completions( usage_entry, #[cfg(feature = "server")] task_tracker: Some(&state.task_tracker), + #[cfg(feature = "server")] + usage_drain: Some(&state.usage_drain), max_response_body_bytes: state.config.server.max_response_body_bytes, streaming_idle_timeout_secs: state.config.server.streaming_idle_timeout_secs, validation_config: &state.config.observability.response_validation, @@ -1691,6 +1693,8 @@ pub async fn api_v1_responses( usage_entry, #[cfg(feature = "server")] task_tracker: Some(&state.task_tracker), + #[cfg(feature = "server")] + usage_drain: Some(&state.usage_drain), max_response_body_bytes: state.config.server.max_response_body_bytes, streaming_idle_timeout_secs: state.config.server.streaming_idle_timeout_secs, validation_config: &state.config.observability.response_validation, @@ -2295,6 +2299,8 @@ pub async fn api_v1_completions( usage_entry, #[cfg(feature = "server")] task_tracker: Some(&state.task_tracker), + #[cfg(feature = "server")] + usage_drain: Some(&state.usage_drain), max_response_body_bytes: state.config.server.max_response_body_bytes, streaming_idle_timeout_secs: state.config.server.streaming_idle_timeout_secs, validation_config: &state.config.observability.response_validation, diff --git a/src/routes/api/embeddings.rs b/src/routes/api/embeddings.rs index bd8f846..8adbd25 100644 --- a/src/routes/api/embeddings.rs +++ b/src/routes/api/embeddings.rs @@ -294,6 +294,8 @@ pub async fn api_v1_embeddings( usage_entry: None, #[cfg(feature = "server")] task_tracker: Some(&state.task_tracker), + #[cfg(feature = "server")] + usage_drain: Some(&state.usage_drain), max_response_body_bytes: state.config.server.max_response_body_bytes, streaming_idle_timeout_secs: 0, // Embeddings don't stream validation_config: &state.config.observability.response_validation, diff --git a/src/routes/execution.rs b/src/routes/execution.rs index c617013..d381051 100644 --- a/src/routes/execution.rs +++ b/src/routes/execution.rs @@ -896,6 +896,10 @@ mod tests { circuit_breakers: CircuitBreakerRegistry::new(), provider_health: crate::jobs::ProviderHealthStateRegistry::new(), task_tracker: tokio_util::task::TaskTracker::new(), + usage_drain: { + let tracker = tokio_util::task::TaskTracker::new(); + crate::streaming::UsageDrainHandle::spawn(&tracker, 16) + }, #[cfg(feature = "sso")] oidc_registry: None, #[cfg(feature = "saml")] diff --git a/src/streaming/mod.rs b/src/streaming/mod.rs index 5178461..88026e9 100644 --- a/src/streaming/mod.rs +++ b/src/streaming/mod.rs @@ -12,12 +12,74 @@ use std::{ use bytes::Bytes; use futures_util::stream::Stream; use serde_json::Value; +#[cfg(feature = "server")] +use tokio::sync::mpsc; use tokio::time::Sleep; #[cfg(feature = "server")] use tokio_util::task::TaskTracker; use crate::{db::DbPool, models::UsageLogEntry, observability::metrics, pricing::PricingConfig}; +/// Default capacity for the usage-drain channel. +/// +/// Each pending job holds two `Arc`s, so memory pressure is small. The cap is +/// here to bound the worst case if the drainer falls behind — under normal +/// operation it stays empty. +#[cfg(feature = "server")] +pub const USAGE_DRAIN_CAPACITY: usize = 4096; + +/// A handle to the usage-drain background task. +/// +/// `UsageTrackingStream::Drop` runs synchronously and is not guaranteed to be +/// called from within a Tokio runtime context (clients can disconnect on a +/// thread that's tearing down, or the future can be cancelled in +/// `poll_cancel`). Spawning a task directly from `Drop` therefore risks a +/// `there is no reactor running` panic and also unbounded fan-out under heavy +/// disconnect storms. +/// +/// Instead, drops push a job into a bounded mpsc channel; a single drainer +/// task spawned at startup (owned by the existing `TaskTracker` so graceful +/// shutdown awaits it) pulls jobs and runs `UsageLogger::log_usage` from +/// inside the runtime where spawning is safe. +#[cfg(feature = "server")] +#[derive(Clone)] +pub struct UsageDrainHandle { + tx: mpsc::Sender, +} + +#[cfg(feature = "server")] +struct UsageDrainJob { + logger: Arc, + tokens: Arc, +} + +#[cfg(feature = "server")] +impl UsageDrainHandle { + /// Spawn the drainer task and return a clonable handle for sending jobs. + pub fn spawn(task_tracker: &TaskTracker, capacity: usize) -> Self { + let (tx, mut rx) = mpsc::channel::(capacity); + task_tracker.spawn(async move { + while let Some(job) = rx.recv().await { + job.logger.log_usage(&job.tokens).await; + } + tracing::debug!("Usage drain channel closed; drainer exiting"); + }); + Self { tx } + } + + /// Sync-send a usage log job. Safe to call from any thread/context, + /// including `Drop`. Drops the job (with a warning) if the channel is + /// full or closed — this is preferable to panicking from a destructor. + fn try_log(&self, logger: Arc, tokens: Arc) { + if let Err(err) = self.tx.try_send(UsageDrainJob { logger, tokens }) { + tracing::warn!( + error = %err, + "Usage drain channel rejected job; partial usage will not be recorded" + ); + } + } +} + /// Sentinel value indicating an optional field is not set const NONE_SENTINEL: i64 = i64::MIN; @@ -458,7 +520,7 @@ pub struct UsageTrackingStream { usage_logger: Arc, stream_ended: bool, #[cfg(feature = "server")] - task_tracker: TaskTracker, + usage_drain: UsageDrainHandle, /// Streaming metrics tracking streaming_metrics: Arc, } @@ -738,6 +800,7 @@ where provider: String, model: String, #[cfg(feature = "server")] task_tracker: TaskTracker, + #[cfg(feature = "server")] usage_drain: UsageDrainHandle, ) -> Self { let logger = Arc::new(UsageLogger::new( db, @@ -755,7 +818,7 @@ where usage_logger: logger, stream_ended: false, #[cfg(feature = "server")] - task_tracker: task_tracker.clone(), + usage_drain, streaming_metrics: Arc::new(StreamingMetrics::new(provider, model)), } } @@ -816,18 +879,12 @@ where // Stream ended normally - log usage and report metrics if !self.stream_ended { self.stream_ended = true; - let logger = self.usage_logger.clone(); - let tokens = self.accumulated_tokens.clone(); - let streaming_metrics = self.streaming_metrics.clone(); - - // Report streaming metrics (completed successfully) - streaming_metrics.report("completed"); - - // Use task_tracker to ensure usage logging completes during graceful shutdown + self.streaming_metrics.report("completed"); #[cfg(feature = "server")] - self.task_tracker.spawn(async move { - logger.log_usage(&tokens).await; - }); + self.usage_drain.try_log( + self.usage_logger.clone(), + self.accumulated_tokens.clone(), + ); } Poll::Ready(None) @@ -836,19 +893,15 @@ where // Error in stream - still try to log what we have if !self.stream_ended { self.stream_ended = true; - let logger = self.usage_logger.clone(); - let tokens = self.accumulated_tokens.clone(); - let streaming_metrics = self.streaming_metrics.clone(); - - // Report streaming metrics (ended with error) - streaming_metrics.report("error"); - - // Use task_tracker to ensure usage logging completes during graceful shutdown + self.streaming_metrics.report("error"); #[cfg(feature = "server")] - self.task_tracker.spawn(async move { + { tracing::warn!("Stream ended with error, logging partial usage"); - logger.log_usage(&tokens).await; - }); + self.usage_drain.try_log( + self.usage_logger.clone(), + self.accumulated_tokens.clone(), + ); + } } Poll::Ready(Some(Err(e))) @@ -868,26 +921,21 @@ impl Drop for UsageTrackingStream { // // This is important for budget enforcement - without this, an attacker // could consume tokens without them being recorded by dropping connections. + // + // Drop runs synchronously and is not guaranteed to be inside a Tokio + // runtime context, so we hand the job to the bounded usage-drain + // channel instead of spawning a task here directly. if !self.stream_ended { self.stream_ended = true; - - let logger = self.usage_logger.clone(); - let tokens = self.accumulated_tokens.clone(); - let streaming_metrics = self.streaming_metrics.clone(); - - // Report streaming metrics (dropped/cancelled) - streaming_metrics.report("dropped"); - - // Spawn async task to log usage - // Note: We can't await here since Drop is sync, so we spawn a task. - // The task_tracker ensures this completes during graceful shutdown. + self.streaming_metrics.report("dropped"); #[cfg(feature = "server")] - self.task_tracker.spawn(async move { + { tracing::warn!( "Stream dropped without completing - logging partial usage for budget accuracy" ); - logger.log_usage(&tokens).await; - }); + self.usage_drain + .try_log(self.usage_logger.clone(), self.accumulated_tokens.clone()); + } } } } From 794436e71c9c24e564840379c8698553856a0916 Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sun, 26 Apr 2026 15:08:26 +1000 Subject: [PATCH 125/172] Default CSP to strict preset; opt into permissive for WASM features --- src/config/server.rs | 80 ++++++++++++++++++++++- src/middleware/layers/security_headers.rs | 5 +- 2 files changed, 81 insertions(+), 4 deletions(-) diff --git a/src/config/server.rs b/src/config/server.rs index 135ca1b..1a26a1b 100644 --- a/src/config/server.rs +++ b/src/config/server.rs @@ -462,9 +462,26 @@ pub struct SecurityHeadersConfig { /// Content-Security-Policy header value. /// Controls resource loading to prevent XSS attacks. - #[serde(default = "default_csp")] + /// + /// When unset, the policy is rendered from `csp_preset`. Setting an explicit + /// string here always wins. + #[serde(default)] pub content_security_policy: Option, + /// Built-in CSP preset to use when `content_security_policy` is not set. + /// + /// - `strict` (default): no `'unsafe-eval'`, `connect-src 'self'`. Suitable + /// for headless gateway deployments and any deployment that does not + /// serve the bundled UI's WASM features (Pyodide / Vega charts / + /// user-configured MCP server URLs). + /// - `permissive`: enables `'unsafe-eval'` (Pyodide bytecode + Vega + /// `Function()` evaluation), `script-src https://cdn.jsdelivr.net` + /// (Pyodide / DuckDB WASM CDN), and `connect-src https: http: wss: ws:` + /// (MCP servers configured at runtime). Required when serving the + /// bundled UI with WASM-mode features enabled. + #[serde(default)] + pub csp_preset: CspPreset, + /// X-XSS-Protection header value. /// Legacy header for older browsers. Disabled by default as CSP provides protection. /// Enable for legacy browser compatibility. @@ -491,7 +508,8 @@ impl Default for SecurityHeadersConfig { content_type_options: default_content_type_options(), frame_options: default_frame_options(), hsts: HstsConfig::default(), - content_security_policy: default_csp(), + content_security_policy: None, + csp_preset: CspPreset::default(), xss_protection: default_xss_protection(), referrer_policy: default_referrer_policy(), permissions_policy: None, @@ -499,6 +517,64 @@ impl Default for SecurityHeadersConfig { } } +impl SecurityHeadersConfig { + /// Resolve the effective CSP header value. + /// + /// An explicit `content_security_policy` string always wins; otherwise the + /// `csp_preset` is rendered. Returns `None` to disable the header entirely. + pub fn resolved_csp(&self) -> Option { + if self.content_security_policy.is_some() { + return self.content_security_policy.clone(); + } + Some(self.csp_preset.render()) + } +} + +/// Built-in CSP presets selectable via `[server.security_headers].csp_preset`. +#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)] +#[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))] +#[serde(rename_all = "lowercase")] +pub enum CspPreset { + /// Locked-down CSP. No `'unsafe-eval'`, `connect-src 'self'`. Default. + #[default] + Strict, + /// Allows the bundled UI's WASM features (Pyodide, Vega chart eval, + /// CDN-loaded modules) and runtime-configured MCP server URLs. + Permissive, +} + +impl CspPreset { + fn render(self) -> String { + match self { + CspPreset::Strict => default_csp_strict(), + CspPreset::Permissive => default_csp_permissive(), + } + } +} + +/// Strict CSP — safe default for API-only / headless deployments. +fn default_csp_strict() -> String { + "default-src 'self'; \ + script-src 'self'; \ + style-src 'self' 'unsafe-inline'; \ + img-src 'self' data: blob:; \ + font-src 'self' data:; \ + media-src 'self'; \ + connect-src 'self'; \ + worker-src 'self'; \ + frame-src 'self'; \ + object-src 'none'; \ + base-uri 'self'; \ + form-action 'self'; \ + frame-ancestors 'none'" + .to_string() +} + +/// Permissive CSP for deployments serving the bundled UI's WASM features. +fn default_csp_permissive() -> String { + default_csp().expect("permissive CSP is always Some") +} + fn default_security_headers_enabled() -> bool { true } diff --git a/src/middleware/layers/security_headers.rs b/src/middleware/layers/security_headers.rs index ae6b59b..a5afafd 100644 --- a/src/middleware/layers/security_headers.rs +++ b/src/middleware/layers/security_headers.rs @@ -50,9 +50,10 @@ pub async fn security_headers_middleware( } } - // Content-Security-Policy + // Content-Security-Policy. Falls back to the configured `csp_preset` + // (default: strict) when no explicit string is set. if let Some(value) = config - .content_security_policy + .resolved_csp() .as_deref() .and_then(try_header_value) { From c14c8134458b47f4cbed0ed065394473421e0e50 Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sun, 26 Apr 2026 15:17:36 +1000 Subject: [PATCH 126/172] Per-route body limits for audio transcription and file uploads --- src/app.rs | 23 ++++++++++++++++---- src/config/server.rs | 28 +++++++++++++++++++++++- src/routes/api/mod.rs | 50 +++++++++++++++++++++++++++++++++++++------ src/wasm.rs | 3 ++- 4 files changed, 91 insertions(+), 13 deletions(-) diff --git a/src/app.rs b/src/app.rs index 23eca11..90f138e 100644 --- a/src/app.rs +++ b/src/app.rs @@ -2173,10 +2173,25 @@ pub fn build_app(config: &config::GatewayConfig, state: AppState) -> Router { app = app.layer(cors_layer); } - app.layer(axum::extract::DefaultBodyLimit::disable()) - .layer(TraceLayer::new_for_http()) - .layer(RequestBodyLimitLayer::new(config.server.body_limit_bytes)) - .with_state(state) + // Body limits are layered: + // * Per-route `DefaultBodyLimit::max(N)` (e.g. audio / files) overrides + // the global axum extractor default for those routes. + // * `DefaultBodyLimit::max(body_limit_bytes)` provides the default cap + // enforced by axum extractors for everything else. + // * `RequestBodyLimitLayer` is the hard tower-level cap, sized to the + // largest configured route limit so the route-level caps are not + // stomped on by an outer layer. + let max_body_limit = config + .server + .body_limit_bytes + .max(config.server.audio_body_limit_bytes) + .max(config.server.files_body_limit_bytes); + app.layer(axum::extract::DefaultBodyLimit::max( + config.server.body_limit_bytes, + )) + .layer(TraceLayer::new_for_http()) + .layer(RequestBodyLimitLayer::new(max_body_limit)) + .with_state(state) } /// Returns the OpenAPI spec as JSON diff --git a/src/config/server.rs b/src/config/server.rs index 1a26a1b..caa34cc 100644 --- a/src/config/server.rs +++ b/src/config/server.rs @@ -18,10 +18,26 @@ pub struct ServerConfig { #[serde(default = "default_port")] pub port: u16, - /// Request body size limit in bytes. + /// Request body size limit in bytes (the *global* cap, applied to every + /// request that doesn't have a more specific override). The audio and file + /// upload routes get a higher per-route limit because their payloads are + /// inherently larger than chat completions. #[serde(default = "default_body_limit")] pub body_limit_bytes: usize, + /// Request body size limit in bytes for audio routes + /// (`/v1/audio/transcriptions`, `/v1/audio/translations`). + /// Whisper-style transcription requests can carry tens of megabytes of + /// audio. Defaults to 100 MB. + #[serde(default = "default_audio_body_limit")] + pub audio_body_limit_bytes: usize, + + /// Request body size limit in bytes for `/v1/files` uploads. + /// Defaults to 512 MB so multi-document RAG ingest works without manual + /// tuning. Operators that don't use file uploads should drop this. + #[serde(default = "default_files_body_limit")] + pub files_body_limit_bytes: usize, + /// Maximum response body size for buffering provider responses (in bytes). /// This prevents OOM from malicious or malformed provider responses. #[serde(default = "default_max_response_body")] @@ -102,6 +118,8 @@ impl Default for ServerConfig { host: default_host(), port: default_port(), body_limit_bytes: default_body_limit(), + audio_body_limit_bytes: default_audio_body_limit(), + files_body_limit_bytes: default_files_body_limit(), max_response_body_bytes: default_max_response_body(), timeout_secs: default_timeout(), streaming_idle_timeout_secs: default_streaming_idle_timeout(), @@ -130,6 +148,14 @@ fn default_body_limit() -> usize { 10 * 1024 * 1024 // 10 MB } +fn default_audio_body_limit() -> usize { + 100 * 1024 * 1024 // 100 MB — enough for ~1h of compressed audio +} + +fn default_files_body_limit() -> usize { + 512 * 1024 * 1024 // 512 MB — multi-document RAG ingest +} + fn default_max_response_body() -> usize { 100 * 1024 * 1024 // 100 MB } diff --git a/src/routes/api/mod.rs b/src/routes/api/mod.rs index 87168e4..12101de 100644 --- a/src/routes/api/mod.rs +++ b/src/routes/api/mod.rs @@ -810,12 +810,36 @@ fn get_services(state: &AppState) -> Result<&Services, ApiError> { }) } +/// Per-route body size limits (audio uploads, file uploads). +/// +/// Pulled from `[server]` config and threaded through router composition so +/// individual routes can opt into a higher cap than the global +/// `RequestBodyLimitLayer` would otherwise impose. +#[cfg(any(feature = "server", feature = "wasm"))] +#[derive(Debug, Clone, Copy)] +pub(crate) struct ApiBodyLimits { + pub audio: usize, + pub files: usize, +} + +#[cfg(any(feature = "server", feature = "wasm"))] +impl Default for ApiBodyLimits { + fn default() -> Self { + // Generous WASM-side defaults; the server overrides from config. + Self { + audio: 100 * 1024 * 1024, + files: 512 * 1024 * 1024, + } + } +} + /// Route definitions for the OpenAI-compatible API. /// /// Shared between server and WASM builds. The server wraps these with auth/rate-limit /// middleware in [`get_api_routes`]; the WASM build uses them directly. #[cfg(any(feature = "server", feature = "wasm"))] -pub(crate) fn api_v1_routes() -> Router { +pub(crate) fn api_v1_routes(limits: ApiBodyLimits) -> Router { + use axum::extract::DefaultBodyLimit; let router = Router::new() .route("/v1/chat/completions", post(api_v1_chat_completions)) .route("/v1/responses", post(api_v1_responses)) @@ -832,20 +856,28 @@ pub(crate) fn api_v1_routes() -> Router { .route("/v1/images/edits", post(api_v1_images_edits)) .route("/v1/images/variations", post(api_v1_images_variations)); let router = router - // Audio API (OpenAI-compatible) + // Audio API (OpenAI-compatible). speech is text-only (small payload), so + // it stays on the global limit; transcription/translation receive raw + // audio uploads and get the larger per-route cap below. .route("/v1/audio/speech", post(api_v1_audio_speech)); #[cfg(feature = "server")] let router = router .route( "/v1/audio/transcriptions", - post(api_v1_audio_transcriptions), + post(api_v1_audio_transcriptions).layer(DefaultBodyLimit::max(limits.audio)), ) - .route("/v1/audio/translations", post(api_v1_audio_translations)); - // Files API (OpenAI-compatible) + .route( + "/v1/audio/translations", + post(api_v1_audio_translations).layer(DefaultBodyLimit::max(limits.audio)), + ); + // Files API (OpenAI-compatible). Uploads need the largest cap; list/get + // are unaffected. #[cfg(feature = "server")] let router = router.route( "/v1/files", - post(api_v1_files_upload).merge(get(api_v1_files_list)), + post(api_v1_files_upload) + .layer(DefaultBodyLimit::max(limits.files)) + .merge(get(api_v1_files_list)), ); #[cfg(not(feature = "server"))] let router = router.route("/v1/files", get(api_v1_files_list)); @@ -903,7 +935,11 @@ pub(crate) fn api_v1_routes() -> Router { /// Server-only: wraps [`api_v1_routes`] with auth, rate-limit, and authz middleware. #[cfg(feature = "server")] pub fn get_api_routes(state: AppState) -> Router { - api_v1_routes() + let limits = ApiBodyLimits { + audio: state.config.server.audio_body_limit_bytes, + files: state.config.server.files_body_limit_bytes, + }; + api_v1_routes(limits) // Apply middleware layers in order (ServiceBuilder runs top-to-bottom): // 1. Rate limiting - reject requests early before auth overhead // 2. Auth, budget, usage - authenticates and sets AuthenticatedRequest diff --git a/src/wasm.rs b/src/wasm.rs index 524c591..e86338d 100644 --- a/src/wasm.rs +++ b/src/wasm.rs @@ -218,7 +218,8 @@ fn build_wasm_router( // Merge public admin routes (ui config) into the admin router so we can nest once. let admin_routes = crate::routes::admin::admin_v1_routes() .merge(crate::routes::admin::public_admin_v1_routes()); - let api_routes = crate::routes::api::api_v1_routes(); + let api_routes = + crate::routes::api::api_v1_routes(crate::routes::api::ApiBodyLimits::default()); Router::new() // WASM-specific handlers (genuinely different behavior) From 4ed779735d3a9764a631c0cc1dce11b8a66ba313 Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Sun, 26 Apr 2026 15:19:10 +1000 Subject: [PATCH 127/172] Lazy-load KaTeX CSS so it stays out of the initial bundle --- ui/src/components/Markdown/Markdown.tsx | 8 ++++++- .../StreamingMarkdown/StreamingMarkdown.tsx | 10 +++++++-- ui/src/utils/katexCss.ts | 22 +++++++++++++++++++ 3 files changed, 37 insertions(+), 3 deletions(-) create mode 100644 ui/src/utils/katexCss.ts diff --git a/ui/src/components/Markdown/Markdown.tsx b/ui/src/components/Markdown/Markdown.tsx index a7c4ba2..42d2024 100644 --- a/ui/src/components/Markdown/Markdown.tsx +++ b/ui/src/components/Markdown/Markdown.tsx @@ -3,10 +3,10 @@ import { Streamdown, type MermaidOptions } from "streamdown"; import { createCodePlugin } from "@streamdown/code"; import { math } from "@streamdown/math"; import { mermaid } from "@streamdown/mermaid"; -import "katex/dist/katex.min.css"; import { cn } from "@/utils/cn"; import { usePreferences } from "@/preferences/PreferencesProvider"; +import { loadKatexCss } from "@/utils/katexCss"; import { linkSafety } from "./linkSafety"; const lightCode = createCodePlugin({ @@ -23,6 +23,12 @@ export function Markdown({ content, className }: MarkdownProps) { const { resolvedTheme } = usePreferences(); const containerRef = useRef(null); + // Lazy-load the KaTeX stylesheet on first mount so it doesn't bloat the + // initial bundle on pages that never render markdown. + useEffect(() => { + void loadKatexCss(); + }, []); + // Streamdown renders
 elements that we can't control directly.
   // Post-render fixup: set tabIndex="0" on all 
 children so keyboard
   // users can scroll them (fixes axe-core scrollable-region-focusable).
diff --git a/ui/src/components/StreamingMarkdown/StreamingMarkdown.tsx b/ui/src/components/StreamingMarkdown/StreamingMarkdown.tsx
index 58caeff..a183c7e 100644
--- a/ui/src/components/StreamingMarkdown/StreamingMarkdown.tsx
+++ b/ui/src/components/StreamingMarkdown/StreamingMarkdown.tsx
@@ -2,12 +2,12 @@ import { Streamdown, type MermaidOptions } from "streamdown";
 import { createCodePlugin } from "@streamdown/code";
 import { math } from "@streamdown/math";
 import { mermaid } from "@streamdown/mermaid";
-import "katex/dist/katex.min.css";
 import "streamdown/styles.css";
-import { memo } from "react";
+import { memo, useEffect } from "react";
 
 import { cn } from "@/utils/cn";
 import { usePreferences } from "@/preferences/PreferencesProvider";
+import { loadKatexCss } from "@/utils/katexCss";
 import { linkSafety } from "@/components/Markdown/linkSafety";
 
 const lightCode = createCodePlugin({
@@ -68,6 +68,12 @@ interface StreamingMarkdownProps {
 function StreamingMarkdownComponent({ content, isStreaming, className }: StreamingMarkdownProps) {
   const { resolvedTheme } = usePreferences();
 
+  // Lazy-load the KaTeX stylesheet on first mount so it doesn't bloat the
+  // initial bundle on pages that never render markdown.
+  useEffect(() => {
+    void loadKatexCss();
+  }, []);
+
   const mermaidOptions: MermaidOptions = {
     config: {
       theme: resolvedTheme === "dark" ? "dark" : "default",
diff --git a/ui/src/utils/katexCss.ts b/ui/src/utils/katexCss.ts
new file mode 100644
index 0000000..7a3b33f
--- /dev/null
+++ b/ui/src/utils/katexCss.ts
@@ -0,0 +1,22 @@
+/**
+ * Lazy-load the KaTeX stylesheet.
+ *
+ * `katex/dist/katex.min.css` is ~24 KB minified and ships with the main
+ * bundle when imported at module level (the original behavior in
+ * `Markdown.tsx` / `StreamingMarkdown.tsx`). Most pages — login, settings,
+ * dashboards, the conversation sidebar — never render math, so we defer
+ * the request until the first markdown component actually mounts.
+ *
+ * Vite resolves `import("katex/dist/katex.min.css?inline")`-style URLs at
+ * build time, but for plain side-effect imports the dynamic import is
+ * still emitted as a separate chunk. Calling this multiple times reuses
+ * the same cached promise so the network request happens at most once.
+ */
+let katexCssPromise: Promise | null = null;
+
+export function loadKatexCss(): Promise {
+  if (katexCssPromise === null) {
+    katexCssPromise = import("katex/dist/katex.min.css");
+  }
+  return katexCssPromise;
+}

From e1fb6809f79252c3404615c004256ef1e66fa13a Mon Sep 17 00:00:00 2001
From: ScriptSmith 
Date: Sun, 26 Apr 2026 15:19:54 +1000
Subject: [PATCH 128/172] Make OTEL trace test actually verify gateway spans
 reach Jaeger

---
 .../infrastructure/observability.test.ts      | 46 +++++++++++++------
 1 file changed, 33 insertions(+), 13 deletions(-)

diff --git a/deploy/tests/src/tests/infrastructure/observability.test.ts b/deploy/tests/src/tests/infrastructure/observability.test.ts
index f89a76b..75354fd 100644
--- a/deploy/tests/src/tests/infrastructure/observability.test.ts
+++ b/deploy/tests/src/tests/infrastructure/observability.test.ts
@@ -224,22 +224,42 @@ describe("Observability Stack Deployment", () => {
     });
 
     it("sends traces to OTEL collector", async () => {
-      // Make a request that should generate a trace via the tracked SDK client
+      // Generate a few requests so we're not racing a single trace through the
+      // OTEL collector batch processor.
+      await healthCheck({ client });
+      await healthCheck({ client });
       await healthCheck({ client });
 
-      // Give the trace a moment to be processed
-      await new Promise((resolve) => setTimeout(resolve, 2000));
-
-      // Check Jaeger for traces from hadrian-gateway service
       const jaegerUrl = env.getServiceUrl("jaeger", 16686);
-      const response = await fetch(`${jaegerUrl}/api/services`);
-      const data = await response.json();
-
-      expect(response.status).toBe(200);
-      // The gateway service should appear in Jaeger
-      // Note: Service name depends on OTEL_SERVICE_NAME env var (hadrian-gateway)
-      // This may take time to appear, so we just verify Jaeger is collecting services
-      expect(data.data).toBeDefined();
+      const expectedService = "hadrian-gateway";
+
+      // Poll Jaeger until the gateway service shows up. The collector
+      // batches traces (default 5s), and Jaeger only registers a service
+      // after it ingests its first span — a single 2s sleep was almost
+      // always too short, so the previous assertion only checked that
+      // Jaeger itself was up.
+      const deadline = Date.now() + 30000;
+      let services: string[] = [];
+      while (Date.now() < deadline) {
+        const resp = await fetch(`${jaegerUrl}/api/services`);
+        expect(resp.status).toBe(200);
+        const json = (await resp.json()) as { data?: string[] };
+        services = json.data ?? [];
+        if (services.includes(expectedService)) break;
+        await new Promise((r) => setTimeout(r, 1000));
+      }
+      expect(services).toContain(expectedService);
+
+      // And the gateway should have at least one trace recorded — verify by
+      // pulling traces for the service. Empty `data` here means the service
+      // appeared but no spans landed, which is the failure mode this test
+      // is meant to catch.
+      const tracesResp = await fetch(
+        `${jaegerUrl}/api/traces?service=${encodeURIComponent(expectedService)}&limit=5`
+      );
+      expect(tracesResp.status).toBe(200);
+      const tracesJson = (await tracesResp.json()) as { data?: unknown[] };
+      expect(Array.isArray(tracesJson.data) && tracesJson.data.length > 0).toBe(true);
     });
   });
 });

From a9d9edfda63cfad52af0aad82a512f95a0c5896f Mon Sep 17 00:00:00 2001
From: ScriptSmith 
Date: Sun, 26 Apr 2026 15:21:41 +1000
Subject: [PATCH 129/172] Type recharts tooltip payloads instead of disabling
 no-explicit-any

---
 ui/src/components/Charts/LineChart.tsx       | 8 +++++---
 ui/src/components/Charts/MultiLineChart.tsx  | 8 +++++---
 ui/src/components/Charts/PieChart.tsx        | 5 +++--
 ui/src/components/Charts/StackedBarChart.tsx | 8 +++++---
 4 files changed, 18 insertions(+), 11 deletions(-)

diff --git a/ui/src/components/Charts/LineChart.tsx b/ui/src/components/Charts/LineChart.tsx
index 67d7808..a604c92 100644
--- a/ui/src/components/Charts/LineChart.tsx
+++ b/ui/src/components/Charts/LineChart.tsx
@@ -1,4 +1,7 @@
-import type { TooltipProps as RechartsTooltipProps } from "recharts";
+import type {
+  TooltipProps as RechartsTooltipProps,
+  TooltipPayloadEntry as RechartsTooltipPayloadEntry,
+} from "recharts";
 import {
   LineChart as RechartsLineChart,
   Line,
@@ -14,8 +17,7 @@ import { CHART_COLORS } from "./constants";
 
 interface ChartTooltipProps {
   active?: boolean;
-  // eslint-disable-next-line @typescript-eslint/no-explicit-any
-  payload?: any[];
+  payload?: ReadonlyArray>;
   label?: string;
   formatter?: (value: number) => string;
 }
diff --git a/ui/src/components/Charts/MultiLineChart.tsx b/ui/src/components/Charts/MultiLineChart.tsx
index ac886a8..ca31afe 100644
--- a/ui/src/components/Charts/MultiLineChart.tsx
+++ b/ui/src/components/Charts/MultiLineChart.tsx
@@ -1,4 +1,7 @@
-import type { TooltipProps as RechartsTooltipProps } from "recharts";
+import type {
+  TooltipProps as RechartsTooltipProps,
+  TooltipPayloadEntry as RechartsTooltipPayloadEntry,
+} from "recharts";
 import {
   LineChart as RechartsLineChart,
   Line,
@@ -13,8 +16,7 @@ import { CHART_COLORS } from "./constants";
 
 interface ChartTooltipProps {
   active?: boolean;
-  // eslint-disable-next-line @typescript-eslint/no-explicit-any
-  payload?: any[];
+  payload?: ReadonlyArray>;
   label?: string;
   formatter?: (value: number) => string;
   xFormatter?: (value: string) => string;
diff --git a/ui/src/components/Charts/PieChart.tsx b/ui/src/components/Charts/PieChart.tsx
index bc3178c..171e25e 100644
--- a/ui/src/components/Charts/PieChart.tsx
+++ b/ui/src/components/Charts/PieChart.tsx
@@ -1,4 +1,5 @@
 import { PieChart as RechartsPieChart, Pie, Cell, Tooltip, ResponsiveContainer } from "recharts";
+import type { PieLabelRenderProps } from "recharts";
 import { CHART_COLORS } from "./constants";
 
 export interface PieChartProps {
@@ -35,8 +36,8 @@ export function PieChart({
           dataKey="value"
           label={
             showLabel
-              ? // eslint-disable-next-line @typescript-eslint/no-explicit-any
-                (props: any) => `${props.name ?? ""} (${((props.percent ?? 0) * 100).toFixed(0)}%)`
+              ? (props: PieLabelRenderProps) =>
+                  `${props.name ?? ""} (${((props.percent ?? 0) * 100).toFixed(0)}%)`
               : undefined
           }
           labelLine={showLabel}
diff --git a/ui/src/components/Charts/StackedBarChart.tsx b/ui/src/components/Charts/StackedBarChart.tsx
index 180f355..4742569 100644
--- a/ui/src/components/Charts/StackedBarChart.tsx
+++ b/ui/src/components/Charts/StackedBarChart.tsx
@@ -1,4 +1,7 @@
-import type { TooltipProps as RechartsTooltipProps } from "recharts";
+import type {
+  TooltipProps as RechartsTooltipProps,
+  TooltipPayloadEntry as RechartsTooltipPayloadEntry,
+} from "recharts";
 import {
   BarChart as RechartsBarChart,
   Bar,
@@ -14,8 +17,7 @@ import { CHART_COLORS } from "./constants";
 
 interface ChartTooltipProps {
   active?: boolean;
-  // eslint-disable-next-line @typescript-eslint/no-explicit-any
-  payload?: any[];
+  payload?: ReadonlyArray>;
   label?: string;
   formatter?: (value: number) => string;
   xFormatter?: (value: string) => string;

From b1e8a0e7f4fb6cb7772506ae84510c0d788b6762 Mon Sep 17 00:00:00 2001
From: ScriptSmith 
Date: Sun, 26 Apr 2026 15:24:27 +1000
Subject: [PATCH 130/172] Honor standard OTEL env vars so Helm-set OTLP
 endpoint actually exports

---
 src/observability/tracing_init.rs | 60 ++++++++++++++++++++++++++++---
 1 file changed, 56 insertions(+), 4 deletions(-)

diff --git a/src/observability/tracing_init.rs b/src/observability/tracing_init.rs
index 880b9fc..6d35159 100644
--- a/src/observability/tracing_init.rs
+++ b/src/observability/tracing_init.rs
@@ -40,9 +40,15 @@ pub fn init_tracing(config: &ObservabilityConfig) -> Result Result Result {
+    use crate::config::{OtlpConfig, OtlpProtocol};
     use opentelemetry::KeyValue;
     use opentelemetry_sdk::Resource;
 
+    // The Helm chart (and most production deployments) drives OpenTelemetry
+    // through standard OTel env vars rather than a TOML stanza. Honor them so
+    // the chart's `OTEL_EXPORTER_OTLP_ENDPOINT` / `OTEL_SERVICE_NAME` settings
+    // aren't no-ops:
+    //   * `OTEL_SERVICE_NAME` overrides the configured service name when the
+    //     config is still on the default ("hadrian").
+    //   * `OTEL_EXPORTER_OTLP_ENDPOINT` synthesizes an OtlpConfig when the
+    //     TOML didn't supply one.
+    //   * `OTEL_EXPORTER_OTLP_PROTOCOL` (`grpc` / `http/protobuf`) selects the
+    //     transport.
+    let env_service_name = std::env::var("OTEL_SERVICE_NAME").ok();
+    let env_otlp_endpoint = std::env::var("OTEL_EXPORTER_OTLP_ENDPOINT").ok();
+    let env_otlp_traces_endpoint = std::env::var("OTEL_EXPORTER_OTLP_TRACES_ENDPOINT").ok();
+    let env_otlp_protocol = std::env::var("OTEL_EXPORTER_OTLP_PROTOCOL").ok();
+
+    let service_name = match env_service_name {
+        Some(s) if config.service_name == "hadrian" => s,
+        _ => config.service_name.clone(),
+    };
+
     // Build resource attributes
-    let mut resource_attrs = vec![KeyValue::new("service.name", config.service_name.clone())];
+    let mut resource_attrs = vec![KeyValue::new("service.name", service_name)];
 
     if let Some(version) = &config.service_version {
         resource_attrs.push(KeyValue::new("service.version", version.clone()));
@@ -379,8 +406,33 @@ fn build_otel_provider(
     // Build sampler
     let sampler = build_sampler(&config.sampling);
 
+    // Resolve the OTLP exporter config: prefer TOML; otherwise synthesize one
+    // from the OTEL env vars if any endpoint is set.
+    let otlp_from_env = config.otlp.is_none()
+        && (env_otlp_endpoint.is_some() || env_otlp_traces_endpoint.is_some());
+    let synthesized_otlp = if otlp_from_env {
+        let endpoint = env_otlp_traces_endpoint
+            .or(env_otlp_endpoint)
+            .expect("checked above");
+        let protocol = match env_otlp_protocol.as_deref() {
+            Some("http/protobuf") | Some("http") => OtlpProtocol::Http,
+            // Default and `grpc` both map to gRPC.
+            _ => OtlpProtocol::Grpc,
+        };
+        Some(OtlpConfig {
+            endpoint,
+            protocol,
+            headers: Default::default(),
+            timeout_secs: 10,
+            compression: true,
+        })
+    } else {
+        None
+    };
+    let effective_otlp = config.otlp.as_ref().or(synthesized_otlp.as_ref());
+
     // Build tracer provider
-    let provider = if let Some(otlp) = &config.otlp {
+    let provider = if let Some(otlp) = effective_otlp {
         let exporter = build_otlp_exporter(otlp)?;
         SdkTracerProvider::builder()
             .with_resource(resource)

From a652af2febc4358b3431f27ca400e270c45d031e Mon Sep 17 00:00:00 2001
From: ScriptSmith 
Date: Sun, 26 Apr 2026 15:28:28 +1000
Subject: [PATCH 131/172] Add hadrian healthcheck subcommand and drop curl from
 Docker image

---
 Dockerfile             | 10 +++---
 src/cli/healthcheck.rs | 70 ++++++++++++++++++++++++++++++++++++++++++
 src/cli/mod.rs         | 21 +++++++++++++
 3 files changed, 97 insertions(+), 4 deletions(-)
 create mode 100644 src/cli/healthcheck.rs

diff --git a/Dockerfile b/Dockerfile
index 04b62ad..e3028bb 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -111,11 +111,12 @@ RUN --mount=type=cache,target=/usr/local/cargo/registry \
 FROM debian:trixie-slim
 
 # Install runtime dependencies
-# Includes SAML libraries for XML signature verification
+# Includes SAML libraries for XML signature verification.
+# `curl` was previously required for the HEALTHCHECK; the binary now ships
+# with a `hadrian healthcheck` subcommand so curl is no longer needed.
 RUN apt-get update && apt-get install -y \
     ca-certificates \
     libssl3 \
-    curl \
     libxml2 \
     libxslt1.1 \
     libxmlsec1 \
@@ -157,8 +158,9 @@ EOF
 # Expose port
 EXPOSE 8080
 
-# Health check
+# Health check (uses the built-in `hadrian healthcheck` subcommand so the
+# runtime image doesn't need to ship `curl`).
 HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
-    CMD curl -f http://localhost:8080/health/live || exit 1
+    CMD ["/app/hadrian", "--config", "/app/config/hadrian.toml", "healthcheck"]
 
 CMD ["/app/hadrian", "--config", "/app/config/hadrian.toml"]
diff --git a/src/cli/healthcheck.rs b/src/cli/healthcheck.rs
new file mode 100644
index 0000000..38727e7
--- /dev/null
+++ b/src/cli/healthcheck.rs
@@ -0,0 +1,70 @@
+//! `hadrian healthcheck` subcommand.
+//!
+//! Issues a single GET against `/health/live` and exits 0/1. Used by the
+//! Docker image's `HEALTHCHECK` so the runtime image can drop `curl`.
+
+use std::time::Duration;
+
+pub async fn run_healthcheck(
+    config_path: Option<&str>,
+    url_override: Option,
+    timeout_secs: u64,
+) {
+    let url = match url_override {
+        Some(u) => u,
+        None => match resolve_url_from_config(config_path) {
+            Ok(u) => u,
+            Err(err) => {
+                eprintln!("healthcheck: could not resolve URL from config: {err}");
+                std::process::exit(1);
+            }
+        },
+    };
+
+    let client = match reqwest::Client::builder()
+        .timeout(Duration::from_secs(timeout_secs))
+        .build()
+    {
+        Ok(c) => c,
+        Err(err) => {
+            eprintln!("healthcheck: could not build HTTP client: {err}");
+            std::process::exit(1);
+        }
+    };
+
+    match client.get(&url).send().await {
+        Ok(resp) if resp.status().is_success() => {
+            std::process::exit(0);
+        }
+        Ok(resp) => {
+            eprintln!("healthcheck: {url} returned status {}", resp.status());
+            std::process::exit(1);
+        }
+        Err(err) => {
+            eprintln!("healthcheck: request to {url} failed: {err}");
+            std::process::exit(1);
+        }
+    }
+}
+
+fn resolve_url_from_config(config_path: Option<&str>) -> Result {
+    let path = config_path.ok_or_else(|| {
+        "no --config supplied and no --url override; pass one of them".to_string()
+    })?;
+    let config =
+        crate::config::GatewayConfig::from_file(path).map_err(|e| e.to_string())?;
+    let host = match config.server.host.to_string().as_str() {
+        // 0.0.0.0 isn't dialable; map back to loopback for the local probe.
+        "0.0.0.0" => "127.0.0.1".to_string(),
+        "::" => "[::1]".to_string(),
+        other => {
+            // Wrap bare IPv6 addresses in brackets for URL syntax.
+            if other.contains(':') && !other.starts_with('[') {
+                format!("[{other}]")
+            } else {
+                other.to_string()
+            }
+        }
+    };
+    Ok(format!("http://{host}:{}/health/live", config.server.port))
+}
diff --git a/src/cli/mod.rs b/src/cli/mod.rs
index 9a13fa3..a7d9c1d 100644
--- a/src/cli/mod.rs
+++ b/src/cli/mod.rs
@@ -1,5 +1,7 @@
 mod bootstrap;
 mod features;
+#[cfg(feature = "server")]
+mod healthcheck;
 mod init;
 mod migrate;
 mod openapi;
@@ -99,6 +101,21 @@ enum Command {
     },
     /// Show enabled compile-time features
     Features,
+    /// Probe the gateway's `/health/live` endpoint and exit with status.
+    ///
+    /// Used by the Docker `HEALTHCHECK` so the runtime image doesn't need to
+    /// install `curl`. Exits 0 on success, 1 on failure. Reads the listen
+    /// host/port from the same config the server uses (defaults to
+    /// `127.0.0.1` / configured port).
+    #[cfg(feature = "server")]
+    Healthcheck {
+        /// Override the URL to probe (e.g. `http://localhost:8080/health/live`).
+        #[arg(long)]
+        url: Option,
+        /// Per-request timeout in seconds.
+        #[arg(long, default_value = "3")]
+        timeout_secs: u64,
+    },
 }
 
 /// Dispatch to the appropriate subcommand handler.
@@ -161,6 +178,10 @@ pub async fn dispatch(args: Args) {
         Some(Command::Features) => {
             features::run_features();
         }
+        #[cfg(feature = "server")]
+        Some(Command::Healthcheck { url, timeout_secs }) => {
+            healthcheck::run_healthcheck(args.config.as_deref(), url, timeout_secs).await;
+        }
         Some(Command::Serve) | None => {
             server::run_server(args.config.as_deref(), args.no_browser).await;
         }

From a55ecef43a38d431a8e6c668fb85ded799a54d3d Mon Sep 17 00:00:00 2001
From: ScriptSmith 
Date: Sun, 26 Apr 2026 15:29:42 +1000
Subject: [PATCH 132/172] Make CommandPalette announce as a combobox with
 active-descendant listbox

---
 .../CommandPalette/CommandPalette.tsx         | 50 +++++++++++++++++--
 1 file changed, 45 insertions(+), 5 deletions(-)

diff --git a/ui/src/components/CommandPalette/CommandPalette.tsx b/ui/src/components/CommandPalette/CommandPalette.tsx
index 2f34689..9d0eda9 100644
--- a/ui/src/components/CommandPalette/CommandPalette.tsx
+++ b/ui/src/components/CommandPalette/CommandPalette.tsx
@@ -88,11 +88,25 @@ interface CommandPaletteDialogProps {
   onClose: () => void;
 }
 
+// Stable IDs for the combobox / listbox / option ARIA wiring. Generated once
+// per dialog instance because `aria-controls` / `aria-activedescendant` need
+// fixed references that screen readers can resolve.
+let commandPaletteCounter = 0;
+
 function CommandPaletteDialog({ commands, onClose }: CommandPaletteDialogProps) {
   const [search, setSearch] = useState("");
   const [selectedIndex, setSelectedIndex] = useState(0);
   const inputRef = useRef(null);
   const listRef = useRef(null);
+  const idsRef = useRef<{ listbox: string; option: (i: number) => string } | null>(null);
+  if (!idsRef.current) {
+    const seq = ++commandPaletteCounter;
+    idsRef.current = {
+      listbox: `command-palette-listbox-${seq}`,
+      option: (i: number) => `command-palette-option-${seq}-${i}`,
+    };
+  }
+  const ids = idsRef.current;
 
   // Filter commands based on search
   const filteredCommands = Array.from(commands.values()).filter((cmd) => {
@@ -175,7 +189,8 @@ function CommandPaletteDialog({ commands, onClose }: CommandPaletteDialogProps)
       {/* Dialog */}
       
- {/* Search input */} + {/* Search input — exposed as a combobox per WAI-ARIA APG so AT + users hear that the input drives a listbox below. */}
- {/* Commands list */} -
+ {/* Commands list — listbox owned by the combobox above, with + per-row option semantics so selection reads correctly. */} +
{flatCommands.length === 0 ? (
No commands found
) : ( Array.from(groupedCommands.entries()).map(([category, items]) => ( -
-
+
+ {items.map((cmd) => { @@ -211,7 +243,15 @@ function CommandPaletteDialog({ commands, onClose }: CommandPaletteDialogProps) return (