diff --git a/.env.example b/.env.example index 61cdf4e3c..552e63626 100644 --- a/.env.example +++ b/.env.example @@ -20,3 +20,8 @@ # Host port mapped to the k3s NodePort (30051) where the OpenShell gateway # listens. The CLI connects here. Must be unique per cluster. #GATEWAY_PORT=8080 + +# Optional gateway host override for `openshell gateway start`. +# `mise run cluster` still pins `--backend k3s`, but host-run backends use +# this value to configure SSH advertisements and sandbox callback routing. +#OPENSHELL_GATEWAY_HOST=host.docker.internal diff --git a/Cargo.lock b/Cargo.lock index 4b29a0c7f..4620795cc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3009,6 +3009,7 @@ name = "openshell-cli" version = "0.0.0" dependencies = [ "anyhow", + "base64 0.22.1", "bytes", "clap", "clap_complete", diff --git a/crates/openshell-bootstrap/src/lib.rs b/crates/openshell-bootstrap/src/lib.rs index 71d223d66..51cad7c90 100644 --- a/crates/openshell-bootstrap/src/lib.rs +++ b/crates/openshell-bootstrap/src/lib.rs @@ -49,7 +49,7 @@ pub use crate::docker::{ DockerPreflight, ExistingGatewayInfo, check_docker_available, create_ssh_docker_client, }; pub use crate::metadata::{ - GatewayMetadata, clear_active_gateway, clear_last_sandbox_if_matches, + GatewayBackend, GatewayMetadata, clear_active_gateway, clear_last_sandbox_if_matches, extract_host_from_ssh_destination, get_gateway_metadata, list_gateways, load_active_gateway, load_gateway_metadata, load_last_sandbox, remove_gateway_metadata, resolve_ssh_hostname, save_active_gateway, save_last_sandbox, store_gateway_metadata, diff --git a/crates/openshell-bootstrap/src/metadata.rs b/crates/openshell-bootstrap/src/metadata.rs index 8e6b8a070..2f46408f2 100644 --- a/crates/openshell-bootstrap/src/metadata.rs +++ b/crates/openshell-bootstrap/src/metadata.rs @@ -6,7 +6,53 @@ use crate::paths::{active_gateway_path, gateways_dir, last_sandbox_path}; use miette::{IntoDiagnostic, Result, WrapErr}; use openshell_core::paths::ensure_parent_dir_restricted; use serde::{Deserialize, Serialize}; +use std::fmt; use std::path::PathBuf; +use std::str::FromStr; + +/// Managed gateway backends supported by the CLI. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum GatewayBackend { + K3s, + Kubernetes, + Vm, + Podman, +} + +impl GatewayBackend { + #[must_use] + pub const fn as_str(self) -> &'static str { + match self { + Self::K3s => "k3s", + Self::Kubernetes => "kubernetes", + Self::Vm => "vm", + Self::Podman => "podman", + } + } +} + +impl fmt::Display for GatewayBackend { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str(self.as_str()) + } +} + +impl FromStr for GatewayBackend { + type Err = String; + + fn from_str(value: &str) -> std::result::Result { + match value.trim().to_ascii_lowercase().as_str() { + "k3s" => Ok(Self::K3s), + "kubernetes" => Ok(Self::Kubernetes), + "vm" => Ok(Self::Vm), + "podman" => Ok(Self::Podman), + other => Err(format!( + "unsupported gateway backend '{other}'. expected one of: k3s, kubernetes, vm, podman" + )), + } + } +} /// Gateway metadata stored alongside deployment info. #[derive(Debug, Clone, Serialize, Deserialize)] @@ -31,6 +77,19 @@ pub struct GatewayMetadata { #[serde(default, skip_serializing_if = "Option::is_none")] pub auth_mode: Option, + /// Managed gateway backend, when this registration was created by + /// `openshell gateway start`. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub backend: Option, + + /// Host override used to configure the running gateway itself. + /// + /// This is distinct from `gateway_endpoint`: local host-run gateways keep + /// their CLI endpoint on loopback but still need a stable configured host + /// for SSH advertisements and backend-specific callback derivation. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub configured_gateway_host: Option, + /// Edge proxy team/org domain (e.g., `brevlab.cloudflareaccess.com`). #[serde( default, @@ -55,6 +114,14 @@ impl GatewayMetadata { /// address (`127.0.0.1`, `localhost`, `::1`) — those are never meaningful /// as a `--gateway-host` override. pub fn gateway_host(&self) -> Option<&str> { + if let Some(host) = self + .configured_gateway_host + .as_deref() + .filter(|host| !is_loopback_host(host)) + { + return Some(host); + } + // Endpoint format: "https://host:port" or "http://host:port" let after_scheme = self .gateway_endpoint @@ -64,16 +131,19 @@ impl GatewayMetadata { let host = after_scheme .rsplit_once(':') .map_or(after_scheme, |(h, _)| h); - if host.is_empty() - || host == "127.0.0.1" - || host == "localhost" - || host == "::1" - || host == "[::1]" - { + if host.is_empty() || is_loopback_host(host) { return None; } Some(host) } + + /// Return the managed backend when known, falling back to `k3s` for legacy + /// Docker-managed gateways that predate the `backend` field. + #[must_use] + pub fn backend(&self) -> Option { + self.backend + .or_else(|| (self.gateway_port > 0).then_some(GatewayBackend::K3s)) + } } pub fn create_gateway_metadata( @@ -134,11 +204,20 @@ pub fn create_gateway_metadata_with_host( remote_host, resolved_host, auth_mode: disable_tls.then(|| "plaintext".to_string()), + backend: Some(GatewayBackend::K3s), + configured_gateway_host: gateway_host.map(ToOwned::to_owned), edge_team_domain: None, edge_auth_url: None, } } +fn is_loopback_host(host: &str) -> bool { + host == "127.0.0.1" + || host.eq_ignore_ascii_case("localhost") + || host == "::1" + || host == "[::1]" +} + pub fn local_gateway_host() -> Option { std::env::var("DOCKER_HOST") .ok() @@ -462,6 +541,8 @@ mod tests { remote_host: Some("user@openshell-dev".to_string()), resolved_host: Some("10.0.0.5".to_string()), auth_mode: None, + backend: Some(GatewayBackend::K3s), + configured_gateway_host: None, edge_team_domain: None, edge_auth_url: None, }; @@ -502,6 +583,11 @@ mod tests { assert!(!meta.is_remote); assert!(meta.remote_host.is_none()); assert!(meta.resolved_host.is_none()); + assert_eq!(meta.backend(), Some(GatewayBackend::K3s)); + assert_eq!( + meta.configured_gateway_host.as_deref(), + Some("host.docker.internal") + ); } #[test] @@ -557,6 +643,8 @@ mod tests { remote_host: None, resolved_host: None, auth_mode: None, + backend: Some(GatewayBackend::K3s), + configured_gateway_host: None, edge_team_domain: None, edge_auth_url: None, }; @@ -573,12 +661,34 @@ mod tests { remote_host: Some("user@10.0.0.5".into()), resolved_host: Some("10.0.0.5".into()), auth_mode: None, + backend: Some(GatewayBackend::K3s), + configured_gateway_host: None, edge_team_domain: None, edge_auth_url: None, }; assert_eq!(meta.gateway_host(), Some("10.0.0.5")); } + #[test] + fn gateway_host_prefers_configured_host_for_loopback_endpoint() { + let meta = GatewayMetadata { + name: "t".into(), + gateway_endpoint: "https://127.0.0.1:8080".into(), + is_remote: false, + gateway_port: 8080, + remote_host: None, + resolved_host: None, + auth_mode: None, + backend: Some(GatewayBackend::Kubernetes), + configured_gateway_host: Some("gateway.internal".into()), + edge_team_domain: None, + edge_auth_url: None, + }; + + assert_eq!(meta.gateway_host(), Some("gateway.internal")); + assert_eq!(meta.backend(), Some(GatewayBackend::Kubernetes)); + } + #[test] fn gateway_host_handles_http_scheme() { let meta = diff --git a/crates/openshell-cli/Cargo.toml b/crates/openshell-cli/Cargo.toml index b3a006fdd..90b2172a7 100644 --- a/crates/openshell-cli/Cargo.toml +++ b/crates/openshell-cli/Cargo.toml @@ -21,6 +21,7 @@ openshell-policy = { path = "../openshell-policy" } openshell-providers = { path = "../openshell-providers" } openshell-prover = { path = "../openshell-prover" } openshell-tui = { path = "../openshell-tui" } +base64 = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } prost-types = { workspace = true } diff --git a/crates/openshell-cli/src/completers.rs b/crates/openshell-cli/src/completers.rs index 3c2a8b336..30ededb4b 100644 --- a/crates/openshell-cli/src/completers.rs +++ b/crates/openshell-cli/src/completers.rs @@ -179,6 +179,8 @@ mod tests { remote_host: None, resolved_host: None, auth_mode: Some("cloudflare_jwt".to_string()), + backend: None, + configured_gateway_host: None, edge_team_domain: None, edge_auth_url: None, }, diff --git a/crates/openshell-cli/src/main.rs b/crates/openshell-cli/src/main.rs index 292922411..4f24fc6d8 100644 --- a/crates/openshell-cli/src/main.rs +++ b/crates/openshell-cli/src/main.rs @@ -11,8 +11,8 @@ use owo_colors::OwoColorize; use std::io::Write; use openshell_bootstrap::{ - edge_token::load_edge_token, get_gateway_metadata, list_gateways, load_active_gateway, - load_gateway_metadata, load_last_sandbox, save_last_sandbox, + GatewayBackend, edge_token::load_edge_token, get_gateway_metadata, list_gateways, + load_active_gateway, load_gateway_metadata, load_last_sandbox, save_last_sandbox, }; use openshell_cli::completers; use openshell_cli::run; @@ -26,6 +26,25 @@ struct GatewayContext { endpoint: String, } +#[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)] +enum GatewayBackendArg { + K3s, + Kubernetes, + Vm, + Podman, +} + +impl From for GatewayBackend { + fn from(value: GatewayBackendArg) -> Self { + match value { + GatewayBackendArg::K3s => Self::K3s, + GatewayBackendArg::Kubernetes => Self::Kubernetes, + GatewayBackendArg::Vm => Self::Vm, + GatewayBackendArg::Podman => Self::Podman, + } + } +} + /// Resolve the gateway name to a [`GatewayContext`] with the gateway endpoint. /// /// Resolution priority: @@ -744,6 +763,15 @@ enum GatewayCommands { #[arg(long, default_value = "openshell", env = "OPENSHELL_GATEWAY")] name: String, + /// Gateway deployment backend. + #[arg( + long, + value_enum, + default_value = "k3s", + env = "OPENSHELL_GATEWAY_BACKEND" + )] + backend: GatewayBackendArg, + /// SSH destination for remote deployment (e.g., user@hostname). #[arg(long)] remote: Option, @@ -763,7 +791,7 @@ enum GatewayCommands { /// example in CI containers, WSL, or when Docker runs on a /// remote host. Common values: `host.docker.internal`, a LAN IP, /// or a hostname. - #[arg(long)] + #[arg(long, env = "OPENSHELL_GATEWAY_HOST")] gateway_host: Option, /// Destroy and recreate the gateway from scratch if one already exists. @@ -1644,6 +1672,7 @@ async fn main() -> Result<()> { }) => match command { GatewayCommands::Start { name, + backend, remote, ssh_key, port, @@ -1662,6 +1691,7 @@ async fn main() -> Result<()> { }; run::gateway_admin_deploy( &name, + backend.into(), remote.as_deref(), ssh_key.as_deref(), port, @@ -2699,6 +2729,8 @@ mod tests { remote_host: None, resolved_host: None, auth_mode: Some("cloudflare_jwt".to_string()), + backend: None, + configured_gateway_host: None, edge_team_domain: None, edge_auth_url: None, } diff --git a/crates/openshell-cli/src/run.rs b/crates/openshell-cli/src/run.rs index c41b53518..70b99c3e2 100644 --- a/crates/openshell-cli/src/run.rs +++ b/crates/openshell-cli/src/run.rs @@ -15,12 +15,15 @@ use hyper_rustls::HttpsConnectorBuilder; use hyper_util::{client::legacy::Client, rt::TokioExecutor}; use indicatif::{MultiProgress, ProgressBar, ProgressStyle}; use miette::{IntoDiagnostic, Result, WrapErr, miette}; +use nix::sys::signal::{Signal, kill}; +use nix::unistd::Pid; use openshell_bootstrap::{ - DeployOptions, GatewayMetadata, RemoteOptions, clear_active_gateway, + DeployOptions, GatewayBackend, GatewayMetadata, RemoteOptions, clear_active_gateway, clear_last_sandbox_if_matches, container_name, extract_host_from_ssh_destination, get_gateway_metadata, list_gateways, load_active_gateway, remove_gateway_metadata, resolve_ssh_hostname, save_active_gateway, save_last_sandbox, store_gateway_metadata, }; +use openshell_bootstrap::{constants, mtls, pki}; use openshell_core::proto::{ ApproveAllDraftChunksRequest, ApproveDraftChunkRequest, ClearDraftChunksRequest, CreateProviderRequest, CreateSandboxRequest, DeleteProviderRequest, DeleteSandboxRequest, @@ -38,9 +41,11 @@ use openshell_providers::{ }; use owo_colors::OwoColorize; use std::collections::{HashMap, HashSet, VecDeque}; -use std::io::{IsTerminal, Read, Write}; +use std::ffi::OsString; +use std::fs::{self, File, OpenOptions}; +use std::io::{BufRead, BufReader, IsTerminal, Read, Seek, SeekFrom, Write}; use std::path::{Path, PathBuf}; -use std::process::Command; +use std::process::{Command, Stdio}; use std::time::{Duration, Instant}; use tonic::{Code, Status}; @@ -53,6 +58,10 @@ pub use crate::ssh::{ pub use openshell_core::forward::{ find_forward_by_port, list_forwards, stop_forward, stop_forwards_for_sandbox, }; +use openshell_core::paths::{ + create_dir_restricted, ensure_parent_dir_restricted, set_file_owner_only, xdg_config_dir, + xdg_data_dir, +}; /// Convert a sandbox phase integer to a human-readable string. fn phase_name(phase: i32) -> &'static str { @@ -784,34 +793,38 @@ pub fn gateway_select(name: Option<&str>, gateway_flag: &Option) -> Resu } fn format_gateway_select_header(gateways: &[GatewayMetadata]) -> String { - let (name_width, endpoint_width, type_width) = gateway_select_column_widths(gateways); + let (name_width, endpoint_width, type_width, backend_width) = + gateway_select_column_widths(gateways); format!( - " {: Vec { - let (name_width, endpoint_width, type_width) = gateway_select_column_widths(gateways); + let (name_width, endpoint_width, type_width, backend_width) = + gateway_select_column_widths(gateways); gateways .iter() .map(|gateway| { format!( - "{: (usize, usize, usize) { +fn gateway_select_column_widths(gateways: &[GatewayMetadata]) -> (usize, usize, usize, usize) { let name_width = gateways .iter() .map(|gateway| gateway.name.len()) @@ -830,8 +843,14 @@ fn gateway_select_column_widths(gateways: &[GatewayMetadata]) -> (usize, usize, .max() .unwrap_or(4) .max(4); + let backend_width = gateways + .iter() + .map(|gateway| gateway_backend_label(gateway).len()) + .max() + .unwrap_or(7) + .max(7); - (name_width, endpoint_width, type_width) + (name_width, endpoint_width, type_width, backend_width) } fn gateway_type_label(gateway: &GatewayMetadata) -> &'static str { @@ -850,6 +869,10 @@ fn gateway_auth_label(gateway: &GatewayMetadata) -> &str { } } +fn gateway_backend_label(gateway: &GatewayMetadata) -> &str { + gateway.backend().map_or("-", GatewayBackend::as_str) +} + fn is_loopback_gateway_endpoint(endpoint: &str) -> bool { let Ok(parsed) = url::Url::parse(endpoint) else { return false; @@ -895,6 +918,8 @@ fn plaintext_gateway_metadata( remote_host, resolved_host, auth_mode: Some("plaintext".to_string()), + backend: None, + configured_gateway_host: None, edge_team_domain: None, edge_auth_url: None, } @@ -1058,6 +1083,11 @@ pub async fn gateway_add( ); eprintln!(" {} {}", "Endpoint:".dimmed(), endpoint); eprintln!(" {} {}", "Type:".dimmed(), gateway_type); + eprintln!( + " {} {}", + "Backend:".dimmed(), + gateway_backend_label(&metadata) + ); eprintln!(" {} {}", "Auth:".dimmed(), gateway_auth); return Ok(()); @@ -1098,6 +1128,8 @@ pub async fn gateway_add( remote_host, resolved_host, auth_mode: Some("mtls".to_string()), + backend: None, + configured_gateway_host: None, edge_team_domain: None, edge_auth_url: None, }; @@ -1116,6 +1148,11 @@ pub async fn gateway_add( "Type:".dimmed(), if local { "local" } else { "remote" }, ); + eprintln!( + " {} {}", + "Backend:".dimmed(), + gateway_backend_label(&metadata) + ); eprintln!("{} TLS certificates extracted", "✓".green().bold()); } else { // Cloud (edge-authenticated) gateway. @@ -1127,6 +1164,8 @@ pub async fn gateway_add( remote_host: None, resolved_host: None, auth_mode: Some("cloudflare_jwt".to_string()), + backend: None, + configured_gateway_host: None, edge_team_domain: None, edge_auth_url: None, }; @@ -1141,6 +1180,11 @@ pub async fn gateway_add( ); eprintln!(" {} {}", "Endpoint:".dimmed(), endpoint); eprintln!(" {} cloud", "Type:".dimmed()); + eprintln!( + " {} {}", + "Backend:".dimmed(), + gateway_backend_label(&metadata) + ); eprintln!(); match crate::auth::browser_auth_flow(&endpoint).await { @@ -1221,13 +1265,20 @@ pub fn gateway_list(gateway_flag: &Option) -> Result<()> { .max() .unwrap_or(4) .max(4); + let backend_width = gateways + .iter() + .map(|g| gateway_backend_label(g).len()) + .max() + .unwrap_or(7) + .max(7); // Print header println!( - " {:) -> Result<()> { let is_active = active.as_deref() == Some(&gateway.name); let marker = if is_active { "*" } else { " " }; let gw_type = gateway_type_label(gateway); + let gw_backend = gateway_backend_label(gateway); let gw_auth = gateway_auth_label(gateway); let line = format!( - "{marker} {: Result { + let config_dir = xdg_config_dir()? + .join("openshell") + .join("gateways") + .join(name); + let data_dir = xdg_data_dir()? + .join("openshell") + .join("gateways") + .join(name); + Ok(Self { + server_tls_dir: config_dir.join("server-tls"), + db_path: data_dir.join("gateway.db"), + pid_path: data_dir.join("gateway.pid"), + log_path: data_dir.join("gateway.log"), + ssh_secret_path: config_dir.join("ssh-handshake-secret"), + vm_state_dir: data_dir.join("vm"), + config_dir, + data_dir, + }) + } + + fn server_ca_path(&self) -> PathBuf { + self.server_tls_dir.join("ca.crt") + } + + fn server_cert_path(&self) -> PathBuf { + self.server_tls_dir.join("tls.crt") + } + + fn server_key_path(&self) -> PathBuf { + self.server_tls_dir.join("tls.key") + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum ManagedGatewayKind { + K3s, + HostRun(GatewayBackend), + External, +} + +fn gateway_kind(metadata: Option<&GatewayMetadata>) -> ManagedGatewayKind { + match metadata.and_then(GatewayMetadata::backend) { + Some(GatewayBackend::K3s) => ManagedGatewayKind::K3s, + Some( + backend @ (GatewayBackend::Kubernetes | GatewayBackend::Vm | GatewayBackend::Podman), + ) => ManagedGatewayKind::HostRun(backend), + None => ManagedGatewayKind::External, + } +} + +fn lifecycle_gateway_kind(metadata: Option<&GatewayMetadata>) -> ManagedGatewayKind { + metadata.map_or(ManagedGatewayKind::K3s, |metadata| { + gateway_kind(Some(metadata)) + }) +} + +fn is_loopback_host(host: &str) -> bool { + host == "127.0.0.1" + || host.eq_ignore_ascii_case("localhost") + || host == "::1" + || host == "[::1]" +} + +fn localhost_gateway_endpoint(port: u16, disable_tls: bool) -> String { + let scheme = if disable_tls { "http" } else { "https" }; + format!("{scheme}://127.0.0.1:{port}") +} + +fn host_run_auth_mode(disable_tls: bool, disable_gateway_auth: bool) -> Option { + if disable_tls { + Some("plaintext".to_string()) + } else if disable_gateway_auth { + Some("mtls_optional".to_string()) + } else { + None + } +} + +fn is_plaintext_auth_mode(auth_mode: Option<&str>) -> bool { + matches!(auth_mode, Some("plaintext")) +} + +fn is_optional_mtls_auth_mode(auth_mode: Option<&str>) -> bool { + matches!(auth_mode, Some("mtls_optional")) +} + +fn gateway_binary_candidates() -> Result> { + let mut candidates = Vec::new(); + if let Ok(path) = std::env::var("OPENSHELL_GATEWAY_BIN") + && !path.trim().is_empty() + { + candidates.push(PathBuf::from(path)); + } + + let current_exe = std::env::current_exe() + .into_diagnostic() + .wrap_err("failed to resolve current OpenShell executable")?; + if let Some(parent) = current_exe.parent() { + candidates.push(parent.join("openshell-gateway")); + } + candidates.extend(path_lookup_candidates("openshell-gateway")); + Ok(candidates) +} + +fn vm_compute_driver_candidates(gateway_bin: &Path) -> Vec { + let mut candidates = Vec::new(); + if let Ok(path) = std::env::var("OPENSHELL_VM_COMPUTE_DRIVER_BIN") + && !path.trim().is_empty() + { + candidates.push(PathBuf::from(path)); + } + if let Some(parent) = gateway_bin.parent() { + candidates.push(parent.join("openshell-driver-vm")); + } + candidates.extend(path_lookup_candidates("openshell-driver-vm")); + candidates +} + +fn path_lookup_candidates(binary_name: &str) -> Vec { + std::env::var_os("PATH") + .into_iter() + .flat_map(|paths| std::env::split_paths(&paths).collect::>()) + .map(|dir| dir.join(binary_name)) + .collect() +} + +fn resolve_binary(candidates: &[PathBuf], install_hint: &str) -> Result { + candidates + .iter() + .find(|path| path.is_file()) + .cloned() + .ok_or_else(|| miette!("{install_hint}")) +} + +fn configured_gateway_host( + requested_gateway_host: Option<&str>, + stored_metadata: Option<&GatewayMetadata>, +) -> String { + requested_gateway_host + .map(str::trim) + .filter(|host| !host.is_empty()) + .map(str::to_string) + .or_else(|| { + stored_metadata + .and_then(GatewayMetadata::gateway_host) + .map(str::to_string) + }) + .unwrap_or_else(|| "127.0.0.1".to_string()) +} + +fn host_run_gateway_port(port: u16, stored_metadata: Option<&GatewayMetadata>) -> u16 { + stored_metadata + .filter(|metadata| metadata.gateway_port > 0) + .map_or(port, |metadata| metadata.gateway_port) +} + +fn host_run_gateway_metadata( + name: &str, + backend: GatewayBackend, + port: u16, + disable_tls: bool, + disable_gateway_auth: bool, + gateway_host: &str, +) -> GatewayMetadata { + GatewayMetadata { + name: name.to_string(), + gateway_endpoint: localhost_gateway_endpoint(port, disable_tls), + is_remote: false, + gateway_port: port, + remote_host: None, + resolved_host: None, + auth_mode: host_run_auth_mode(disable_tls, disable_gateway_auth), + backend: Some(backend), + configured_gateway_host: Some(gateway_host.to_string()), + edge_team_domain: None, + edge_auth_url: None, + } +} + +fn read_pid_file(path: &Path) -> Result> { + if !path.exists() { + return Ok(None); + } + let contents = fs::read_to_string(path) + .into_diagnostic() + .wrap_err_with(|| format!("failed to read PID file {}", path.display()))?; + let pid = contents + .trim() + .parse::() + .into_diagnostic() + .wrap_err_with(|| format!("failed to parse PID file {}", path.display()))?; + Ok(Some(pid)) +} + +fn is_process_alive(pid: i32) -> bool { + kill(Pid::from_raw(pid), None).is_ok() +} + +fn cleanup_stale_pid_file(path: &Path) { + if let Err(err) = fs::remove_file(path) + && err.kind() != std::io::ErrorKind::NotFound + { + tracing::debug!("failed to remove stale pid file {}: {err}", path.display()); + } +} + +async fn stop_host_run_process(paths: &HostRunGatewayPaths) -> Result { + let Some(pid) = read_pid_file(&paths.pid_path)? else { + cleanup_stale_pid_file(&paths.pid_path); + return Ok(false); + }; + + let pid = Pid::from_raw(pid); + if !is_process_alive(pid.as_raw()) { + cleanup_stale_pid_file(&paths.pid_path); + return Ok(false); + } + + kill(pid, Signal::SIGTERM) + .into_diagnostic() + .wrap_err_with(|| format!("failed to stop gateway process {}", pid.as_raw()))?; + + let deadline = Instant::now() + HOST_RUN_GATEWAY_STOP_TIMEOUT; + while Instant::now() < deadline { + if !is_process_alive(pid.as_raw()) { + cleanup_stale_pid_file(&paths.pid_path); + return Ok(true); + } + tokio::time::sleep(Duration::from_millis(100)).await; + } + + kill(pid, Signal::SIGKILL) + .into_diagnostic() + .wrap_err_with(|| format!("failed to kill gateway process {}", pid.as_raw()))?; + cleanup_stale_pid_file(&paths.pid_path); + Ok(true) +} + +fn remove_host_run_runtime_state(paths: &HostRunGatewayPaths) -> Result<()> { + for path in [ + &paths.data_dir, + &paths.server_tls_dir, + &paths.config_dir.join("mtls"), + ] { + match fs::remove_dir_all(path) { + Ok(()) => {} + Err(err) if err.kind() == std::io::ErrorKind::NotFound => {} + Err(err) => { + return Err(miette!("failed to remove {}: {err}", path.display())); + } + } + } + match fs::remove_file(&paths.ssh_secret_path) { + Ok(()) => {} + Err(err) if err.kind() == std::io::ErrorKind::NotFound => {} + Err(err) => { + return Err(miette!( + "failed to remove {}: {err}", + paths.ssh_secret_path.display() + )); + } + } + Ok(()) +} + +fn host_run_tls_sans( + backend: GatewayBackend, + gateway_host: &str, + grpc_endpoint: &str, +) -> Vec { + let mut sans = Vec::new(); + if !is_loopback_host(gateway_host) { + sans.push(gateway_host.to_string()); + } + if let Ok(url) = url::Url::parse(grpc_endpoint) + && let Some(host) = url.host_str() + && !is_loopback_host(host) + { + sans.push(host.to_string()); + } + if backend == GatewayBackend::Vm { + sans.push(HOST_RUN_GATEWAY_VM_CALLBACK_HOST.to_string()); + } + sans.sort(); + sans.dedup(); + sans +} + +fn write_private_file(path: &Path, contents: &str) -> Result<()> { + ensure_parent_dir_restricted(path)?; + fs::write(path, contents) + .into_diagnostic() + .wrap_err_with(|| format!("failed to write {}", path.display()))?; + set_file_owner_only(path)?; + Ok(()) +} + +fn ensure_host_run_tls_materials( + name: &str, + backend: GatewayBackend, + paths: &HostRunGatewayPaths, + gateway_host: &str, + grpc_endpoint: &str, + disable_tls: bool, +) -> Result<()> { + if disable_tls { + return Ok(()); + } + + let client_key_path = paths.config_dir.join("mtls").join("tls.key"); + let server_key_path = paths.server_key_path(); + if client_key_path.is_file() && server_key_path.is_file() { + return Ok(()); + } + + let bundle = pki::generate_pki(&host_run_tls_sans(backend, gateway_host, grpc_endpoint))?; + mtls::store_pki_bundle(name, &bundle)?; + create_dir_restricted(&paths.server_tls_dir)?; + write_private_file(&paths.server_ca_path(), &bundle.ca_cert_pem)?; + write_private_file(&paths.server_cert_path(), &bundle.server_cert_pem)?; + write_private_file(&paths.server_key_path(), &bundle.server_key_pem)?; + Ok(()) +} + +fn load_or_create_ssh_handshake_secret(paths: &HostRunGatewayPaths) -> Result { + if paths.ssh_secret_path.is_file() { + let secret = fs::read_to_string(&paths.ssh_secret_path) + .into_diagnostic() + .wrap_err_with(|| format!("failed to read {}", paths.ssh_secret_path.display()))?; + let secret = secret.trim().to_string(); + if !secret.is_empty() { + return Ok(secret); + } + } + + let mut bytes = [0u8; 32]; + File::open("/dev/urandom") + .into_diagnostic() + .wrap_err("failed to open /dev/urandom for SSH handshake secret generation")? + .read_exact(&mut bytes) + .into_diagnostic() + .wrap_err("failed to read randomness for SSH handshake secret")?; + let secret = bytes + .iter() + .map(|byte| format!("{byte:02x}")) + .collect::(); + write_private_file(&paths.ssh_secret_path, &secret)?; + Ok(secret) +} + +fn kubectl_capture(args: &[&str]) -> Result { + Command::new("kubectl") + .args(args) + .output() + .into_diagnostic() + .wrap_err("failed to invoke kubectl") +} + +fn ensure_kubernetes_backend_prereqs() -> Result<()> { + let namespace = + kubectl_capture(&["get", "namespace", HOST_RUN_GATEWAY_NAMESPACE, "-o", "name"])?; + if !namespace.status.success() { + let stderr = String::from_utf8_lossy(&namespace.stderr); + return Err(miette!( + "kubernetes backend requires namespace '{HOST_RUN_GATEWAY_NAMESPACE}'.\n\ + Current kube context is not ready for OpenShell: {}", + stderr.trim() + )); + } + + let crd = kubectl_capture(&["get", "crd", "sandboxes.agents.x-k8s.io", "-o", "name"])?; + if !crd.status.success() { + let stderr = String::from_utf8_lossy(&crd.stderr); + return Err(miette!( + "kubernetes backend requires the Sandbox CRD 'sandboxes.agents.x-k8s.io'.\n\ + Current kube context is missing the OpenShell CRDs: {}", + stderr.trim() + )); + } + + Ok(()) +} + +fn upsert_kubernetes_client_tls_secret(bundle_dir: &Path) -> Result<()> { + use base64::Engine; + use base64::engine::general_purpose::STANDARD; + + let cert = fs::read(bundle_dir.join("tls.crt")) + .into_diagnostic() + .wrap_err("failed to read client TLS cert")?; + let key = fs::read(bundle_dir.join("tls.key")) + .into_diagnostic() + .wrap_err("failed to read client TLS key")?; + let ca = fs::read(bundle_dir.join("ca.crt")) + .into_diagnostic() + .wrap_err("failed to read client TLS CA")?; + + let manifest = serde_json::json!({ + "apiVersion": "v1", + "kind": "Secret", + "metadata": { + "name": constants::CLIENT_TLS_SECRET_NAME, + "namespace": HOST_RUN_GATEWAY_NAMESPACE, + }, + "type": "Opaque", + "data": { + "tls.crt": STANDARD.encode(cert), + "tls.key": STANDARD.encode(key), + "ca.crt": STANDARD.encode(ca), + } + }) + .to_string(); + + let mut child = Command::new("kubectl") + .args(["apply", "-f", "-"]) + .stdin(Stdio::piped()) + .stdout(Stdio::null()) + .stderr(Stdio::piped()) + .spawn() + .into_diagnostic() + .wrap_err("failed to apply openshell-client-tls secret")?; + if let Some(stdin) = child.stdin.as_mut() { + stdin + .write_all(manifest.as_bytes()) + .into_diagnostic() + .wrap_err("failed to write openshell-client-tls manifest to kubectl")?; + } + let output = child + .wait_with_output() + .into_diagnostic() + .wrap_err("failed to wait for kubectl apply")?; + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + return Err(miette!( + "failed to apply openshell-client-tls secret: {}", + stderr.trim() + )); + } + Ok(()) +} + +fn log_file_handle(path: &Path) -> Result { + ensure_parent_dir_restricted(path)?; + OpenOptions::new() + .create(true) + .append(true) + .open(path) + .into_diagnostic() + .wrap_err_with(|| format!("failed to open {}", path.display())) +} + +fn resolve_host_run_grpc_endpoint( + backend: GatewayBackend, + port: u16, + disable_tls: bool, + gateway_host: &str, +) -> Result { + let scheme = if disable_tls { "http" } else { "https" }; + match backend { + GatewayBackend::Kubernetes => { + if let Ok(host_gateway_ip) = std::env::var("OPENSHELL_HOST_GATEWAY_IP") + && !host_gateway_ip.trim().is_empty() + { + return Ok(format!("{scheme}://host.openshell.internal:{port}")); + } + if !is_loopback_host(gateway_host) { + return Ok(format!("{scheme}://{gateway_host}:{port}")); + } + Err(miette!( + "kubernetes backend needs a pod-reachable gateway host.\n\ + Set OPENSHELL_GATEWAY_HOST to a host or IP reachable from the cluster, or set OPENSHELL_HOST_GATEWAY_IP to let sandboxes use host.openshell.internal." + )) + } + GatewayBackend::Vm => Ok(format!( + "{scheme}://{HOST_RUN_GATEWAY_VM_CALLBACK_HOST}:{port}" + )), + GatewayBackend::K3s | GatewayBackend::Podman => unreachable!("not a host-run backend"), + } +} + +fn host_run_gateway_args( + backend: GatewayBackend, + gateway_bin: &Path, + paths: &HostRunGatewayPaths, + port: u16, + disable_tls: bool, + disable_gateway_auth: bool, + gateway_host: &str, + grpc_endpoint: &str, +) -> Result> { + let mut args = vec![ + gateway_bin.as_os_str().to_os_string(), + OsString::from("--port"), + OsString::from(port.to_string()), + OsString::from("--db-url"), + OsString::from(format!("sqlite:{}", paths.db_path.display())), + OsString::from("--drivers"), + OsString::from(backend.to_string()), + OsString::from("--ssh-gateway-host"), + OsString::from(gateway_host), + OsString::from("--ssh-gateway-port"), + OsString::from(port.to_string()), + OsString::from("--ssh-handshake-secret"), + OsString::from(load_or_create_ssh_handshake_secret(paths)?), + OsString::from("--grpc-endpoint"), + OsString::from(grpc_endpoint), + ]; + + if backend == GatewayBackend::Kubernetes { + args.push(OsString::from("--sandbox-namespace")); + args.push(OsString::from(HOST_RUN_GATEWAY_NAMESPACE)); + if !disable_tls { + args.push(OsString::from("--client-tls-secret-name")); + args.push(OsString::from(constants::CLIENT_TLS_SECRET_NAME)); + } + if let Ok(ip) = std::env::var("OPENSHELL_HOST_GATEWAY_IP") + && !ip.trim().is_empty() + { + args.push(OsString::from("--host-gateway-ip")); + args.push(OsString::from(ip)); + } + } else if backend == GatewayBackend::Vm { + args.push(OsString::from("--vm-driver-state-dir")); + args.push(paths.vm_state_dir.as_os_str().to_os_string()); + let driver_bin = resolve_binary( + &vm_compute_driver_candidates(gateway_bin), + "vm backend requires the openshell-driver-vm binary to be installed or included alongside openshell-gateway", + )?; + args.push(OsString::from("--vm-compute-driver-bin")); + args.push(driver_bin.as_os_str().to_os_string()); + if !disable_tls { + let mtls_dir = paths.config_dir.join("mtls"); + args.push(OsString::from("--vm-tls-ca")); + args.push(mtls_dir.join("ca.crt").as_os_str().to_os_string()); + args.push(OsString::from("--vm-tls-cert")); + args.push(mtls_dir.join("tls.crt").as_os_str().to_os_string()); + args.push(OsString::from("--vm-tls-key")); + args.push(mtls_dir.join("tls.key").as_os_str().to_os_string()); + } + } + + if disable_tls { + args.push(OsString::from("--disable-tls")); + } else { + args.push(OsString::from("--tls-cert")); + args.push(paths.server_cert_path().as_os_str().to_os_string()); + args.push(OsString::from("--tls-key")); + args.push(paths.server_key_path().as_os_str().to_os_string()); + args.push(OsString::from("--tls-client-ca")); + args.push(paths.server_ca_path().as_os_str().to_os_string()); + if disable_gateway_auth { + args.push(OsString::from("--disable-gateway-auth")); + } + } + + Ok(args) +} + +async fn spawn_host_run_gateway( + name: &str, + backend: GatewayBackend, + port: u16, + requested_gateway_host: Option<&str>, + recreate: bool, + disable_tls: bool, + disable_gateway_auth: bool, +) -> Result<()> { + let paths = HostRunGatewayPaths::for_gateway(name)?; + create_dir_restricted(&paths.config_dir)?; + create_dir_restricted(&paths.data_dir)?; + + let stored_metadata = if !recreate { + get_gateway_metadata(name) + } else { + None + }; + if let Some(existing_backend) = stored_metadata.as_ref().and_then(GatewayMetadata::backend) + && existing_backend != backend + { + return Err(miette!( + "gateway '{name}' is already registered with backend '{}'.\n\ + Destroy it first with: openshell gateway destroy --name {name}", + existing_backend + )); + } + + let effective_port = host_run_gateway_port(port, stored_metadata.as_ref()); + let effective_disable_tls = disable_tls + || stored_metadata + .as_ref() + .is_some_and(|m| is_plaintext_auth_mode(m.auth_mode.as_deref())); + let effective_disable_gateway_auth = disable_gateway_auth + || stored_metadata + .as_ref() + .is_some_and(|m| is_optional_mtls_auth_mode(m.auth_mode.as_deref())); + let gateway_host = configured_gateway_host(requested_gateway_host, stored_metadata.as_ref()); + let grpc_endpoint = resolve_host_run_grpc_endpoint( + backend, + effective_port, + effective_disable_tls, + &gateway_host, + )?; + + if backend == GatewayBackend::Kubernetes { + ensure_kubernetes_backend_prereqs()?; + } + + if recreate { + let _ = stop_host_run_process(&paths).await; + remove_host_run_runtime_state(&paths)?; + create_dir_restricted(&paths.config_dir)?; + create_dir_restricted(&paths.data_dir)?; + } else if read_pid_file(&paths.pid_path)?.is_some_and(is_process_alive) { + eprintln!( + "{} Gateway '{name}' is already running.", + "✓".green().bold() + ); + save_active_gateway(name)?; + return Ok(()); + } else { + cleanup_stale_pid_file(&paths.pid_path); + } + + ensure_host_run_tls_materials( + name, + backend, + &paths, + &gateway_host, + &grpc_endpoint, + effective_disable_tls, + )?; + if backend == GatewayBackend::Kubernetes && !effective_disable_tls { + upsert_kubernetes_client_tls_secret(&paths.config_dir.join("mtls"))?; + } + + let gateway_bin = resolve_binary( + &gateway_binary_candidates()?, + "local gateway backends require the openshell-gateway binary to be installed or included alongside openshell", + )?; + let args = host_run_gateway_args( + backend, + &gateway_bin, + &paths, + effective_port, + effective_disable_tls, + effective_disable_gateway_auth, + &gateway_host, + &grpc_endpoint, + )?; + + let stdout = log_file_handle(&paths.log_path)?; + let stderr = stdout + .try_clone() + .into_diagnostic() + .wrap_err_with(|| format!("failed to clone {}", paths.log_path.display()))?; + let mut command = Command::new(&gateway_bin); + command.args(&args[1..]); + command.stdin(Stdio::null()); + command.stdout(Stdio::from(stdout)); + command.stderr(Stdio::from(stderr)); + let child = command + .spawn() + .into_diagnostic() + .wrap_err_with(|| format!("failed to start {}", gateway_bin.display()))?; + + write_private_file(&paths.pid_path, &child.id().to_string())?; + + let server = localhost_gateway_endpoint(effective_port, effective_disable_tls); + let tls = TlsOptions::default() + .with_gateway_name(name) + .with_default_paths(&server); + if let Err(err) = crate::bootstrap::wait_for_grpc_ready(&server, &tls).await { + let _ = stop_host_run_process(&paths).await; + return Err(err); + } + + let metadata = host_run_gateway_metadata( + name, + backend, + effective_port, + effective_disable_tls, + effective_disable_gateway_auth, + &gateway_host, + ); + store_gateway_metadata(name, &metadata)?; + save_active_gateway(name)?; + + eprintln!("{} Gateway '{name}' is ready.", "✓".green().bold()); + eprintln!(" {} {}", "Endpoint:".dimmed(), metadata.gateway_endpoint); + eprintln!( + " {} {}", + "Backend:".dimmed(), + gateway_backend_label(&metadata) + ); + Ok(()) +} + +fn ensure_host_run_backend_flags( + backend: GatewayBackend, + remote: Option<&str>, + ssh_key: Option<&str>, + registry_username: Option<&str>, + registry_token: Option<&str>, + gpu: &[String], +) -> Result<()> { + if remote.is_some() || ssh_key.is_some() { + return Err(miette!( + "gateway backend '{}' only supports local host-run gateways; --remote and --ssh-key are only valid with --backend k3s", + backend + )); + } + if registry_username.is_some() || registry_token.is_some() || !gpu.is_empty() { + return Err(miette!( + "gateway backend '{}' does not support --registry-username, --registry-token, or --gpu; those flags are only valid with --backend k3s", + backend + )); + } + Ok(()) +} + +fn ensure_localhost_destroy_target( + name: &str, + metadata: Option<&GatewayMetadata>, + remote_override: Option<&str>, +) -> Result<()> { + if remote_override.is_some() { + return Err(miette!( + "gateway destroy is only supported for localhost gateways.\n\ + Gateway '{name}' was targeted with --remote, which is not a localhost backend." + )); + } + if let Some(metadata) = metadata + && !is_loopback_gateway_endpoint(&metadata.gateway_endpoint) + { + return Err(miette!( + "gateway destroy is only supported for localhost gateways.\n\ + Gateway '{name}' resolves to {}.", + metadata.gateway_endpoint + )); + } + Ok(()) +} + +fn print_log_lines(path: &Path, lines: Option) -> Result<()> { + if !path.exists() { + return Err(miette!( + "gateway log file does not exist: {}", + path.display() + )); + } + + let file = File::open(path) + .into_diagnostic() + .wrap_err_with(|| format!("failed to open {}", path.display()))?; + let reader = BufReader::new(file); + let mut contents = reader + .lines() + .collect::, _>>() + .into_diagnostic() + .wrap_err_with(|| format!("failed to read {}", path.display()))?; + if let Some(limit) = lines + && contents.len() > limit + { + let start = contents.len() - limit; + contents = contents.split_off(start); + } + for line in contents { + println!("{line}"); + } + Ok(()) +} + +async fn follow_log_file(path: &Path, lines: Option) -> Result<()> { + print_log_lines(path, lines)?; + + let mut file = File::open(path) + .into_diagnostic() + .wrap_err_with(|| format!("failed to open {}", path.display()))?; + let mut position = file + .metadata() + .into_diagnostic() + .wrap_err_with(|| format!("failed to stat {}", path.display()))? + .len(); + file.seek(SeekFrom::Start(position)) + .into_diagnostic() + .wrap_err_with(|| format!("failed to seek {}", path.display()))?; + + loop { + tokio::select! { + _ = tokio::signal::ctrl_c() => return Ok(()), + _ = tokio::time::sleep(HOST_RUN_GATEWAY_LOG_POLL_INTERVAL) => { + let metadata = file + .metadata() + .into_diagnostic() + .wrap_err_with(|| format!("failed to stat {}", path.display()))?; + if metadata.len() < position { + position = 0; + file.seek(SeekFrom::Start(0)) + .into_diagnostic() + .wrap_err_with(|| format!("failed to rewind {}", path.display()))?; + } + if metadata.len() == position { + continue; + } + let mut chunk = String::new(); + file.read_to_string(&mut chunk) + .into_diagnostic() + .wrap_err_with(|| format!("failed to read {}", path.display()))?; + position = metadata.len(); + print!("{chunk}"); + std::io::stdout().flush().into_diagnostic()?; + } + } + } +} + +async fn doctor_logs_host_run(name: &str, lines: Option, tail: bool) -> Result<()> { + let paths = HostRunGatewayPaths::for_gateway(name)?; + if tail { + follow_log_file(&paths.log_path, lines).await + } else { + print_log_lines(&paths.log_path, lines) + } +} + /// Provision or start a gateway (local or remote). pub async fn gateway_admin_deploy( name: &str, + backend: GatewayBackend, remote: Option<&str>, ssh_key: Option<&str>, port: u16, @@ -1435,6 +2332,33 @@ pub async fn gateway_admin_deploy( registry_token: Option<&str>, gpu: Vec, ) -> Result<()> { + match backend { + GatewayBackend::K3s => {} + GatewayBackend::Kubernetes | GatewayBackend::Vm => { + ensure_host_run_backend_flags( + backend, + remote, + ssh_key, + registry_username, + registry_token, + &gpu, + )?; + return spawn_host_run_gateway( + name, + backend, + port, + gateway_host, + recreate, + disable_tls, + disable_gateway_auth, + ) + .await; + } + GatewayBackend::Podman => { + return Err(miette!("gateway backend 'podman' is not implemented yet")); + } + } + let location = if remote.is_some() { "remote" } else { "local" }; // Build remote options once so we can reuse them for the existence check @@ -1476,6 +2400,14 @@ pub async fn gateway_admin_deploy( .as_ref() .filter(|m| m.gateway_port > 0) .map_or(port, |m| m.gateway_port); + let effective_disable_tls = disable_tls + || stored_metadata + .as_ref() + .is_some_and(|m| is_plaintext_auth_mode(m.auth_mode.as_deref())); + let effective_disable_gateway_auth = disable_gateway_auth + || stored_metadata + .as_ref() + .is_some_and(|m| is_optional_mtls_auth_mode(m.auth_mode.as_deref())); let effective_gateway_host: Option = gateway_host.map(String::from).or_else(|| { stored_metadata .as_ref() @@ -1484,8 +2416,8 @@ pub async fn gateway_admin_deploy( let mut options = DeployOptions::new(name) .with_port(effective_port) - .with_disable_tls(disable_tls) - .with_disable_gateway_auth(disable_gateway_auth) + .with_disable_tls(effective_disable_tls) + .with_disable_gateway_auth(effective_disable_gateway_auth) .with_gpu(gpu) .with_recreate(recreate); if let Some(opts) = remote_opts { @@ -1547,11 +2479,17 @@ fn resolve_gateway_control_target_from( } match metadata { - Some(metadata) if metadata.is_remote => metadata.remote_host.map_or( - GatewayControlTarget::ExternalRegistration, - GatewayControlTarget::Remote, - ), - _ => GatewayControlTarget::Local, + Some(metadata) => match gateway_kind(Some(&metadata)) { + ManagedGatewayKind::External => GatewayControlTarget::ExternalRegistration, + ManagedGatewayKind::K3s | ManagedGatewayKind::HostRun(_) if metadata.is_remote => { + metadata.remote_host.map_or( + GatewayControlTarget::ExternalRegistration, + GatewayControlTarget::Remote, + ) + } + ManagedGatewayKind::K3s | ManagedGatewayKind::HostRun(_) => GatewayControlTarget::Local, + }, + None => GatewayControlTarget::Local, } } @@ -1570,8 +2508,8 @@ fn gateway_control_target_options( Ok(Some(opts)) } GatewayControlTarget::ExternalRegistration => Err(miette::miette!( - "Gateway '{name}' is an external registration, not a managed Docker gateway.\n\ - `openshell gateway stop` is only supported for local or SSH-managed gateways." + "Gateway '{name}' is an external registration, not a managed gateway.\n\ + `openshell gateway stop` is only supported for managed local gateways." )), } } @@ -1610,13 +2548,37 @@ pub async fn gateway_admin_stop( remote: Option<&str>, ssh_key: Option<&str>, ) -> Result<()> { - let remote_opts = gateway_control_target_options(name, remote, ssh_key)?; + let metadata = get_gateway_metadata(name); + match lifecycle_gateway_kind(metadata.as_ref()) { + ManagedGatewayKind::K3s => { + let remote_opts = gateway_control_target_options(name, remote, ssh_key)?; - eprintln!("• Stopping gateway {name}..."); - let handle = openshell_bootstrap::gateway_handle(name, remote_opts.as_ref()).await?; - handle.stop().await?; - eprintln!("{} Gateway {name} stopped.", "✓".green().bold()); - Ok(()) + eprintln!("• Stopping gateway {name}..."); + let handle = openshell_bootstrap::gateway_handle(name, remote_opts.as_ref()).await?; + handle.stop().await?; + eprintln!("{} Gateway {name} stopped.", "✓".green().bold()); + Ok(()) + } + ManagedGatewayKind::HostRun(_) => { + if remote.is_some() || ssh_key.is_some() { + return Err(miette!( + "gateway stop does not accept --remote or --ssh-key for host-run gateways" + )); + } + eprintln!("• Stopping gateway {name}..."); + let stopped = stop_host_run_process(&HostRunGatewayPaths::for_gateway(name)?).await?; + if stopped { + eprintln!("{} Gateway {name} stopped.", "✓".green().bold()); + } else { + eprintln!("{} Gateway {name} is not running.", "!".yellow()); + } + Ok(()) + } + ManagedGatewayKind::External => Err(miette!( + "Gateway '{name}' is an external registration, not a managed gateway.\n\ + `openshell gateway stop` is only supported for managed local gateways." + )), + } } /// Destroy a gateway and its state. @@ -1625,8 +2587,11 @@ pub async fn gateway_admin_destroy( remote: Option<&str>, ssh_key: Option<&str>, ) -> Result<()> { - match resolve_gateway_control_target(name, remote) { - GatewayControlTarget::ExternalRegistration => { + let metadata = get_gateway_metadata(name); + ensure_localhost_destroy_target(name, metadata.as_ref(), remote)?; + + match lifecycle_gateway_kind(metadata.as_ref()) { + ManagedGatewayKind::External => { eprintln!("• Removing gateway registration {name}..."); remove_gateway_registration(name); eprintln!( @@ -1635,7 +2600,7 @@ pub async fn gateway_admin_destroy( ); Ok(()) } - GatewayControlTarget::Local | GatewayControlTarget::Remote(_) => { + ManagedGatewayKind::K3s => { let remote_opts = gateway_control_target_options(name, remote, ssh_key)?; eprintln!("• Destroying gateway {name}..."); @@ -1647,6 +2612,21 @@ pub async fn gateway_admin_destroy( eprintln!("{} Gateway {name} destroyed.", "✓".green().bold()); Ok(()) } + ManagedGatewayKind::HostRun(_) => { + if ssh_key.is_some() { + return Err(miette!( + "gateway destroy does not accept --ssh-key for host-run gateways" + )); + } + + eprintln!("• Destroying gateway {name}..."); + let paths = HostRunGatewayPaths::for_gateway(name)?; + let _ = stop_host_run_process(&paths).await?; + remove_host_run_runtime_state(&paths)?; + cleanup_gateway_metadata(name); + eprintln!("{} Gateway {name} destroyed.", "✓".green().bold()); + Ok(()) + } } } @@ -1667,6 +2647,11 @@ pub fn gateway_admin_info(name: &str) -> Result<()> { "Gateway endpoint:".dimmed(), metadata.gateway_endpoint ); + println!( + " {} {}", + "Backend:".dimmed(), + gateway_backend_label(&metadata) + ); if metadata.is_remote { if let Some(ref host) = metadata.remote_host { @@ -1693,6 +2678,25 @@ pub async fn doctor_logs( remote: Option<&str>, ssh_key: Option<&str>, ) -> Result<()> { + let metadata = get_gateway_metadata(name); + match lifecycle_gateway_kind(metadata.as_ref()) { + ManagedGatewayKind::HostRun(_) => { + if remote.is_some() || ssh_key.is_some() { + return Err(miette!( + "doctor logs does not accept --remote or --ssh-key for host-run gateways" + )); + } + return doctor_logs_host_run(name, lines, tail).await; + } + ManagedGatewayKind::External => { + return Err(miette!( + "Gateway '{name}' is an external registration.\n\ + `openshell doctor logs` is only supported for managed gateways." + )); + } + ManagedGatewayKind::K3s => {} + } + // Build remote options: explicit --remote flag, or auto-resolve from metadata let remote_opts = if let Some(dest) = remote { let mut opts = RemoteOptions::new(dest); @@ -1729,6 +2733,22 @@ pub fn doctor_exec( ssh_key: Option<&str>, command: &[String], ) -> Result<()> { + let metadata = get_gateway_metadata(name); + match lifecycle_gateway_kind(metadata.as_ref()) { + ManagedGatewayKind::HostRun(_) => { + return Err(miette!( + "`openshell doctor exec` is not supported for process-managed gateways" + )); + } + ManagedGatewayKind::External => { + return Err(miette!( + "Gateway '{name}' is an external registration.\n\ + `openshell doctor exec` is only supported for managed gateways." + )); + } + ManagedGatewayKind::K3s => {} + } + validate_gateway_name(name)?; let container = container_name(name); let is_tty = std::io::stdin().is_terminal(); @@ -5196,13 +6216,15 @@ fn format_timestamp_ms(ms: i64) -> String { #[cfg(test)] mod tests { use super::{ - GatewayControlTarget, TlsOptions, format_gateway_select_header, - format_gateway_select_items, gateway_add, gateway_auth_label, gateway_select_with, - gateway_type_label, git_sync_files, http_health_check, image_requests_gpu, - inferred_provider_type, parse_cli_setting_value, parse_credential_pairs, - plaintext_gateway_is_remote, provisioning_timeout_message, ready_false_condition_message, - resolve_gateway_control_target_from, sandbox_should_persist, shell_escape, - source_requests_gpu, validate_gateway_name, validate_ssh_host, + GatewayControlTarget, ManagedGatewayKind, TlsOptions, ensure_localhost_destroy_target, + format_gateway_select_header, format_gateway_select_items, gateway_add, gateway_auth_label, + gateway_backend_label, gateway_kind, gateway_select_with, gateway_type_label, + git_sync_files, http_health_check, image_requests_gpu, inferred_provider_type, + parse_cli_setting_value, parse_credential_pairs, plaintext_gateway_is_remote, + provisioning_timeout_message, ready_false_condition_message, + resolve_gateway_control_target_from, resolve_host_run_grpc_endpoint, + sandbox_should_persist, shell_escape, source_requests_gpu, validate_gateway_name, + validate_ssh_host, }; use crate::TEST_ENV_LOCK; use hyper::StatusCode; @@ -5214,7 +6236,7 @@ mod tests { use std::process::Command; use std::thread; - use openshell_bootstrap::GatewayMetadata; + use openshell_bootstrap::{GatewayBackend, GatewayMetadata}; use openshell_core::proto::{SandboxCondition, SandboxStatus}; struct EnvVarGuard { @@ -5277,6 +6299,8 @@ mod tests { remote_host: None, resolved_host: None, auth_mode: Some("cloudflare_jwt".to_string()), + backend: None, + configured_gateway_host: None, edge_team_domain: None, edge_auth_url: None, } @@ -5581,6 +6605,98 @@ mod tests { } } + #[test] + fn resolve_gateway_control_target_treats_local_external_registration_as_unmanaged() { + let metadata = GatewayMetadata { + name: "external-local".to_string(), + gateway_endpoint: "http://127.0.0.1:8080".to_string(), + is_remote: false, + gateway_port: 0, + remote_host: None, + resolved_host: None, + auth_mode: Some("plaintext".to_string()), + backend: None, + configured_gateway_host: None, + edge_team_domain: None, + edge_auth_url: None, + }; + + let target = resolve_gateway_control_target_from(Some(metadata), None); + assert!(matches!(target, GatewayControlTarget::ExternalRegistration)); + } + + #[test] + fn gateway_kind_distinguishes_managed_and_external_backends() { + let managed = GatewayMetadata { + name: "managed".to_string(), + gateway_endpoint: "https://127.0.0.1:8080".to_string(), + is_remote: false, + gateway_port: 8080, + remote_host: None, + resolved_host: None, + auth_mode: None, + backend: Some(GatewayBackend::Kubernetes), + configured_gateway_host: Some("gateway.internal".to_string()), + edge_team_domain: None, + edge_auth_url: None, + }; + let external = edge_registration("edge", "https://edge.example.com"); + + assert!(matches!( + gateway_kind(Some(&managed)), + ManagedGatewayKind::HostRun(GatewayBackend::Kubernetes) + )); + assert!(matches!( + gateway_kind(Some(&external)), + ManagedGatewayKind::External + )); + assert_eq!(gateway_backend_label(&managed), "kubernetes"); + assert_eq!(gateway_backend_label(&external), "-"); + } + + #[test] + fn localhost_destroy_guard_rejects_non_loopback_endpoints() { + let metadata = GatewayMetadata { + name: "remote".to_string(), + gateway_endpoint: "https://gateway.example.com:8443".to_string(), + is_remote: true, + gateway_port: 8443, + remote_host: Some("user@gateway.example.com".to_string()), + resolved_host: Some("gateway.example.com".to_string()), + auth_mode: None, + backend: Some(GatewayBackend::K3s), + configured_gateway_host: Some("gateway.example.com".to_string()), + edge_team_domain: None, + edge_auth_url: None, + }; + + let err = ensure_localhost_destroy_target("remote", Some(&metadata), None) + .expect_err("non-loopback destroy should fail"); + assert!(err.to_string().contains("localhost gateways")); + } + + #[test] + fn kubernetes_host_run_endpoint_prefers_host_alias_when_host_gateway_ip_is_set() { + let _guard = EnvVarGuard::set("OPENSHELL_HOST_GATEWAY_IP", "172.17.0.1"); + + let endpoint = + resolve_host_run_grpc_endpoint(GatewayBackend::Kubernetes, 8080, false, "127.0.0.1") + .expect("kubernetes callback endpoint"); + + assert_eq!(endpoint, "https://host.openshell.internal:8080"); + } + + #[test] + fn kubernetes_host_run_endpoint_rejects_loopback_without_host_alias() { + let _guard = EnvVarGuard::unset("OPENSHELL_HOST_GATEWAY_IP"); + + let err = + resolve_host_run_grpc_endpoint(GatewayBackend::Kubernetes, 8080, false, "127.0.0.1") + .expect_err("loopback kubernetes callback endpoint should fail"); + + assert!(err.to_string().contains("pod-reachable gateway host")); + } + #[test] fn gateway_select_uses_explicit_name_without_prompting() { let tmpdir = tempfile::tempdir().expect("create tmpdir"); @@ -5665,6 +6781,8 @@ mod tests { remote_host: None, resolved_host: None, auth_mode: None, + backend: Some(GatewayBackend::K3s), + configured_gateway_host: None, edge_team_domain: None, edge_auth_url: None, }, @@ -5680,12 +6798,15 @@ mod tests { assert!(header.contains("NAME")); assert!(header.contains("ENDPOINT")); assert!(header.contains("TYPE")); + assert!(header.contains("BACKEND")); assert!(header.contains("AUTH")); assert!(items[0].contains("alpha")); assert!(items[0].contains("https://edge.example.com")); assert!(items[0].contains("cloud")); + assert!(items[0].contains("-")); assert!(items[0].contains("cloudflare_jwt")); assert!(items[1].contains("local")); + assert!(items[1].contains("k3s")); assert!(items[1].contains("plaintext")); assert!(items[1].contains("http://127.0.0.1:8080")); } @@ -5700,6 +6821,8 @@ mod tests { remote_host: None, resolved_host: None, auth_mode: None, + backend: Some(GatewayBackend::K3s), + configured_gateway_host: None, edge_team_domain: None, edge_auth_url: None, }; diff --git a/crates/openshell-vm/src/lib.rs b/crates/openshell-vm/src/lib.rs index 2b78a7669..aebf70543 100644 --- a/crates/openshell-vm/src/lib.rs +++ b/crates/openshell-vm/src/lib.rs @@ -1738,6 +1738,8 @@ fn bootstrap_gateway(rootfs: &Path, gateway_name: &str, gateway_port: u16) -> Re remote_host: None, resolved_host: None, auth_mode: None, + backend: Some(openshell_bootstrap::GatewayBackend::K3s), + configured_gateway_host: None, edge_team_domain: None, edge_auth_url: None, }; diff --git a/tasks/scripts/cluster-bootstrap.sh b/tasks/scripts/cluster-bootstrap.sh index def2429b8..1c3fe1556 100755 --- a/tasks/scripts/cluster-bootstrap.sh +++ b/tasks/scripts/cluster-bootstrap.sh @@ -241,24 +241,25 @@ if [ -z "${OPENSHELL_CLUSTER_IMAGE:-}" ]; then export OPENSHELL_CLUSTER_IMAGE="openshell/cluster:${IMAGE_TAG}" fi -DEPLOY_CMD=(openshell gateway start --name "${CLUSTER_NAME}" --port "${GATEWAY_PORT}") +DEPLOY_CMD=(openshell gateway start --backend k3s --name "${CLUSTER_NAME}" --port "${GATEWAY_PORT}") if [ "${CLUSTER_GPU:-0}" = "1" ]; then DEPLOY_CMD+=(--gpu) fi -if [ -n "${GATEWAY_HOST:-}" ]; then - DEPLOY_CMD+=(--gateway-host "${GATEWAY_HOST}") +EFFECTIVE_GATEWAY_HOST="${OPENSHELL_GATEWAY_HOST:-${GATEWAY_HOST:-}}" +if [ -n "${EFFECTIVE_GATEWAY_HOST}" ]; then + DEPLOY_CMD+=(--gateway-host "${EFFECTIVE_GATEWAY_HOST}") # Ensure the gateway host resolves from the current environment. # On Linux CI runners host.docker.internal is not set automatically # (it's a Docker Desktop feature). If the hostname doesn't resolve, # add it via the Docker bridge gateway IP. - if ! getent hosts "${GATEWAY_HOST}" >/dev/null 2>&1; then + if ! getent hosts "${EFFECTIVE_GATEWAY_HOST}" >/dev/null 2>&1; then BRIDGE_IP=$(docker network inspect bridge --format '{{(index .IPAM.Config 0).Gateway}}' 2>/dev/null || true) if [ -n "${BRIDGE_IP}" ]; then - echo "Adding /etc/hosts entry: ${BRIDGE_IP} ${GATEWAY_HOST}" - echo "${BRIDGE_IP} ${GATEWAY_HOST}" >> /etc/hosts + echo "Adding /etc/hosts entry: ${BRIDGE_IP} ${EFFECTIVE_GATEWAY_HOST}" + echo "${BRIDGE_IP} ${EFFECTIVE_GATEWAY_HOST}" >> /etc/hosts fi fi fi