Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
431 changes: 431 additions & 0 deletions Cargo.lock

Large diffs are not rendered by default.

42 changes: 41 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,19 @@ name = "codex"
path = "src/main.rs"

[features]
default = ["rar"]
default = ["rar", "observability"]
rar = ["dep:unrar"]
embed-frontend = []
observability = [
"dep:opentelemetry",
"dep:opentelemetry_sdk",
"dep:opentelemetry-otlp",
"dep:opentelemetry-semantic-conventions",
"dep:tracing-opentelemetry",
"dep:axum-tracing-opentelemetry",
"dep:tonic",
"dep:sysinfo",
]

[workspace]
members = [".", "migration"]
Expand Down Expand Up @@ -108,6 +118,33 @@ tracing = "0.1"
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
tracing-appender = "0.2"
log = "0.4" # For sqlx logging level configuration

# OpenTelemetry (optional, gated by `observability` feature)
opentelemetry = { version = "0.32", optional = true }
opentelemetry_sdk = { version = "0.32", features = ["rt-tokio", "trace", "metrics"], optional = true }
opentelemetry-otlp = { version = "0.32", default-features = false, features = [
"grpc-tonic",
"http-proto",
"http-json",
# Blocking HTTP client is intentional: the OTel SDK 0.32 batch processor
# runs export on a dedicated std::thread that has no async runtime
# attached. An async reqwest client would panic on first export. The
# blocking client only blocks the batch thread, not the server runtime.
"reqwest-blocking-client",
"trace",
"metrics",
], optional = true }
opentelemetry-semantic-conventions = { version = "0.32", optional = true }
tracing-opentelemetry = { version = "0.33", optional = true }
axum-tracing-opentelemetry = { version = "0.33", optional = true }
# Re-used via opentelemetry-otlp's grpc-tonic feature; declared here so
# metadata helpers can use MetadataKey/MetadataValue types directly.
tonic = { version = "0.14", default-features = false, optional = true }
# Process-level metrics (CPU, memory). `opentelemetry-system-metrics` would
# do this for us but is pinned to opentelemetry 0.31, one minor behind our
# 0.32. Rolling the few callbacks we need against sysinfo directly is ~30 lines
# and keeps the toolchain consistent.
sysinfo = { version = "0.39", default-features = false, features = ["system"], optional = true }
async-stream = "0.3"
futures = "0.3"
tokio-stream = "0.1"
Expand Down Expand Up @@ -171,6 +208,9 @@ http-body-util = "0.1"
hyper = { version = "1.0", features = ["full"] }
serial_test = "3.2"
tracing-test = "0.2"
# Enable the SDK's `testing` feature for the in-memory metric exporter used
# in observability::metrics tests. Dev-only; no production impact.
opentelemetry_sdk = { version = "0.32", features = ["rt-tokio", "trace", "metrics", "testing"] }

# =============================================================================
# Development Profile - Optimized for fast incremental builds
Expand Down
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,9 @@ dev-logs-worker: ## View worker logs only
dev-logs-frontend: ## View frontend logs only
docker compose logs -f frontend-dev

dev-logs-jaeger: ## View Jaeger logs only
docker compose logs -f jaeger

dev-restart: ## Restart all development containers
docker compose restart codex-dev codex-dev-worker frontend-dev

Expand Down
28 changes: 28 additions & 0 deletions config/config.docker.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -163,3 +163,31 @@ komga_api:

koreader_api:
enabled: true

# OpenTelemetry observability (disabled by default).
#
# Uncomment to ship traces/metrics to the bundled Jaeger sidecar (started
# automatically by `make dev-up`). Jaeger accepts OTLP on port 4317 inside the
# compose network and serves a UI at http://localhost:16686 on the host.
#
# See docs/docs/observability.md for the full schema, backend matrix, and
# sampling guidance.
# observability:
# enabled: true
# service_name: codex
# otlp:
# endpoint: http://jaeger:4317
# protocol: grpc
# # headers: # auth/tenant headers for hosted backends
# # x-tenant: dev
# timeout_ms: 5000
# traces:
# enabled: true
# sample_ratio: 1.0
# metrics:
# enabled: true
# export_interval_ms: 30000
# browser:
# enabled: true # opt-in browser RUM (proxied through codex)
# proxy_path: /api/v1/observability/otlp
# sample_ratio: 0.1
33 changes: 33 additions & 0 deletions config/config.kubernetes.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -155,3 +155,36 @@ files:
# - /api/v1/books/*/thumbnail # Exempt book thumbnails
# cleanup_interval_secs: 60
# bucket_ttl_secs: 300

# OpenTelemetry observability (disabled by default).
#
# In Kubernetes you'll typically point this at the cluster's OTel collector
# DaemonSet/Deployment (e.g. opentelemetry-collector.observability.svc:4317)
# or at the OTLP receiver of an agent like the DataDog Agent. See
# docs/docs/observability.md for the schema, backend matrix, and sampling
# guidance.
#
# Most fields can also be set via env (CODEX_OBSERVABILITY_*) so secrets
# (auth tokens) can come from Kubernetes Secrets:
# CODEX_OBSERVABILITY_ENABLED=true
# CODEX_OBSERVABILITY_OTLP_ENDPOINT=http://otel-collector.observability:4317
# CODEX_OBSERVABILITY_OTLP_HEADERS=signoz-access-token=$(cat /secrets/signoz-token)
# observability:
# enabled: true
# service_name: codex
# otlp:
# endpoint: http://otel-collector.observability:4317
# protocol: grpc
# # headers: # auth/tenant headers
# # x-honeycomb-team: ...
# timeout_ms: 5000
# traces:
# enabled: true
# sample_ratio: 0.25 # tune for cluster traffic volume
# metrics:
# enabled: true
# export_interval_ms: 30000
# browser:
# enabled: false
# proxy_path: /api/v1/observability/otlp
# sample_ratio: 0.1
25 changes: 25 additions & 0 deletions config/config.sqlite.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -162,3 +162,28 @@ files:
# - /api/v1/books/*/thumbnail # Exempt book thumbnails
# cleanup_interval_secs: 60 # How often to clean up stale buckets
# bucket_ttl_secs: 300 # Time before a bucket is considered stale

# OpenTelemetry observability (disabled by default).
#
# Uncomment and point `otlp.endpoint` at your collector to enable trace and
# metric export. See docs/docs/observability.md for the schema, backend matrix
# (SigNoz, Tempo, Honeycomb, Uptrace, ...), and sampling guidance.
# observability:
# enabled: true
# service_name: codex
# otlp:
# endpoint: http://localhost:4317 # e.g. a local Jaeger or your operator's collector
# protocol: grpc # grpc | http/protobuf | http/json
# # headers: # auth/tenant headers (e.g. signoz-access-token)
# # x-honeycomb-team: ...
# timeout_ms: 5000
# traces:
# enabled: true
# sample_ratio: 1.0 # tune down on busy deployments
# metrics:
# enabled: true
# export_interval_ms: 30000
# browser:
# enabled: false # opt-in browser RUM, proxied through codex
# proxy_path: /api/v1/observability/otlp
# sample_ratio: 0.1
42 changes: 41 additions & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -120,12 +120,26 @@ services:
# CODEX_DATABASE_POSTGRES_DATABASE_NAME: codex
CODEX_SCHEDULER_TIMEZONE: America/Los_Angeles
CODEX_LOGGING_LEVEL: debug
# OpenTelemetry observability: ship traces/metrics to the bundled Jaeger
# sidecar so `make dev-up` "just works". The Codex config files keep
# observability disabled by default (trust posture for production
# deployments); the dev compose overrides that here.
CODEX_OBSERVABILITY_ENABLED: "true"
CODEX_OBSERVABILITY_SERVICE_NAME: codex
CODEX_OBSERVABILITY_OTLP_ENDPOINT: http://jaeger:4317
CODEX_OBSERVABILITY_OTLP_PROTOCOL: grpc
CODEX_OBSERVABILITY_BROWSER_ENABLED: "true"
healthcheck:
test: ["CMD-SHELL", "curl -f http://localhost:8080/health || exit 1"]
interval: 10s
timeout: 5s
retries: 30
start_period: 30s
# Generous grace period: first-time `cargo build` inside the container
# can take 10+ minutes on a cold cache. During start_period, failing
# healthchecks do not count toward `retries`, so the container is not
# prematurely marked unhealthy (which would also fail codex-dev-worker
# since it depends on `codex-dev: service_healthy`).
start_period: 900s
networks:
- codex-network
profiles:
Expand Down Expand Up @@ -202,6 +216,12 @@ services:
# CODEX_DATABASE_POSTGRES_PASSWORD: codex
# CODEX_DATABASE_POSTGRES_DATABASE_NAME: codex
CODEX_LOGGING_LEVEL: debug
# OpenTelemetry observability: same overrides as codex-dev so the worker
# emits spans/metrics into the same Jaeger sidecar.
CODEX_OBSERVABILITY_ENABLED: "true"
CODEX_OBSERVABILITY_SERVICE_NAME: codex
CODEX_OBSERVABILITY_OTLP_ENDPOINT: http://jaeger:4317
CODEX_OBSERVABILITY_OTLP_PROTOCOL: grpc
networks:
- codex-network
profiles:
Expand Down Expand Up @@ -317,6 +337,26 @@ services:
- dev
- prod

# Jaeger all-in-one for OTLP trace evaluation (see docs/docs/observability.md).
# Accepts OTLP natively on 4317 (gRPC) / 4318 (HTTP), serves the UI on 16686,
# and stores spans in memory. Available in the dev profile; the codex-dev and
# codex-dev-worker services above are pre-wired to send OTLP here via
# CODEX_OBSERVABILITY_OTLP_ENDPOINT=http://jaeger:4317.
jaeger:
image: jaegertracing/all-in-one:1.62.0
container_name: codex-jaeger
environment:
- COLLECTOR_OTLP_ENABLED=true
ports:
- "16686:16686"
- "4317:4317"
- "4318:4318"
networks:
- codex-network
restart: unless-stopped
profiles:
- dev

# Documentation server
docs:
build:
Expand Down
Loading
Loading