JinBa1 · JinBa1 · Jun 20, 2026 · Jun 20, 2026 · Jun 20, 2026
diff --git a/.dockerignore b/.dockerignore
@@ -0,0 +1,13 @@
+# Keep the build context small and never leak internal docs or local build output into image layers.
+.git
+.github
+dev
+**/target
+*.iml
+.idea
+.vscode
+*.log
+# Engine test-resource output dirs (generated by the test suite; never needed for a build).
+engine/src/test/resources/test_integration_output
+engine/src/test/resources/test_sample_output
+engine/src/test/resources/test_integration_queries
diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml
@@ -0,0 +1,56 @@
+name: docker-publish
+
+# Build the server image on every PR (validation, no push) and publish to GHCR on pushes to main
+# and version tags. The image is the self-host distribution channel: `docker run` the gateway next
+# to your CSVs and point an MCP client at it.
+on:
+  push:
+    branches: [ main ]
+    tags: [ 'v*' ]
+  pull_request:
+    branches: [ main ]
+  workflow_dispatch:
+
+permissions:
+  contents: read
+  packages: write
+
+jobs:
+  build-push:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Log in to GHCR
+        # Only needed to push; skip on PRs (incl. forks, where GITHUB_TOKEN is read-only).
+        if: github.event_name != 'pull_request'
+        uses: docker/login-action@v3
+        with:
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Image metadata
+        id: meta
+        uses: docker/metadata-action@v5
+        with:
+          images: ghcr.io/${{ github.repository_owner }}/cuckoodb
+          tags: |
+            type=ref,event=branch
+            type=ref,event=tag
+            type=sha
+            type=raw,value=latest,enable={{is_default_branch}}
+
+      - name: Build and push
+        uses: docker/build-push-action@v6
+        with:
+          context: .
+          # PRs validate the build only; pushes to main/tags publish.
+          push: ${{ github.event_name != 'pull_request' }}
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,27 @@
+# syntax=docker/dockerfile:1
+
+# Build the server fat jar (and its engine dependency) inside the image, so the build is
+# self-contained and reproducible. A BuildKit cache mount keeps the Maven repo warm across
+# rebuilds without baking it into a layer. Tests are CI's job; skip them here for a fast image.
+FROM eclipse-temurin:17-jdk AS build
+WORKDIR /src
+COPY . .
+RUN --mount=type=cache,target=/root/.m2 \
+    chmod +x mvnw && ./mvnw -pl server -am clean package -Dmaven.test.skip=true -q
+
+# Runtime: JRE only, non-root, just the executable Spring Boot jar.
+FROM eclipse-temurin:17-jre AS runtime
+RUN groupadd --system cuckoo && useradd --system --gid cuckoo --home /app cuckoo
+WORKDIR /app
+COPY --from=build /src/server/target/cuckoodb-server-*.jar /app/cuckoodb-server.jar
+
+# Self-host layout: mount a folder of CSVs at /cuckoodb/data and they are loaded into the catalog
+# at startup; uploads (when enabled) persist under /cuckoodb/work. data-dir is the PARENT of the
+# data/ folder the engine scans, hence /cuckoodb with CSVs in /cuckoodb/data.
+RUN mkdir -p /cuckoodb/data /cuckoodb/work && chown -R cuckoo:cuckoo /cuckoodb
+ENV CUCKOODB_DATA_DIR=/cuckoodb \
+    CUCKOODB_WORK_DIR=/cuckoodb/work
+
+USER cuckoo
+EXPOSE 8080
+ENTRYPOINT ["java", "-jar", "/app/cuckoodb-server.jar"]
diff --git a/README.md b/README.md
@@ -97,6 +97,21 @@ java -cp engine/target/cuckoodb-engine-1.0.0-jar-with-dependencies.jar \
 
 Both `--max-tuples` and `--timeout-ms` are optional and independent. Omit either to impose no limit on that dimension.
 
+### Run the server as a container
+
+The Spring Boot gateway (REST + MCP) ships as a container image, so you can run it next to your data with no Java toolchain. Put your CSV files in a folder and mount it as the catalog's data directory:
+
+```bash
+docker run --rm -p 8080:8080 \
+  -v /path/to/your/csvs:/cuckoodb/data \
+  ghcr.io/jinba1/cuckoodb:latest
+```
+
+- **REST:** `POST http://localhost:8080/queries`, `GET /tables`, `GET /tables/{name}` (OpenAPI at `/swagger-ui.html`).
+- **MCP:** Streamable-HTTP endpoint at `http://localhost:8080/mcp` — point an MCP client at it to query your CSVs with `list_tables` / `describe_table` / `sample_rows` / `explain_query` / `query`.
+
+The image is published to GHCR on each merge to `main`. To build it locally instead: `docker build -t cuckoodb .`
+
 ### Query budgets
 
 The engine enforces **total-work semantics**: every tuple emitted by any operator in the tree counts against the budget, including intermediate tuples that are later filtered or joined. A cross-product explosion that never produces output rows will still hit the tuple limit. The timeout clock starts lazily at the first tuple emission.