From 6c2ef66a6a4815203f5007f447377d6f7795321b Mon Sep 17 00:00:00 2001 From: jocelyneholdbrook Date: Mon, 27 Apr 2026 16:59:22 +0100 Subject: [PATCH] feat(cogstack-cohorter): Use a smaller model by default --- cogstack-cohorter/NL2DSL/Dockerfile | 2 +- cogstack-cohorter/NL2DSL/README.md | 6 +++--- cogstack-cohorter/NL2DSL/docker_run.sh | 2 +- cogstack-cohorter/NL2DSL/server.js | 2 +- cogstack-cohorter/README.md | 2 +- .../client-react/public/assets/js/init-alpine.js | 8 +++----- .../WebAPP/client-react/src/alpine-data.js | 8 +++----- .../WebAPP/client/public/assets/js/init-alpine.js | 8 +++----- cogstack-cohorter/WebAPP/server/server.js | 2 +- cogstack-cohorter/docker-compose.yml | 4 ++-- helm-charts/cogstack-cohorter-helm/README.md | 10 ++++++---- helm-charts/cogstack-cohorter-helm/values.yaml | 10 ++++++++-- 12 files changed, 33 insertions(+), 31 deletions(-) diff --git a/cogstack-cohorter/NL2DSL/Dockerfile b/cogstack-cohorter/NL2DSL/Dockerfile index 53c79fe..59b79c5 100644 --- a/cogstack-cohorter/NL2DSL/Dockerfile +++ b/cogstack-cohorter/NL2DSL/Dockerfile @@ -3,7 +3,7 @@ FROM node:20-alpine ENV NODE_ENV=production \ PORT=3002 \ OLLAMA_URL=http://ollama:11434/api/generate \ - OLLAMA_MODEL=gpt-oss:20b \ + OLLAMA_MODEL=llama3.2:3b \ MEDCAT_URL=http://cohorter-medcat:5000 \ ALLOW_ORIGINS=* diff --git a/cogstack-cohorter/NL2DSL/README.md b/cogstack-cohorter/NL2DSL/README.md index 27bf4b6..438ca68 100644 --- a/cogstack-cohorter/NL2DSL/README.md +++ b/cogstack-cohorter/NL2DSL/README.md @@ -45,7 +45,7 @@ Run: docker run -d --name cohorter-nl2dsl \ -p 3002:3002 \ -e OLLAMA_URL="http://ollama:11434/api/generate" \ - -e OLLAMA_MODEL="gpt-oss:20b" \ + -e OLLAMA_MODEL="llama3.2:3b" \ -e MEDCAT_URL="http://cohorter-medcat:3001" \ -e ALLOW_ORIGINS="*" \ --restart unless-stopped \ @@ -64,7 +64,7 @@ NL2DSL reads the following environment variables (as used in `docker_run.sh` / y Ollama generate endpoint URL, e.g. `http://ollama:11434/api/generate` - `OLLAMA_MODEL` - Model name to use in Ollama, e.g. `gpt-oss:20b` + Model name to use in Ollama, e.g. `llama3.2:3b` - `MEDCAT_URL` MedCAT API base URL, e.g. `http://cohorter-medcat:5000` @@ -121,7 +121,7 @@ node server.js Pull the model inside the ollama container (example): ```bash -docker exec -it ollama ollama pull gpt-oss:20b +docker exec -it ollama ollama pull llama3.2:3b ``` (Replace with your actual model name.) diff --git a/cogstack-cohorter/NL2DSL/docker_run.sh b/cogstack-cohorter/NL2DSL/docker_run.sh index 1cf9937..b5fe31c 100755 --- a/cogstack-cohorter/NL2DSL/docker_run.sh +++ b/cogstack-cohorter/NL2DSL/docker_run.sh @@ -6,7 +6,7 @@ sudo docker network connect cohorter-net cohorter-medcat sudo docker run -d --name cohorter-nl2dsl --network cohorter-net \ -p 3002:3002 \ -e OLLAMA_URL="http://ollama:11434/api/generate" \ - -e OLLAMA_MODEL="gpt-oss:20b" \ + -e OLLAMA_MODEL="llama3.2:3b" \ -e MEDCAT_URL="http://cohorter-medcat:5000" \ -e ALLOW_ORIGINS="*" \ --restart unless-stopped \ diff --git a/cogstack-cohorter/NL2DSL/server.js b/cogstack-cohorter/NL2DSL/server.js index df11403..6246816 100644 --- a/cogstack-cohorter/NL2DSL/server.js +++ b/cogstack-cohorter/NL2DSL/server.js @@ -7,7 +7,7 @@ import cors from 'cors'; // ---- config ---- const PORT = process.env.PORT || 3002; const OLLAMA_URL = process.env.OLLAMA_URL || 'http://127.0.0.1:8002/api/generate'; -const MODEL = process.env.OLLAMA_MODEL || 'gpt-oss:20b'; // 'phi4:latest'; +const MODEL = process.env.OLLAMA_MODEL || 'llama3.2:3b'; // CORS: allow list via env (comma-separated) or "*" const allowList = (process.env.ALLOW_ORIGINS || '*') diff --git a/cogstack-cohorter/README.md b/cogstack-cohorter/README.md index 32ae66b..7b58aa0 100644 --- a/cogstack-cohorter/README.md +++ b/cogstack-cohorter/README.md @@ -53,7 +53,7 @@ docker compose up --build -d NL2DSL uses: - `OLLAMA_URL` (default in compose: `http://ollama:11434/api/generate`) -- `OLLAMA_MODEL` (default: `gpt-oss:20b`) +- `OLLAMA_MODEL` (default: `llama3.2:3b`) - `MEDCAT_URL` (default: `http://cohorter-medcat:5000`) - `ALLOW_ORIGINS` (default: `*`) diff --git a/cogstack-cohorter/WebAPP/client-react/public/assets/js/init-alpine.js b/cogstack-cohorter/WebAPP/client-react/public/assets/js/init-alpine.js index 26ff0d7..94ee349 100644 --- a/cogstack-cohorter/WebAPP/client-react/public/assets/js/init-alpine.js +++ b/cogstack-cohorter/WebAPP/client-react/public/assets/js/init-alpine.js @@ -1,7 +1,6 @@ function data() { - const NL2DSL_URL = '/nl2dsl'; - const NL2DSL_MODEL = 'gpt-oss:20b'; + const NL2DSL_URL = '/nl2dsl'; // read version from URL const url = new URL(window.location.href); @@ -346,13 +345,12 @@ function data() { this.nl_loading = true; try { - // 1) NL -> DSL (force gpt-oss-20b) + // 1) NL -> DSL — model is configured server-side via OLLAMA_MODEL env var const compileResp = await fetch(NL2DSL_URL, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ - query: q, - model: NL2DSL_MODEL // ← use gpt-oss-20b + query: q // If your API also accepts temperature/top_p etc., add them here. // e.g. temperature: 0.2 }) diff --git a/cogstack-cohorter/WebAPP/client-react/src/alpine-data.js b/cogstack-cohorter/WebAPP/client-react/src/alpine-data.js index c1eaafb..3fb6596 100644 --- a/cogstack-cohorter/WebAPP/client-react/src/alpine-data.js +++ b/cogstack-cohorter/WebAPP/client-react/src/alpine-data.js @@ -1,7 +1,6 @@ export function createAppState() { - const NL2DSL_URL = '/nl2dsl'; - const NL2DSL_MODEL = 'gpt-oss:20b'; + const NL2DSL_URL = '/nl2dsl'; const deepClone = (value) => { if (value == null) return value; if (typeof structuredClone === 'function') { @@ -358,13 +357,12 @@ export function createAppState() { this.nl_loading = true; try { - // 1) NL -> DSL (force gpt-oss-20b) + // 1) NL -> DSL — model is configured server-side via OLLAMA_MODEL env var const compileResp = await fetch(NL2DSL_URL, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ - query: q, - model: NL2DSL_MODEL // ← use gpt-oss-20b + query: q // If your API also accepts temperature/top_p etc., add them here. // e.g. temperature: 0.2 }) diff --git a/cogstack-cohorter/WebAPP/client/public/assets/js/init-alpine.js b/cogstack-cohorter/WebAPP/client/public/assets/js/init-alpine.js index 26ff0d7..94ee349 100644 --- a/cogstack-cohorter/WebAPP/client/public/assets/js/init-alpine.js +++ b/cogstack-cohorter/WebAPP/client/public/assets/js/init-alpine.js @@ -1,7 +1,6 @@ function data() { - const NL2DSL_URL = '/nl2dsl'; - const NL2DSL_MODEL = 'gpt-oss:20b'; + const NL2DSL_URL = '/nl2dsl'; // read version from URL const url = new URL(window.location.href); @@ -346,13 +345,12 @@ function data() { this.nl_loading = true; try { - // 1) NL -> DSL (force gpt-oss-20b) + // 1) NL -> DSL — model is configured server-side via OLLAMA_MODEL env var const compileResp = await fetch(NL2DSL_URL, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ - query: q, - model: NL2DSL_MODEL // ← use gpt-oss-20b + query: q // If your API also accepts temperature/top_p etc., add them here. // e.g. temperature: 0.2 }) diff --git a/cogstack-cohorter/WebAPP/server/server.js b/cogstack-cohorter/WebAPP/server/server.js index be93115..39a9f27 100644 --- a/cogstack-cohorter/WebAPP/server/server.js +++ b/cogstack-cohorter/WebAPP/server/server.js @@ -716,7 +716,7 @@ app.post('/nl2dsl', async (req, res) => { const r = await fetch(NL2DSL_SERVER, { method: 'POST', headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify(req.body), // e.g. { query: "...", model: "gpt-oss-20b" } + body: JSON.stringify(req.body), // e.g. { query: "...", model: "llama3.2:3b" } }); const text = await r.text(); console.log(text); diff --git a/cogstack-cohorter/docker-compose.yml b/cogstack-cohorter/docker-compose.yml index 65ca0fe..b05a295 100644 --- a/cogstack-cohorter/docker-compose.yml +++ b/cogstack-cohorter/docker-compose.yml @@ -16,7 +16,7 @@ services: until ollama list > /dev/null 2>&1; do echo '[ollama-pull] Waiting for Ollama...'; sleep 2; done && - ollama pull ${OLLAMA_MODEL:-gpt-oss:20b} + ollama pull ${OLLAMA_MODEL:-llama3.2:3b} " environment: OLLAMA_HOST: "http://ollama:11434" @@ -44,7 +44,7 @@ services: - "3002:3002" environment: OLLAMA_URL: "http://ollama:11434/api/generate" - OLLAMA_MODEL: "gpt-oss:20b" + OLLAMA_MODEL: "llama3.2:3b" MEDCAT_URL: "http://cohorter-medcat:5000" ALLOW_ORIGINS: "*" depends_on: diff --git a/helm-charts/cogstack-cohorter-helm/README.md b/helm-charts/cogstack-cohorter-helm/README.md index dc20e0e..fbbc31f 100644 --- a/helm-charts/cogstack-cohorter-helm/README.md +++ b/helm-charts/cogstack-cohorter-helm/README.md @@ -19,7 +19,7 @@ MedCAT and Ollama are deployed as **subcharts**: - Kubernetes 1.21+ - Helm 3.10+ -- Sufficient node resources for the Ollama model (the default `gpt-oss:20b` requires ~14 GB of memory/VRAM) +- Sufficient node resources for the Ollama model. The default `llama3.2:3b` (~2 GB) runs on CPU. For better NL → DSL accuracy, consider a larger model — see [Ollama model library](https://ollama.com/library) ## Installation @@ -41,7 +41,7 @@ ollama: ollama: models: pull: - - gpt-oss:20b # pulled automatically on first startup + - llama3.2:3b # pulled automatically on first startup persistentVolume: enabled: true size: 10Gi @@ -49,6 +49,8 @@ ollama: Models are pulled automatically by the otwld subchart's built-in init container. Change `ollama.ollama.models.pull` to use a different model — make sure `nl2dsl.env.OLLAMA_MODEL` matches. +`llama3.2:3b` is a lightweight default that runs on CPU but produces lower-quality NL → DSL results. For better accuracy, upgrade to a larger model such as `llama3.1:8b`, `llama3.3:70b`, or `gpt-oss:20b`. Browse all available models at [https://ollama.com/library](https://ollama.com/library). + ### MedCAT ```yaml @@ -161,7 +163,7 @@ For issues and questions, please visit the [CogStack GitHub repository](https:// | nl2dsl.affinity | object | `{}` | | | nl2dsl.enabled | bool | `true` | | | nl2dsl.env.ALLOW_ORIGINS | string | `"*"` | | -| nl2dsl.env.OLLAMA_MODEL | string | `"gpt-oss:20b"` | | +| nl2dsl.env.OLLAMA_MODEL | string | `"llama3.2:3b"` | | | nl2dsl.image.pullPolicy | string | `"IfNotPresent"` | | | nl2dsl.image.repository | string | `"cogstacksystems/cogstack-cohorter-nl2dsl"` | | | nl2dsl.image.tag | string | `"latest"` | | @@ -180,7 +182,7 @@ For issues and questions, please visit the [CogStack GitHub repository](https:// | nl2dsl.service.type | string | `"ClusterIP"` | | | nl2dsl.tolerations | list | `[]` | | | ollama.enabled | bool | `true` | | -| ollama.ollama.models.pull[0] | string | `"gpt-oss:20b"` | | +| ollama.ollama.models.pull[0] | string | `"llama3.2:3b"` | | | ollama.persistentVolume.enabled | bool | `true` | | | ollama.persistentVolume.size | string | `"10Gi"` | | | ollama.persistentVolume.storageClass | string | `""` | | diff --git a/helm-charts/cogstack-cohorter-helm/values.yaml b/helm-charts/cogstack-cohorter-helm/values.yaml index d44e308..e456597 100644 --- a/helm-charts/cogstack-cohorter-helm/values.yaml +++ b/helm-charts/cogstack-cohorter-helm/values.yaml @@ -188,7 +188,11 @@ nl2dsl: # OLLAMA_URL and MEDCAT_URL are set automatically from subchart service names. env: # Ollama model to use for NL → DSL compilation. - OLLAMA_MODEL: "gpt-oss:20b" + # llama3.2:3b is a lightweight default that runs on CPU but produces lower-quality results. + # For better NL → DSL accuracy, upgrade to a larger model — e.g. llama3.1:8b, llama3.3:70b, + # or gpt-oss:20b. Must match ollama.ollama.models.pull below. + # Browse available models at: https://ollama.com/library + OLLAMA_MODEL: "llama3.2:3b" # CORS origins allowed to call the NL2DSL API. ALLOW_ORIGINS: "*" @@ -263,9 +267,11 @@ ollama: ollama: # Models pulled automatically by the built-in init container on first startup. # Ensure nl2dsl.env.OLLAMA_MODEL matches the model listed here. + # For better results, replace llama3.2:3b with a larger model — e.g. llama3.1:8b, + # llama3.3:70b, or gpt-oss:20b. Browse options at: https://ollama.com/library models: pull: - - gpt-oss:20b + - llama3.2:3b persistentVolume: enabled: true