From b88b0f7a6fa73a444b1a78a7990c871771319619 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Mon, 11 May 2026 12:10:05 -0700 Subject: [PATCH 1/4] Add DeepSeek V4 Flash freebuff model --- agents/__tests__/base2.test.ts | 2 + agents/base2/base2-free-deepseek-flash.ts | 14 +++++ .../reviewer/code-reviewer-deepseek-flash.ts | 13 ++++ agents/types/agent-definition.ts | 2 + common/src/__tests__/free-agents.test.ts | 22 +++++++ common/src/__tests__/freebuff-models.test.ts | 15 +++++ common/src/constants/free-agents.ts | 10 +++ common/src/constants/freebuff-models.ts | 7 +++ common/src/constants/model-config.ts | 2 + .../types/agent-definition.ts | 2 + docs/freebuff-waiting-room.md | 62 +++++++++---------- freebuff/README.md | 2 +- freebuff/SPEC.md | 26 ++++---- freebuff/web/src/app/home-client.tsx | 2 +- .../completions/__tests__/completions.test.ts | 35 ++++++++--- .../deepseek-image-compat.integration.test.ts | 12 ++++ web/src/llm-api/deepseek-request-body.ts | 2 + web/src/llm-api/deepseek.ts | 22 ++++++- web/src/server/free-session/config.ts | 2 + 19 files changed, 199 insertions(+), 55 deletions(-) create mode 100644 agents/base2/base2-free-deepseek-flash.ts create mode 100644 agents/reviewer/code-reviewer-deepseek-flash.ts diff --git a/agents/__tests__/base2.test.ts b/agents/__tests__/base2.test.ts index fe102f032..a6da96c58 100644 --- a/agents/__tests__/base2.test.ts +++ b/agents/__tests__/base2.test.ts @@ -1,6 +1,7 @@ import { describe, expect, test } from 'bun:test' import { + FREEBUFF_DEEPSEEK_V4_FLASH_MODEL_ID, FREEBUFF_DEEPSEEK_V4_PRO_MODEL_ID, FREEBUFF_KIMI_MODEL_ID, FREEBUFF_MINIMAX_MODEL_ID, @@ -13,6 +14,7 @@ describe('base2 reviewer selection', () => { [FREEBUFF_MINIMAX_MODEL_ID, 'code-reviewer-minimax'], [FREEBUFF_KIMI_MODEL_ID, 'code-reviewer-kimi'], [FREEBUFF_DEEPSEEK_V4_PRO_MODEL_ID, 'code-reviewer-deepseek'], + [FREEBUFF_DEEPSEEK_V4_FLASH_MODEL_ID, 'code-reviewer-deepseek-flash'], ])('uses matching reviewer for model %p', (model, expectedReviewer) => { const base2 = createBase2('free', { model }) diff --git a/agents/base2/base2-free-deepseek-flash.ts b/agents/base2/base2-free-deepseek-flash.ts new file mode 100644 index 000000000..611d03329 --- /dev/null +++ b/agents/base2/base2-free-deepseek-flash.ts @@ -0,0 +1,14 @@ +import { FREEBUFF_DEEPSEEK_V4_FLASH_MODEL_ID } from '@codebuff/common/constants/freebuff-models' + +import { createBase2 } from './base2' + +const definition = { + ...createBase2('free', { + noAskUser: true, + model: FREEBUFF_DEEPSEEK_V4_FLASH_MODEL_ID, + }), + id: 'base2-free-deepseek-flash', + displayName: 'Buffy the DeepSeek Flash Free Orchestrator', +} + +export default definition diff --git a/agents/reviewer/code-reviewer-deepseek-flash.ts b/agents/reviewer/code-reviewer-deepseek-flash.ts new file mode 100644 index 000000000..23550079f --- /dev/null +++ b/agents/reviewer/code-reviewer-deepseek-flash.ts @@ -0,0 +1,13 @@ +import { FREEBUFF_DEEPSEEK_V4_FLASH_MODEL_ID } from '@codebuff/common/constants/freebuff-models' + +import { publisher } from '../constants' +import type { SecretAgentDefinition } from '../types/secret-agent-definition' +import { createReviewer } from './code-reviewer' + +const definition: SecretAgentDefinition = { + id: 'code-reviewer-deepseek-flash', + publisher, + ...createReviewer(FREEBUFF_DEEPSEEK_V4_FLASH_MODEL_ID), +} + +export default definition diff --git a/agents/types/agent-definition.ts b/agents/types/agent-definition.ts index 2d05e4e0b..030de3a14 100644 --- a/agents/types/agent-definition.ts +++ b/agents/types/agent-definition.ts @@ -417,6 +417,8 @@ export type ModelName = // DeepSeek | 'deepseek/deepseek-v4-pro' | 'deepseek-v4-pro' + | 'deepseek/deepseek-v4-flash' + | 'deepseek-v4-flash' | 'deepseek/deepseek-chat-v3-0324' | 'deepseek/deepseek-chat-v3-0324:nitro' | 'deepseek/deepseek-r1-0528' diff --git a/common/src/__tests__/free-agents.test.ts b/common/src/__tests__/free-agents.test.ts index 003e179b5..2a790b190 100644 --- a/common/src/__tests__/free-agents.test.ts +++ b/common/src/__tests__/free-agents.test.ts @@ -1,6 +1,7 @@ import { describe, expect, test } from 'bun:test' import { + FREEBUFF_DEEPSEEK_V4_FLASH_MODEL_ID, FREEBUFF_DEEPSEEK_V4_PRO_MODEL_ID, FREEBUFF_GEMINI_PRO_MODEL_ID, FREEBUFF_KIMI_MODEL_ID, @@ -24,6 +25,9 @@ describe('free mode agent model allowlist', () => { expect( getFreebuffRootAgentIdForModel(FREEBUFF_DEEPSEEK_V4_PRO_MODEL_ID), ).toBe('base2-free-deepseek') + expect( + getFreebuffRootAgentIdForModel(FREEBUFF_DEEPSEEK_V4_FLASH_MODEL_ID), + ).toBe('base2-free-deepseek-flash') }) test('allows each freebuff root agent only with its configured model', () => { @@ -48,6 +52,12 @@ describe('free mode agent model allowlist', () => { FREEBUFF_DEEPSEEK_V4_PRO_MODEL_ID, ), ).toBe(true) + expect( + isFreeModeAllowedAgentModel( + 'base2-free-deepseek-flash', + FREEBUFF_DEEPSEEK_V4_FLASH_MODEL_ID, + ), + ).toBe(true) }) test('allows each freebuff reviewer agent only with its configured model', () => { @@ -72,6 +82,12 @@ describe('free mode agent model allowlist', () => { FREEBUFF_DEEPSEEK_V4_PRO_MODEL_ID, ), ).toBe(true) + expect( + isFreeModeAllowedAgentModel( + 'code-reviewer-deepseek-flash', + FREEBUFF_DEEPSEEK_V4_FLASH_MODEL_ID, + ), + ).toBe(true) }) test('allows legacy code-reviewer-lite with freebuff reviewer models', () => { @@ -90,6 +106,12 @@ describe('free mode agent model allowlist', () => { FREEBUFF_DEEPSEEK_V4_PRO_MODEL_ID, ), ).toBe(true) + expect( + isFreeModeAllowedAgentModel( + 'code-reviewer-lite', + FREEBUFF_DEEPSEEK_V4_FLASH_MODEL_ID, + ), + ).toBe(true) }) test('allows the browser-use subagent with its bundled model', () => { diff --git a/common/src/__tests__/freebuff-models.test.ts b/common/src/__tests__/freebuff-models.test.ts index 87ba03477..efdbc8b43 100644 --- a/common/src/__tests__/freebuff-models.test.ts +++ b/common/src/__tests__/freebuff-models.test.ts @@ -3,6 +3,7 @@ import { describe, expect, test } from 'bun:test' import { canFreebuffModelSpawnGeminiThinker, DEFAULT_FREEBUFF_MODEL_ID, + FREEBUFF_DEEPSEEK_V4_FLASH_MODEL_ID, FREEBUFF_DEEPSEEK_V4_PRO_MODEL_ID, FREEBUFF_GLM_MODEL_ID, FREEBUFF_KIMI_MODEL_ID, @@ -12,6 +13,7 @@ import { getFreebuffDeploymentAvailabilityLabel, isFreebuffDeploymentHours, isFreebuffModelId, + isFreebuffPremiumModelId, isSupportedFreebuffModelId, } from '../constants/freebuff-models' @@ -27,6 +29,16 @@ describe('freebuff model availability', () => { expect(deepseek?.warning).toBe('Collects data for training') }) + test('DeepSeek V4 Flash is selectable and unlimited', () => { + expect(FREEBUFF_MODELS.map((model) => model.id)).toContain( + FREEBUFF_DEEPSEEK_V4_FLASH_MODEL_ID, + ) + expect(isFreebuffModelId(FREEBUFF_DEEPSEEK_V4_FLASH_MODEL_ID)).toBe(true) + expect(isFreebuffPremiumModelId(FREEBUFF_DEEPSEEK_V4_FLASH_MODEL_ID)).toBe( + false, + ) + }) + test('only smart freebuff models can spawn the gemini-thinker subagent', () => { expect(canFreebuffModelSpawnGeminiThinker(FREEBUFF_KIMI_MODEL_ID)).toBe( true, @@ -37,6 +49,9 @@ describe('freebuff model availability', () => { expect(canFreebuffModelSpawnGeminiThinker(FREEBUFF_MINIMAX_MODEL_ID)).toBe( false, ) + expect( + canFreebuffModelSpawnGeminiThinker(FREEBUFF_DEEPSEEK_V4_FLASH_MODEL_ID), + ).toBe(false) }) test('supports GLM 5.1 as a legacy server-side model without selecting it for new clients', () => { diff --git a/common/src/constants/free-agents.ts b/common/src/constants/free-agents.ts index 0159132d9..a14ca9f87 100644 --- a/common/src/constants/free-agents.ts +++ b/common/src/constants/free-agents.ts @@ -2,6 +2,7 @@ import { parseAgentId } from '../util/agent-id-parsing' import { FREEBUFF_GEMINI_THINKER_AGENT_ID } from './freebuff-gemini-thinker' import { + FREEBUFF_DEEPSEEK_V4_FLASH_MODEL_ID, FREEBUFF_DEEPSEEK_V4_PRO_MODEL_ID, FREEBUFF_GEMINI_PRO_MODEL_ID, FREEBUFF_GLM_MODEL_ID, @@ -28,6 +29,7 @@ export const FREEBUFF_ROOT_AGENT_IDS = [ 'base2-free', 'base2-free-kimi', 'base2-free-deepseek', + 'base2-free-deepseek-flash', ] as const const FREEBUFF_ROOT_AGENT_ID_SET: ReadonlySet = new Set( FREEBUFF_ROOT_AGENT_IDS, @@ -40,12 +42,14 @@ export const FREEBUFF_ROOT_AGENT_ID_BY_MODEL: Record = { [FREEBUFF_MINIMAX_MODEL_ID]: 'base2-free', [FREEBUFF_KIMI_MODEL_ID]: 'base2-free-kimi', [FREEBUFF_DEEPSEEK_V4_PRO_MODEL_ID]: 'base2-free-deepseek', + [FREEBUFF_DEEPSEEK_V4_FLASH_MODEL_ID]: 'base2-free-deepseek-flash', } export const FREEBUFF_REVIEWER_AGENT_ID_BY_MODEL: Record = { [FREEBUFF_MINIMAX_MODEL_ID]: 'code-reviewer-minimax', [FREEBUFF_KIMI_MODEL_ID]: 'code-reviewer-kimi', [FREEBUFF_DEEPSEEK_V4_PRO_MODEL_ID]: 'code-reviewer-deepseek', + [FREEBUFF_DEEPSEEK_V4_FLASH_MODEL_ID]: 'code-reviewer-deepseek-flash', } export function getFreebuffRootAgentIdForModel(model: string): string { @@ -66,10 +70,12 @@ export const FREE_MODE_AGENT_MODELS: Record> = { FREEBUFF_MINIMAX_MODEL_ID, FREEBUFF_GLM_MODEL_ID, FREEBUFF_DEEPSEEK_V4_PRO_MODEL_ID, + FREEBUFF_DEEPSEEK_V4_FLASH_MODEL_ID, FREEBUFF_KIMI_MODEL_ID, ]), 'base2-free-kimi': new Set([FREEBUFF_KIMI_MODEL_ID]), 'base2-free-deepseek': new Set([FREEBUFF_DEEPSEEK_V4_PRO_MODEL_ID]), + 'base2-free-deepseek-flash': new Set([FREEBUFF_DEEPSEEK_V4_FLASH_MODEL_ID]), // File exploration agents 'file-picker': new Set(['google/gemini-2.5-flash-lite']), @@ -93,12 +99,16 @@ export const FREE_MODE_AGENT_MODELS: Record> = { ]), 'code-reviewer-kimi': new Set([FREEBUFF_KIMI_MODEL_ID]), 'code-reviewer-deepseek': new Set([FREEBUFF_DEEPSEEK_V4_PRO_MODEL_ID]), + 'code-reviewer-deepseek-flash': new Set([ + FREEBUFF_DEEPSEEK_V4_FLASH_MODEL_ID, + ]), // Legacy freebuff clients spawned code-reviewer-lite under provider-specific // free roots before those reviewer IDs existed. 'code-reviewer-lite': new Set([ FREEBUFF_MINIMAX_MODEL_ID, FREEBUFF_KIMI_MODEL_ID, FREEBUFF_DEEPSEEK_V4_PRO_MODEL_ID, + FREEBUFF_DEEPSEEK_V4_FLASH_MODEL_ID, ]), // Legacy: kept for the standalone gemini thinker agent if invoked directly. diff --git a/common/src/constants/freebuff-models.ts b/common/src/constants/freebuff-models.ts index 434ed35f4..ec20e6423 100644 --- a/common/src/constants/freebuff-models.ts +++ b/common/src/constants/freebuff-models.ts @@ -34,6 +34,7 @@ export interface FreebuffModelOption { export const FREEBUFF_DEPLOYMENT_HOURS_LABEL = '9am ET-5pm PT every day' export const FREEBUFF_GEMINI_PRO_MODEL_ID = 'google/gemini-3.1-pro-preview' export const FREEBUFF_DEEPSEEK_V4_PRO_MODEL_ID = 'deepseek/deepseek-v4-pro' +export const FREEBUFF_DEEPSEEK_V4_FLASH_MODEL_ID = 'deepseek/deepseek-v4-flash' export const FREEBUFF_GLM_MODEL_ID = 'z-ai/glm-5.1' export const FREEBUFF_KIMI_MODEL_ID = 'moonshotai/kimi-k2.6' export const FREEBUFF_MINIMAX_MODEL_ID = 'minimax/minimax-m2.7' @@ -86,6 +87,12 @@ export const FREEBUFF_MODELS = [ tagline: 'Fastest', availability: 'always', }, + { + id: FREEBUFF_DEEPSEEK_V4_FLASH_MODEL_ID, + displayName: 'DeepSeek V4 Flash', + tagline: 'Fast', + availability: 'always', + }, ] as const satisfies readonly FreebuffModelOption[] export const LEGACY_FREEBUFF_MODELS = [ diff --git a/common/src/constants/model-config.ts b/common/src/constants/model-config.ts index e86e2adfe..f45d0ed16 100644 --- a/common/src/constants/model-config.ts +++ b/common/src/constants/model-config.ts @@ -65,6 +65,8 @@ export const deepseekModels = { deepseekReasoner: 'deepseek-reasoner', deepseekV4ProDirect: 'deepseek-v4-pro', deepseekV4Pro: 'deepseek/deepseek-v4-pro', + deepseekV4FlashDirect: 'deepseek-v4-flash', + deepseekV4Flash: 'deepseek/deepseek-v4-flash', } as const export type DeepseekModel = (typeof deepseekModels)[keyof typeof deepseekModels] diff --git a/common/src/templates/initial-agents-dir/types/agent-definition.ts b/common/src/templates/initial-agents-dir/types/agent-definition.ts index 2d05e4e0b..030de3a14 100644 --- a/common/src/templates/initial-agents-dir/types/agent-definition.ts +++ b/common/src/templates/initial-agents-dir/types/agent-definition.ts @@ -417,6 +417,8 @@ export type ModelName = // DeepSeek | 'deepseek/deepseek-v4-pro' | 'deepseek-v4-pro' + | 'deepseek/deepseek-v4-flash' + | 'deepseek-v4-flash' | 'deepseek/deepseek-chat-v3-0324' | 'deepseek/deepseek-chat-v3-0324:nitro' | 'deepseek/deepseek-r1-0528' diff --git a/docs/freebuff-waiting-room.md b/docs/freebuff-waiting-room.md index 971353881..25999fb33 100644 --- a/docs/freebuff-waiting-room.md +++ b/docs/freebuff-waiting-room.md @@ -153,18 +153,18 @@ The final tick result carries a `queueDepthByModel` map and a single `skipped` r ### Tunables -| Constant | Location | Default | Purpose | -|---|---|---|---| -| `ADMISSION_TICK_MS` | `config.ts` | 15000 | How often the ticker fires. Up to one user is admitted per model per tick. | -| `FREEBUFF_MODELS` | `common/src/constants/freebuff-models.ts` | `deepseek-v4-pro`, `kimi-k2.6`, `minimax-m2.7` | Selectable models; each gets its own queue and admission slot. | -| `FIREWORKS_DEPLOYMENT_MAP` | `web/src/llm-api/fireworks-config.ts` | `glm-5.1` | Models with dedicated Fireworks deployments. Models not listed are treated as `healthy` (serverless fallback) — drop this default when they migrate to their own deployments. | -| `HEALTH_CACHE_TTL_MS` | `fireworks-health.ts` | 25000 | Fleet probe cache TTL. Sits just under the Fireworks 30s exporter cadence and 6 req/min rate limit. | -| `FREEBUFF_SESSION_LENGTH_MS` | env | 3_600_000 | Session lifetime | -| `SESSION_GRACE_MS` | `web/src/server/free-session/config.ts` | 1_800_000 | Drain window after expiry — gate still admits requests so an in-flight agent can finish, but the CLI is expected to block new prompts. Hard cutoff at `expires_at + grace`. | +| Constant | Location | Default | Purpose | +| ---------------------------- | ----------------------------------------- | ------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `ADMISSION_TICK_MS` | `config.ts` | 15000 | How often the ticker fires. Up to one user is admitted per model per tick. | +| `FREEBUFF_MODELS` | `common/src/constants/freebuff-models.ts` | `deepseek-v4-pro`, `kimi-k2.6`, `minimax-m2.7`, `deepseek-v4-flash` | Selectable models; each gets its own queue and admission slot. | +| `FIREWORKS_DEPLOYMENT_MAP` | `web/src/llm-api/fireworks-config.ts` | `glm-5.1` | Models with dedicated Fireworks deployments. Models not listed are treated as `healthy` (serverless fallback) — drop this default when they migrate to their own deployments. | +| `HEALTH_CACHE_TTL_MS` | `fireworks-health.ts` | 25000 | Fleet probe cache TTL. Sits just under the Fireworks 30s exporter cadence and 6 req/min rate limit. | +| `FREEBUFF_SESSION_LENGTH_MS` | env | 3_600_000 | Session lifetime | +| `SESSION_GRACE_MS` | `web/src/server/free-session/config.ts` | 1_800_000 | Drain window after expiry — gate still admits requests so an in-flight agent can finish, but the CLI is expected to block new prompts. Hard cutoff at `expires_at + grace`. | ### Premium Session Quota -DeepSeek, Kimi, and legacy GLM share a per-user premium quota. The server counts `free_session_admit` rows from the last midnight in `America/Los_Angeles`; when the user reaches `FREEBUFF_PREMIUM_SESSION_LIMIT`, the next premium `POST /session` is rejected until the next Pacific midnight reset. MiniMax remains unlimited. +DeepSeek V4 Pro, Kimi, and legacy GLM share a per-user premium quota. The server counts `free_session_admit` rows from the last midnight in `America/Los_Angeles`; when the user reaches `FREEBUFF_PREMIUM_SESSION_LIMIT`, the next premium `POST /session` is rejected until the next Pacific midnight reset. MiniMax and DeepSeek V4 Flash remain unlimited. ## HTTP API @@ -264,13 +264,13 @@ For free-mode requests (`codebuff_metadata.cost_mode === 'free'`), `_post.ts` ca ### Response codes -| HTTP | `error` | When | -|---|---|---| -| 426 | `freebuff_update_required` | Request did not include a `freebuff_instance_id` — the client is a pre-waiting-room build. The CLI shows the server-supplied message verbatim. | -| 428 | `waiting_room_required` | No session row exists. Client should call POST /session. | -| 429 | `waiting_room_queued` | Row exists with `status='queued'`. Client should keep polling GET. | -| 409 | `session_superseded` | Claimed `instance_id` does not match stored one — another CLI took over. | -| 410 | `session_expired` | `expires_at + grace < now()` (past the hard cutoff). Client should POST /session to re-queue. | +| HTTP | `error` | When | +| ---- | -------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------- | +| 426 | `freebuff_update_required` | Request did not include a `freebuff_instance_id` — the client is a pre-waiting-room build. The CLI shows the server-supplied message verbatim. | +| 428 | `waiting_room_required` | No session row exists. Client should call POST /session. | +| 429 | `waiting_room_queued` | Row exists with `status='queued'`. Client should keep polling GET. | +| 409 | `session_superseded` | Claimed `instance_id` does not match stored one — another CLI took over. | +| 410 | `session_expired` | `expires_at + grace < now()` (past the hard cutoff). Client should POST /session to re-queue. | Successful results carry one of three reasons: `disabled` (gate is off), `active` (`expires_at > now()`, `remainingMs` provided), or `draining` (`expires_at <= now() < expires_at + grace`, `gracePeriodRemainingMs` provided). The CLI should treat `draining` as "let any in-flight agent run finish, but block new user prompts" — see [Drain / Grace Window](#drain--grace-window) below. The corresponding wire status from `getSessionState` is `ended`. @@ -320,25 +320,25 @@ The `disabled` response means the server has the waiting room turned off. CLI tr - **`/api/v1/freebuff/session` routes** are stateless per pod; all state lives in Postgres. Any pod can serve any request. - **Chat completions gate** is a single `SELECT` per free-mode request. At high QPS this is the hottest path — the `user_id` PK lookup is O(1). If it ever becomes a problem, the obvious fix is to cache the session row for ~1s per pod. -- **Admission loop** runs on every pod. Per-model advisory locks serialize admission *within* each model while allowing different models to admit on different pods concurrently. At any given tick, exactly one pod actually admits for each model; the rest early-return on that model's lock. +- **Admission loop** runs on every pod. Per-model advisory locks serialize admission _within_ each model while allowing different models to admit on different pods concurrently. At any given tick, exactly one pod actually admits for each model; the rest early-return on that model's lock. - **Fleet health probe** is cached per-pod (`HEALTH_CACHE_TTL_MS`, 25s). Each pod hits the Fireworks metrics endpoint at most ~2.4/min, staying under the 6 req/min account rate limit with a comfortable margin. ## Abuse Resistance Summary -| Attack | Mitigation | -|---|---| -| CLI keeps submitting new prompts past `expires_at` | Trusted client; bounded by 30-min hard cutoff at `expires_at + grace`. After that the gate returns `session_expired` and the user must re-queue. | -| Multiple sessions per account | PK on `user_id` — structurally impossible | -| Multiple CLIs sharing one session | `active_instance_id` rotates on POST; stale id → 409 | -| Client-forged timestamps | All timestamps server-supplied (`DEFAULT now()` or explicit) | -| Queue jumping via timestamp manipulation | `queued_at` is server-supplied; FIFO order is server-determined | -| Repeatedly calling POST to reset queue position | POST preserves `queued_at` for already-queued users | -| Two pods admitting the same user | Per-model `SELECT ... FOR UPDATE SKIP LOCKED` + per-model advisory xact lock | -| Spamming POST/GET to starve admission tick | Admission uses per-model Postgres advisory locks; DDoS protection is upstream (Next's global rate limits). Consider adding a per-user limiter on `/session` if traffic warrants. | -| Repeatedly POSTing different models to get across every queue | Single row per user (PK on `user_id`); switching models moves the row, never clones it. A user holds exactly one queue slot at any time. | -| Fireworks metrics endpoint down / slow | `getFleetHealth()` fails closed (timeout, non-OK, or missing API key) → every dedicated-deployment model is flagged `unhealthy` and its queue pauses. | -| One deployment degraded while others are fine | Health is classified per-deployment; only the affected model's queue pauses, so a degraded GLM deployment doesn't block MiniMax admissions. | -| Zombie expired sessions holding capacity | Swept on every admission tick, even when upstream is unhealthy | +| Attack | Mitigation | +| ------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| CLI keeps submitting new prompts past `expires_at` | Trusted client; bounded by 30-min hard cutoff at `expires_at + grace`. After that the gate returns `session_expired` and the user must re-queue. | +| Multiple sessions per account | PK on `user_id` — structurally impossible | +| Multiple CLIs sharing one session | `active_instance_id` rotates on POST; stale id → 409 | +| Client-forged timestamps | All timestamps server-supplied (`DEFAULT now()` or explicit) | +| Queue jumping via timestamp manipulation | `queued_at` is server-supplied; FIFO order is server-determined | +| Repeatedly calling POST to reset queue position | POST preserves `queued_at` for already-queued users | +| Two pods admitting the same user | Per-model `SELECT ... FOR UPDATE SKIP LOCKED` + per-model advisory xact lock | +| Spamming POST/GET to starve admission tick | Admission uses per-model Postgres advisory locks; DDoS protection is upstream (Next's global rate limits). Consider adding a per-user limiter on `/session` if traffic warrants. | +| Repeatedly POSTing different models to get across every queue | Single row per user (PK on `user_id`); switching models moves the row, never clones it. A user holds exactly one queue slot at any time. | +| Fireworks metrics endpoint down / slow | `getFleetHealth()` fails closed (timeout, non-OK, or missing API key) → every dedicated-deployment model is flagged `unhealthy` and its queue pauses. | +| One deployment degraded while others are fine | Health is classified per-deployment; only the affected model's queue pauses, so a degraded GLM deployment doesn't block MiniMax admissions. | +| Zombie expired sessions holding capacity | Swept on every admission tick, even when upstream is unhealthy | ## Testing diff --git a/freebuff/README.md b/freebuff/README.md index 27a199a44..7e757ce41 100644 --- a/freebuff/README.md +++ b/freebuff/README.md @@ -54,7 +54,7 @@ freebuff **How can it be free?** Freebuff is supported by ads shown in the CLI. -**What models do you use?** DeepSeek V4 Pro (default, but its API collects data for training) or Kimi K2.6 as the main coding agent. Gemini 3.1 Flash Lite for finding files and research, and GPT-5.4 for deep thinking if you connect your ChatGPT subscription. +**What models do you use?** DeepSeek V4 Pro (smartest, but its API collects data for training), Kimi K2.6, MiniMax M2.7, or DeepSeek V4 Flash as the main coding agent. Gemini 3.1 Flash Lite handles file finding and research, and GPT-5.4 handles deep thinking if you connect your ChatGPT subscription. **Are you training on my data?** No. We only use model providers that do not train on our requests. Your code stays yours. diff --git a/freebuff/SPEC.md b/freebuff/SPEC.md index ea973ba5a..134cd471c 100644 --- a/freebuff/SPEC.md +++ b/freebuff/SPEC.md @@ -72,19 +72,19 @@ Freebuff only supports **FREE mode**. All mode-related features are stripped. ### Commands to REMOVE in Freebuff -| Command | Reason | -| -------------------------------------------------- | --------------------------------------------------------- | -| `/subscribe` (+ `/strong`, `/sub`, `/buy-credits`) | No subscription model | -| `/usage` (+ `/credits`) | No credits display | -| `/ads:enable` | Ads always on, not toggleable | -| `/ads:disable` | Ads always on, not toggleable | -| `/connect:claude` (+ `/claude`) | Claude subscription not available | -| `/refer-friends` (+ `/referral`, `/redeem`) | Referrals earn credits, not applicable | -| `/mode:*` (all mode commands) | Only FREE mode | -| `/agent:gpt-5` | Premium agent, not available in free tier | -| `/review` | Uses thinker-gpt under the hood | -| `/publish` | Agent publishing not available in free tier | -| `/image` (+ `/img`, `/attach`) | Image attachments unavailable with free models (Kimi K2.6, DeepSeek V4 Pro) | +| Command | Reason | +| -------------------------------------------------- | ---------------------------------------------------------------------------------------------- | +| `/subscribe` (+ `/strong`, `/sub`, `/buy-credits`) | No subscription model | +| `/usage` (+ `/credits`) | No credits display | +| `/ads:enable` | Ads always on, not toggleable | +| `/ads:disable` | Ads always on, not toggleable | +| `/connect:claude` (+ `/claude`) | Claude subscription not available | +| `/refer-friends` (+ `/referral`, `/redeem`) | Referrals earn credits, not applicable | +| `/mode:*` (all mode commands) | Only FREE mode | +| `/agent:gpt-5` | Premium agent, not available in free tier | +| `/review` | Uses thinker-gpt under the hood | +| `/publish` | Agent publishing not available in free tier | +| `/image` (+ `/img`, `/attach`) | Image attachments unavailable with free models (Kimi K2.6, DeepSeek V4 Pro, DeepSeek V4 Flash) | ### Commands to KEEP diff --git a/freebuff/web/src/app/home-client.tsx b/freebuff/web/src/app/home-client.tsx index 4721640f9..c032050ef 100644 --- a/freebuff/web/src/app/home-client.tsx +++ b/freebuff/web/src/app/home-client.tsx @@ -26,7 +26,7 @@ const faqs = [ { question: 'What models do you use?', answer: - 'You can choose from:\n\n- DeepSeek V4 Pro: smartest. Its API collects data for training.\n- Kimi K2.6: balanced.\n- MiniMax M2.7: fastest.\n\nAlso, Gemini 3.1 Flash Lite handles file finding and research. Connect your ChatGPT subscription to unlock GPT-5.4 for deep thinking.', + 'You can choose from:\n\n- DeepSeek V4 Pro: smartest. Its API collects data for training.\n- Kimi K2.6: balanced.\n- MiniMax M2.7: fastest.\n- DeepSeek V4 Flash: fast.\n\nAlso, Gemini 3.1 Flash Lite handles file finding and research. Connect your ChatGPT subscription to unlock GPT-5.4 for deep thinking.', }, { question: 'Which countries is Freebuff available in?', diff --git a/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts b/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts index 84c49f4fe..63ba06aa3 100644 --- a/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts +++ b/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts @@ -2,6 +2,7 @@ import { afterEach, beforeEach, describe, expect, mock, it } from 'bun:test' import { NextRequest } from 'next/server' import { + FREEBUFF_DEEPSEEK_V4_FLASH_MODEL_ID, FREEBUFF_DEEPSEEK_V4_PRO_MODEL_ID, FREEBUFF_GEMINI_PRO_MODEL_ID, FREEBUFF_GLM_MODEL_ID, @@ -161,6 +162,13 @@ describe('/api/v1/chat/completions POST endpoint', () => { status: 'running', } } + if (runId === 'run-free-deepseek-flash') { + return { + agent_id: 'base2-free-deepseek-flash', + ancestor_run_ids: [], + status: 'running', + } + } if (runId === 'run-reviewer-direct') { return { agent_id: 'code-reviewer-minimax', @@ -795,9 +803,20 @@ describe('/api/v1/chat/completions POST endpoint', () => { FETCH_PATH_TEST_TIMEOUT_MS, ) - it( - 'lets the DeepSeek V4 free agent use the direct DeepSeek provider', - async () => { + it.each([ + { + codebuffModel: FREEBUFF_DEEPSEEK_V4_PRO_MODEL_ID, + upstreamModel: 'deepseek-v4-pro', + runId: 'run-free-deepseek', + }, + { + codebuffModel: FREEBUFF_DEEPSEEK_V4_FLASH_MODEL_ID, + upstreamModel: 'deepseek-v4-flash', + runId: 'run-free-deepseek-flash', + }, + ])( + 'lets $codebuffModel use the direct DeepSeek provider', + async ({ codebuffModel, upstreamModel, runId }) => { const fetchedBodies: Record[] = [] const fetchedUrls: string[] = [] const fetchViaDeepSeek = mock( @@ -811,7 +830,7 @@ describe('/api/v1/chat/completions POST endpoint', () => { return new Response( JSON.stringify({ id: 'test-id', - model: 'deepseek-v4-pro', + model: upstreamModel, choices: [{ message: { content: 'test response' } }], usage: { prompt_tokens: 10, @@ -834,10 +853,10 @@ describe('/api/v1/chat/completions POST endpoint', () => { method: 'POST', headers: allowedFreeModeHeaders('test-api-key-new-free'), body: JSON.stringify({ - model: FREEBUFF_DEEPSEEK_V4_PRO_MODEL_ID, + model: codebuffModel, stream: false, codebuff_metadata: { - run_id: 'run-free-deepseek', + run_id: runId, client_id: 'test-client-id-123', cost_mode: 'free', }, @@ -861,8 +880,8 @@ describe('/api/v1/chat/completions POST endpoint', () => { const body = await response.json() expect(response.status).toBe(200) expect(fetchedUrls[0]).toBe('https://api.deepseek.com/chat/completions') - expect(fetchedBodies[0].model).toBe('deepseek-v4-pro') - expect(body.model).toBe(FREEBUFF_DEEPSEEK_V4_PRO_MODEL_ID) + expect(fetchedBodies[0].model).toBe(upstreamModel) + expect(body.model).toBe(codebuffModel) expect(body.provider).toBe('DeepSeek') }, FETCH_PATH_TEST_TIMEOUT_MS, diff --git a/web/src/llm-api/__tests__/deepseek-image-compat.integration.test.ts b/web/src/llm-api/__tests__/deepseek-image-compat.integration.test.ts index 35ba1957b..fb9d58e21 100644 --- a/web/src/llm-api/__tests__/deepseek-image-compat.integration.test.ts +++ b/web/src/llm-api/__tests__/deepseek-image-compat.integration.test.ts @@ -51,6 +51,18 @@ describe('normalizeDeepSeekRequestBody', () => { }) }) + it('maps DeepSeek V4 Flash to the direct DeepSeek model id', () => { + const body: ChatCompletionRequestBody = { + model: 'deepseek/deepseek-v4-flash', + messages: [{ role: 'user', content: 'Hello' }], + } + + expect(normalizeDeepSeekRequestBody(body)).toEqual({ + ...body, + model: 'deepseek-v4-flash', + }) + }) + it('does not throw on minimal provider-path bodies without messages', () => { const body = { model: 'deepseek/deepseek-v4-pro', diff --git a/web/src/llm-api/deepseek-request-body.ts b/web/src/llm-api/deepseek-request-body.ts index 582e690ef..33c3ffcb5 100644 --- a/web/src/llm-api/deepseek-request-body.ts +++ b/web/src/llm-api/deepseek-request-body.ts @@ -5,6 +5,8 @@ import type { ChatCompletionRequestBody } from './types' export const DEEPSEEK_MODEL_IDS: Record = { [deepseekModels.deepseekV4ProDirect]: deepseekModels.deepseekV4ProDirect, [deepseekModels.deepseekV4Pro]: deepseekModels.deepseekV4ProDirect, + [deepseekModels.deepseekV4FlashDirect]: deepseekModels.deepseekV4FlashDirect, + [deepseekModels.deepseekV4Flash]: deepseekModels.deepseekV4FlashDirect, } export function getDeepSeekModelId(openrouterModel: string): string { diff --git a/web/src/llm-api/deepseek.ts b/web/src/llm-api/deepseek.ts index 037851410..e2adfdfca 100644 --- a/web/src/llm-api/deepseek.ts +++ b/web/src/llm-api/deepseek.ts @@ -1,6 +1,7 @@ import { Agent } from 'undici' import { PROFIT_MARGIN } from '@codebuff/common/constants/limits' +import { deepseekModels } from '@codebuff/common/constants/model-config' import { getErrorObject } from '@codebuff/common/util/error' import { env } from '@codebuff/internal/env' @@ -43,6 +44,17 @@ const DEEPSEEK_V4_PRO_PRICING: DeepSeekPricing = { outputCostPerToken: 0.87 / 1_000_000, } +const DEEPSEEK_V4_FLASH_PRICING: DeepSeekPricing = { + inputCostPerToken: 0.14 / 1_000_000, + cachedInputCostPerToken: 0.0028 / 1_000_000, + outputCostPerToken: 0.28 / 1_000_000, +} + +const DEEPSEEK_PRICING_BY_DIRECT_MODEL_ID: Record = { + [deepseekModels.deepseekV4ProDirect]: DEEPSEEK_V4_PRO_PRICING, + [deepseekModels.deepseekV4FlashDirect]: DEEPSEEK_V4_FLASH_PRICING, +} + const DEEPSEEK_MODELS: Record< string, { deepseekId: string; pricing: DeepSeekPricing } @@ -51,7 +63,7 @@ const DEEPSEEK_MODELS: Record< model, { deepseekId, - pricing: DEEPSEEK_V4_PRO_PRICING, + pricing: getPricingForDeepSeekId(deepseekId), }, ]), ) @@ -70,6 +82,14 @@ function getDeepSeekPricing(model: string): DeepSeekPricing { return entry.pricing } +function getPricingForDeepSeekId(deepseekId: string): DeepSeekPricing { + const pricing = DEEPSEEK_PRICING_BY_DIRECT_MODEL_ID[deepseekId] + if (!pricing) { + throw new Error(`No DeepSeek pricing found for direct model: ${deepseekId}`) + } + return pricing +} + type StreamState = { responseText: string reasoningText: string diff --git a/web/src/server/free-session/config.ts b/web/src/server/free-session/config.ts index b096fd989..da51cee0e 100644 --- a/web/src/server/free-session/config.ts +++ b/web/src/server/free-session/config.ts @@ -1,4 +1,5 @@ import { + FREEBUFF_DEEPSEEK_V4_FLASH_MODEL_ID, FREEBUFF_DEEPSEEK_V4_PRO_MODEL_ID, FREEBUFF_GLM_MODEL_ID, FREEBUFF_KIMI_MODEL_ID, @@ -55,6 +56,7 @@ export function getSessionGraceMs(): number { * queue). */ const INSTANT_ADMIT_CAPACITY: Record = { + [FREEBUFF_DEEPSEEK_V4_FLASH_MODEL_ID]: 1000, [FREEBUFF_DEEPSEEK_V4_PRO_MODEL_ID]: 1000, [FREEBUFF_GLM_MODEL_ID]: 50, [FREEBUFF_KIMI_MODEL_ID]: 1000, From d7269255a7be09222ed4f96c82728e5ccdb8fbd4 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Mon, 11 May 2026 12:20:50 -0700 Subject: [PATCH 2/4] Stabilize chat completions provider tests --- .../app/api/v1/chat/completions/__tests__/completions.test.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts b/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts index 63ba06aa3..1ec5a37a5 100644 --- a/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts +++ b/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts @@ -1,6 +1,7 @@ import { afterEach, beforeEach, describe, expect, mock, it } from 'bun:test' import { NextRequest } from 'next/server' +import { TEST_USER_ID } from '@codebuff/common/constants/paths' import { FREEBUFF_DEEPSEEK_V4_FLASH_MODEL_ID, FREEBUFF_DEEPSEEK_V4_PRO_MODEL_ID, @@ -29,7 +30,7 @@ import type { GetUserPreferencesFn } from '../_post' describe('/api/v1/chat/completions POST endpoint', () => { const mockUserData: Record = { 'test-api-key-123': { - id: 'user-123', + id: TEST_USER_ID, banned: false, }, 'test-api-key-no-credits': { From 2cdd99fa5bb021187d5b8848a3b6028bf88299d1 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Mon, 11 May 2026 12:27:38 -0700 Subject: [PATCH 3/4] Allow DeepSeek free agents to ask users --- agents/base2/base2-free-deepseek-flash.ts | 1 - agents/base2/base2-free-deepseek.ts | 1 - 2 files changed, 2 deletions(-) diff --git a/agents/base2/base2-free-deepseek-flash.ts b/agents/base2/base2-free-deepseek-flash.ts index 611d03329..77dd48543 100644 --- a/agents/base2/base2-free-deepseek-flash.ts +++ b/agents/base2/base2-free-deepseek-flash.ts @@ -4,7 +4,6 @@ import { createBase2 } from './base2' const definition = { ...createBase2('free', { - noAskUser: true, model: FREEBUFF_DEEPSEEK_V4_FLASH_MODEL_ID, }), id: 'base2-free-deepseek-flash', diff --git a/agents/base2/base2-free-deepseek.ts b/agents/base2/base2-free-deepseek.ts index 6b40e3489..b73bb4730 100644 --- a/agents/base2/base2-free-deepseek.ts +++ b/agents/base2/base2-free-deepseek.ts @@ -4,7 +4,6 @@ import { createBase2 } from './base2' const definition = { ...createBase2('free', { - noAskUser: true, model: FREEBUFF_DEEPSEEK_V4_PRO_MODEL_ID, }), id: 'base2-free-deepseek', From 54ee4530890be9cff833ef57ffce524d579ee08c Mon Sep 17 00:00:00 2001 From: James Grugett Date: Mon, 11 May 2026 12:30:10 -0700 Subject: [PATCH 4/4] Update DeepSeek Flash tagline --- common/src/constants/freebuff-models.ts | 2 +- freebuff/web/src/app/home-client.tsx | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/common/src/constants/freebuff-models.ts b/common/src/constants/freebuff-models.ts index ec20e6423..173da1587 100644 --- a/common/src/constants/freebuff-models.ts +++ b/common/src/constants/freebuff-models.ts @@ -90,7 +90,7 @@ export const FREEBUFF_MODELS = [ { id: FREEBUFF_DEEPSEEK_V4_FLASH_MODEL_ID, displayName: 'DeepSeek V4 Flash', - tagline: 'Fast', + tagline: 'Most efficient', availability: 'always', }, ] as const satisfies readonly FreebuffModelOption[] diff --git a/freebuff/web/src/app/home-client.tsx b/freebuff/web/src/app/home-client.tsx index c032050ef..5e30128cc 100644 --- a/freebuff/web/src/app/home-client.tsx +++ b/freebuff/web/src/app/home-client.tsx @@ -26,7 +26,7 @@ const faqs = [ { question: 'What models do you use?', answer: - 'You can choose from:\n\n- DeepSeek V4 Pro: smartest. Its API collects data for training.\n- Kimi K2.6: balanced.\n- MiniMax M2.7: fastest.\n- DeepSeek V4 Flash: fast.\n\nAlso, Gemini 3.1 Flash Lite handles file finding and research. Connect your ChatGPT subscription to unlock GPT-5.4 for deep thinking.', + 'You can choose from:\n\n- DeepSeek V4 Pro: smartest. Its API collects data for training.\n- Kimi K2.6: balanced.\n- MiniMax M2.7: fastest.\n- DeepSeek V4 Flash: most efficient.\n\nAlso, Gemini 3.1 Flash Lite handles file finding and research. Connect your ChatGPT subscription to unlock GPT-5.4 for deep thinking.', }, { question: 'Which countries is Freebuff available in?',