Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 71 additions & 0 deletions packages/agentic-server/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
# agentic-server

Standalone Express LLM service — agent threads, chat streaming, billing metering, and inference logging via `@constructive-io/express-context`.

## Overview

`agentic-server` is the Express-only equivalent of what `graphile-llm` does inside PostGraphile. It provides the same capabilities (agent threads, chat, embeddings, billing, inference logging) but as a standalone Express router that uses `@constructive-io/express-context` for tenant-scoped database access.

## Usage

```typescript
import express from 'express';
import { createContextMiddleware } from '@constructive-io/express-context';
import { createAgenticRouter } from 'agentic-server';

const app = express();

// Tenant context middleware (domain resolution, JWT, pgSettings, withPgClient)
app.use(createContextMiddleware());

// Mount the agentic router
app.use(createAgenticRouter());

app.listen(3001, () => {
console.log('agentic-server running on :3001');
});
```

## Endpoints

| Method | Path | Description |
|--------|------|-------------|
| POST | `/v1/threads` | Create a new conversation thread |
| POST | `/v1/threads/:thread_id/messages` | Send messages + get AI response (streaming SSE) |
| POST | `/v1/orgs/:entity_id/threads` | Create thread (entity-scoped) |
| POST | `/v1/orgs/:entity_id/threads/:thread_id/messages` | Send message (entity-scoped) |
| POST | `/v1/embed` | Generate embeddings |

## Environment Variables

| Variable | Default | Description |
|----------|---------|-------------|
| `CHAT_PROVIDER` | `ollama` | Chat LLM provider |
| `CHAT_MODEL` | `llama3` | Default chat model |
| `CHAT_BASE_URL` | `http://localhost:11434` | Chat provider URL |
| `EMBEDDER_PROVIDER` | `ollama` | Embedding provider |
| `EMBEDDER_MODEL` | `nomic-embed-text` | Default embedding model |
| `EMBEDDER_BASE_URL` | `http://localhost:11434` | Embedding provider URL |

## Features

- **Thread management**: Create and manage conversation threads with system prompts
- **Streaming chat**: SSE streaming responses (OpenAI-compatible format)
- **Batch chat**: Non-streaming JSON responses
- **Embeddings**: Generate vector embeddings via `/v1/embed`
- **Billing integration**: Automatic quota checks and usage recording (when billing module is provisioned)
- **Inference logging**: Automatic token usage logging (when inference_log module is provisioned)
- **RLS enforcement**: All database operations run within tenant-scoped RLS transactions

## Architecture

```
Cloud Function → POST /v1/threads/:id/messages → agentic-server
┌───────────────────┼───────────────────┐
│ │ │
express-context OllamaAdapter Billing
(tenant DB context) (LLM provider) (quota + usage)
```

The cloud function doesn't need to know about databases, billing, or LLM providers. It just POSTs `{ messages, model }` and gets back a streamed response.
52 changes: 52 additions & 0 deletions packages/agentic-server/__tests__/cache.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import { TtlCache } from '../src/cache';

describe('TtlCache', () => {
it('stores and retrieves values', () => {
const cache = new TtlCache<string>(60_000);
cache.set('key1', 'value1');
expect(cache.get('key1')).toBe('value1');
});

it('returns undefined for missing keys', () => {
const cache = new TtlCache<string>(60_000);
expect(cache.get('missing')).toBeUndefined();
});

it('expires entries after TTL', () => {
const cache = new TtlCache<string>(1); // 1ms TTL
cache.set('key1', 'value1');

// Wait for TTL to expire
const start = Date.now();
while (Date.now() - start < 5) {
// spin
}

expect(cache.get('key1')).toBeUndefined();
});

it('deletes entries', () => {
const cache = new TtlCache<string>(60_000);
cache.set('key1', 'value1');
cache.delete('key1');
expect(cache.get('key1')).toBeUndefined();
});

it('clears all entries', () => {
const cache = new TtlCache<string>(60_000);
cache.set('a', '1');
cache.set('b', '2');
expect(cache.size).toBe(2);
cache.clear();
expect(cache.size).toBe(0);
expect(cache.get('a')).toBeUndefined();
});

it('tracks size correctly', () => {
const cache = new TtlCache<number>(60_000);
expect(cache.size).toBe(0);
cache.set('x', 1);
cache.set('y', 2);
expect(cache.size).toBe(2);
});
});
47 changes: 47 additions & 0 deletions packages/agentic-server/__tests__/env.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import { getEnvOptions } from '../src/env';

describe('getEnvOptions', () => {
const originalEnv = process.env;

beforeEach(() => {
process.env = { ...originalEnv };
});

afterAll(() => {
process.env = originalEnv;
});

it('returns defaults when no env vars set', () => {
delete process.env.CHAT_PROVIDER;
delete process.env.CHAT_MODEL;
delete process.env.CHAT_BASE_URL;
delete process.env.EMBEDDER_PROVIDER;
delete process.env.EMBEDDER_MODEL;
delete process.env.EMBEDDER_BASE_URL;

const opts = getEnvOptions();
expect(opts.chat.provider).toBe('ollama');
expect(opts.chat.model).toBe('llama3');
expect(opts.chat.baseUrl).toBe('http://localhost:11434');
expect(opts.embedding.provider).toBe('ollama');
expect(opts.embedding.model).toBe('nomic-embed-text');
expect(opts.embedding.baseUrl).toBe('http://localhost:11434');
});

it('reads from env vars', () => {
process.env.CHAT_PROVIDER = 'openai';
process.env.CHAT_MODEL = 'gpt-4';
process.env.CHAT_BASE_URL = 'https://api.openai.com';
process.env.EMBEDDER_PROVIDER = 'cohere';
process.env.EMBEDDER_MODEL = 'embed-v3';
process.env.EMBEDDER_BASE_URL = 'https://api.cohere.ai';

const opts = getEnvOptions();
expect(opts.chat.provider).toBe('openai');
expect(opts.chat.model).toBe('gpt-4');
expect(opts.chat.baseUrl).toBe('https://api.openai.com');
expect(opts.embedding.provider).toBe('cohere');
expect(opts.embedding.model).toBe('embed-v3');
expect(opts.embedding.baseUrl).toBe('https://api.cohere.ai');
});
});
18 changes: 18 additions & 0 deletions packages/agentic-server/jest.config.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
/** @type {import('ts-jest').JestConfigWithTsJest} */
module.exports = {
preset: 'ts-jest',
testEnvironment: 'node',
transform: {
'^.+\\.tsx?$': [
'ts-jest',
{
babelConfig: false,
tsconfig: 'tsconfig.json',
},
],
},
transformIgnorePatterns: [`/node_modules/*`],
testRegex: '(/__tests__/.*|(\\.|/)(test|spec))\\.(jsx?|tsx?)$',
moduleFileExtensions: ['ts', 'tsx', 'js', 'jsx', 'json', 'node'],
modulePathIgnorePatterns: ['dist/*']
};
58 changes: 58 additions & 0 deletions packages/agentic-server/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
{
"name": "agentic-server",
"version": "0.1.0",
"description": "Standalone Express LLM service — agent threads, chat streaming, billing metering, and inference logging via express-context",
"author": "Constructive <developers@constructive.io>",
"homepage": "https://github.com/constructive-io/constructive",
"license": "MIT",
"main": "index.js",
"module": "esm/index.js",
"types": "index.d.ts",
"publishConfig": {
"access": "public",
"directory": "dist"
},
"repository": {
"type": "git",
"url": "https://github.com/constructive-io/constructive"
},
"bugs": {
"url": "https://github.com/constructive-io/constructive/issues"
},
"scripts": {
"clean": "makage clean",
"prepack": "npm run build",
"build": "makage build",
"build:dev": "makage build --dev",
"lint": "eslint . --fix",
"test": "jest --forceExit",
"test:watch": "jest --watch"
},
"dependencies": {
"@agentic-kit/ollama": "^2.0.0",
"@constructive-io/express-context": "workspace:^",
"@pgpmjs/logger": "workspace:^",
"pg": "^8.21.0",
"pg-cache": "workspace:^"
},
"devDependencies": {
"@types/express": "^5.0.6",
"@types/node": "^22.19.11",
"@types/pg": "^8.20.0",
"express": "^5.2.1",
"makage": "^0.3.0"
},
"peerDependencies": {
"express": "^5.0.0"
},
"keywords": [
"agentic-kit",
"express",
"llm",
"ollama",
"streaming",
"billing",
"metering",
"constructive"
]
}
97 changes: 97 additions & 0 deletions packages/agentic-server/src/billing.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
/**
* billing — Quota check and usage recording via tenant database
*
* Calls the billing functions discovered from billing_module.
* Gracefully allows requests if billing is not provisioned or errors.
*/

import { Logger } from '@pgpmjs/logger';
import type { ConstructiveContext } from '@constructive-io/express-context';

import type { BillingConfig, InferenceLogConfig } from './discovery';

const log = new Logger('agentic-server:billing');

// ─── Quota Check ────────────────────────────────────────────────────────────

export async function checkQuota(
ctx: ConstructiveContext,
billing: BillingConfig,
entityId: string,
meterSlug: string,
): Promise<boolean> {
try {
return await ctx.withPgClient(async (client) => {
const sql = `SELECT "${billing.privateSchema}"."${billing.checkBillingQuotaFunction}"($1, $2::uuid, $3) AS allowed`;
const result = await client.query(sql, [meterSlug, entityId, 1]);
return result.rows[0]?.allowed !== false;
});
} catch (e: unknown) {
const message = e instanceof Error ? e.message : String(e);
log.warn(`check_billing_quota failed (allowing): ${message}`);
return true;
}
}

// ─── Usage Recording ────────────────────────────────────────────────────────

export async function recordUsage(
ctx: ConstructiveContext,
billing: BillingConfig,
entityId: string,
meterSlug: string,
amount: number,
metadata: Record<string, unknown>,
): Promise<void> {
try {
await ctx.withPgClient(async (client) => {
const sql = `SELECT "${billing.privateSchema}"."${billing.recordUsageFunction}"($1, $2::uuid, $3, $4::jsonb)`;
await client.query(sql, [meterSlug, entityId, amount, JSON.stringify(metadata)]);
});
} catch (e: unknown) {
const message = e instanceof Error ? e.message : String(e);
log.warn(`record_usage failed (non-fatal): ${message}`);
}
}

// ─── Inference Logging ──────────────────────────────────────────────────────

export interface InferenceLogEntry {
entityId: string;
actorId: string;
model: string;
provider: string;
service: string;
operation: string;
inputTokens: number;
outputTokens: number;
totalTokens: number;
latencyMs: number;
status: string;
}

export async function logInference(
ctx: ConstructiveContext,
logConfig: InferenceLogConfig,
entry: InferenceLogEntry,
): Promise<void> {
try {
await ctx.withPgClient(async (client) => {
await client.query(
`INSERT INTO "${logConfig.schema}"."${logConfig.tableName}"
(entity_id, actor_id, model, provider, service, operation,
input_tokens, output_tokens, total_tokens, latency_ms, status)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11)`,
[
entry.entityId, entry.actorId, entry.model,
entry.provider, entry.service, entry.operation,
entry.inputTokens, entry.outputTokens, entry.totalTokens,
entry.latencyMs, entry.status,
],
);
});
} catch (e: unknown) {
const message = e instanceof Error ? e.message : String(e);
log.warn(`inference log INSERT failed (non-fatal): ${message}`);
}
}
46 changes: 46 additions & 0 deletions packages/agentic-server/src/cache.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
/**
* cache — Simple TTL cache for module discovery results
*
* Avoids a dependency on graphile-cache by providing a minimal
* Map + TTL implementation. Entries expire after the configured TTL.
*/

interface CacheEntry<T> {
value: T;
expiresAt: number;
}

export class TtlCache<T> {
private store = new Map<string, CacheEntry<T>>();
private ttlMs: number;

constructor(ttlMs: number) {
this.ttlMs = ttlMs;
}

get(key: string): T | undefined {
const entry = this.store.get(key);
if (!entry) return undefined;
if (Date.now() > entry.expiresAt) {
this.store.delete(key);
return undefined;
}
return entry.value;
}

set(key: string, value: T): void {
this.store.set(key, { value, expiresAt: Date.now() + this.ttlMs });
}

delete(key: string): void {
this.store.delete(key);
}

clear(): void {
this.store.clear();
}

get size(): number {
return this.store.size;
}
}
Loading
Loading