diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 090cd81..5f9abd3 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -69,7 +69,9 @@ jobs: if: vars.SUPABASE_PROJECT_REF != '' env: SUPABASE_ACCESS_TOKEN: ${{ secrets.SUPABASE_ACCESS_TOKEN }} - run: supabase functions deploy venice --project-ref ${{ vars.SUPABASE_PROJECT_REF }} + run: | + supabase functions deploy venice --project-ref ${{ vars.SUPABASE_PROJECT_REF }} + supabase functions deploy expire-attachments --project-ref ${{ vars.SUPABASE_PROJECT_REF }} build: needs: sync-supabase diff --git a/docs/dev/attachments.md b/docs/dev/attachments.md index bed330a..e12cc95 100644 --- a/docs/dev/attachments.md +++ b/docs/dev/attachments.md @@ -4,10 +4,12 @@ > private `attachments` Storage bucket (the `documents` pattern), not as > base64 in `message_attachments.data`. Liveness keys on `storage_path`, > reads go through signed URLs, and the legacy base64 was reclaimed -> one-time (pre-bucket rows are treated as expired). Still pending: -> (1) the **expiry sweep is not yet server-side** - the old browser -> worker / `expire_old_attachments` RPC are inert, so bucket objects do -> not yet expire; (2) the `data` column drop. See +> one-time (pre-bucket rows are treated as expired). Expiry now runs +> **server-side** (the standalone `expire-attachments` edge function + +> hourly cron deletes bucket objects 30 days after a thread goes +> dormant). Still pending: (1) retiring the now-INERT browser +> `attachment_expiry` worker + `expire_old_attachments` RPC (cleanup); +> (2) the `data` column drop. See > [`./in-progress/attachments-storage-migration.md`](./in-progress/attachments-storage-migration.md). > Sections below are mid-update; where they describe base64-in-`data`, > read it as "historical, now storage_path + bucket". diff --git a/docs/dev/in-progress/attachments-storage-migration.md b/docs/dev/in-progress/attachments-storage-migration.md index 750dbf4..460cb86 100644 --- a/docs/dev/in-progress/attachments-storage-migration.md +++ b/docs/dev/in-progress/attachments-storage-migration.md @@ -174,10 +174,23 @@ row with `data` set but no `storage_path` just gets `data` nulled `data` column stays (write-never, read-never after the reclaim). The old browser expiry worker / `expire_old_attachments` RPC are left INERT - so bucket objects do not yet expire. -- **Stage 2 - server-side expiry (TODO).** The pg_cron + edge- - function sweep described under Expiry above; retire the browser - `attachment_expiry` worker + the inert RPC. Until this lands, - uploaded objects accumulate in the bucket. +- **Stage 2 - server-side expiry sweep (LANDED, server side).** The + standalone `expire-attachments` edge function + hourly pg_cron job, + with the `list_expirable_attachments` / `mark_attachments_expired` + service-role RPCs and the I/O-free `runExpiry` drain loop + (`_shared/expire-attachments.ts`, unit-tested). Deployed via its own + line in `deploy.yml`. Cron/Storage round-trip is unverified from the + cloud env - confirm after deploy that an object actually disappears + ~30 days after its thread goes dormant. +- **Stage 2b - retire the browser worker (TODO, cleanup).** The + `attachment_expiry` UNIT inside the supervisor + (`src/lib/agents/supervisor/{loop,manager,worker}.ts` + + `src/lib/agents/attachment_expiry/`), `SupabaseService.expireOldAttachments`, + the `expire_old_attachments` RPC, and `tests/attachment-expiry-loop.test.ts`. + Left in place for now because it is INERT post-Stage-1 (the RPC nulls + `data where data is not null`, which matches no rows after the reclaim, + so it no-ops). Pure deletion; deferred only to avoid rushed supervisor + surgery. - **Collapse PR (follow-up)** drops the `data` column AND removes the reclaim UPDATE together. Splitting is required: a single apply can't both keep an UPDATE that references `data` and drop diff --git a/docs/dev/in-progress/venice-edge-functions/text-parser.md b/docs/dev/in-progress/venice-edge-functions/text-parser.md index 955556b..7f6f489 100644 --- a/docs/dev/in-progress/venice-edge-functions/text-parser.md +++ b/docs/dev/in-progress/venice-edge-functions/text-parser.md @@ -1,17 +1,81 @@ # Text parser milestone -*Skeleton - embeddings lessons folded in (step 8); target state -still to define.* Part of the [Venice edge functions](./README.md) -project. +*Now bug-driven: text extraction is **broken from the browser** +(see [Motivating bug](#motivating-bug-text-extraction-broken-from-the-browser)), +so this milestone is the fix, not just a consolidation. Embeddings +lessons folded in (step 8).* Part of the +[Venice edge functions](./README.md) project. Wraps `POST /augment/text-parser` (`VeniceClient.extractText`) as a `/text-parser` route on the `venice` function. This is the endpoint behind the attachments flow (see -[attachments](../../attachments.md)). +[attachments](../../attachments.md)) AND the Library document +upload (see [library](../../library.md)). The one with a file upload: `extractText` posts a `Blob` + filename as multipart, unlike the JSON-bodied endpoints. +## Motivating bug: text extraction broken from the browser + +Observed 2026-05-30 during attachments-storage QA, on current +`main`. + +**Symptom.** Uploading any non-image file (tried `.pdf`, +`.txt`, `.md`, all <1 KB) fails at the composer: the attachment +pill turns red and sending shows + +> "dishes.txt": Text extraction failed: Network error +> contacting Venice: Failed to fetch + +That string is `extractText`'s catch block in +`src/lib/venice.ts` wrapping a thrown `TypeError: Failed to +fetch` as `VeniceError(..., 'network')`. "Failed to fetch" is a +browser network/CORS-layer rejection - the request never +completed in a CORS-readable way - NOT an HTTP error response +(a 4xx/5xx would surface through `classifyError`, with a status). + +**Why it is CORS, not a bad request.** + +- The endpoint path is correct and current: Venice's docs list + `POST /api/v1/augment/text-parser`, multipart/form-data, + accepting PDF/DOCX/PPTX/XLSX/plain-text up to 25 MB - exactly + the files that failed. So it is not a wrong URL (would 404) + or a rejected format (would 400); both are HTTP errors, not + "Failed to fetch". +- `app.venice` is a `VeniceClient` pointed at the default + `https://api.venice.ai/api/v1` (`new VeniceClient({ apiKey })` + in `src/lib/state.svelte.ts`), calling **directly from the + browser**. +- Image paths work against the **same host and key**: inline + vision (`/chat/completions`) and `analyze_image` were verified + live in the same session, and `generate_image` hits + `/image/generate`. So the browser can reach Venice with CORS + for those endpoints - the key, origin, and general CORS are + fine. +- Therefore the failure is **endpoint-specific**: Venice + CORS-enables its chat/image endpoints but evidently NOT + `/augment/text-parser`. (Their docs make no browser-safety + claim for it.) A 404 whose error response omits CORS headers + would also read as "Failed to fetch", so a quietly-moved or + gated route is a secondary possibility - but the path matches + the live docs. + +**This is pre-existing, not caused by the attachments-storage +migration.** Extraction runs at the composer BEFORE any storage +write; the storage migration is entirely downstream. It was +simply never exercised live with a non-image file until now +(image uploads skip extraction). Unconfirmed whether it ever +worked from the browser or Venice tightened CORS later - the +browser devtools Network entry for the failed request (CORS +error vs a status code) would settle that, but does not change +the fix. + +**The fix = this milestone.** Routing text extraction through +the `venice` edge function makes the call **server-side**, where +browser CORS does not apply and the project-global key already +lives. Fixes both the chat-attachment path and the Library +upload path in one move. + ## Why this one is different - **Multipart upload.** The request carries file bytes, not @@ -26,19 +90,63 @@ filename as multipart, unlike the JSON-bodied endpoints. ## Current state -To document: `extractText` in `src/lib/venice.ts`, the -attachments upload flow that calls it, and the size/expiration -constraints from the attachments feature doc. +`extractText(file: Blob, filename: string): Promise` in +`src/lib/venice.ts`: + +- Builds a `FormData` with `file` (the Blob + filename) and + `response_format: 'json'`. +- `POST`s to `${baseUrl}/augment/text-parser` with ONLY an + `Authorization: Bearer ` header - deliberately not + `this.headers()`, because a JSON Content-Type would clobber + the multipart boundary the browser sets. +- On a thrown fetch error wraps it as `VeniceError('Network + error contacting Venice: ...', 'network')` - the string the + user saw. +- On `!res.ok` calls `classifyError`; on success reads `text` + off the JSON body (with a couple of fallback keys). + +**Two call sites**, both must end up routed server-side: + +1. **Chat attachments** - `src/screens/Chat.svelte` + `addAttachment(file)` calls `app.venice.extractText(file, + file.name)` for non-image files at compose time, before the + message is sent. This is the path the bug report hit. +2. **Library uploads** - `src/lib/documents.ts` `ingestDocument` + calls `venice.extractText(file, file.name)` after the bucket + upload; a failure there marks the document `extraction_status + = 'failed'` (the doc is still stored, just not searchable). + +Size/expiration constraints to mind: attachments cap at +`MAX_ATTACHMENT_BYTES` (10 MB); Library at +`MAX_DOCUMENT_FILE_BYTES` (25 MB) - which is also Venice's +text-parser limit. The edge-function request-size ceiling vs +these caps is the open question below. ## Target state -To define. +A `/text-parser` route on the `venice` edge function that accepts +the multipart upload (user JWT auth, `verify_jwt` on, shared key +from `app_config` via service role - copy `/embed`'s model, not +`/backfill`'s), forwards it to Venice server-side, and returns +the parsed `{ text, ... }`. The browser stops calling Venice +directly: `extractText` becomes a thin call to the function +(e.g. a `SupabaseService.extractText` mirroring how browser +embeds now go through the function), and both call sites above +move onto it. Keep the `VeniceError` shape so the composer pill +and the `ingestDocument` failure branch render unchanged. ## Open questions - Edge-function payload size limit vs the largest attachment we - accept - does anything need chunking or a direct-to-Venice - escape hatch for large files? + accept (10 MB attachments, 25 MB Library docs) - does anything + need chunking or a direct-to-Venice escape hatch for large + files? If a large file can't round-trip through the function, + the direct-from-browser path it would fall back to is the very + one that's CORS-broken - so the escape hatch may have to be a + signed-upload-to-storage-then-server-fetch shape rather than + browser-direct-to-Venice. +- Confirm the CORS diagnosis from devtools (nice-to-have; the + server-side route fixes it regardless). ## Lessons from the embeddings milestone diff --git a/supabase/functions/_shared/expire-attachments.ts b/supabase/functions/_shared/expire-attachments.ts new file mode 100644 index 0000000..166cf47 --- /dev/null +++ b/supabase/functions/_shared/expire-attachments.ts @@ -0,0 +1,77 @@ +// I/O-free orchestration for the attachment-expiry sweep, the server-side +// replacement for the old browser attachment_expiry worker (Stage 2 of the +// attachments-storage migration). All I/O is injected so this runs under +// `deno test` with fakes - the edge function (expire-attachments/index.ts) +// wires the real Supabase service-role client in. +// +// The sweep: pull a bounded batch of live attachments whose owning thread has +// been dormant past the cutoff, delete their bucket objects, then mark the rows +// expired (null storage_path + stamp expired_at). Repeat until a batch comes +// back short (queue drained), the row cap is hit, or the time budget elapses; +// the next cron tick resumes. Deletion and marking are idempotent, so no +// per-row claim is needed - overlapping ticks at worst redo harmless work. + +export interface ExpireBatchRow { + id: string; + storagePath: string; +} + +export interface ExpireDeps { + /** Next batch of expirable rows (live + dormant), at most `batchSize`. */ + listBatch: (batchSize: number) => Promise; + /** Delete these object keys from the attachments bucket. Idempotent. */ + deleteObjects: (paths: string[]) => Promise; + /** Null storage_path + stamp expired_at for these ids. Returns row count. */ + markExpired: (ids: string[]) => Promise; +} + +export interface ExpireOpts { + batchSize: number; + maxRows: number; + timeBudgetMs: number; + now?: () => number; +} + +export interface ExpireSummary { + /** Rows marked expired (objects deleted). */ + expired: number; + /** Batches processed. */ + batches: number; + /** True when stopped on the cap/budget rather than draining the queue. */ + bounded: boolean; + durationMs: number; +} + +export async function runExpiry(deps: ExpireDeps, opts: ExpireOpts): Promise { + const now = opts.now ?? Date.now; + const start = now(); + let expired = 0; + let batches = 0; + let bounded = false; + + for (;;) { + if (expired >= opts.maxRows) { + bounded = true; + break; + } + if (now() - start >= opts.timeBudgetMs) { + bounded = true; + break; + } + + const remaining = opts.maxRows - expired; + const batchSize = Math.min(opts.batchSize, remaining); + const rows = await deps.listBatch(batchSize); + if (rows.length === 0) break; // queue drained + + await deps.deleteObjects(rows.map((r) => r.storagePath)); + const marked = await deps.markExpired(rows.map((r) => r.id)); + expired += marked; + batches += 1; + + // A short batch means the eligible set is exhausted for now. + if (rows.length < batchSize) break; + } + + return { expired, batches, bounded, durationMs: now() - start }; +} diff --git a/supabase/functions/expire-attachments/index.ts b/supabase/functions/expire-attachments/index.ts new file mode 100644 index 0000000..c8afc32 --- /dev/null +++ b/supabase/functions/expire-attachments/index.ts @@ -0,0 +1,113 @@ +// The `expire-attachments` edge function - the server-side replacement for the +// old browser attachment_expiry worker (Stage 2 of the attachments-storage +// migration). Cron-triggered (pg_cron -> pg_net -> here; see +// nak_trigger_attachment_expiry in schema.sql). Service-role only. +// +// It is deliberately NOT a route on the `venice` function: expiry never calls +// Venice, it only deletes Supabase Storage objects and marks rows, so it has no +// business sharing that function. The auth/client helpers below are duplicated +// from venice/index.ts on purpose - keeping the two functions independent is +// worth a few lines over coupling them through a shared module. +// +// The sweep orchestration is pure and unit-tested in +// ../_shared/expire-attachments.ts; this file owns only the glue: auth, the +// service-role client, and wiring the RPC + Storage I/O into the deps. +import { createClient, type SupabaseClient } from '@supabase/supabase-js'; +import { runExpiry, type ExpireDeps } from '../_shared/expire-attachments.ts'; + +const CORS_HEADERS: Record = { + 'Access-Control-Allow-Origin': '*', + 'Access-Control-Allow-Headers': 'authorization, x-client-info, apikey, content-type', + 'Access-Control-Allow-Methods': 'POST, OPTIONS', +}; + +// Dormancy window (days since the owning thread's updated_at) and per- +// invocation bounds. Hourly cron + a generous cap keeps each tick small; the +// next tick resumes if a backlog ever exceeds the cap. +const EXPIRY_DAYS = 30; +const BATCH_SIZE = 100; +const MAX_ROWS = 2000; +const TIME_BUDGET_MS = 25_000; + +function json(body: unknown, status = 200): Response { + return new Response(JSON.stringify(body), { + status, + headers: { ...CORS_HEADERS, 'Content-Type': 'application/json' }, + }); +} + +function adminClient(): SupabaseClient | null { + const url = Deno.env.get('SUPABASE_URL'); + const serviceKey = Deno.env.get('SUPABASE_SERVICE_ROLE_KEY'); + if (!url || !serviceKey) return null; + return createClient(url, serviceKey, { auth: { persistSession: false } }); +} + +// True when the bearer's decoded `role` claim is service_role. Safe to trust +// the decoded payload only because the gateway's verify_jwt has already +// validated the signature (see the longer note in venice/index.ts). Do NOT +// disable verify_jwt for this function without replacing this with signature +// verification. +function isServiceRole(req: Request): boolean { + const auth = req.headers.get('Authorization') ?? ''; + const token = auth.startsWith('Bearer ') ? auth.slice(7).trim() : ''; + const parts = token.split('.'); + if (parts.length !== 3) return false; + try { + let b64 = parts[1].replace(/-/g, '+').replace(/_/g, '/'); + while (b64.length % 4) b64 += '='; + const payload = JSON.parse(atob(b64)) as { role?: unknown }; + return payload.role === 'service_role'; + } catch { + return false; + } +} + +async function handleExpire(req: Request): Promise { + if (!isServiceRole(req)) return json({ error: 'forbidden' }, 403); + + const admin = adminClient(); + if (!admin) return json({ error: 'function env missing SUPABASE_* secrets' }, 503); + + const deps: ExpireDeps = { + listBatch: async (batchSize) => { + const { data, error } = await admin.rpc('list_expirable_attachments', { + p_days: EXPIRY_DAYS, + p_limit: batchSize, + }); + if (error) throw error; + return ((data ?? []) as Array<{ id: string; storage_path: string }>).map((r) => ({ + id: r.id, + storagePath: r.storage_path, + })); + }, + deleteObjects: async (paths) => { + if (paths.length === 0) return; + const { error } = await admin.storage.from('attachments').remove(paths); + if (error) throw error; + }, + markExpired: async (ids) => { + if (ids.length === 0) return 0; + const { data, error } = await admin.rpc('mark_attachments_expired', { p_ids: ids }); + if (error) throw error; + return typeof data === 'number' ? data : 0; + }, + }; + + try { + const summary = await runExpiry(deps, { + batchSize: BATCH_SIZE, + maxRows: MAX_ROWS, + timeBudgetMs: TIME_BUDGET_MS, + }); + return json(summary); + } catch (err) { + return json({ error: (err as Error).message }, 500); + } +} + +Deno.serve(async (req: Request): Promise => { + if (req.method === 'OPTIONS') return new Response('ok', { headers: CORS_HEADERS }); + if (req.method === 'POST') return handleExpire(req); + return json({ error: 'not found' }, 404); +}); diff --git a/supabase/functions/tests/expire-attachments.test.ts b/supabase/functions/tests/expire-attachments.test.ts new file mode 100644 index 0000000..77199af --- /dev/null +++ b/supabase/functions/tests/expire-attachments.test.ts @@ -0,0 +1,110 @@ +// Offline unit tests for the attachment-expiry orchestration. runExpiry takes +// injected listBatch/deleteObjects/markExpired callbacks, so the drain loop, +// batch cap, short-batch stop, and budget exit are exercised with fakes - no +// network, no Supabase, no Storage. +import { assertEquals } from '@std/assert'; +import { runExpiry, type ExpireDeps, type ExpireBatchRow } from '../_shared/expire-attachments.ts'; + +// A fake backed by a queue of pending rows. listBatch shifts up to `batchSize` +// off the front; deleteObjects + markExpired record what they saw. +function fakeDeps( + pending: ExpireBatchRow[] +): { + deps: ExpireDeps; + deleted: string[]; + marked: string[]; +} { + const deleted: string[] = []; + const marked: string[] = []; + const deps: ExpireDeps = { + listBatch: (batchSize) => Promise.resolve(pending.splice(0, batchSize)), + deleteObjects: (paths) => { + deleted.push(...paths); + return Promise.resolve(); + }, + markExpired: (ids) => { + marked.push(...ids); + return Promise.resolve(ids.length); + }, + }; + return { deps, deleted, marked }; +} + +function rows(n: number): ExpireBatchRow[] { + return Array.from({ length: n }, (_, i) => ({ id: `id-${i}`, storagePath: `u/id-${i}/f` })); +} + +Deno.test('drains the queue across multiple full batches', async () => { + const { deps, deleted, marked } = fakeDeps(rows(250)); + const summary = await runExpiry(deps, { + batchSize: 100, + maxRows: 10_000, + timeBudgetMs: 10_000, + }); + assertEquals(summary.expired, 250); + assertEquals(summary.batches, 3); // 100 + 100 + 50 (short -> stop) + assertEquals(summary.bounded, false); + assertEquals(deleted.length, 250); + assertEquals(marked.length, 250); +}); + +Deno.test('stops at a short batch without an extra empty call', async () => { + const { deps } = fakeDeps(rows(40)); + const summary = await runExpiry(deps, { + batchSize: 100, + maxRows: 10_000, + timeBudgetMs: 10_000, + }); + assertEquals(summary.expired, 40); + assertEquals(summary.batches, 1); + assertEquals(summary.bounded, false); +}); + +Deno.test('empty queue is a clean no-op', async () => { + const { deps, deleted } = fakeDeps([]); + const summary = await runExpiry(deps, { + batchSize: 100, + maxRows: 10_000, + timeBudgetMs: 10_000, + }); + assertEquals(summary.expired, 0); + assertEquals(summary.batches, 0); + assertEquals(deleted.length, 0); +}); + +Deno.test('honors the row cap (bounded), leaving the rest for the next tick', async () => { + const { deps, marked } = fakeDeps(rows(500)); + const summary = await runExpiry(deps, { + batchSize: 100, + maxRows: 150, + timeBudgetMs: 10_000, + }); + // 100 then a 50-row batch (capped by remaining) reaches the cap. + assertEquals(summary.expired, 150); + assertEquals(summary.bounded, true); + assertEquals(marked.length, 150); +}); + +Deno.test('deletes objects before marking rows expired', async () => { + const order: string[] = []; + const deps: ExpireDeps = { + listBatch: (() => { + let served = false; + return (n: number) => { + if (served) return Promise.resolve([]); + served = true; + return Promise.resolve(rows(3).slice(0, n)); + }; + })(), + deleteObjects: (paths) => { + order.push(`delete:${paths.length}`); + return Promise.resolve(); + }, + markExpired: (ids) => { + order.push(`mark:${ids.length}`); + return Promise.resolve(ids.length); + }, + }; + await runExpiry(deps, { batchSize: 100, maxRows: 10_000, timeBudgetMs: 10_000 }); + assertEquals(order, ['delete:3', 'mark:3']); +}); diff --git a/supabase/schema.sql b/supabase/schema.sql index 4c509ff..8f6489b 100644 --- a/supabase/schema.sql +++ b/supabase/schema.sql @@ -7970,3 +7970,138 @@ exception when others then raise notice 'embedding backfill cron setup skipped: %', sqlerrm; end $cron$; + +-- --------------------------------------------------------------------------- +-- Scheduled attachment expiry (pg_cron -> pg_net -> expire-attachments) +-- +-- Stage 2 of the attachments-storage migration +-- (docs/dev/in-progress/attachments-storage-migration.md). Replaces the old +-- browser attachment_expiry worker: a cron tick POSTs to the standalone +-- `expire-attachments` edge function, which deletes the bucket objects for +-- attachments whose owning thread has been dormant 30 days, then nulls +-- storage_path + stamps expired_at. SQL can't delete a Storage object, so the +-- deletion has to happen in the function (service-role storage client); these +-- RPCs only select the batch and mark the rows. +-- +-- Reuses the same Vault secrets as the embedding backfill (project_url + +-- service_role_key, seeded by `mise run supabase-init`). The function is NOT +-- the venice function - expiry never calls Venice, it only touches Storage - +-- so it deploys separately (see .github/workflows/deploy.yml). +-- +-- Both RPCs are security definer with no auth.uid() filter (cron has no user +-- session; the sweep spans every member) and EXECUTE-locked to service_role - +-- the same boundary as the embedding claim/save pair. The edge function +-- (service role) is their only caller. + +-- Select a bounded batch of live attachments eligible for expiry: object still +-- present (storage_path not null) and the owning thread dormant for p_days. +-- Returns (id, storage_path) so the function knows which objects to delete and +-- which rows to mark. No claim/TTL: deletion + marking are idempotent (removing +-- an already-gone object is a no-op, re-marking an expired row is a no-op), so +-- two overlapping ticks can't corrupt anything - the FOR UPDATE SKIP LOCKED +-- just keeps them from contending on the same rows within a tick. +drop function if exists public.list_expirable_attachments(int, int); +create or replace function public.list_expirable_attachments( + p_days int, + p_limit int +) returns table (id uuid, storage_path text) +language sql security definer +set search_path = public as $$ + select a.id, a.storage_path + from public.message_attachments a + join public.messages m on m.id = a.message_id + join public.threads t on t.id = m.thread_id + where a.storage_path is not null + and t.updated_at < now() - make_interval(days => p_days) + order by t.updated_at asc + limit p_limit + for update of a skip locked +$$; + +-- Mark the given attachments expired once their objects are deleted: null +-- storage_path (the liveness signal) and stamp expired_at. extracted_text and +-- the other metadata stay, so the row still renders as an expired chip. +drop function if exists public.mark_attachments_expired(uuid[]); +create or replace function public.mark_attachments_expired( + p_ids uuid[] +) returns int +language plpgsql security definer +set search_path = public as $$ +declare + affected int; +begin + update public.message_attachments + set storage_path = null, + expired_at = now() + where id = any(p_ids); + get diagnostics affected = row_count; + return affected; +end $$; + +revoke all on function public.list_expirable_attachments(int, int) from public, anon, authenticated; +revoke all on function public.mark_attachments_expired(uuid[]) from public, anon, authenticated; +grant execute on function public.list_expirable_attachments(int, int) to service_role; +grant execute on function public.mark_attachments_expired(uuid[]) to service_role; + +-- Cron dispatcher, same shape + Vault-secret custody as +-- nak_trigger_embed_backfill above. Dynamic SQL so it compiles where pg_net / +-- vault are absent (local stack); no-ops until the secrets are seeded. +create or replace function public.nak_trigger_attachment_expiry() +returns void +language plpgsql +security definer +set search_path = public +as $fn$ +declare + v_url text; + v_key text; +begin + begin + execute $q$ select decrypted_secret from vault.decrypted_secrets where name = 'project_url' $q$ into v_url; + execute $q$ select decrypted_secret from vault.decrypted_secrets where name = 'service_role_key' $q$ into v_key; + exception when others then + return; -- vault not installed or unreadable; nothing to dispatch + end; + if v_url is null or v_key is null then + return; -- secrets not seeded yet + end if; + begin + execute format( + $q$ select net.http_post( + url := %L, + headers := jsonb_build_object('Content-Type', 'application/json', 'Authorization', %L), + body := '{}'::jsonb + ) $q$, + v_url || '/functions/v1/expire-attachments', + 'Bearer ' || v_key + ); + exception when others then + raise notice 'nak_trigger_attachment_expiry: dispatch failed: %', sqlerrm; + end; +end; +$fn$; + +revoke all on function public.nak_trigger_attachment_expiry() from public, anon, authenticated; + +-- Schedule the sweep hourly (dormancy is measured in days, so hourly is ample +-- and keeps each tick's batch small). Guarded on extension availability + +-- idempotent reschedule, same as the backfill cron. +do $cron$ +begin + if exists (select 1 from pg_available_extensions where name = 'pg_cron') + and exists (select 1 from pg_available_extensions where name = 'pg_net') then + create extension if not exists pg_cron; + create extension if not exists pg_net; + if exists (select 1 from cron.job where jobname = 'nak-attachment-expiry') then + perform cron.unschedule('nak-attachment-expiry'); + end if; + perform cron.schedule( + 'nak-attachment-expiry', + '17 * * * *', + $job$ select public.nak_trigger_attachment_expiry(); $job$ + ); + end if; +exception when others then + raise notice 'attachment expiry cron setup skipped: %', sqlerrm; +end +$cron$;