diff --git a/src/app/sitemap.ts b/src/app/sitemap.ts deleted file mode 100644 index 8ffb9bcc..00000000 --- a/src/app/sitemap.ts +++ /dev/null @@ -1,128 +0,0 @@ -import type { MetadataRoute } from "next"; -import { prisma } from "@/lib/prisma"; -import { localizedUrl } from "@/lib/seo"; -import { routing } from "@/i18n/routing"; -import { SCENARIOS } from "@/lib/scenarios"; -import { getAllPosts } from "@/lib/blog"; - -// Generated at request time against the live catalog (the app renders DB pages -// dynamically and the Docker build has no DB), so the sitemap stays fresh as -// agents/skills are imported. -// -// Why split (generateSitemaps): there are ~6.8k URLs, each carrying 15-locale -// hreflang alternates. As ONE document that serialized to ~12MB and ~13s per -// request, which made Google's sitemap fetcher time out ("Couldn't fetch"). -// Splitting yields a tiny instant index at /sitemap.xml plus small children at -// /sitemap/{id}.xml that Google fetches independently — each child is fast and -// the whole catalog still gets covered. -export const dynamic = "force-dynamic"; - -// URLs per child sitemap. Well under Google's 50k/50MB cap; kept small because -// each entity URL expands to 15 hreflang lines, so ~1k URLs ≈ a couple of MB. -const CHUNK = 1000; - -const APPROVED = { status: "APPROVED" } as const; - -// hreflang alternates for a path: one entry per locale. The canonical `url` is -// the unprefixed English URL; `alternates.languages` carries every locale so a -// single row covers all 15 (the shape Google recommends). -function languagesFor(path: string): Record { - return Object.fromEntries(routing.locales.map((l) => [l, localizedUrl(l, path)])); -} - -type SitemapEntry = MetadataRoute.Sitemap[number]; -function entry(path: string, rest: Omit): SitemapEntry { - return { - url: localizedUrl(routing.defaultLocale, path), - alternates: { languages: languagesFor(path) }, - ...rest, - }; -} - -async function agentCount(): Promise { - try { - return await prisma.agent.count({ where: APPROVED }); - } catch { - return 0; - } -} -async function skillCount(): Promise { - try { - return await prisma.skill.count({ where: APPROVED }); - } catch { - return 0; - } -} - -// Layout of the child sitemaps, by `id`: -// 0 -> static + blog + scenario pages -// 1 .. agentChunks -> APPROVED agents, CHUNK per file -// agentChunks+1 .. +skillChunks -> APPROVED skills, CHUNK per file -export async function generateSitemaps(): Promise<{ id: number }[]> { - const [agents, skills] = await Promise.all([agentCount(), skillCount()]); - const count = 1 + Math.ceil(agents / CHUNK) + Math.ceil(skills / CHUNK); - return Array.from({ length: count }, (_, id) => ({ id })); -} - -function pageRoutes(): MetadataRoute.Sitemap { - const now = new Date(); - const staticRoutes: MetadataRoute.Sitemap = [ - entry("", { lastModified: now, changeFrequency: "daily", priority: 1 }), - entry("/agents", { lastModified: now, changeFrequency: "daily", priority: 0.9 }), - entry("/scenarios", { lastModified: now, changeFrequency: "weekly", priority: 0.8 }), - entry("/skills", { lastModified: now, changeFrequency: "daily", priority: 0.8 }), - entry("/install", { lastModified: now, changeFrequency: "monthly", priority: 0.6 }), - entry("/badge", { lastModified: now, changeFrequency: "monthly", priority: 0.5 }), - entry("/trending", { lastModified: now, changeFrequency: "weekly", priority: 0.6 }), - entry("/blog", { lastModified: now, changeFrequency: "weekly", priority: 0.7 }), - ]; - - const blogRoutes: MetadataRoute.Sitemap = getAllPosts().map((p) => - entry(`/blog/${p.slug}`, { - lastModified: new Date(p.dateModified), - changeFrequency: "monthly", - priority: 0.6, - }), - ); - - const scenarioRoutes: MetadataRoute.Sitemap = SCENARIOS.map((s) => - entry(`/scenarios/${s.slug}`, { lastModified: now, changeFrequency: "weekly", priority: 0.6 }), - ); - - return [...staticRoutes, ...blogRoutes, ...scenarioRoutes]; -} - -// Next passes `id` as a Promise (the value from generateSitemaps). -export default async function sitemap({ id }: { id: Promise }): Promise { - const n = Number(await id); - if (n === 0) return pageRoutes(); - - const agents = await agentCount(); - const agentChunks = Math.ceil(agents / CHUNK); - const chunkIndex = n - 1; // 0-based among entity chunks - - if (chunkIndex < agentChunks) { - const rows = await prisma.agent.findMany({ - where: APPROVED, - select: { slug: true, updatedAt: true }, - orderBy: { updatedAt: "desc" }, - skip: chunkIndex * CHUNK, - take: CHUNK, - }); - return rows.map((a) => - entry(`/agents/${a.slug}`, { lastModified: a.updatedAt, changeFrequency: "weekly", priority: 0.7 }), - ); - } - - const skillChunkIndex = chunkIndex - agentChunks; - const rows = await prisma.skill.findMany({ - where: APPROVED, - select: { slug: true, updatedAt: true }, - orderBy: { updatedAt: "desc" }, - skip: skillChunkIndex * CHUNK, - take: CHUNK, - }); - return rows.map((s) => - entry(`/skills/${s.slug}`, { lastModified: s.updatedAt, changeFrequency: "weekly", priority: 0.5 }), - ); -} diff --git a/src/app/sitemap.xml/route.ts b/src/app/sitemap.xml/route.ts new file mode 100644 index 00000000..74f2c114 --- /dev/null +++ b/src/app/sitemap.xml/route.ts @@ -0,0 +1,11 @@ +import { childCount, renderIndex, SITEMAP_HEADERS } from "@/lib/sitemap-shape"; + +// Sitemap index at /sitemap.xml — lists the chunked children at /sitemap/{id}.xml. +// Hand-rolled because Next's sitemap.ts metadata convention can't emit a +// (see @/lib/sitemap-shape). +export const dynamic = "force-dynamic"; + +export async function GET() { + const xml = renderIndex(await childCount(), new Date().toISOString()); + return new Response(xml, { headers: SITEMAP_HEADERS }); +} diff --git a/src/app/sitemap/[id]/route.ts b/src/app/sitemap/[id]/route.ts new file mode 100644 index 00000000..970a74a4 --- /dev/null +++ b/src/app/sitemap/[id]/route.ts @@ -0,0 +1,15 @@ +import { entriesFor, renderUrlset, SITEMAP_HEADERS } from "@/lib/sitemap-shape"; + +// Child sitemap at /sitemap/{id}.xml. The dynamic segment is "{id}.xml" (e.g. +// "3.xml"); parseInt stops at the dot. Listed by the index at /sitemap.xml. +export const dynamic = "force-dynamic"; + +export async function GET(_req: Request, { params }: { params: Promise<{ id: string }> }) { + const { id } = await params; + const n = Number.parseInt(id, 10); + if (!Number.isInteger(n) || n < 0) { + return new Response("Not found", { status: 404 }); + } + const xml = renderUrlset(await entriesFor(n)); + return new Response(xml, { headers: SITEMAP_HEADERS }); +} diff --git a/src/lib/sitemap-shape.ts b/src/lib/sitemap-shape.ts new file mode 100644 index 00000000..5f79a873 --- /dev/null +++ b/src/lib/sitemap-shape.ts @@ -0,0 +1,163 @@ +import { prisma } from "@/lib/prisma"; +import { localizedUrl, SITE_URL } from "@/lib/seo"; +import { routing } from "@/i18n/routing"; +import { SCENARIOS } from "@/lib/scenarios"; +import { getAllPosts } from "@/lib/blog"; + +// Shared logic for the split sitemap. We hand-roll the XML (instead of Next's +// app/sitemap.ts metadata convention) because that convention reserves +// /sitemap.xml and, when combined with generateSitemaps(), 404s the parent +// index there. Hand-rolling lets us serve a real at /sitemap.xml +// (app/sitemap.xml/route.ts) plus chunked children at /sitemap/{id}.xml +// (app/sitemap/[id]/route.ts) — all force-dynamic, so the live catalog is always +// complete and each child is small enough that Google never times out. + +// URLs per child sitemap. Well under Google's 50k/50MB cap; kept small because +// each entity URL expands to 15 hreflang lines, so ~1k URLs ≈ a couple of MB. +export const CHUNK = 1000; + +const APPROVED = { status: "APPROVED" } as const; + +type Entry = { + path: string; + lastModified: Date; + changeFrequency: "daily" | "weekly" | "monthly"; + priority: number; +}; + +export async function approvedCounts(): Promise<{ agents: number; skills: number }> { + try { + const [agents, skills] = await Promise.all([ + prisma.agent.count({ where: APPROVED }), + prisma.skill.count({ where: APPROVED }), + ]); + return { agents, skills }; + } catch { + // DB unreachable (e.g. the DB-less Docker build): degrade to no entity chunks. + return { agents: 0, skills: 0 }; + } +} + +// Number of child sitemaps: +// id 0 -> static + blog + scenario pages +// 1 .. ceil(agents/CHUNK) -> APPROVED agents +// then ceil(skills/CHUNK) more -> APPROVED skills +export async function childCount(): Promise { + const { agents, skills } = await approvedCounts(); + return 1 + Math.ceil(agents / CHUNK) + Math.ceil(skills / CHUNK); +} + +function pageEntries(): Entry[] { + const now = new Date(); + const staticRoutes: Entry[] = [ + { path: "", lastModified: now, changeFrequency: "daily", priority: 1 }, + { path: "/agents", lastModified: now, changeFrequency: "daily", priority: 0.9 }, + { path: "/scenarios", lastModified: now, changeFrequency: "weekly", priority: 0.8 }, + { path: "/skills", lastModified: now, changeFrequency: "daily", priority: 0.8 }, + { path: "/install", lastModified: now, changeFrequency: "monthly", priority: 0.6 }, + { path: "/badge", lastModified: now, changeFrequency: "monthly", priority: 0.5 }, + { path: "/trending", lastModified: now, changeFrequency: "weekly", priority: 0.6 }, + { path: "/blog", lastModified: now, changeFrequency: "weekly", priority: 0.7 }, + ]; + const blogRoutes: Entry[] = getAllPosts().map((p) => ({ + path: `/blog/${p.slug}`, + lastModified: new Date(p.dateModified), + changeFrequency: "monthly", + priority: 0.6, + })); + const scenarioRoutes: Entry[] = SCENARIOS.map((s) => ({ + path: `/scenarios/${s.slug}`, + lastModified: now, + changeFrequency: "weekly", + priority: 0.6, + })); + return [...staticRoutes, ...blogRoutes, ...scenarioRoutes]; +} + +// Entries for child sitemap `n` (0-based). Out-of-range ids yield []. +export async function entriesFor(n: number): Promise { + if (n === 0) return pageEntries(); + + const { agents } = await approvedCounts(); + const agentChunks = Math.ceil(agents / CHUNK); + const chunkIndex = n - 1; // 0-based among entity chunks + + // A DB blip degrades a child to an empty rather than a 500. + try { + if (chunkIndex < agentChunks) { + const rows = await prisma.agent.findMany({ + where: APPROVED, + select: { slug: true, updatedAt: true }, + orderBy: { updatedAt: "desc" }, + skip: chunkIndex * CHUNK, + take: CHUNK, + }); + return rows.map((a) => ({ + path: `/agents/${a.slug}`, + lastModified: a.updatedAt, + changeFrequency: "weekly", + priority: 0.7, + })); + } + + const rows = await prisma.skill.findMany({ + where: APPROVED, + select: { slug: true, updatedAt: true }, + orderBy: { updatedAt: "desc" }, + skip: (chunkIndex - agentChunks) * CHUNK, + take: CHUNK, + }); + return rows.map((s) => ({ + path: `/skills/${s.slug}`, + lastModified: s.updatedAt, + changeFrequency: "weekly", + priority: 0.5, + })); + } catch { + return []; + } +} + +function xmlEscape(s: string): string { + return s.replace(/&/g, "&").replace(//g, ">").replace(/"/g, """); +} + +// The canonical `loc` is the unprefixed English URL; hreflang alternates list +// every locale (the shape Google recommends — one row covers all 15). +function renderUrl(e: Entry): string { + const loc = xmlEscape(localizedUrl(routing.defaultLocale, e.path)); + const alts = routing.locales + .map((l) => ``) + .join(""); + return ( + `${loc}${alts}` + + `${e.lastModified.toISOString()}` + + `${e.changeFrequency}` + + `${e.priority}` + ); +} + +export function renderUrlset(entries: Entry[]): string { + return ( + `\n` + + `` + + entries.map(renderUrl).join("") + + `` + ); +} + +export function renderIndex(n: number, lastmod: string): string { + const entries = Array.from( + { length: n }, + (_, id) => `${SITE_URL}/sitemap/${id}.xml${lastmod}`, + ).join(""); + return ( + `\n` + + `${entries}` + ); +} + +export const SITEMAP_HEADERS = { + "Content-Type": "application/xml", + "Cache-Control": "public, max-age=3600, s-maxage=3600", +} as const;