Skip to content

Commit f17299b

Browse files
Merge branch 'main' into claude/practical-mclaren-4da7df
2 parents c470d4e + 1cc3b6c commit f17299b

29 files changed

Lines changed: 640 additions & 66 deletions

netlify.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ directory = "netlify/functions"
1616
included_files = [
1717
"public/fonts/Inter-Regular.ttf",
1818
"public/fonts/Inter-ExtraBold.ttf",
19+
"public/fonts/Inter-Black.ttf",
1920
"public/images/logos/splash-dark.png",
2021
"node_modules/.pnpm/@takumi-rs+wasm@*/node_modules/@takumi-rs/wasm/pkg/takumi_wasm_bg.wasm",
2122
]
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
import type { Config } from '@netlify/functions'
2+
import { pruneStaleCacheRows } from '~/utils/github-content-cache.server'
3+
4+
/**
5+
* Netlify Scheduled Function - Prune stale GitHub content cache rows
6+
*
7+
* Deletes rows in `github_content_cache` and `docs_artifact_cache` whose
8+
* `updatedAt` is older than the configured TTL. Anything that's still being
9+
* read regularly is touched on refresh and stays put; the rest (old gitRefs,
10+
* deleted paths, abandoned branches) gets reclaimed.
11+
*
12+
* Scheduled: Daily at 09:00 UTC.
13+
*/
14+
const handler = async (req: Request) => {
15+
const { next_run } = await req.json()
16+
17+
const startTime = Date.now()
18+
console.log('[prune-content-cache] Starting prune...')
19+
20+
try {
21+
const result = await pruneStaleCacheRows()
22+
const duration = Date.now() - startTime
23+
24+
console.log(
25+
`[prune-content-cache] ✓ Completed in ${duration}ms - deleted ${result.githubContentDeleted} content rows (${result.githubContentNegativesDeleted} negatives), ${result.docsArtifactDeleted} artifact rows (cutoff: ${result.cutoff.toISOString()}, negativeCutoff: ${result.negativeCutoff.toISOString()})`,
26+
)
27+
console.log('[prune-content-cache] Next invocation at:', next_run)
28+
} catch (error) {
29+
const duration = Date.now() - startTime
30+
const errorMessage = error instanceof Error ? error.message : String(error)
31+
const errorStack = error instanceof Error ? error.stack : undefined
32+
33+
console.error(
34+
`[prune-content-cache] ✗ Failed after ${duration}ms:`,
35+
errorMessage,
36+
)
37+
if (errorStack) {
38+
console.error('[prune-content-cache] Stack:', errorStack)
39+
}
40+
}
41+
}
42+
43+
export default handler
44+
45+
export const config: Config = {
46+
schedule: '0 9 * * *', // Daily at 09:00 UTC
47+
}

package.json

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
"build": "vite build",
1313
"start": "vite start",
1414
"start:prod": "pnpm run with-env node scripts/run-built-server.mjs",
15+
"content:build": "node scripts/build-content-collections.mjs",
1516
"lint": "oxlint --type-aware --disable-nested-config",
1617
"format": "oxfmt --write",
1718
"db:generate": "drizzle-kit generate",
@@ -21,6 +22,7 @@
2122
"docs:webhooks:plan": "tsx scripts/sync-docs-webhooks.ts --dry-run",
2223
"docs:webhooks:sync": "tsx scripts/sync-docs-webhooks.ts",
2324
"husky": "pnpm run format && pnpm run test && pnpm run test:smoke",
25+
"pretest": "pnpm run content:build",
2426
"test": "run-p test:tsc test:lint",
2527
"test:tsc": "tsc",
2628
"test:lint": "pnpm run lint",
@@ -125,9 +127,9 @@
125127
"@playwright/test": "^1.59.0",
126128
"@shikijs/transformers": "^4.0.2",
127129
"@tanstack/devtools-vite": "^0.6.0",
128-
"@tanstack/redact": "^0.0.3",
129130
"@tanstack/react-devtools": "^0.10.2",
130131
"@tanstack/react-query-devtools": "^5.99.0",
132+
"@tanstack/redact": "^0.0.6",
131133
"@types/hast": "^3.0.4",
132134
"@types/node": "^25.5.0",
133135
"@types/pg": "^8.20.0",

pnpm-lock.yaml

Lines changed: 5 additions & 5 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

public/fonts/Inter-Black.ttf

66.3 KB
Binary file not shown.
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
import { createBuilder } from '@content-collections/core'
2+
import path from 'node:path'
3+
4+
const builder = await createBuilder(path.resolve('content-collections.ts'))
5+
await builder.build()
Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
import { and, eq, sql } from 'drizzle-orm'
2+
import { db } from '../src/db/client'
3+
import { githubContentCache } from '../src/db/schema'
4+
5+
async function main() {
6+
const repo = 'tanstack/router'
7+
const gitRef = 'main'
8+
9+
console.log(`\n=== ${repo}@${gitRef} ===`)
10+
11+
const [{ total }] = await db
12+
.select({ total: sql<number>`count(*)::int` })
13+
.from(githubContentCache)
14+
.where(
15+
and(
16+
eq(githubContentCache.repo, repo),
17+
eq(githubContentCache.gitRef, gitRef),
18+
),
19+
)
20+
console.log(`total: ${total}`)
21+
22+
console.log('\nby contentKind:')
23+
const byKind = await db
24+
.select({
25+
kind: githubContentCache.contentKind,
26+
rows: sql<number>`count(*)::int`,
27+
present: sql<number>`count(*) filter (where ${githubContentCache.isPresent})::int`,
28+
absent: sql<number>`count(*) filter (where not ${githubContentCache.isPresent})::int`,
29+
})
30+
.from(githubContentCache)
31+
.where(
32+
and(
33+
eq(githubContentCache.repo, repo),
34+
eq(githubContentCache.gitRef, gitRef),
35+
),
36+
)
37+
.groupBy(githubContentCache.contentKind)
38+
console.table(byKind)
39+
40+
console.log('\ntop-level path segment distribution:')
41+
const byTopSegment = await db.execute(sql`
42+
select
43+
content_kind,
44+
split_part(path, '/', 1) as top,
45+
count(*)::int as rows
46+
from github_content_cache
47+
where repo = ${repo} and git_ref = ${gitRef}
48+
group by content_kind, top
49+
order by count(*) desc
50+
limit 30
51+
`)
52+
console.table(byTopSegment)
53+
54+
console.log('\nfile extension distribution (file kind only):')
55+
const byExt = await db.execute(sql`
56+
select
57+
case
58+
when path ~ '\\.[a-zA-Z0-9]+$' then regexp_replace(path, '.*\\.([a-zA-Z0-9]+)$', '\\1')
59+
else '(none)'
60+
end as ext,
61+
count(*)::int as rows
62+
from github_content_cache
63+
where repo = ${repo} and git_ref = ${gitRef} and content_kind = 'file'
64+
group by ext
65+
order by count(*) desc
66+
limit 30
67+
`)
68+
console.table(byExt)
69+
70+
console.log('\nsample 20 random file paths:')
71+
const samples = await db.execute(sql`
72+
select path, is_present, length(coalesce(text_content, '')) as text_len
73+
from github_content_cache
74+
where repo = ${repo} and git_ref = ${gitRef} and content_kind = 'file'
75+
order by random()
76+
limit 20
77+
`)
78+
console.table(samples)
79+
80+
console.log('\nsample 10 random dir paths:')
81+
const dirSamples = await db.execute(sql`
82+
select path, is_present, jsonb_array_length(coalesce(json_content, '[]'::jsonb)) as entries
83+
from github_content_cache
84+
where repo = ${repo} and git_ref = ${gitRef} and content_kind = 'dir'
85+
order by random()
86+
limit 10
87+
`)
88+
console.table(dirSamples)
89+
90+
console.log('\npath length distribution:')
91+
const lenBuckets = await db.execute(sql`
92+
select
93+
case
94+
when length(path) < 50 then '< 50'
95+
when length(path) < 100 then '50-100'
96+
when length(path) < 200 then '100-200'
97+
when length(path) < 500 then '200-500'
98+
else '500+'
99+
end as bucket,
100+
count(*)::int as rows
101+
from github_content_cache
102+
where repo = ${repo} and git_ref = ${gitRef}
103+
group by bucket
104+
order by min(length(path))
105+
`)
106+
console.table(lenBuckets)
107+
}
108+
109+
main()
110+
.then(() => process.exit(0))
111+
.catch((err) => {
112+
console.error(err)
113+
process.exit(1)
114+
})

scripts/inspect-content-cache.ts

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
import { sql } from 'drizzle-orm'
2+
import { db } from '../src/db/client'
3+
import { docsArtifactCache, githubContentCache } from '../src/db/schema'
4+
5+
async function main() {
6+
console.log('\n=== github_content_cache ===')
7+
8+
const [{ total }] = await db
9+
.select({ total: sql<number>`count(*)::int` })
10+
.from(githubContentCache)
11+
console.log(`total rows: ${total}`)
12+
13+
console.log('\nby repo:')
14+
const byRepo = await db
15+
.select({
16+
repo: githubContentCache.repo,
17+
rows: sql<number>`count(*)::int`,
18+
refs: sql<number>`count(distinct ${githubContentCache.gitRef})::int`,
19+
})
20+
.from(githubContentCache)
21+
.groupBy(githubContentCache.repo)
22+
.orderBy(sql`count(*) desc`)
23+
console.table(byRepo)
24+
25+
console.log('\ntop 20 (repo, gitRef) by row count:')
26+
const byRef = await db
27+
.select({
28+
repo: githubContentCache.repo,
29+
gitRef: githubContentCache.gitRef,
30+
rows: sql<number>`count(*)::int`,
31+
oldestCreated: sql<Date>`min(${githubContentCache.createdAt})`,
32+
newestCreated: sql<Date>`max(${githubContentCache.createdAt})`,
33+
newestUpdated: sql<Date>`max(${githubContentCache.updatedAt})`,
34+
})
35+
.from(githubContentCache)
36+
.groupBy(githubContentCache.repo, githubContentCache.gitRef)
37+
.orderBy(sql`count(*) desc`)
38+
.limit(20)
39+
console.table(byRef)
40+
41+
console.log('\ncreatedAt age buckets:')
42+
const ageBuckets = await db.execute(sql`
43+
select
44+
case
45+
when created_at > now() - interval '1 day' then '0-1d'
46+
when created_at > now() - interval '7 days' then '1-7d'
47+
when created_at > now() - interval '30 days' then '7-30d'
48+
when created_at > now() - interval '90 days' then '30-90d'
49+
when created_at > now() - interval '180 days' then '90-180d'
50+
else '180d+'
51+
end as bucket,
52+
count(*)::int as rows
53+
from github_content_cache
54+
group by bucket
55+
order by min(created_at)
56+
`)
57+
console.table(ageBuckets)
58+
59+
console.log('\n=== docs_artifact_cache ===')
60+
const [{ totalArt }] = await db
61+
.select({ totalArt: sql<number>`count(*)::int` })
62+
.from(docsArtifactCache)
63+
console.log(`total rows: ${totalArt}`)
64+
}
65+
66+
main()
67+
.then(() => process.exit(0))
68+
.catch((err) => {
69+
console.error(err)
70+
process.exit(1)
71+
})
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
import { sql } from 'drizzle-orm'
2+
import { db } from '../src/db/client'
3+
4+
async function main() {
5+
console.log('\n=== preview prune impact ===')
6+
7+
const before = await db.execute(sql`
8+
select
9+
count(*)::int as total_rows,
10+
count(*) filter (where is_present)::int as present_rows,
11+
count(*) filter (where not is_present)::int as absent_rows,
12+
count(*) filter (where not is_present and updated_at < now() - interval '1 day')::int as absent_older_than_1d,
13+
count(*) filter (where updated_at < now() - interval '30 days')::int as any_older_than_30d
14+
from github_content_cache
15+
`)
16+
console.table(before)
17+
18+
console.log('\nbreakdown of absent rows by repo (top 20):')
19+
const byRepo = await db.execute(sql`
20+
select repo, count(*)::int as absent_rows
21+
from github_content_cache
22+
where not is_present and updated_at < now() - interval '1 day'
23+
group by repo
24+
order by count(*) desc
25+
limit 20
26+
`)
27+
console.table(byRepo)
28+
}
29+
30+
main()
31+
.then(() => process.exit(0))
32+
.catch((err) => {
33+
console.error(err)
34+
process.exit(1)
35+
})
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
import { eq, sql } from 'drizzle-orm'
2+
import { db } from '../src/db/client'
3+
import { githubContentCache } from '../src/db/schema'
4+
5+
/**
6+
* One-shot cleanup: delete every negative cache row (is_present = false).
7+
*
8+
* The daily prune fn keeps negatives for 1 day going forward, but the
9+
* existing rows had their updated_at bumped by mark*Stale so they all
10+
* look fresh. They're safe to drop wholesale — anything still being
11+
* requested will repopulate within 15 minutes (the negative TTL), and
12+
* most are bogus paths that won't be requested again.
13+
*/
14+
async function main() {
15+
const startTime = Date.now()
16+
17+
const [{ before }] = await db
18+
.select({ before: sql<number>`count(*)::int` })
19+
.from(githubContentCache)
20+
console.log(`[prune-negatives] rows before: ${before}`)
21+
22+
const deleted = await db
23+
.delete(githubContentCache)
24+
.where(eq(githubContentCache.isPresent, false))
25+
.returning({ id: githubContentCache.id })
26+
27+
const [{ after }] = await db
28+
.select({ after: sql<number>`count(*)::int` })
29+
.from(githubContentCache)
30+
31+
const duration = Date.now() - startTime
32+
console.log(
33+
`[prune-negatives] ✓ Deleted ${deleted.length} negative rows in ${duration}ms (rows after: ${after})`,
34+
)
35+
}
36+
37+
main()
38+
.then(() => process.exit(0))
39+
.catch((err) => {
40+
console.error('[prune-negatives] ✗ Failed:', err)
41+
process.exit(1)
42+
})

0 commit comments

Comments
 (0)