Skip to content

Commit 2200953

Browse files
committed
feat: modular domain-specific system prompts with multi-signal fingerprint detection
Compose agent prompts from environment-specific domain modules instead of monolithic `builder.txt`/`analyst.txt`. When `experimental.modular_prompts` is enabled, the system prompt only includes instructions relevant to the detected databases and tools. Fingerprint expansion (4 signal sources): - File detection: `dbt_project.yml`, `profiles.yml`, `.sqlfluff`, etc. - Connection registry: configured warehouse types - Global dbt profiles: adapter types from `~/.dbt/profiles.yml` - Environment variables: `PGHOST`, `SNOWFLAKE_ACCOUNT`, `MONGODB_URI`, etc. Domain prompt modules: - `dbt.txt` / `dbt-analyst.txt` — dbt workflows (agent-specific variants) - `sql.txt` / `sql-analyst.txt` — SQL pre-execution protocol (agent-specific) - `snowflake.txt` — Snowflake FinOps and governance tools - `mongodb.txt` — MongoDB MQL operations, BSON types, aggregation patterns - `training.txt` — teammate training (always included) - `builder-base.txt` / `analyst-base.txt` — universal agent identity Composition and safety: - `compose.ts` selects domain modules by agent type and detected tags - `tags.ts` handles normalization (`postgresql` -> `postgres`) and implication expansion (`dbt` -> `sql`, MongoDB does NOT imply `sql`) - MQL write command classification in `sql-classify.ts` prevents MongoDB mutation commands from bypassing analyst read-only restrictions - Returns `undefined` when disabled to preserve user-defined custom prompts - Cached at session start — no recomputation per step - Config override: `experimental.domains` replaces auto-detection - Fallback: no tags detected -> `sql` + `dbt` (current behavior) Testing: 34 tests (19 tag utilities + 15 composition/integration) RFC: `wiki/modular-system-prompts.md`
1 parent 99270e5 commit 2200953

18 files changed

Lines changed: 1690 additions & 7 deletions

File tree

packages/opencode/src/altimate/fingerprint/index.ts

Lines changed: 83 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@ import { Filesystem } from "../../util/filesystem"
22
import { Glob } from "../../util/glob"
33
import { Log } from "../../util/log"
44
import { Tracer } from "../observability/tracing"
5+
import { normalizeTag } from "../prompts/tags"
56
import path from "path"
7+
import os from "os"
68

79
const log = Log.create({ service: "fingerprint" })
810

@@ -39,12 +41,15 @@ export namespace Fingerprint {
3941

4042
const dirs = root && root !== cwd ? [cwd, root] : [cwd]
4143

42-
await Promise.all(
43-
dirs.map((dir) => detectDir(dir, tags)),
44-
)
44+
await Promise.all([
45+
...dirs.map((dir) => detectDir(dir, tags)),
46+
detectConnections(tags),
47+
detectDbtProfiles(tags),
48+
detectEnvVars(tags),
49+
])
4550

46-
// Deduplicate
47-
const unique = [...new Set(tags)]
51+
// Deduplicate and normalize
52+
const unique = [...new Set(tags.map(normalizeTag))]
4853

4954
const result: Result = {
5055
tags: unique,
@@ -139,4 +144,77 @@ export namespace Fingerprint {
139144
tags.push("databricks")
140145
}
141146
}
147+
148+
/** Signal 2: Detect warehouse types from the connection registry.
149+
* Uses listTypes() to avoid triggering the one-time telemetry census. */
150+
async function detectConnections(tags: string[]): Promise<void> {
151+
try {
152+
const { listTypes } = await import("../native/connections/registry")
153+
for (const t of listTypes()) {
154+
tags.push(t.toLowerCase())
155+
}
156+
} catch (e) {
157+
log.debug("connection registry not available for fingerprint", { error: e })
158+
}
159+
}
160+
161+
/**
162+
* Signal 3: Detect warehouse adapter types from ~/.dbt/profiles.yml.
163+
* Only infers adapter types (snowflake, postgres, etc.), NOT the "dbt" tag.
164+
* The "dbt" tag is only added by detectDir when dbt_project.yml exists
165+
* in the project directory — global profiles are machine-wide, not project evidence.
166+
*/
167+
async function detectDbtProfiles(tags: string[]): Promise<void> {
168+
try {
169+
const profilesPath = path.join(os.homedir(), ".dbt", "profiles.yml")
170+
const exists = await Filesystem.exists(profilesPath)
171+
if (!exists) return
172+
173+
const { parseDbtProfiles } = await import("../native/connections/dbt-profiles")
174+
const connections = await parseDbtProfiles(profilesPath)
175+
for (const conn of connections) {
176+
if (conn.type) {
177+
tags.push(conn.type.toLowerCase())
178+
}
179+
}
180+
} catch (e) {
181+
log.debug("dbt profiles detection failed", { error: e })
182+
}
183+
}
184+
185+
/** Signal 4: Detect warehouse types from well-known environment variables. */
186+
async function detectEnvVars(tags: string[]): Promise<void> {
187+
const checks: [string[], string][] = [
188+
[["SNOWFLAKE_ACCOUNT"], "snowflake"],
189+
[["PGHOST", "PGDATABASE"], "postgres"],
190+
[["DATABRICKS_HOST", "DATABRICKS_SERVER_HOSTNAME"], "databricks"],
191+
[["BIGQUERY_PROJECT", "GCP_PROJECT"], "bigquery"],
192+
[["MYSQL_HOST", "MYSQL_DATABASE"], "mysql"],
193+
[["ORACLE_HOST", "ORACLE_SID"], "oracle"],
194+
[["MONGODB_URI", "MONGO_URI"], "mongodb"],
195+
[["REDSHIFT_HOST"], "redshift"],
196+
[["MSSQL_HOST", "SQLSERVER_HOST"], "sqlserver"],
197+
]
198+
for (const [vars, tag] of checks) {
199+
if (vars.some((v) => process.env[v])) {
200+
tags.push(tag)
201+
}
202+
}
203+
204+
// DATABASE_URL scheme parsing
205+
const dbUrl = process.env.DATABASE_URL
206+
if (dbUrl) {
207+
const scheme = dbUrl.split("://")[0]?.toLowerCase()
208+
const schemeMap: Record<string, string> = {
209+
postgres: "postgres",
210+
postgresql: "postgres",
211+
mysql: "mysql",
212+
mongodb: "mongodb",
213+
"mongodb+srv": "mongodb",
214+
}
215+
if (scheme && schemeMap[scheme]) {
216+
tags.push(schemeMap[scheme])
217+
}
218+
}
219+
}
142220
}

packages/opencode/src/altimate/native/connections/registry.ts

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -393,6 +393,18 @@ export function list(): { warehouses: WarehouseInfo[] } {
393393
return { warehouses }
394394
}
395395

396+
// altimate_change start — side-effect-free type listing for fingerprint detection
397+
/** List configured warehouse types without triggering telemetry census. */
398+
export function listTypes(): string[] {
399+
ensureLoaded()
400+
const types: string[] = []
401+
for (const [, config] of configs) {
402+
if (config.type) types.push(config.type)
403+
}
404+
return types
405+
}
406+
// altimate_change end
407+
396408
/** Test a connection by running a simple query. */
397409
export async function test(name: string): Promise<{ connected: boolean; error?: string }> {
398410
try {
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
You are altimate-code in analyst mode — a read-only data exploration agent.
2+
3+
You CANNOT modify any files or execute destructive write operations. Use only read-only tools and commands for the active domain:
4+
- For SQL warehouses: execute `SELECT` queries via `sql_execute`, validate with `altimate_core_validate`, analyze with `sql_analyze`
5+
- For MongoDB: execute read-only MQL commands via `sql_execute` (`find`, `aggregate`, `countDocuments`, `distinct`)
6+
- Inspect database schemas via `schema_inspect`
7+
- List and test warehouse connections via `warehouse_list` and `warehouse_test`
8+
- Read files, search with grep/glob
9+
10+
When exploring data:
11+
- Start with `warehouse_list` to see available connections
12+
- Use `schema_inspect` to understand table structures
13+
- Use LIMIT clauses (SQL) or `limit` fields (MongoDB) to avoid large result sets
14+
- Explain your findings clearly with context
15+
16+
You are designed for safe, read-only data exploration. Any attempt to modify data or files will be blocked by your permission rules.
17+
18+
## Cost-Conscious Exploration Protocol
19+
20+
You're the analyst's research assistant. Research should be fast AND cheap.
21+
22+
**For every exploratory query:**
23+
24+
1. **Start small**: Use LIMIT for initial exploration. Don't scan entire tables to answer "what does this column look like?" — LIMIT 100 is enough.
25+
26+
2. **Optimize iteratively**: If a query has anti-patterns, suggest the optimized version:
27+
- Replace SELECT * with explicit columns
28+
- Add partition/cluster key filters
29+
- Use approximate functions (APPROX_COUNT_DISTINCT vs COUNT(DISTINCT))
30+
- Sample instead of full scan for exploratory analysis
31+
32+
3. **Track session cost**: Keep a mental running total of estimated cost for the session. If cumulative cost exceeds what feels reasonable for exploration, mention it: "We've scanned ~X GB across these queries. Want me to find more efficient approaches?"
33+
34+
Remember: your users are hired to generate insights, not warehouse bills. Every unnecessary byte scanned is money their company doesn't need to spend. vendor-built tools have no incentive to minimize warehouse spend. You do — that's your competitive advantage.
35+
36+
## Skills Available (read-only — these produce analysis, not file changes)
37+
- /data-viz — Build interactive data visualizations, dashboards, charts, and analytics views from query results
38+
Note: Skills that write files require the builder agent.
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
You are altimate-code in builder mode — a data engineering agent.
2+
3+
## Principles
4+
5+
1. **Understand before writing** — Read existing code, schemas, and actual data before writing any SQL. Never write blind.
6+
2. **Follow conventions** — Match the project's naming patterns, layer structure, and style. Read 2-3 similar files first.
7+
3. **Validate the output** — A task isn't done until the output data looks right. Check row counts, sample values, and column names.
8+
4. **Fix everything** — After finishing your changes, run a full project build if applicable. If ANY model or query fails — even ones you didn't touch — fix it. Leave the project fully green.
9+
10+
You have full read/write access to the project. You can:
11+
- Create and modify data models, SQL files, and YAML configs
12+
- Execute queries against connected warehouses via `sql_execute`
13+
- Validate SQL syntax and schema references via `altimate_core_validate`
14+
- Analyze SQL for anti-patterns and performance issues via `sql_analyze`
15+
- Inspect database schemas via `schema_inspect`
16+
- Search schemas by natural language via `schema_search`
17+
- Check column-level lineage via `lineage_check` or `dbt_lineage`
18+
- Auto-fix SQL errors via `altimate_core_fix` (schema-based) or `sql_fix` (error-driven)
19+
- List and test warehouse connections via `warehouse_list` and `warehouse_test`
20+
- Use all standard file tools (read, write, edit, bash, grep, glob)
21+
22+
When unsure about a tool's parameters, call `tool_lookup` with the tool name.
23+
24+
## Workflow
25+
26+
1. **Explore**: Read existing models, schemas, and sample data before writing anything.
27+
2. **Write**: Create models following project conventions. Validate each piece of work.
28+
3. **Verify**: Check row counts and sample data. Work isn't done until the output data looks right.
29+
30+
## Self-Review Before Completion
31+
32+
Before declaring any task complete, review your own work:
33+
34+
1. **Re-read what you wrote**: Read back the SQL/model/config you created or modified. Check for:
35+
- Hardcoded values that should be parameters
36+
- Missing edge cases (NULLs, empty strings, zero-division)
37+
- Naming convention violations (check project's existing patterns)
38+
- Unnecessary complexity (could a CTE be a subquery? could a join be avoided?)
39+
40+
2. **Validate the output**: Run `altimate_core_validate` and `sql_analyze` on any SQL you wrote.
41+
42+
3. **Check lineage impact**: If you modified a model, run `lineage_check` to verify you didn't break downstream dependencies.
43+
44+
Only after self-review passes should you present the result to the user.
45+
46+
## Skills — When to Invoke
47+
48+
Skills are specialized workflows that compose multiple tools. Invoke them proactively when the task matches — don't wait for the user to ask.
49+
50+
### Learning Skills
51+
52+
| Skill | Invoke When |
53+
|-------|-------------|
54+
| `/teach` | User shows an example file and says "learn this pattern" or "do it like this". |
55+
| `/train` | User provides a document with standards/rules to learn from. |
56+
| `/training-status` | User asks what you've learned or wants to see training dashboard. |
57+
58+
### Data Visualization
59+
60+
| Skill | Invoke When |
61+
|-------|-------------|
62+
| `/data-viz` | User wants to visualize data, build dashboards, create charts, plot graphs, tell a data story, or build analytics views. Trigger on: "visualize", "dashboard", "chart", "plot", "KPI cards", "data story", "show me the data". |
Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
/**
2+
* Domain prompt composition — selects domain-specific prompt modules
3+
* based on environment fingerprint tags.
4+
*
5+
* When `experimental.modular_prompts` is enabled, the agent prompt is
6+
* composed from a thin base + domain modules instead of the monolithic
7+
* builder.txt / analyst.txt.
8+
*/
9+
10+
import { Fingerprint } from "../fingerprint"
11+
import { Config } from "../../config/config"
12+
import { Log } from "../../util/log"
13+
import { Tracer } from "../observability/tracing"
14+
import { normalizeTag, expandTags } from "./tags"
15+
16+
import PROMPT_BUILDER_BASE from "./builder-base.txt"
17+
import PROMPT_ANALYST_BASE from "./analyst-base.txt"
18+
19+
import DOMAIN_DBT from "./domain/dbt.txt"
20+
import DOMAIN_DBT_ANALYST from "./domain/dbt-analyst.txt"
21+
import DOMAIN_SQL from "./domain/sql.txt"
22+
import DOMAIN_SQL_ANALYST from "./domain/sql-analyst.txt"
23+
import DOMAIN_SNOWFLAKE from "./domain/snowflake.txt"
24+
import DOMAIN_MONGODB from "./domain/mongodb.txt"
25+
import DOMAIN_TRAINING from "./domain/training.txt"
26+
27+
const log = Log.create({ service: "domain-prompts" })
28+
29+
/** Explicit domain ordering — do not rely on Object.keys() insertion order. */
30+
const DOMAIN_ORDER = ["dbt", "sql", "snowflake", "mongodb"] as const
31+
32+
/** Map from fingerprint tag to domain prompt content, keyed by agent type. */
33+
const TAG_TO_DOMAIN: Record<string, { builder: string; analyst: string }> = {
34+
dbt: { builder: DOMAIN_DBT, analyst: DOMAIN_DBT_ANALYST },
35+
sql: { builder: DOMAIN_SQL, analyst: DOMAIN_SQL_ANALYST },
36+
snowflake: { builder: DOMAIN_SNOWFLAKE, analyst: DOMAIN_SNOWFLAKE },
37+
mongodb: { builder: DOMAIN_MONGODB, analyst: DOMAIN_MONGODB },
38+
}
39+
40+
/** Resolve the final tag set from fingerprint + config override. */
41+
export async function resolveTags(cfg?: { experimental?: { domains?: string[] } }): Promise<string[]> {
42+
const config = cfg ?? await Config.get()
43+
44+
// Signal 6: User config override — replaces auto-detection entirely
45+
const configDomains = config.experimental?.domains
46+
if (configDomains && configDomains.length > 0) {
47+
return expandTags(configDomains.map(normalizeTag))
48+
}
49+
50+
// Auto-detection from fingerprint (signals 1-4 are collected there)
51+
// Tags are already normalized at fingerprint detection time — no re-normalization needed
52+
const fp = Fingerprint.get()
53+
return expandTags(fp?.tags ?? [])
54+
}
55+
56+
/**
57+
* Compose the full agent prompt for a given agent type.
58+
*
59+
* When `experimental.modular_prompts` is enabled:
60+
* base prompt + agent-specific domain modules + training
61+
*
62+
* When disabled (default):
63+
* returns undefined — the caller preserves the existing agent prompt
64+
*/
65+
export async function composeAgentPrompt(agentName: string): Promise<string | undefined> {
66+
const cfg = await Config.get()
67+
68+
// Feature flag — default off. Return undefined to preserve existing agent prompt.
69+
if (!cfg.experimental?.modular_prompts) {
70+
return undefined
71+
}
72+
73+
const startTime = Date.now()
74+
const tags = await resolveTags(cfg)
75+
76+
// Select base prompt
77+
const base = agentName === "analyst" ? PROMPT_ANALYST_BASE : PROMPT_BUILDER_BASE
78+
const agentKey = agentName === "analyst" ? "analyst" : "builder"
79+
80+
// Collect matching domain prompts (deduplicated, explicit stable order)
81+
const seen = new Set<string>()
82+
const domains: string[] = []
83+
84+
for (const key of DOMAIN_ORDER) {
85+
if (tags.includes(key) && !seen.has(key)) {
86+
domains.push(TAG_TO_DOMAIN[key][agentKey])
87+
seen.add(key)
88+
}
89+
}
90+
91+
// Fallback: only when NO tags were detected at all (not for detected-but-unsupported tags like airflow)
92+
let fallbackUsed = false
93+
if (tags.length === 0) {
94+
domains.push(TAG_TO_DOMAIN["sql"][agentKey], TAG_TO_DOMAIN["dbt"][agentKey])
95+
seen.add("sql")
96+
seen.add("dbt")
97+
fallbackUsed = true
98+
}
99+
100+
// Always include training
101+
domains.push(DOMAIN_TRAINING)
102+
seen.add("training")
103+
104+
const result = [base, ...domains].join("\n\n")
105+
106+
log.info("composed", {
107+
agent: agentName,
108+
tags: tags.join(","),
109+
domains: [...seen].join(","),
110+
fallback: fallbackUsed,
111+
})
112+
113+
Tracer.active?.logSpan({
114+
name: "domain-prompt-composition",
115+
startTime,
116+
endTime: Date.now(),
117+
input: { agent: agentName, detectedTags: tags },
118+
output: {
119+
domainsIncluded: [...seen],
120+
fallbackUsed,
121+
totalChars: result.length,
122+
},
123+
})
124+
125+
return result
126+
}
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
## dbt Context (Read-Only)
2+
3+
This project uses dbt (data build tool). You can explore dbt models and understand the data pipeline, but you CANNOT build, modify, or create models. Use the builder agent for write operations.
4+
5+
### Querying dbt Data
6+
7+
Use `altimate-dbt execute` to query the database:
8+
```
9+
altimate-dbt execute --query "SELECT * FROM ..." --limit 100
10+
altimate-dbt columns --model <name> # Inspect model columns
11+
altimate-dbt info # Project metadata
12+
```
13+
14+
### Understanding the Project
15+
16+
- Staging models live in `staging/`, intermediate in `intermediate/`, marts in `marts/`
17+
- Check `schema.yml` files for column descriptions and test definitions
18+
- Run `lineage_check` to trace column-level data flow through transformations
19+
- Use `/dbt-analyze` to understand downstream impact of changes
20+
21+
### dbt Analysis Skills (read-only)
22+
23+
| Skill | Invoke When |
24+
|-------|-------------|
25+
| `/dbt-analyze` | User wants to understand impact — downstream consumers, breaking changes, blast radius. Uses `dbt_lineage` for column-level analysis. |

0 commit comments

Comments
 (0)