Skip to content

Commit 7ae90f0

Browse files
authored
Merge pull request #2501 from keboola/vb/DMD-921/llm-export-generator
DMD-921 - Add generator for twin format output
2 parents bd91730 + 1b90192 commit 7ae90f0

16 files changed

Lines changed: 3149 additions & 2 deletions

File tree

CLAUDE.md

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,37 @@ func buildUID(prefix, name string) string {
226226
- **Dependency management**: Constructor-based DI; interface segregation (small interfaces)
227227
- **Observability**: Structured logging; OpenTelemetry integration; metrics for critical paths
228228
- **Early returns**: Prefer early `return` / `continue` to reduce nesting
229+
- **Default-first assignment**: Set default value first, then override if present. Avoid resetting nil values after assignment.
230+
231+
Example - instead of:
232+
```go
233+
value := someMap[key]
234+
if value == nil {
235+
value = defaultValue
236+
}
237+
```
238+
Use:
239+
```go
240+
value := defaultValue
241+
if v, ok := someMap[key]; ok {
242+
value = v
243+
}
244+
```
245+
246+
Similarly for struct fields:
247+
```go
248+
// Good: set default, override if not nil
249+
items := []string{}
250+
if data.Items != nil {
251+
items = data.Items
252+
}
253+
254+
// Bad: assign then reset if nil
255+
items := data.Items
256+
if items == nil {
257+
items = []string{}
258+
}
259+
```
229260

230261
### Testing
231262
- Test files use `*_test.go` suffix and are located next to implementation
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
package twinformat
2+
3+
// DocFieldsConfig contains the documentation fields for JSON output files.
4+
// Every JSON file in the twin format output must have these fields.
5+
type DocFieldsConfig struct {
6+
Comment string
7+
Purpose string
8+
UpdateFrequency string
9+
Security string
10+
Retention string
11+
}
12+
13+
// DefaultDocFields returns the default documentation fields configuration.
14+
func DefaultDocFields() *DocFieldsConfig {
15+
return &DocFieldsConfig{
16+
UpdateFrequency: "Every sync",
17+
}
18+
}
19+
20+
// BucketsIndexDocFields returns documentation fields for buckets/index.json.
21+
func BucketsIndexDocFields() *DocFieldsConfig {
22+
return &DocFieldsConfig{
23+
Comment: "GENERATION: GET /v2/storage/buckets - aggregate all buckets",
24+
Purpose: "Catalog of all buckets for fast lookup without scanning directories",
25+
UpdateFrequency: "Every sync",
26+
}
27+
}
28+
29+
// TableMetadataDocFields returns documentation fields for table metadata.json files.
30+
func TableMetadataDocFields(source string) *DocFieldsConfig {
31+
return &DocFieldsConfig{
32+
Comment: "GENERATION: GET /v2/storage/tables/{table_id}?include=columns,metadata + computed dependencies",
33+
Purpose: "Table metadata for " + source,
34+
UpdateFrequency: "On table structure changes",
35+
}
36+
}
37+
38+
// TransformationsIndexDocFields returns documentation fields for transformations/index.json.
39+
func TransformationsIndexDocFields() *DocFieldsConfig {
40+
return &DocFieldsConfig{
41+
Comment: "GENERATION: Scan transformation configs + group by platform",
42+
Purpose: "Catalog of all transformations grouped by platform",
43+
UpdateFrequency: "Every sync",
44+
}
45+
}
46+
47+
// TransformationMetadataDocFields returns documentation fields for transformation metadata.json files.
48+
func TransformationMetadataDocFields() *DocFieldsConfig {
49+
return &DocFieldsConfig{
50+
Comment: "GENERATION: From transformation config + platform detection + computed dependencies + job queue status",
51+
Purpose: "Complete transformation configuration and data flow dependencies",
52+
UpdateFrequency: "On transformation config changes and job completion",
53+
}
54+
}
55+
56+
// JobsIndexDocFields returns documentation fields for jobs/index.json.
57+
func JobsIndexDocFields() *DocFieldsConfig {
58+
return &DocFieldsConfig{
59+
Comment: "GENERATION: GET /search/jobs + aggregate statistics",
60+
Purpose: "Job execution statistics and summary",
61+
UpdateFrequency: "Every hour or on job completion",
62+
}
63+
}
64+
65+
// JobMetadataDocFields returns documentation fields for individual job files.
66+
func JobMetadataDocFields() *DocFieldsConfig {
67+
return &DocFieldsConfig{
68+
Comment: "GENERATION: GET /search/jobs/{job_id}",
69+
Purpose: "Individual job execution details",
70+
UpdateFrequency: "On job completion",
71+
}
72+
}
73+
74+
// ManifestExtendedDocFields returns documentation fields for manifest-extended.json.
75+
func ManifestExtendedDocFields() *DocFieldsConfig {
76+
return &DocFieldsConfig{
77+
Comment: "GENERATION: GET /v2/storage/tokens/verify + computed statistics",
78+
Purpose: "Complete project overview in one file for fast AI analysis",
79+
UpdateFrequency: "Every sync",
80+
}
81+
}
82+
83+
// SourcesIndexDocFields returns documentation fields for indices/sources.json.
84+
func SourcesIndexDocFields() *DocFieldsConfig {
85+
return &DocFieldsConfig{
86+
Comment: "GENERATION: Inferred from bucket names + GET /v2/storage components",
87+
Purpose: "Registry of data sources with bucket and table counts",
88+
UpdateFrequency: "Every sync",
89+
}
90+
}
91+
92+
// ComponentsIndexDocFields returns documentation fields for components/index.json.
93+
func ComponentsIndexDocFields() *DocFieldsConfig {
94+
return &DocFieldsConfig{
95+
Comment: "GENERATION: Scan component configs + group by type",
96+
Purpose: "Catalog of all components grouped by type",
97+
UpdateFrequency: "Every sync",
98+
}
99+
}
100+
101+
// ComponentMetadataDocFields returns documentation fields for component metadata.json files.
102+
func ComponentMetadataDocFields() *DocFieldsConfig {
103+
return &DocFieldsConfig{
104+
Comment: "GENERATION: From component config + job queue status",
105+
Purpose: "Complete component configuration and execution status",
106+
UpdateFrequency: "On component config changes and job completion",
107+
}
108+
}
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
package twinformat
2+
3+
import (
4+
"testing"
5+
6+
"github.com/stretchr/testify/assert"
7+
)
8+
9+
func TestDefaultDocFields(t *testing.T) {
10+
t.Parallel()
11+
12+
fields := DefaultDocFields()
13+
assert.NotNil(t, fields)
14+
assert.Equal(t, "Every sync", fields.UpdateFrequency)
15+
}
16+
17+
func TestBucketsIndexDocFields(t *testing.T) {
18+
t.Parallel()
19+
20+
fields := BucketsIndexDocFields()
21+
assert.NotNil(t, fields)
22+
assert.Contains(t, fields.Comment, "buckets")
23+
assert.Contains(t, fields.Purpose, "Catalog")
24+
assert.Equal(t, "Every sync", fields.UpdateFrequency)
25+
}
26+
27+
func TestTableMetadataDocFields(t *testing.T) {
28+
t.Parallel()
29+
30+
fields := TableMetadataDocFields("shopify")
31+
assert.NotNil(t, fields)
32+
assert.Contains(t, fields.Comment, "tables")
33+
assert.Contains(t, fields.Purpose, "shopify")
34+
}
35+
36+
func TestTransformationsIndexDocFields(t *testing.T) {
37+
t.Parallel()
38+
39+
fields := TransformationsIndexDocFields()
40+
assert.NotNil(t, fields)
41+
assert.Contains(t, fields.Comment, "transformation")
42+
assert.Contains(t, fields.Purpose, "Catalog")
43+
}
44+
45+
func TestJobsIndexDocFields(t *testing.T) {
46+
t.Parallel()
47+
48+
fields := JobsIndexDocFields()
49+
assert.NotNil(t, fields)
50+
assert.Contains(t, fields.Comment, "jobs")
51+
assert.Contains(t, fields.Purpose, "Job execution")
52+
}
53+
54+
func TestManifestExtendedDocFields(t *testing.T) {
55+
t.Parallel()
56+
57+
fields := ManifestExtendedDocFields()
58+
assert.NotNil(t, fields)
59+
assert.Contains(t, fields.Purpose, "project overview")
60+
}
61+
62+
func TestSourcesIndexDocFields(t *testing.T) {
63+
t.Parallel()
64+
65+
fields := SourcesIndexDocFields()
66+
assert.NotNil(t, fields)
67+
assert.Contains(t, fields.Purpose, "data sources")
68+
}
69+
70+
func TestComponentsIndexDocFields(t *testing.T) {
71+
t.Parallel()
72+
73+
fields := ComponentsIndexDocFields()
74+
assert.NotNil(t, fields)
75+
assert.Contains(t, fields.Comment, "component")
76+
}

internal/pkg/llm/twinformat/fetcher.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,9 @@ func (f *Fetcher) FetchTableLastImporter(ctx context.Context, tableID keboola.Ta
271271
// Format: "Keboola Storage API PHP Client/14 kds-team.app-custom-python"
272272
// Returns the last space-separated part which is the component ID.
273273
func extractComponentFromUserAgent(userAgent string) string {
274+
if userAgent == "" {
275+
return ""
276+
}
274277
parts := strings.Split(userAgent, " ")
275278
return parts[len(parts)-1]
276279
}

0 commit comments

Comments
 (0)