77 "sort"
88 "time"
99
10+ "github.com/keboola/keboola-sdk-go/v2/pkg/keboola"
11+
1012 "github.com/keboola/keboola-as-code/internal/pkg/filesystem"
1113 "github.com/keboola/keboola-as-code/internal/pkg/llm/twinformat/templates"
1214 "github.com/keboola/keboola-as-code/internal/pkg/llm/twinformat/writer"
@@ -95,6 +97,10 @@ func (g *Generator) Generate(ctx context.Context, data *ProcessedData) error {
9597 return errors .Errorf ("failed to generate AI guide: %w" , err )
9698 }
9799
100+ if err := g .generateDataDictionary (ctx , data ); err != nil {
101+ return errors .Errorf ("failed to generate data dictionary: %w" , err )
102+ }
103+
98104 g .logger .Infof (ctx , "Twin format output generated successfully" )
99105 return nil
100106}
@@ -276,35 +282,10 @@ func (g *Generator) generateTableMetadata(ctx context.Context, table *ProcessedT
276282// buildColumnDetails builds detailed column information including metadata.
277283func (g * Generator ) buildColumnDetails (table * ProcessedTable ) []map [string ]any {
278284 columns := make ([]map [string ]any , 0 , len (table .Columns ))
279-
280285 for _ , colName := range table .Columns {
281- col := map [string ]any {
282- "name" : colName ,
283- }
284-
285- // Extract column metadata if available
286- if table .Table != nil && table .ColumnMetadata != nil {
287- if colMeta , ok := table .ColumnMetadata [colName ]; ok {
288- for _ , meta := range colMeta {
289- switch meta .Key {
290- case "KBC.datatype.basetype" :
291- col ["base_type" ] = meta .Value
292- case "KBC.datatype.type" :
293- col ["type" ] = meta .Value
294- case "KBC.datatype.nullable" :
295- col ["nullable" ] = meta .Value == "1" || meta .Value == "true"
296- case "KBC.datatype.length" :
297- col ["length" ] = meta .Value
298- case "KBC.description" :
299- col ["description" ] = meta .Value
300- }
301- }
302- }
303- }
304-
286+ col := buildColumnDetailEntry (colName , table .ColumnMetadata )
305287 columns = append (columns , col )
306288 }
307-
308289 return columns
309290}
310291
@@ -835,6 +816,11 @@ func (g *Generator) generateSourcesIndex(ctx context.Context, data *ProcessedDat
835816 docFields := SourcesIndexDocFields ()
836817 sources := buildSourcesList (data .Buckets )
837818
819+ // Sort sources by ID for deterministic output.
820+ sort .Slice (sources , func (i , j int ) bool {
821+ return sources [i ]["id" ].(string ) < sources [j ]["id" ].(string )
822+ })
823+
838824 sourcesIndex := map [string ]any {
839825 "_comment" : docFields .Comment ,
840826 "_purpose" : docFields .Purpose ,
@@ -878,6 +864,8 @@ func buildSourcesList(buckets []*ProcessedBucket) []map[string]any {
878864
879865 sources := make ([]map [string ]any , 0 , len (sourceMap ))
880866 for _ , info := range sourceMap {
867+ // Sort buckets for deterministic output.
868+ sort .Strings (info .Buckets )
881869 sources = append (sources , map [string ]any {
882870 "id" : info .ID ,
883871 "name" : info .Name ,
@@ -890,6 +878,60 @@ func buildSourcesList(buckets []*ProcessedBucket) []map[string]any {
890878 return sources
891879}
892880
881+ // buildColumnDetailEntry builds a detailed column entry map for table metadata.
882+ // Extracts all available metadata fields (base_type, type, nullable, length, description).
883+ func buildColumnDetailEntry (colName string , columnMetadata keboola.ColumnsMetadata ) map [string ]any {
884+ col := map [string ]any {"name" : colName }
885+ if columnMetadata == nil {
886+ return col
887+ }
888+
889+ colMeta , ok := columnMetadata [colName ]
890+ if ! ok {
891+ return col
892+ }
893+
894+ for _ , meta := range colMeta {
895+ switch meta .Key {
896+ case "KBC.datatype.basetype" :
897+ col ["base_type" ] = meta .Value
898+ case "KBC.datatype.type" :
899+ col ["type" ] = meta .Value
900+ case "KBC.datatype.nullable" :
901+ col ["nullable" ] = meta .Value == "1" || meta .Value == "true"
902+ case "KBC.datatype.length" :
903+ col ["length" ] = meta .Value
904+ case "KBC.description" :
905+ col ["description" ] = meta .Value
906+ }
907+ }
908+ return col
909+ }
910+
911+ // buildColumnEntry builds a column entry map for data dictionary.
912+ // Extracts type and description from column metadata if available.
913+ func buildColumnEntry (colName string , columnMetadata keboola.ColumnsMetadata ) map [string ]any {
914+ col := map [string ]any {"name" : colName }
915+ if columnMetadata == nil {
916+ return col
917+ }
918+
919+ colMeta , ok := columnMetadata [colName ]
920+ if ! ok {
921+ return col
922+ }
923+
924+ for _ , meta := range colMeta {
925+ switch meta .Key {
926+ case "KBC.datatype.basetype" :
927+ col ["type" ] = meta .Value
928+ case "KBC.description" :
929+ col ["description" ] = meta .Value
930+ }
931+ }
932+ return col
933+ }
934+
893935// formatSourceName converts a source ID to a human-readable name.
894936func formatSourceName (source string ) string {
895937 names := map [string ]string {
@@ -1050,7 +1092,11 @@ func (g *Generator) generateMostConnectedNodes(ctx context.Context, data *Proces
10501092 }
10511093
10521094 sort .Slice (nodes , func (i , j int ) bool {
1053- return nodes [i ].Connections > nodes [j ].Connections
1095+ if nodes [i ].Connections != nodes [j ].Connections {
1096+ return nodes [i ].Connections > nodes [j ].Connections
1097+ }
1098+ // Secondary sort by UID for deterministic order when connections are equal.
1099+ return nodes [i ].UID < nodes [j ].UID
10541100 })
10551101
10561102 // Limit to top 20 nodes.
@@ -1105,6 +1151,11 @@ func (g *Generator) generateManifestExtended(ctx context.Context, data *Processe
11051151 docFields := ManifestExtendedDocFields ()
11061152 sources := buildSourcesList (data .Buckets )
11071153
1154+ // Sort sources by ID for deterministic output.
1155+ sort .Slice (sources , func (i , j int ) bool {
1156+ return sources [i ]["id" ].(string ) < sources [j ]["id" ].(string )
1157+ })
1158+
11081159 // Build platform counts.
11091160 platformCounts := make (map [string ]int )
11101161 for _ , transform := range data .Transformations {
@@ -1209,6 +1260,97 @@ func (g *Generator) generateAIGuide(ctx context.Context, _ *ProcessedData) error
12091260 return nil
12101261}
12111262
1263+ // generateDataDictionary generates a data dictionary from the exported data.
1264+ func (g * Generator ) generateDataDictionary (ctx context.Context , data * ProcessedData ) error {
1265+ g .logger .Debugf (ctx , "Generating data dictionary" )
1266+
1267+ // Create documentation directory.
1268+ docDir := filesystem .Join (g .outputDir , "documentation" )
1269+ if err := g .fs .Mkdir (ctx , docDir ); err != nil {
1270+ return errors .Errorf ("failed to create documentation directory: %w" , err )
1271+ }
1272+
1273+ // Build tables section from actual data.
1274+ tables := make (map [string ]map [string ]any )
1275+ for _ , table := range data .Tables {
1276+ tableEntry := map [string ]any {
1277+ "name" : table .Name ,
1278+ "bucket" : table .BucketName ,
1279+ "source" : table .Source ,
1280+ "rows_count" : table .RowsCount ,
1281+ "column_count" : len (table .Columns ),
1282+ }
1283+
1284+ if table .DisplayName != "" {
1285+ tableEntry ["display_name" ] = table .DisplayName
1286+ }
1287+
1288+ // Add column details.
1289+ columns := make ([]map [string ]any , 0 , len (table .Columns ))
1290+ for _ , colName := range table .Columns {
1291+ col := buildColumnEntry (colName , table .ColumnMetadata )
1292+ columns = append (columns , col )
1293+ }
1294+ tableEntry ["columns" ] = columns
1295+
1296+ tables [table .UID ] = tableEntry
1297+ }
1298+
1299+ // Build transformations section.
1300+ transformations := make (map [string ]map [string ]any )
1301+ for _ , transform := range data .Transformations {
1302+ entry := map [string ]any {
1303+ "name" : transform .Name ,
1304+ "platform" : transform .Platform ,
1305+ "is_disabled" : transform .IsDisabled ,
1306+ }
1307+ if transform .Description != "" {
1308+ entry ["description" ] = transform .Description
1309+ }
1310+ if transform .Dependencies != nil {
1311+ entry ["inputs" ] = transform .Dependencies .Consumes
1312+ entry ["outputs" ] = transform .Dependencies .Produces
1313+ }
1314+ transformations [transform .UID ] = entry
1315+ }
1316+
1317+ // Build components section.
1318+ components := make (map [string ]map [string ]any )
1319+ for _ , config := range data .ComponentConfigs {
1320+ entry := map [string ]any {
1321+ "name" : config .Name ,
1322+ "component_id" : config .ComponentID ,
1323+ "component_type" : config .ComponentType ,
1324+ "is_disabled" : config .IsDisabled ,
1325+ }
1326+ if config .Description != "" {
1327+ entry ["description" ] = config .Description
1328+ }
1329+ components [config .ID ] = entry
1330+ }
1331+
1332+ // Build the data dictionary.
1333+ dictionary := map [string ]any {
1334+ "_comment" : "Auto-generated data dictionary from Keboola project" ,
1335+ "_purpose" : "Comprehensive reference of all tables, transformations, and components" ,
1336+ "_update_frequency" : "Generated on each export" ,
1337+ "project_id" : data .ProjectID .String (),
1338+ "generated_at" : data .ProcessedAt .UTC ().Format (time .RFC3339 ),
1339+ "summary" : map [string ]any {
1340+ "total_tables" : len (data .Tables ),
1341+ "total_transformations" : len (data .Transformations ),
1342+ "total_components" : len (data .ComponentConfigs ),
1343+ "total_buckets" : len (data .Buckets ),
1344+ },
1345+ "tables" : tables ,
1346+ "transformations" : transformations ,
1347+ "components" : components ,
1348+ }
1349+
1350+ dictPath := filesystem .Join (docDir , "data-dictionary.json" )
1351+ return g .jsonWriter .Write (ctx , dictPath , dictionary )
1352+ }
1353+
12121354// GenerateSamples generates sample CSV files for tables.
12131355func (g * Generator ) GenerateSamples (ctx context.Context , data * ProcessedData , samples []* TableSample ) error {
12141356 g .logger .Infof (ctx , "Generating samples for %d tables" , len (samples ))
0 commit comments