@@ -4,13 +4,17 @@ import (
44 "context"
55 "sort"
66 "strings"
7+ "sync"
78 "time"
89
910 "github.com/keboola/keboola-sdk-go/v2/pkg/keboola"
11+ "golang.org/x/sync/errgroup"
12+ "golang.org/x/sync/semaphore"
1013
1114 "github.com/keboola/keboola-as-code/internal/pkg/llm/twinformat/configparser"
1215 "github.com/keboola/keboola-as-code/internal/pkg/log"
1316 "github.com/keboola/keboola-as-code/internal/pkg/telemetry"
17+ "github.com/keboola/keboola-as-code/internal/pkg/utils/errors"
1418)
1519
1620// FetcherDependencies defines the dependencies required by the Fetcher.
@@ -183,6 +187,162 @@ type componentsResult struct {
183187 ComponentConfigs []* configparser.ComponentConfig
184188}
185189
190+ // TableSample represents a sample of table data.
191+ type TableSample struct {
192+ TableID keboola.TableID
193+ Columns []string
194+ Rows [][]string
195+ }
196+
197+ // RowCount returns the number of rows in the sample.
198+ func (s * TableSample ) RowCount () int {
199+ return len (s .Rows )
200+ }
201+
202+ // FetchTableSample fetches a sample of data from a table.
203+ func (f * Fetcher ) FetchTableSample (ctx context.Context , tableKey keboola.TableKey , limit uint ) (sample * TableSample , err error ) {
204+ ctx , span := f .telemetry .Tracer ().Start (ctx , "keboola.go.twinformat.fetcher.FetchTableSample" )
205+ defer span .End (& err )
206+
207+ f .logger .Debugf (ctx , "Fetching sample for table %s (limit: %d)" , tableKey .TableID , limit )
208+
209+ // Fetch table preview using the SDK.
210+ // Don't pass WithLimitRows when limit is 0, as it may have different semantics than omitting.
211+ var preview * keboola.TablePreview
212+ if limit == 0 {
213+ preview , err = f .api .PreviewTableRequest (tableKey ).Send (ctx )
214+ } else {
215+ preview , err = f .api .PreviewTableRequest (tableKey , keboola .WithLimitRows (limit )).Send (ctx )
216+ }
217+ if err != nil {
218+ return nil , err
219+ }
220+
221+ sample = & TableSample {
222+ TableID : tableKey .TableID ,
223+ Columns : preview .Columns ,
224+ Rows : preview .Rows ,
225+ }
226+
227+ f .logger .Debugf (ctx , "Fetched %d rows for table %s" , sample .RowCount (), tableKey .TableID )
228+
229+ return sample , nil
230+ }
231+
232+ // indexedSample pairs a sample with its original index for ordering.
233+ type indexedSample struct {
234+ index int
235+ sample * TableSample
236+ }
237+
238+ // sampleFetchCollector collects results from concurrent sample fetching.
239+ type sampleFetchCollector struct {
240+ mu sync.Mutex
241+ results []indexedSample
242+ failedCount int
243+ }
244+
245+ // recordFailure increments the failure count.
246+ func (c * sampleFetchCollector ) recordFailure () {
247+ c .mu .Lock ()
248+ defer c .mu .Unlock ()
249+ c .failedCount ++
250+ }
251+
252+ // recordSuccess records a successfully fetched sample.
253+ func (c * sampleFetchCollector ) recordSuccess (idx int , sample * TableSample ) {
254+ c .mu .Lock ()
255+ defer c .mu .Unlock ()
256+ c .results = append (c .results , indexedSample {index : idx , sample : sample })
257+ }
258+
259+ // FetchTableSamples fetches samples for multiple tables concurrently.
260+ func (f * Fetcher ) FetchTableSamples (ctx context.Context , tables []* keboola.Table , limit uint , maxTables int ) (samples []* TableSample , err error ) {
261+ ctx , span := f .telemetry .Tracer ().Start (ctx , "keboola.go.twinformat.fetcher.FetchTableSamples" )
262+ defer span .End (& err )
263+
264+ // Guard against non-positive maxTables to avoid panics from negative slice capacities.
265+ if maxTables <= 0 {
266+ return []* TableSample {}, nil
267+ }
268+
269+ // Limit tables to fetch.
270+ tablesToFetch := tables
271+ if len (tablesToFetch ) > maxTables {
272+ tablesToFetch = tablesToFetch [:maxTables ]
273+ }
274+
275+ f .logger .Infof (ctx , "Fetching samples for %d tables concurrently (limit: %d rows each)" , len (tablesToFetch ), limit )
276+
277+ // Use bounded concurrency to respect API rate limits.
278+ const maxConcurrency = 10
279+
280+ sem := semaphore .NewWeighted (maxConcurrency )
281+ group , groupCtx := errgroup .WithContext (ctx )
282+ collector := & sampleFetchCollector {
283+ results : make ([]indexedSample , 0 , len (tablesToFetch )),
284+ }
285+
286+ for i , table := range tablesToFetch {
287+ idx , t := i , table
288+ group .Go (func () error {
289+ // Acquire semaphore - blocks until slot available or context cancelled.
290+ if err := sem .Acquire (groupCtx , 1 ); err != nil {
291+ collector .recordFailure ()
292+ return nil // Don't propagate - we want partial results
293+ }
294+ defer sem .Release (1 )
295+
296+ f .fetchTableSample (groupCtx , collector , idx , t , limit )
297+ return nil
298+ })
299+ }
300+
301+ // Wait for all goroutines to complete.
302+ _ = group .Wait ()
303+
304+ // If context was cancelled and no samples were fetched, propagate the cancellation error.
305+ if len (collector .results ) == 0 && ctx .Err () != nil {
306+ return nil , ctx .Err ()
307+ }
308+
309+ // Sort by original index to preserve order.
310+ sort .Slice (collector .results , func (i , j int ) bool {
311+ return collector .results [i ].index < collector .results [j ].index
312+ })
313+
314+ samples = make ([]* TableSample , 0 , len (collector .results ))
315+ for _ , r := range collector .results {
316+ samples = append (samples , r .sample )
317+ }
318+
319+ f .logger .Infof (ctx , "Fetched samples for %d tables (%d failed)" , len (samples ), collector .failedCount )
320+
321+ // Return error if any tables failed to fetch, but still return partial results.
322+ if collector .failedCount > 0 {
323+ return samples , errors .Errorf ("failed to fetch samples for %d of %d tables" , collector .failedCount , len (tablesToFetch ))
324+ }
325+
326+ return samples , nil
327+ }
328+
329+ // fetchTableSample fetches a sample for a single table and records the result.
330+ func (f * Fetcher ) fetchTableSample (ctx context.Context , collector * sampleFetchCollector , idx int , t * keboola.Table , limit uint ) {
331+ tableKey := keboola.TableKey {
332+ BranchID : t .BranchID ,
333+ TableID : t .TableID ,
334+ }
335+
336+ sample , err := f .FetchTableSample (ctx , tableKey , limit )
337+ if err != nil {
338+ f .logger .Warnf (ctx , "Failed to fetch sample for table %s: %v" , t .TableID , err )
339+ collector .recordFailure ()
340+ return
341+ }
342+
343+ collector .recordSuccess (idx , sample )
344+ }
345+
186346// fetchAllComponents fetches all components and extracts transformation and component configs.
187347// This makes a single API call and returns all data needed for processing.
188348func (f * Fetcher ) fetchAllComponents (ctx context.Context , branchID keboola.BranchID ) (result * componentsResult , err error ) {
0 commit comments