Skip to content

Commit 0473971

Browse files
authored
Merge pull request #2498 from keboola/vb/DMD-918/llm-export-scaffolding
DMD-918 - Add CLI scaffolding for kbc llm export command
2 parents aec1de4 + 3385799 commit 0473971

10 files changed

Lines changed: 401 additions & 5 deletions

File tree

internal/pkg/service/cli/cmd/llm/cmd.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package llm
33
import (
44
"github.com/spf13/cobra"
55

6+
llmExport "github.com/keboola/keboola-as-code/internal/pkg/service/cli/cmd/llm/export"
67
llmInit "github.com/keboola/keboola-as-code/internal/pkg/service/cli/cmd/llm/init"
78
"github.com/keboola/keboola-as-code/internal/pkg/service/cli/dependencies"
89
"github.com/keboola/keboola-as-code/internal/pkg/service/cli/helpmsg"
@@ -15,6 +16,7 @@ func Commands(p dependencies.Provider) *cobra.Command {
1516
Long: helpmsg.Read(`llm/long`),
1617
}
1718
cmd.AddCommand(
19+
llmExport.Command(p),
1820
llmInit.Command(p),
1921
)
2022
return cmd
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
package export
2+
3+
import (
4+
"github.com/spf13/cobra"
5+
6+
"github.com/keboola/keboola-as-code/internal/pkg/service/cli/dependencies"
7+
"github.com/keboola/keboola-as-code/internal/pkg/service/cli/helpmsg"
8+
"github.com/keboola/keboola-as-code/internal/pkg/service/common/configmap"
9+
exportOp "github.com/keboola/keboola-as-code/pkg/lib/operation/llm/export"
10+
)
11+
12+
type Flags struct {
13+
StorageAPIHost configmap.Value[string] `configKey:"storage-api-host" configShorthand:"H" configUsage:"storage API host, eg. \"connection.keboola.com\""`
14+
StorageAPIToken configmap.Value[string] `configKey:"storage-api-token" configShorthand:"t" configUsage:"storage API token from your project"`
15+
Force configmap.Value[bool] `configKey:"force" configShorthand:"f" configUsage:"skip confirmation when directory contains existing files"`
16+
}
17+
18+
func DefaultFlags() Flags {
19+
return Flags{}
20+
}
21+
22+
func Command(p dependencies.Provider) *cobra.Command {
23+
cmd := &cobra.Command{
24+
Use: "export",
25+
Short: helpmsg.Read(`llm/export/short`),
26+
Long: helpmsg.Read(`llm/export/long`),
27+
RunE: func(cmd *cobra.Command, args []string) (cmdErr error) {
28+
f := Flags{}
29+
if err := p.BaseScope().ConfigBinder().Bind(cmd.Context(), cmd.Flags(), args, &f); err != nil {
30+
return err
31+
}
32+
33+
// Get dependencies
34+
d, err := p.RemoteCommandScope(cmd.Context(), f.StorageAPIHost, f.StorageAPIToken)
35+
if err != nil {
36+
return err
37+
}
38+
39+
// Validate directory
40+
if err := validateDirectory(cmd.Context(), d, f); err != nil {
41+
return err
42+
}
43+
44+
// Build options
45+
options := exportOp.Options{
46+
Force: f.Force.Value,
47+
}
48+
49+
// Send cmd successful/failed event
50+
defer d.EventSender().SendCmdEvent(cmd.Context(), d.Clock().Now(), &cmdErr, "llm-export")
51+
52+
// Export
53+
return exportOp.Run(cmd.Context(), options, d)
54+
},
55+
}
56+
57+
// Flags
58+
configmap.MustGenerateFlags(cmd.Flags(), DefaultFlags())
59+
60+
return cmd
61+
}
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
package export
2+
3+
import (
4+
"context"
5+
"strings"
6+
7+
"github.com/keboola/keboola-as-code/internal/pkg/filesystem"
8+
"github.com/keboola/keboola-as-code/internal/pkg/service/cli/dialog"
9+
"github.com/keboola/keboola-as-code/internal/pkg/service/cli/prompt"
10+
"github.com/keboola/keboola-as-code/internal/pkg/utils/errors"
11+
)
12+
13+
type validateDependencies interface {
14+
Dialogs() *dialog.Dialogs
15+
Fs() filesystem.Fs
16+
}
17+
18+
// isAllowedFile checks if a file is allowed to exist in the directory without prompting.
19+
func isAllowedFile(name string) bool {
20+
switch name {
21+
case ".keboola", ".gitignore", ".git":
22+
return true
23+
}
24+
return strings.HasPrefix(name, ".env")
25+
}
26+
27+
// validateDirectory checks if the current directory is suitable for export.
28+
// It allows .keboola/, .env*, .gitignore, and .git to exist.
29+
// If other files exist, it prompts for confirmation unless --force is set.
30+
func validateDirectory(ctx context.Context, d validateDependencies, f Flags) error {
31+
fs := d.Fs()
32+
33+
// List files in the current directory
34+
entries, err := fs.ReadDir(ctx, ".")
35+
if err != nil {
36+
return errors.Errorf("cannot read directory: %w", err)
37+
}
38+
39+
// Check for files that are not in the allowed list
40+
conflicts := make([]string, 0, len(entries))
41+
for _, entry := range entries {
42+
name := entry.Name()
43+
if !isAllowedFile(name) {
44+
conflicts = append(conflicts, name)
45+
}
46+
}
47+
48+
// If no conflicts, proceed
49+
if len(conflicts) == 0 {
50+
return nil
51+
}
52+
53+
// If --force is set, proceed with warning
54+
if f.Force.Value {
55+
return nil
56+
}
57+
58+
// Prompt for confirmation, including the list of conflicting files
59+
label := "Directory contains existing files: " + strings.Join(conflicts, ", ") + ". Do you want to continue?"
60+
confirmed := d.Dialogs().Confirm(&prompt.Confirm{
61+
Label: label,
62+
Default: false,
63+
})
64+
65+
if !confirmed {
66+
return errors.New("export cancelled by user")
67+
}
68+
69+
return nil
70+
}
Lines changed: 192 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,192 @@
1+
package export
2+
3+
import (
4+
"context"
5+
"testing"
6+
7+
"github.com/stretchr/testify/assert"
8+
"github.com/stretchr/testify/require"
9+
10+
"github.com/keboola/keboola-as-code/internal/pkg/filesystem"
11+
"github.com/keboola/keboola-as-code/internal/pkg/filesystem/aferofs"
12+
"github.com/keboola/keboola-as-code/internal/pkg/service/cli/dialog"
13+
"github.com/keboola/keboola-as-code/internal/pkg/service/cli/prompt"
14+
"github.com/keboola/keboola-as-code/internal/pkg/service/common/configmap"
15+
)
16+
17+
func TestIsAllowedFile(t *testing.T) {
18+
t.Parallel()
19+
20+
tests := []struct {
21+
name string
22+
filename string
23+
expected bool
24+
}{
25+
// Allowed files
26+
{name: "keboola dir", filename: ".keboola", expected: true},
27+
{name: "env file", filename: ".env", expected: true},
28+
{name: "env local", filename: ".env.local", expected: true},
29+
{name: "env dist", filename: ".env.dist", expected: true},
30+
{name: "env custom", filename: ".env.production", expected: true},
31+
{name: "gitignore", filename: ".gitignore", expected: true},
32+
{name: "git dir", filename: ".git", expected: true},
33+
34+
// Not allowed files
35+
{name: "readme", filename: "README.md", expected: false},
36+
{name: "src dir", filename: "src", expected: false},
37+
{name: "go mod", filename: "go.mod", expected: false},
38+
{name: "twin format", filename: "twin_format", expected: false},
39+
{name: "main dir", filename: "main", expected: false},
40+
{name: "hidden file", filename: ".hidden", expected: false},
41+
{name: "env without dot prefix", filename: "env", expected: false},
42+
}
43+
44+
for _, tc := range tests {
45+
t.Run(tc.name, func(t *testing.T) {
46+
t.Parallel()
47+
result := isAllowedFile(tc.filename)
48+
assert.Equal(t, tc.expected, result, "isAllowedFile(%q) should be %v", tc.filename, tc.expected)
49+
})
50+
}
51+
}
52+
53+
// mockPrompt implements prompt.Prompt for testing.
54+
type mockPrompt struct {
55+
confirmResult bool
56+
}
57+
58+
func (m *mockPrompt) IsInteractive() bool { return true }
59+
func (m *mockPrompt) Printf(_ string, _ ...any) {}
60+
func (m *mockPrompt) Confirm(_ *prompt.Confirm) bool { return m.confirmResult }
61+
func (m *mockPrompt) Ask(q *prompt.Question) (string, bool) { return q.Default, true }
62+
func (m *mockPrompt) Select(s *prompt.Select) (string, bool) { return s.Default, true }
63+
func (m *mockPrompt) SelectIndex(s *prompt.SelectIndex) (int, bool) { return s.Default, true }
64+
func (m *mockPrompt) MultiSelect(s *prompt.MultiSelect) ([]string, bool) { return s.Default, true }
65+
func (m *mockPrompt) MultiSelectIndex(s *prompt.MultiSelectIndex) ([]int, bool) {
66+
return s.Default, true
67+
}
68+
func (m *mockPrompt) Multiline(q *prompt.Question) (string, bool) { return q.Default, true }
69+
func (m *mockPrompt) Editor(_ string, q *prompt.Question) (string, bool) { return q.Default, true }
70+
71+
// mockValidateDeps implements validateDependencies for testing.
72+
type mockValidateDeps struct {
73+
fs filesystem.Fs
74+
dialogs *dialog.Dialogs
75+
}
76+
77+
func (m *mockValidateDeps) Fs() filesystem.Fs { return m.fs }
78+
func (m *mockValidateDeps) Dialogs() *dialog.Dialogs { return m.dialogs }
79+
80+
func TestValidateDirectory(t *testing.T) {
81+
t.Parallel()
82+
83+
tests := []struct {
84+
name string
85+
files []string
86+
dirs []string
87+
force bool
88+
confirmResult bool
89+
expectError bool
90+
errorContains string
91+
}{
92+
{
93+
name: "empty directory",
94+
files: nil,
95+
dirs: nil,
96+
force: false,
97+
expectError: false,
98+
},
99+
{
100+
name: "only allowed files",
101+
files: []string{".env", ".env.local", ".gitignore"},
102+
dirs: []string{".keboola", ".git"},
103+
force: false,
104+
expectError: false,
105+
},
106+
{
107+
name: "conflicts with force flag",
108+
files: []string{"README.md", "src"},
109+
dirs: nil,
110+
force: true,
111+
expectError: false,
112+
},
113+
{
114+
name: "conflicts without force, user confirms",
115+
files: []string{"README.md"},
116+
dirs: nil,
117+
force: false,
118+
confirmResult: true,
119+
expectError: false,
120+
},
121+
{
122+
name: "conflicts without force, user rejects",
123+
files: []string{"README.md"},
124+
dirs: nil,
125+
force: false,
126+
confirmResult: false,
127+
expectError: true,
128+
errorContains: "export cancelled by user",
129+
},
130+
{
131+
name: "mixed allowed and not allowed files with force",
132+
files: []string{".env", ".gitignore", "main.go", "go.mod"},
133+
dirs: []string{".keboola"},
134+
force: true,
135+
expectError: false,
136+
},
137+
{
138+
name: "mixed files without force, user rejects",
139+
files: []string{".env", "main.go"},
140+
dirs: nil,
141+
force: false,
142+
confirmResult: false,
143+
expectError: true,
144+
errorContains: "export cancelled by user",
145+
},
146+
}
147+
148+
for _, tc := range tests {
149+
t.Run(tc.name, func(t *testing.T) {
150+
t.Parallel()
151+
ctx := context.Background()
152+
153+
// Create memory filesystem
154+
fs := aferofs.NewMemoryFs()
155+
156+
// Create test files
157+
for _, f := range tc.files {
158+
require.NoError(t, fs.WriteFile(ctx, filesystem.NewRawFile(f, "test content")))
159+
}
160+
161+
// Create test directories
162+
for _, d := range tc.dirs {
163+
require.NoError(t, fs.Mkdir(ctx, d))
164+
}
165+
166+
// Create mock dependencies
167+
mockP := &mockPrompt{confirmResult: tc.confirmResult}
168+
deps := &mockValidateDeps{
169+
fs: fs,
170+
dialogs: dialog.New(mockP),
171+
}
172+
173+
// Create flags
174+
flags := Flags{
175+
Force: configmap.Value[bool]{Value: tc.force},
176+
}
177+
178+
// Run validation
179+
err := validateDirectory(ctx, deps, flags)
180+
181+
// Check result
182+
if tc.expectError {
183+
require.Error(t, err)
184+
if tc.errorContains != "" {
185+
assert.Contains(t, err.Error(), tc.errorContains)
186+
}
187+
} else {
188+
require.NoError(t, err)
189+
}
190+
})
191+
}
192+
}
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
Export project data to an AI-optimized "twin format" directory structure.
2+
3+
The twin format is designed for AI assistants to understand and work with
4+
Keboola projects directly from Git repositories. It includes:
5+
6+
- Bucket and table metadata with schema information
7+
- Transformation configurations with platform detection
8+
- Component configurations organized by type
9+
- Job execution history and statistics
10+
- Lineage graph showing data flow dependencies
11+
- Optional data samples (controlled by flags)
12+
13+
The export creates a "twin_format/" directory containing JSON files with
14+
inline documentation (_comment, _purpose, _update_frequency fields).
15+
16+
Security features:
17+
- Automatic detection of public Git repositories
18+
- Sample export disabled by default for public repos
19+
- Secrets (fields starting with #) are encrypted
20+
21+
Use "kbc llm init" first to initialize the project directory.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Export project data to AI-optimized twin format directory structure.

0 commit comments

Comments
 (0)