From a686542b1f481222615c389c46ecfb7f14f9c332 Mon Sep 17 00:00:00 2001 From: Serge Smertin Date: Sat, 5 Aug 2023 18:06:50 +0200 Subject: [PATCH 1/2] (PoC) `Databricks Labs` command group Allow for `labs.yml` definition in target repositories: ``` --- name: dbx context: workspace description: Databricks CLI extensions hooks: install: install.py entrypoint: main.py commands: - name: foo description: foo command flags: - name: first description: first flag description - name: second description: second flag description - name: bar description: bar command flags: - name: first description: first flag description - name: second description: second flag description ``` and simple command entry points that are aware of both CLI flags and Unified Authentication env variables: ``` import os, sys, json print(f'host is {os.environ["DATABRICKS_HOST"]}') payload = json.loads(sys.argv[1]) print(f'[{payload["command"]}]: flags are {payload["flags"]}') answer = input('What is your name? ') print(f'got answer: {answer}') answer = input('Preferences? ') print(f'got answer: {answer}') ``` --- cmd/cmd.go | 2 + cmd/internal/test.go | 21 ++++ cmd/labs/feature/all.go | 30 +++++ cmd/labs/feature/feature.go | 218 ++++++++++++++++++++++++++++++++++ cmd/labs/feature/http_call.go | 29 +++++ cmd/labs/install.go | 24 ++++ cmd/labs/install_test.go | 15 +++ cmd/labs/labs.go | 126 ++++++++++++++++++++ cmd/labs/list.go | 89 ++++++++++++++ 9 files changed, 554 insertions(+) create mode 100644 cmd/internal/test.go create mode 100644 cmd/labs/feature/all.go create mode 100644 cmd/labs/feature/feature.go create mode 100644 cmd/labs/feature/http_call.go create mode 100644 cmd/labs/install.go create mode 100644 cmd/labs/install_test.go create mode 100644 cmd/labs/labs.go create mode 100644 cmd/labs/list.go diff --git a/cmd/cmd.go b/cmd/cmd.go index 04d7cc8044..18d3cac169 100644 --- a/cmd/cmd.go +++ b/cmd/cmd.go @@ -7,6 +7,7 @@ import ( "github.com/databricks/cli/cmd/bundle" "github.com/databricks/cli/cmd/configure" "github.com/databricks/cli/cmd/fs" + "github.com/databricks/cli/cmd/labs" "github.com/databricks/cli/cmd/root" "github.com/databricks/cli/cmd/sync" "github.com/databricks/cli/cmd/version" @@ -37,6 +38,7 @@ func New() *cobra.Command { cli.AddCommand(bundle.New()) cli.AddCommand(configure.New()) cli.AddCommand(fs.New()) + cli.AddCommand(labs.New()) cli.AddCommand(sync.New()) cli.AddCommand(version.New()) diff --git a/cmd/internal/test.go b/cmd/internal/test.go new file mode 100644 index 0000000000..19fe8b30d2 --- /dev/null +++ b/cmd/internal/test.go @@ -0,0 +1,21 @@ +package internal + +import ( + "bytes" + "context" + + "github.com/databricks/cli/cmd" +) + +func RunGetOutput(ctx context.Context, args ...string) ([]byte, error) { + root := cmd.New() + args = append(args, "--log-level", "debug") + root.SetArgs(args) + var buf bytes.Buffer + root.SetOut(&buf) + err := root.ExecuteContext(ctx) + if err != nil { + return nil, err + } + return buf.Bytes(), nil +} diff --git a/cmd/labs/feature/all.go b/cmd/labs/feature/all.go new file mode 100644 index 0000000000..c1cf757434 --- /dev/null +++ b/cmd/labs/feature/all.go @@ -0,0 +1,30 @@ +package feature + +import ( + "context" + "fmt" + "os" + "path/filepath" +) + +func LoadAll(ctx context.Context) (features []*Feature, err error) { + home, err := os.UserHomeDir() + if err != nil { + return nil, err + } + labsDir, err := os.ReadDir(filepath.Join(home, ".databricks", "labs")) + if err != nil { + return nil, err + } + for _, v := range labsDir { + if !v.IsDir() { + continue + } + feature, err := NewFeature(v.Name()) + if err != nil { + return nil, fmt.Errorf("%s: %w", v.Name(), err) + } + features = append(features, feature) + } + return features, nil +} diff --git a/cmd/labs/feature/feature.go b/cmd/labs/feature/feature.go new file mode 100644 index 0000000000..3b78e4eca7 --- /dev/null +++ b/cmd/labs/feature/feature.go @@ -0,0 +1,218 @@ +package feature + +import ( + "bytes" + "context" + "fmt" + "io" + "os" + "os/exec" + "path/filepath" + "sort" + "strings" + "time" + + "github.com/databricks/cli/libs/git" + "github.com/databricks/cli/libs/log" + "golang.org/x/mod/semver" + "gopkg.in/yaml.v2" +) + +type Feature struct { + Name string `json:"name"` + Context string `json:"context,omitempty"` // auth context + Description string `json:"description"` + Hooks struct { + Install string `json:"install,omitempty"` + Uninstall string `json:"uninstall,omitempty"` + } + Entrypoint string `json:"entrypoint"` + Commands []struct { + Name string `json:"name"` + Description string `json:"description"` + Flags []struct { + Name string `json:"name"` + Description string `json:"description"` + } `json:"flags,omitempty"` + } `json:"commands,omitempty"` + + path string + checkout *git.Repository +} + +func NewFeature(name string) (*Feature, error) { + home, err := os.UserHomeDir() + if err != nil { + return nil, err + } + path := filepath.Join(home, ".databricks", "labs", name) + checkout, err := git.NewRepository(path) + if err != nil && !os.IsNotExist(err) { + return nil, err + } + feat := &Feature{ + Name: name, + path: path, + checkout: checkout, + } + raw, err := os.ReadFile(filepath.Join(path, "labs.yml")) + if err != nil { + return nil, fmt.Errorf("read labs.yml: %w", err) + } + err = yaml.Unmarshal(raw, feat) + if err != nil { + return nil, fmt.Errorf("parse labs.yml: %w", err) + } + return feat, nil +} + +type release struct { + TagName string `json:"tag_name"` + Draft bool `json:"draft"` + Prerelease bool `json:"prerelease"` + PublishedAt time.Time `json:"published_at"` +} + +func (i *Feature) LatestVersion(ctx context.Context) (*release, error) { + var tags []release + url := fmt.Sprintf("https://api.github.com/repos/databrickslabs/%s/releases", i.Name) + err := httpCall(ctx, url, &tags) + if err != nil { + return nil, err + } + return &tags[0], nil +} + +const CacheDir = ".databricks" + +type pythonInstallation struct { + Version string + Binary string +} + +func (i *Feature) pythonExecutables(ctx context.Context) ([]pythonInstallation, error) { + found := []pythonInstallation{} + paths := strings.Split(os.Getenv("PATH"), string(os.PathListSeparator)) + for _, candidate := range paths { + bin := filepath.Join(candidate, "python3") + _, err := os.Stat(bin) + if err != nil && os.IsNotExist(err) { + continue + } + out, err := i.cmd(ctx, bin, "--version") + if err != nil { + return nil, err + } + words := strings.Split(out, " ") + found = append(found, pythonInstallation{ + Version: words[len(words)-1], + Binary: bin, + }) + } + if len(found) == 0 { + return nil, fmt.Errorf("no python3 executables found") + } + sort.Slice(found, func(i, j int) bool { + a := found[i].Version + b := found[j].Version + cmp := semver.Compare(a, b) + if cmp != 0 { + return cmp < 0 + } + return a < b + }) + return found, nil +} + +func (i *Feature) installVirtualEnv(ctx context.Context) error { + _, err := os.Stat(filepath.Join(i.path, "setup.py")) + if err != nil { + return err + } + pys, err := i.pythonExecutables(ctx) + if err != nil { + return err + } + python3 := pys[0].Binary + log.Debugf(ctx, "Creating python virtual environment in %s/%s", i.path, CacheDir) + _, err = i.cmd(ctx, python3, "-m", "venv", CacheDir) + if err != nil { + return fmt.Errorf("create venv: %w", err) + } + + log.Debugf(ctx, "Installing dependencies from setup.py") + venvPip := filepath.Join(i.path, CacheDir, "bin", "pip") + _, err = i.cmd(ctx, venvPip, "install", ".") + if err != nil { + return fmt.Errorf("pip install: %w", err) + } + return nil +} + +func (i *Feature) Run(ctx context.Context, raw []byte) error { + err := i.installVirtualEnv(ctx) + if err != nil { + return err + } + // TODO: detect virtual env (also create it on installation), + // because here we just assume that virtual env is installed. + python3 := filepath.Join(i.path, CacheDir, "bin", "python") + + // make sure to sync on writing to stdout + reader, writer := io.Pipe() + go io.CopyBuffer(os.Stdout, reader, make([]byte, 128)) + defer reader.Close() + defer writer.Close() + + // pass command parameters down to script as the first arg + cmd := exec.Command(python3, i.Entrypoint, string(raw)) + cmd.Dir = i.path + cmd.Stdout = writer + cmd.Stderr = writer + + stdin, err := cmd.StdinPipe() + if err != nil { + return err + } + go io.CopyBuffer(stdin, os.Stdin, make([]byte, 128)) + defer stdin.Close() + + err = cmd.Start() + if err != nil { + return err + } + + return cmd.Wait() +} + +func (i *Feature) cmd(ctx context.Context, args ...string) (string, error) { + commandStr := strings.Join(args, " ") + log.Debugf(ctx, "running: %s", commandStr) + cmd := exec.Command(args[0], args[1:]...) + stdout := &bytes.Buffer{} + cmd.Dir = i.path + cmd.Stdin = os.Stdin + cmd.Stdout = stdout + cmd.Stderr = stdout + if err := cmd.Run(); err != nil { + return "", fmt.Errorf("%s: %s", commandStr, stdout.String()) + } + return strings.TrimSpace(stdout.String()), nil +} + +func (i *Feature) Install(ctx context.Context) error { + if i.checkout != nil { + curr, err := i.cmd(ctx, "git", "tag", "--points-at", "HEAD") + if err != nil { + return err + } + return fmt.Errorf("%s (%s) is already installed", i.Name, curr) + } + url := fmt.Sprintf("https://github.com/databrickslabs/%s", i.Name) + release, err := i.LatestVersion(ctx) + if err != nil { + return err + } + log.Infof(ctx, "Installing %s (%s) into %s", url, release.TagName, i.path) + return git.Clone(ctx, url, release.TagName, i.path) +} diff --git a/cmd/labs/feature/http_call.go b/cmd/labs/feature/http_call.go new file mode 100644 index 0000000000..d64a72f379 --- /dev/null +++ b/cmd/labs/feature/http_call.go @@ -0,0 +1,29 @@ +package feature + +import ( + "context" + "encoding/json" + "fmt" + "io" + "net/http" +) + +func httpCall(ctx context.Context, url string, response any) error { + res, err := http.Get(url) + if err != nil { + return err + } + if res.StatusCode >= 400 { + return fmt.Errorf("github request failed: %s", res.Status) + } + defer res.Body.Close() + raw, err := io.ReadAll(res.Body) + if err != nil { + return err + } + err = json.Unmarshal(raw, response) + if err != nil { + return err + } + return nil +} diff --git a/cmd/labs/install.go b/cmd/labs/install.go new file mode 100644 index 0000000000..1ac366defc --- /dev/null +++ b/cmd/labs/install.go @@ -0,0 +1,24 @@ +package labs + +import ( + "github.com/databricks/cli/cmd/labs/feature" + "github.com/databricks/cli/cmd/root" + "github.com/spf13/cobra" +) + +func newInstallCommand() *cobra.Command { + return &cobra.Command{ + Use: "install NAME", + Short: "Install a feature", + Args: cobra.ExactArgs(1), + PreRunE: root.MustWorkspaceClient, + RunE: func(cmd *cobra.Command, args []string) error { + ctx := cmd.Context() + state, err := feature.NewFeature(args[0]) + if err != nil { + return err + } + return state.Install(ctx) + }, + } +} diff --git a/cmd/labs/install_test.go b/cmd/labs/install_test.go new file mode 100644 index 0000000000..7b951186bf --- /dev/null +++ b/cmd/labs/install_test.go @@ -0,0 +1,15 @@ +package labs_test + +import ( + "context" + "testing" + + "github.com/databricks/cli/cmd/internal" + "github.com/stretchr/testify/assert" +) + +func TestInstallDbx(t *testing.T) { + ctx := context.Background() + _, err := internal.RunGetOutput(ctx, "labs", "install", "dbx") + assert.NoError(t, err) +} diff --git a/cmd/labs/labs.go b/cmd/labs/labs.go new file mode 100644 index 0000000000..be7a33df15 --- /dev/null +++ b/cmd/labs/labs.go @@ -0,0 +1,126 @@ +package labs + +import ( + "encoding/json" + "fmt" + "os" + + "github.com/databricks/cli/cmd/labs/feature" + "github.com/databricks/cli/cmd/root" + "github.com/databricks/databricks-sdk-go/config" + "github.com/spf13/cobra" +) + +func New() *cobra.Command { + cmd := &cobra.Command{ + Use: "labs", + Short: "Databricks Labs features", + Long: `Manage experimental Databricks Labs apps`, + } + + // TODO: this should be on the top CLI level + cmd.AddGroup(&cobra.Group{ + ID: "labs", + Title: "Databricks Labs", + }) + + cmd.AddCommand( + newListCommand(), + newInstallCommand(), + &cobra.Command{ + Use: "py", + Short: "...", + RunE: func(cmd *cobra.Command, args []string) error { + return nil + }, + }, + ) + + err := infuse(cmd) + if err != nil { + panic(err) + } + + return cmd +} + +type commandInput struct { + Command string `json:"command"` + Flags map[string]any `json:"flags"` + OutputType string `json:"output_type"` +} + +func infuse(cmd *cobra.Command) error { + ctx := cmd.Context() + all, err := feature.LoadAll(ctx) + if err != nil { + return err + } + for _, f := range all { + group := &cobra.Command{ + Use: f.Name, + Short: f.Description, + GroupID: "labs", + } + cmd.AddCommand(group) + for _, v := range f.Commands { + l := v + definedFlags := v.Flags + vcmd := &cobra.Command{ + Use: v.Name, + Short: v.Description, + RunE: func(cmd *cobra.Command, args []string) error { + flags := cmd.Flags() + if f.Context == "workspace" { + // TODO: context can be on both command and feature level + err = root.MustWorkspaceClient(cmd, args) + if err != nil { + return err + } + // TODO: add account-level init as well + w := root.WorkspaceClient(cmd.Context()) + for _, a := range config.ConfigAttributes { + if a.IsZero(w.Config) { + continue + } + for _, ev := range a.EnvVars { + err = os.Setenv(ev, a.GetString(w.Config)) + if err != nil { + return fmt.Errorf("set %s: %w", a.Name, err) + } + } + } + } + ci := &commandInput{ + Command: l.Name, + Flags: map[string]any{}, + } + for _, flag := range definedFlags { + v, err := flags.GetString(flag.Name) + if err != nil { + return fmt.Errorf("get %s flag: %w", flag.Name, err) + } + ci.Flags[flag.Name] = v + } + logLevelFlag := flags.Lookup("log-level") + if logLevelFlag != nil { + ci.Flags["log_level"] = logLevelFlag.Value.String() + } + raw, err := json.Marshal(ci) + if err != nil { + return err + } + ctx := cmd.Context() + // actually execute the command + return f.Run(ctx, raw) + }, + } + flags := vcmd.Flags() + for _, flag := range definedFlags { + flags.String(flag.Name, "", flag.Description) + } + group.AddCommand(vcmd) + } + } + return nil +} diff --git a/cmd/labs/list.go b/cmd/labs/list.go new file mode 100644 index 0000000000..c8ea1455d6 --- /dev/null +++ b/cmd/labs/list.go @@ -0,0 +1,89 @@ +package labs + +import ( + "context" + "encoding/json" + "fmt" + "io" + "net/http" + + "github.com/databricks/cli/libs/cmdio" + "github.com/spf13/cobra" +) + +type labsMeta struct { + Name string `json:"name"` + Description string `json:"description"` + License string `json:"license"` +} + +func httpCall(ctx context.Context, url string, response any) error { + res, err := http.Get(url) + if err != nil { + return err + } + if res.StatusCode >= 400 { + return fmt.Errorf("github request failed: %s", res.Status) + } + defer res.Body.Close() + raw, err := io.ReadAll(res.Body) + if err != nil { + return err + } + err = json.Unmarshal(raw, response) + if err != nil { + return err + } + return nil +} + +func newListCommand() *cobra.Command { + return &cobra.Command{ + Use: "list", + Short: "List all labs", + Annotations: map[string]string{ + "template": cmdio.Heredoc(` + Name Description + {{range .}}{{.Name}} {{.Description}} + {{end}} + `), + }, + RunE: func(cmd *cobra.Command, args []string) error { + ctx := cmd.Context() + var repositories []struct { + Name string `json:"name"` + Description string `json:"description"` + Fork bool `json:"fork"` + Arcived bool `json:"archived"` + License struct { + Name string `json:"name"` + } `json:"license"` + } + err := httpCall(ctx, + "https://api.github.com/users/databrickslabs/repos", + &repositories) + if err != nil { + return err + } + info := []labsMeta{} + for _, v := range repositories { + if v.Arcived { + continue + } + if v.Fork { + continue + } + description := v.Description + if len(description) > 50 { + description = description[:50] + "..." + } + info = append(info, labsMeta{ + Name: v.Name, + Description: description, + License: v.License.Name, + }) + } + return cmdio.Render(ctx, info) + }, + } +} From 771d7bc5f84c84273c6d1b155a397a5f944c6b81 Mon Sep 17 00:00:00 2001 From: Serge Smertin Date: Mon, 7 Aug 2023 14:36:32 +0200 Subject: [PATCH 2/2] make things work a bit more --- cmd/labs/feature/all.go | 7 + cmd/labs/feature/feature.go | 224 ++++++++++++++----------------- cmd/labs/install.go | 9 ++ cmd/labs/install_test.go | 2 +- cmd/labs/labs.go | 27 ++-- libs/process/background.go | 32 +++++ libs/process/forwarded.go | 47 +++++++ libs/process/opts.go | 39 ++++++ libs/python/interpreters.go | 95 +++++++++++++ libs/python/interpreters_test.go | 17 +++ libs/python/venv.go | 33 +++++ 11 files changed, 398 insertions(+), 134 deletions(-) create mode 100644 libs/process/background.go create mode 100644 libs/process/forwarded.go create mode 100644 libs/process/opts.go create mode 100644 libs/python/interpreters.go create mode 100644 libs/python/interpreters_test.go create mode 100644 libs/python/venv.go diff --git a/cmd/labs/feature/all.go b/cmd/labs/feature/all.go index c1cf757434..5b4af962e8 100644 --- a/cmd/labs/feature/all.go +++ b/cmd/labs/feature/all.go @@ -13,6 +13,9 @@ func LoadAll(ctx context.Context) (features []*Feature, err error) { return nil, err } labsDir, err := os.ReadDir(filepath.Join(home, ".databricks", "labs")) + if os.IsNotExist(err) { + return nil, nil + } if err != nil { return nil, err } @@ -24,6 +27,10 @@ func LoadAll(ctx context.Context) (features []*Feature, err error) { if err != nil { return nil, fmt.Errorf("%s: %w", v.Name(), err) } + err = feature.loadMetadata() + if err != nil { + return nil, fmt.Errorf("%s metadata: %w", v.Name(), err) + } features = append(features, feature) } return features, nil diff --git a/cmd/labs/feature/feature.go b/cmd/labs/feature/feature.go index 3b78e4eca7..5da8fefce9 100644 --- a/cmd/labs/feature/feature.go +++ b/cmd/labs/feature/feature.go @@ -1,20 +1,17 @@ package feature import ( - "bytes" "context" "fmt" - "io" "os" - "os/exec" "path/filepath" - "sort" "strings" "time" "github.com/databricks/cli/libs/git" "github.com/databricks/cli/libs/log" - "golang.org/x/mod/semver" + "github.com/databricks/cli/libs/process" + "github.com/databricks/cli/libs/python" "gopkg.in/yaml.v2" ) @@ -25,7 +22,7 @@ type Feature struct { Hooks struct { Install string `json:"install,omitempty"` Uninstall string `json:"uninstall,omitempty"` - } + } `json:"hooks,omitempty"` Entrypoint string `json:"entrypoint"` Commands []struct { Name string `json:"name"` @@ -36,6 +33,7 @@ type Feature struct { } `json:"flags,omitempty"` } `json:"commands,omitempty"` + version string path string checkout *git.Repository } @@ -45,25 +43,26 @@ func NewFeature(name string) (*Feature, error) { if err != nil { return nil, err } + version := "latest" + split := strings.Split(name, "@") + if len(split) > 2 { + return nil, fmt.Errorf("invalid coordinates: %s", name) + } + if len(split) == 2 { + name = split[0] + version = split[1] + } path := filepath.Join(home, ".databricks", "labs", name) checkout, err := git.NewRepository(path) if err != nil && !os.IsNotExist(err) { return nil, err } - feat := &Feature{ + return &Feature{ Name: name, path: path, + version: version, checkout: checkout, - } - raw, err := os.ReadFile(filepath.Join(path, "labs.yml")) - if err != nil { - return nil, fmt.Errorf("read labs.yml: %w", err) - } - err = yaml.Unmarshal(raw, feat) - if err != nil { - return nil, fmt.Errorf("parse labs.yml: %w", err) - } - return feat, nil + }, nil } type release struct { @@ -73,7 +72,19 @@ type release struct { PublishedAt time.Time `json:"published_at"` } -func (i *Feature) LatestVersion(ctx context.Context) (*release, error) { +func (i *Feature) loadMetadata() error { + raw, err := os.ReadFile(filepath.Join(i.path, "labs.yml")) + if err != nil { + return fmt.Errorf("read labs.yml: %w", err) + } + err = yaml.Unmarshal(raw, i) + if err != nil { + return fmt.Errorf("parse labs.yml: %w", err) + } + return nil +} + +func (i *Feature) fetchLatestVersion(ctx context.Context) (*release, error) { var tags []release url := fmt.Sprintf("https://api.github.com/repos/databrickslabs/%s/releases", i.Name) err := httpCall(ctx, url, &tags) @@ -83,136 +94,105 @@ func (i *Feature) LatestVersion(ctx context.Context) (*release, error) { return &tags[0], nil } -const CacheDir = ".databricks" - -type pythonInstallation struct { - Version string - Binary string -} - -func (i *Feature) pythonExecutables(ctx context.Context) ([]pythonInstallation, error) { - found := []pythonInstallation{} - paths := strings.Split(os.Getenv("PATH"), string(os.PathListSeparator)) - for _, candidate := range paths { - bin := filepath.Join(candidate, "python3") - _, err := os.Stat(bin) - if err != nil && os.IsNotExist(err) { - continue - } - out, err := i.cmd(ctx, bin, "--version") +func (i *Feature) requestedVersion(ctx context.Context) (string, error) { + if i.version == "latest" { + release, err := i.fetchLatestVersion(ctx) if err != nil { - return nil, err + return "", err } - words := strings.Split(out, " ") - found = append(found, pythonInstallation{ - Version: words[len(words)-1], - Binary: bin, - }) - } - if len(found) == 0 { - return nil, fmt.Errorf("no python3 executables found") - } - sort.Slice(found, func(i, j int) bool { - a := found[i].Version - b := found[j].Version - cmp := semver.Compare(a, b) - if cmp != 0 { - return cmp < 0 - } - return a < b - }) - return found, nil + return release.TagName, nil + } + return i.version, nil } -func (i *Feature) installVirtualEnv(ctx context.Context) error { - _, err := os.Stat(filepath.Join(i.path, "setup.py")) +func (i *Feature) Install(ctx context.Context) error { + if i.hasFile(".git/HEAD") { + curr, err := process.Background(ctx, []string{ + "git", "tag", "--points-at", "HEAD", + }, process.WithDir(i.path)) + if err != nil { + return err + } + return fmt.Errorf("%s (%s) is already installed", i.Name, curr) + } + url := fmt.Sprintf("https://github.com/databrickslabs/%s", i.Name) + version, err := i.requestedVersion(ctx) if err != nil { return err } - pys, err := i.pythonExecutables(ctx) + log.Infof(ctx, "Installing %s (%s) into %s", url, version, i.path) + err = git.Clone(ctx, url, version, i.path) if err != nil { return err } - python3 := pys[0].Binary - log.Debugf(ctx, "Creating python virtual environment in %s/%s", i.path, CacheDir) - _, err = i.cmd(ctx, python3, "-m", "venv", CacheDir) + err = i.loadMetadata() if err != nil { - return fmt.Errorf("create venv: %w", err) + return fmt.Errorf("labs.yml: %w", err) } - - log.Debugf(ctx, "Installing dependencies from setup.py") - venvPip := filepath.Join(i.path, CacheDir, "bin", "pip") - _, err = i.cmd(ctx, venvPip, "install", ".") - if err != nil { - return fmt.Errorf("pip install: %w", err) + if i.isPython() { + err := i.installPythonTool(ctx) + if err != nil { + return err + } } return nil } +const CacheDir = ".databricks" + func (i *Feature) Run(ctx context.Context, raw []byte) error { - err := i.installVirtualEnv(ctx) - if err != nil { - return err - } - // TODO: detect virtual env (also create it on installation), - // because here we just assume that virtual env is installed. - python3 := filepath.Join(i.path, CacheDir, "bin", "python") + // raw is a JSON-encoded payload that holds things like command name and flags + return i.forwardPython(ctx, filepath.Join(i.path, i.Entrypoint), string(raw)) +} - // make sure to sync on writing to stdout - reader, writer := io.Pipe() - go io.CopyBuffer(os.Stdout, reader, make([]byte, 128)) - defer reader.Close() - defer writer.Close() +func (i *Feature) hasFile(name string) bool { + _, err := os.Stat(filepath.Join(i.path, name)) + return err == nil +} - // pass command parameters down to script as the first arg - cmd := exec.Command(python3, i.Entrypoint, string(raw)) - cmd.Dir = i.path - cmd.Stdout = writer - cmd.Stderr = writer +func (i *Feature) isPython() bool { + return i.hasFile("setup.py") || i.hasFile("pyproject.toml") +} + +func (i *Feature) venvBinDir() string { + return filepath.Join(i.path, CacheDir, "bin") +} + +func (i *Feature) forwardPython(ctx context.Context, pythonArgs ...string) error { + args := []string{filepath.Join(i.venvBinDir(), "python")} + args = append(args, pythonArgs...) + return process.Forwarded(ctx, args, + process.WithDir(i.path), // we may need to skip it for install step + process.WithEnv("PYTHONPATH", i.path)) +} - stdin, err := cmd.StdinPipe() +func (i *Feature) installPythonTool(ctx context.Context) error { + pythons, err := python.DetectInterpreters(ctx) if err != nil { return err } - go io.CopyBuffer(stdin, os.Stdin, make([]byte, 128)) - defer stdin.Close() - - err = cmd.Start() + interpreter := pythons.Latest() + log.Debugf(ctx, "Creating Python %s virtual environment in %s", interpreter.Version, i.path) + _, err = process.Background(ctx, []string{ + interpreter.Binary, "-m", "venv", CacheDir, + }, process.WithDir(i.path)) if err != nil { - return err + return fmt.Errorf("create venv: %w", err) } - - return cmd.Wait() -} - -func (i *Feature) cmd(ctx context.Context, args ...string) (string, error) { - commandStr := strings.Join(args, " ") - log.Debugf(ctx, "running: %s", commandStr) - cmd := exec.Command(args[0], args[1:]...) - stdout := &bytes.Buffer{} - cmd.Dir = i.path - cmd.Stdin = os.Stdin - cmd.Stdout = stdout - cmd.Stderr = stdout - if err := cmd.Run(); err != nil { - return "", fmt.Errorf("%s: %s", commandStr, stdout.String()) - } - return strings.TrimSpace(stdout.String()), nil -} - -func (i *Feature) Install(ctx context.Context) error { - if i.checkout != nil { - curr, err := i.cmd(ctx, "git", "tag", "--points-at", "HEAD") + log.Debugf(ctx, "Installing dependencies via PIP") + venvPip := filepath.Join(i.venvBinDir(), "pip") + _, err = process.Background(ctx, []string{ + venvPip, "install", ".", + }, process.WithDir(i.path)) + if err != nil { + return fmt.Errorf("pip install: %w", err) + } + if i.Hooks.Install != "" { + installer := filepath.Join(i.path, i.Hooks.Install) + err = i.forwardPython(ctx, installer) if err != nil { - return err + return fmt.Errorf("%s: %w", i.Hooks.Install, err) } - return fmt.Errorf("%s (%s) is already installed", i.Name, curr) - } - url := fmt.Sprintf("https://github.com/databrickslabs/%s", i.Name) - release, err := i.LatestVersion(ctx) - if err != nil { - return err } - log.Infof(ctx, "Installing %s (%s) into %s", url, release.TagName, i.path) - return git.Clone(ctx, url, release.TagName, i.path) + return nil } diff --git a/cmd/labs/install.go b/cmd/labs/install.go index 1ac366defc..6718764fa3 100644 --- a/cmd/labs/install.go +++ b/cmd/labs/install.go @@ -14,6 +14,15 @@ func newInstallCommand() *cobra.Command { PreRunE: root.MustWorkspaceClient, RunE: func(cmd *cobra.Command, args []string) error { ctx := cmd.Context() + // TODO: context can be on both command and feature level + err := root.MustWorkspaceClient(cmd, args) + if err != nil { + return err + } + // TODO: add account-level init as well + w := root.WorkspaceClient(cmd.Context()) + propagateEnvConfig(w.Config) + state, err := feature.NewFeature(args[0]) if err != nil { return err diff --git a/cmd/labs/install_test.go b/cmd/labs/install_test.go index 7b951186bf..724c44e591 100644 --- a/cmd/labs/install_test.go +++ b/cmd/labs/install_test.go @@ -10,6 +10,6 @@ import ( func TestInstallDbx(t *testing.T) { ctx := context.Background() - _, err := internal.RunGetOutput(ctx, "labs", "install", "dbx") + _, err := internal.RunGetOutput(ctx, "labs", "install", "dbx@metadata", "--profile", "bogdan") assert.NoError(t, err) } diff --git a/cmd/labs/labs.go b/cmd/labs/labs.go index be7a33df15..f6a6d524cc 100644 --- a/cmd/labs/labs.go +++ b/cmd/labs/labs.go @@ -50,6 +50,21 @@ type commandInput struct { OutputType string `json:"output_type"` } +func propagateEnvConfig(cfg *config.Config) error { + for _, a := range config.ConfigAttributes { + if a.IsZero(cfg) { + continue + } + for _, ev := range a.EnvVars { + err := os.Setenv(ev, a.GetString(cfg)) + if err != nil { + return fmt.Errorf("set %s: %w", a.Name, err) + } + } + } + return nil +} + func infuse(cmd *cobra.Command) error { ctx := cmd.Context() all, err := feature.LoadAll(ctx) @@ -79,17 +94,7 @@ func infuse(cmd *cobra.Command) error { } // TODO: add account-level init as well w := root.WorkspaceClient(cmd.Context()) - for _, a := range config.ConfigAttributes { - if a.IsZero(w.Config) { - continue - } - for _, ev := range a.EnvVars { - err = os.Setenv(ev, a.GetString(w.Config)) - if err != nil { - return fmt.Errorf("set %s: %w", a.Name, err) - } - } - } + propagateEnvConfig(w.Config) } ci := &commandInput{ Command: l.Name, diff --git a/libs/process/background.go b/libs/process/background.go new file mode 100644 index 0000000000..f061568072 --- /dev/null +++ b/libs/process/background.go @@ -0,0 +1,32 @@ +package process + +import ( + "bytes" + "context" + "fmt" + "os" + "os/exec" + "strings" + + "github.com/databricks/cli/libs/log" +) + +func Background(ctx context.Context, args []string, opts ...execOption) (string, error) { + commandStr := strings.Join(args, " ") + log.Debugf(ctx, "running: %s", commandStr) + cmd := exec.CommandContext(ctx, args[0], args[1:]...) + stdout := &bytes.Buffer{} + cmd.Stdin = os.Stdin + cmd.Stdout = stdout + cmd.Stderr = stdout + for _, o := range opts { + err := o(cmd) + if err != nil { + return "", err + } + } + if err := cmd.Run(); err != nil { + return "", fmt.Errorf("%s: %s", commandStr, stdout.String()) + } + return strings.TrimSpace(stdout.String()), nil +} diff --git a/libs/process/forwarded.go b/libs/process/forwarded.go new file mode 100644 index 0000000000..f408290816 --- /dev/null +++ b/libs/process/forwarded.go @@ -0,0 +1,47 @@ +package process + +import ( + "context" + "io" + "os" + "os/exec" + "strings" + + "github.com/databricks/cli/libs/log" +) + +func Forwarded(ctx context.Context, args []string, opts ...execOption) error { + commandStr := strings.Join(args, " ") + log.Debugf(ctx, "starting: %s", commandStr) + cmd := exec.CommandContext(ctx, args[0], args[1:]...) + + // make sure to sync on writing to stdout + reader, writer := io.Pipe() + go io.CopyBuffer(os.Stdout, reader, make([]byte, 128)) + defer reader.Close() + defer writer.Close() + cmd.Stdout = writer + cmd.Stderr = writer + + // apply common options + for _, o := range opts { + err := o(cmd) + if err != nil { + return err + } + } + + stdin, err := cmd.StdinPipe() + if err != nil { + return err + } + go io.CopyBuffer(stdin, os.Stdin, make([]byte, 128)) + defer stdin.Close() + + err = cmd.Start() + if err != nil { + return err + } + + return cmd.Wait() +} diff --git a/libs/process/opts.go b/libs/process/opts.go new file mode 100644 index 0000000000..7aebfd3862 --- /dev/null +++ b/libs/process/opts.go @@ -0,0 +1,39 @@ +package process + +import ( + "fmt" + "os" + "os/exec" +) + +type execOption func(*exec.Cmd) error + +func WithEnv(key, value string) execOption { + return func(c *exec.Cmd) error { + if c.Env == nil { + c.Env = os.Environ() + } + v := fmt.Sprintf("%s=%s", key, value) + c.Env = append(c.Env, v) + return nil + } +} + +func WithEnvs(envs map[string]string) execOption { + return func(c *exec.Cmd) error { + for k, v := range envs { + err := WithEnv(k, v)(c) + if err != nil { + return err + } + } + return nil + } +} + +func WithDir(dir string) execOption { + return func(c *exec.Cmd) error { + c.Dir = dir + return nil + } +} diff --git a/libs/python/interpreters.go b/libs/python/interpreters.go new file mode 100644 index 0000000000..633946f849 --- /dev/null +++ b/libs/python/interpreters.go @@ -0,0 +1,95 @@ +package python + +import ( + "context" + "fmt" + "os" + "path/filepath" + "sort" + "strings" + + "github.com/databricks/cli/libs/log" + "github.com/databricks/cli/libs/process" + "golang.org/x/mod/semver" +) + +type Interpreter struct { + Version string + Binary string +} + +func (i Interpreter) String() string { + return fmt.Sprintf("%s (%s)", i.Version, i.Binary) +} + +type AllInterpreters []Interpreter + +func (a AllInterpreters) Latest() Interpreter { + return a[len(a)-1] +} + +func DetectInterpreters(ctx context.Context) (AllInterpreters, error) { + found := AllInterpreters{} + paths := strings.Split(os.Getenv("PATH"), string(os.PathListSeparator)) + seen := map[string]bool{} + for _, prefix := range paths { + entries, err := os.ReadDir(prefix) + if os.IsNotExist(err) { + // some directories in $PATH may not exist + continue + } + if err != nil { + return nil, fmt.Errorf("listing %s: %w", prefix, err) + } + for _, v := range entries { + if v.IsDir() { + continue + } + if strings.Contains(v.Name(), "-") { + // skip python3-config, python3.10-config, etc + continue + } + if !strings.HasPrefix(v.Name(), "python3") { + continue + } + bin := filepath.Join(prefix, v.Name()) + resolved, err := filepath.EvalSymlinks(bin) + if err != nil { + log.Debugf(ctx, "cannot resolve symlink for %s: %s", bin, resolved) + continue + } + if seen[resolved] { + continue + } + seen[resolved] = true + out, err := process.Background(ctx, []string{resolved, "--version"}) + if err != nil { + // TODO: skip-and-log or return? + return nil, err + } + words := strings.Split(out, " ") + if words[0] != "Python" { + continue + } + lastWord := words[len(words)-1] + version := semver.Canonical("v" + lastWord) + found = append(found, Interpreter{ + Version: version, + Binary: bin, + }) + } + } + if len(found) == 0 { + return nil, fmt.Errorf("no python3 executables found") + } + sort.Slice(found, func(i, j int) bool { + a := found[i].Version + b := found[j].Version + cmp := semver.Compare(a, b) + if cmp != 0 { + return cmp < 0 + } + return a < b + }) + return found, nil +} diff --git a/libs/python/interpreters_test.go b/libs/python/interpreters_test.go new file mode 100644 index 0000000000..d099703197 --- /dev/null +++ b/libs/python/interpreters_test.go @@ -0,0 +1,17 @@ +package python + +import ( + "context" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestAtLeastOnePythonInstalled(t *testing.T) { + ctx := context.Background() + all, err := DetectInterpreters(ctx) + assert.NoError(t, err) + a := all.Latest() + t.Logf("latest is: %s", a) + assert.True(t, len(all) > 0) +} diff --git a/libs/python/venv.go b/libs/python/venv.go new file mode 100644 index 0000000000..d623866065 --- /dev/null +++ b/libs/python/venv.go @@ -0,0 +1,33 @@ +package python + +import ( + "errors" + "os" + "path/filepath" +) + +// DetectVirtualEnv scans direct subfolders in path to get a valid +// Virtual Environment installation, that is marked by pyvenv.cfg file. +// +// See: https://packaging.python.org/en/latest/tutorials/packaging-projects/ +func DetectVirtualEnvPath(path string) (string, error) { + files, err := os.ReadDir(path) + if err != nil { + return "", err + } + for _, v := range files { + if !v.IsDir() { + continue + } + candidate := filepath.Join(path, v.Name()) + _, err = os.Stat(filepath.Join(candidate, "pyvenv.cfg")) + if errors.Is(err, os.ErrNotExist) { + continue + } + if err != nil { + return "", err + } + return candidate, nil + } + return "", nil +}