From 321f908436b753a6e02d2f9688e3d1db317b8f51 Mon Sep 17 00:00:00 2001 From: Serge Smertin Date: Tue, 24 Oct 2023 10:50:17 +0200 Subject: [PATCH 1/8] Add `databricks labs` commad group .. Add folder list installed works stdin forwarding fixed some tests tests pass fetching versions added installer, still WIP fixed tests for UNIX added debug logging .. installer tests .. run on windows xx fixed added uninstall .. work on windows .. .. work with more CUJs pick up configuration when auth.json is not found .. Added spinners added cluster prompts .. ... .. support for dbconnect lib install .. .. .. fixed dev libDIr logic starting tests for dev installs +10% coverage ,, .. .. .. --- cmd/cmd.go | 2 + cmd/labs/CODEOWNERS | 1 + cmd/labs/clear_cache.go | 32 ++ cmd/labs/github/github.go | 66 +++ cmd/labs/github/ref.go | 20 + cmd/labs/github/ref_test.go | 48 ++ cmd/labs/github/releases.go | 61 +++ cmd/labs/github/releases_test.go | 34 ++ cmd/labs/github/repositories.go | 59 +++ cmd/labs/github/repositories_test.go | 30 ++ cmd/labs/install.go | 21 + cmd/labs/installed.go | 57 +++ cmd/labs/installed_test.go | 19 + cmd/labs/labs.go | 39 ++ cmd/labs/list.go | 62 +++ cmd/labs/list_test.go | 19 + cmd/labs/localcache/jsonfile.go | 96 ++++ cmd/labs/localcache/jsonfile_test.go | 97 ++++ cmd/labs/project/command_test.go | 69 +++ cmd/labs/project/entrypoint.go | 247 +++++++++++ cmd/labs/project/fetcher.go | 145 ++++++ cmd/labs/project/helpers.go | 35 ++ cmd/labs/project/init_test.go | 13 + cmd/labs/project/installed.go | 58 +++ cmd/labs/project/installed_test.go | 19 + cmd/labs/project/installer.go | 286 ++++++++++++ cmd/labs/project/installer_test.go | 415 ++++++++++++++++++ cmd/labs/project/login.go | 116 +++++ cmd/labs/project/project.go | 352 +++++++++++++++ cmd/labs/project/project_test.go | 22 + cmd/labs/project/proxy.go | 144 ++++++ cmd/labs/project/schema.json | 126 ++++++ cmd/labs/project/testdata/.gitignore | 1 + .../databrickslabs-blueprint-releases.json | 8 + .../labs/blueprint/config/login.json | 4 + .../.databricks/labs/blueprint/lib/install.py | 1 + .../.databricks/labs/blueprint/lib/labs.yml | 33 ++ .../.databricks/labs/blueprint/lib/main.py | 27 ++ .../labs/blueprint/lib/pyproject.toml | 11 + .../blueprint/state/other-state-file.json | 1 + .../labs/blueprint/state/venv/pyvenv.cfg | 0 .../labs/blueprint/state/version.json | 4 + .../labs/databrickslabs-repositories.json | 37 ++ .../testdata/installed-in-home/.databrickscfg | 9 + cmd/labs/show.go | 57 +++ cmd/labs/uninstall.go | 40 ++ cmd/labs/unpack/zipball.go | 64 +++ cmd/labs/upgrade.go | 21 + cmd/workspace/clusters/uc.go | 147 +++++++ cmd/workspace/clusters/uc_test.go | 31 ++ cmd/workspace/warehouses/ask.go | 51 +++ 51 files changed, 3357 insertions(+) create mode 100644 cmd/labs/CODEOWNERS create mode 100644 cmd/labs/clear_cache.go create mode 100644 cmd/labs/github/github.go create mode 100644 cmd/labs/github/ref.go create mode 100644 cmd/labs/github/ref_test.go create mode 100644 cmd/labs/github/releases.go create mode 100644 cmd/labs/github/releases_test.go create mode 100644 cmd/labs/github/repositories.go create mode 100644 cmd/labs/github/repositories_test.go create mode 100644 cmd/labs/install.go create mode 100644 cmd/labs/installed.go create mode 100644 cmd/labs/installed_test.go create mode 100644 cmd/labs/labs.go create mode 100644 cmd/labs/list.go create mode 100644 cmd/labs/list_test.go create mode 100644 cmd/labs/localcache/jsonfile.go create mode 100644 cmd/labs/localcache/jsonfile_test.go create mode 100644 cmd/labs/project/command_test.go create mode 100644 cmd/labs/project/entrypoint.go create mode 100644 cmd/labs/project/fetcher.go create mode 100644 cmd/labs/project/helpers.go create mode 100644 cmd/labs/project/init_test.go create mode 100644 cmd/labs/project/installed.go create mode 100644 cmd/labs/project/installed_test.go create mode 100644 cmd/labs/project/installer.go create mode 100644 cmd/labs/project/installer_test.go create mode 100644 cmd/labs/project/login.go create mode 100644 cmd/labs/project/project.go create mode 100644 cmd/labs/project/project_test.go create mode 100644 cmd/labs/project/proxy.go create mode 100644 cmd/labs/project/schema.json create mode 100644 cmd/labs/project/testdata/.gitignore create mode 100644 cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/cache/databrickslabs-blueprint-releases.json create mode 100644 cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/config/login.json create mode 100644 cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/lib/install.py create mode 100644 cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/lib/labs.yml create mode 100644 cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/lib/main.py create mode 100644 cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/lib/pyproject.toml create mode 100644 cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/state/other-state-file.json create mode 100644 cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/state/venv/pyvenv.cfg create mode 100644 cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/state/version.json create mode 100644 cmd/labs/project/testdata/installed-in-home/.databricks/labs/databrickslabs-repositories.json create mode 100644 cmd/labs/project/testdata/installed-in-home/.databrickscfg create mode 100644 cmd/labs/show.go create mode 100644 cmd/labs/uninstall.go create mode 100644 cmd/labs/unpack/zipball.go create mode 100644 cmd/labs/upgrade.go create mode 100644 cmd/workspace/clusters/uc.go create mode 100644 cmd/workspace/clusters/uc_test.go create mode 100644 cmd/workspace/warehouses/ask.go diff --git a/cmd/cmd.go b/cmd/cmd.go index 6dd0f6e216..5d835409f9 100644 --- a/cmd/cmd.go +++ b/cmd/cmd.go @@ -10,6 +10,7 @@ import ( "github.com/databricks/cli/cmd/bundle" "github.com/databricks/cli/cmd/configure" "github.com/databricks/cli/cmd/fs" + "github.com/databricks/cli/cmd/labs" "github.com/databricks/cli/cmd/root" "github.com/databricks/cli/cmd/sync" "github.com/databricks/cli/cmd/version" @@ -70,6 +71,7 @@ func New(ctx context.Context) *cobra.Command { cli.AddCommand(bundle.New()) cli.AddCommand(configure.New()) cli.AddCommand(fs.New()) + cli.AddCommand(labs.New(ctx)) cli.AddCommand(sync.New()) cli.AddCommand(version.New()) diff --git a/cmd/labs/CODEOWNERS b/cmd/labs/CODEOWNERS new file mode 100644 index 0000000000..cc93a75e69 --- /dev/null +++ b/cmd/labs/CODEOWNERS @@ -0,0 +1 @@ +* @nfx diff --git a/cmd/labs/clear_cache.go b/cmd/labs/clear_cache.go new file mode 100644 index 0000000000..480e312248 --- /dev/null +++ b/cmd/labs/clear_cache.go @@ -0,0 +1,32 @@ +package labs + +import ( + "log/slog" + "os" + + "github.com/databricks/cli/cmd/labs/project" + "github.com/databricks/cli/libs/log" + "github.com/spf13/cobra" +) + +func newClearCacheCommand() *cobra.Command { + return &cobra.Command{ + Use: "clear-cache", + Short: "Clears cache entries from everywhere relevant", + RunE: func(cmd *cobra.Command, args []string) error { + ctx := cmd.Context() + projects, err := project.Installed(ctx) + if err != nil { + return err + } + _ = os.Remove(project.PathInLabs(ctx, "repositories.json")) + logger := log.GetLogger(ctx) + for _, prj := range projects { + logger.Info("clearing labs project cache", slog.String("name", prj.Name)) + _ = os.RemoveAll(prj.CacheDir(ctx)) + _ = prj.EnsureFoldersExist(ctx) + } + return nil + }, + } +} diff --git a/cmd/labs/github/github.go b/cmd/labs/github/github.go new file mode 100644 index 0000000000..98e11e75a2 --- /dev/null +++ b/cmd/labs/github/github.go @@ -0,0 +1,66 @@ +package github + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "io" + "net/http" + "strings" + + "github.com/databricks/cli/libs/log" +) + +const gitHubAPI = "https://api.github.com" +const gitHubUserContent = "https://raw.githubusercontent.com" + +// Placeholders to use as unique keys in context.Context. +var apiOverride int +var userContentOverride int + +func WithApiOverride(ctx context.Context, override string) context.Context { + return context.WithValue(ctx, &apiOverride, override) +} + +func WithUserContentOverride(ctx context.Context, override string) context.Context { + return context.WithValue(ctx, &userContentOverride, override) +} + +var ErrNotFound = errors.New("not found") + +func getBytes(ctx context.Context, method, url string, body io.Reader) ([]byte, error) { + ao, ok := ctx.Value(&apiOverride).(string) + if ok { + url = strings.Replace(url, gitHubAPI, ao, 1) + } + uco, ok := ctx.Value(&userContentOverride).(string) + if ok { + url = strings.Replace(url, gitHubUserContent, uco, 1) + } + log.Tracef(ctx, "%s %s", method, url) + req, err := http.NewRequestWithContext(ctx, "GET", url, body) + if err != nil { + return nil, err + } + res, err := http.DefaultClient.Do(req) + if err != nil { + return nil, err + } + if res.StatusCode == 404 { + return nil, ErrNotFound + } + if res.StatusCode >= 400 { + return nil, fmt.Errorf("github request failed: %s", res.Status) + } + defer res.Body.Close() + return io.ReadAll(res.Body) +} + +func httpGetAndUnmarshall(ctx context.Context, url string, response any) error { + raw, err := getBytes(ctx, "GET", url, nil) + if err != nil { + return err + } + return json.Unmarshal(raw, response) +} diff --git a/cmd/labs/github/ref.go b/cmd/labs/github/ref.go new file mode 100644 index 0000000000..1975f6fbaf --- /dev/null +++ b/cmd/labs/github/ref.go @@ -0,0 +1,20 @@ +package github + +import ( + "context" + "fmt" + + "github.com/databricks/cli/libs/log" +) + +func ReadFileFromRef(ctx context.Context, org, repo, ref, file string) ([]byte, error) { + log.Debugf(ctx, "Reading %s@%s from %s/%s", file, ref, org, repo) + url := fmt.Sprintf("%s/%s/%s/%s/%s", gitHubUserContent, org, repo, ref, file) + return getBytes(ctx, "GET", url, nil) +} + +func DownloadZipball(ctx context.Context, org, repo, ref string) ([]byte, error) { + log.Debugf(ctx, "Downloading zipball for %s from %s/%s", ref, org, repo) + zipballURL := fmt.Sprintf("%s/repos/%s/%s/zipball/%s", gitHubAPI, org, repo, ref) + return getBytes(ctx, "GET", zipballURL, nil) +} diff --git a/cmd/labs/github/ref_test.go b/cmd/labs/github/ref_test.go new file mode 100644 index 0000000000..2a9ffcc5bd --- /dev/null +++ b/cmd/labs/github/ref_test.go @@ -0,0 +1,48 @@ +package github + +import ( + "context" + "net/http" + "net/http/httptest" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestFileFromRef(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path == "/databrickslabs/ucx/main/README.md" { + w.Write([]byte(`abc`)) + return + } + t.Logf("Requested: %s", r.URL.Path) + panic("stub required") + })) + defer server.Close() + + ctx := context.Background() + ctx = WithUserContentOverride(ctx, server.URL) + + raw, err := ReadFileFromRef(ctx, "databrickslabs", "ucx", "main", "README.md") + assert.NoError(t, err) + assert.Equal(t, []byte("abc"), raw) +} + +func TestDownloadZipball(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path == "/repos/databrickslabs/ucx/zipball/main" { + w.Write([]byte(`abc`)) + return + } + t.Logf("Requested: %s", r.URL.Path) + panic("stub required") + })) + defer server.Close() + + ctx := context.Background() + ctx = WithApiOverride(ctx, server.URL) + + raw, err := DownloadZipball(ctx, "databrickslabs", "ucx", "main") + assert.NoError(t, err) + assert.Equal(t, []byte("abc"), raw) +} diff --git a/cmd/labs/github/releases.go b/cmd/labs/github/releases.go new file mode 100644 index 0000000000..c5a8364caa --- /dev/null +++ b/cmd/labs/github/releases.go @@ -0,0 +1,61 @@ +package github + +import ( + "context" + "fmt" + "time" + + "github.com/databricks/cli/cmd/labs/localcache" + "github.com/databricks/cli/libs/log" +) + +const cacheTTL = 1 * time.Hour + +// NewReleaseCache creates a release cache for a repository in the GitHub org. +// Caller has to provide different cache directories for different repositories. +func NewReleaseCache(org, repo, cacheDir string) *ReleaseCache { + pattern := fmt.Sprintf("%s-%s-releases", org, repo) + return &ReleaseCache{ + cache: localcache.NewLocalCache[Versions](cacheDir, pattern, cacheTTL), + Org: org, + Repo: repo, + } +} + +type ReleaseCache struct { + cache localcache.LocalCache[Versions] + Org string + Repo string +} + +func (r *ReleaseCache) Load(ctx context.Context) (Versions, error) { + return r.cache.Load(ctx, func() (Versions, error) { + return getVersions(ctx, r.Org, r.Repo) + }) +} + +// getVersions is considered to be a private API, as we want the usage go through a cache +func getVersions(ctx context.Context, org, repo string) (Versions, error) { + var releases Versions + log.Debugf(ctx, "Fetching latest releases for %s/%s from GitHub API", org, repo) + url := fmt.Sprintf("%s/repos/%s/%s/releases", gitHubAPI, org, repo) + err := httpGetAndUnmarshall(ctx, url, &releases) + return releases, err +} + +type ghAsset struct { + Name string `json:"name"` + ContentType string `json:"content_type"` + Size int `json:"size"` + BrowserDownloadURL string `json:"browser_download_url"` +} + +type Release struct { + Version string `json:"tag_name"` + CreatedAt time.Time `json:"created_at"` + PublishedAt time.Time `json:"published_at"` + ZipballURL string `json:"zipball_url"` + Assets []ghAsset `json:"assets"` +} + +type Versions []Release diff --git a/cmd/labs/github/releases_test.go b/cmd/labs/github/releases_test.go new file mode 100644 index 0000000000..75a5f96609 --- /dev/null +++ b/cmd/labs/github/releases_test.go @@ -0,0 +1,34 @@ +package github + +import ( + "context" + "net/http" + "net/http/httptest" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestLoadsReleasesForCLI(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path == "/repos/databricks/cli/releases" { + w.Write([]byte("[]")) + return + } + t.Logf("Requested: %s", r.URL.Path) + panic("stub required") + })) + defer server.Close() + + ctx := context.Background() + ctx = WithApiOverride(ctx, server.URL) + + r := NewReleaseCache("databricks", "cli", t.TempDir()) + all, err := r.Load(ctx) + assert.NoError(t, err) + assert.NotNil(t, all) + + // no call is made + _, err = r.Load(ctx) + assert.NoError(t, err) +} diff --git a/cmd/labs/github/repositories.go b/cmd/labs/github/repositories.go new file mode 100644 index 0000000000..545c886fb2 --- /dev/null +++ b/cmd/labs/github/repositories.go @@ -0,0 +1,59 @@ +package github + +import ( + "context" + "fmt" + "time" + + "github.com/databricks/cli/cmd/labs/localcache" + "github.com/databricks/cli/libs/log" +) + +const repositoryCacheTTL = 24 * time.Hour + +func NewRepositoryCache(org, cacheDir string) *repositoryCache { + filename := fmt.Sprintf("%s-repositories", org) + return &repositoryCache{ + cache: localcache.NewLocalCache[Repositories](cacheDir, filename, repositoryCacheTTL), + Org: org, + } +} + +type repositoryCache struct { + cache localcache.LocalCache[Repositories] + Org string +} + +func (r *repositoryCache) Load(ctx context.Context) (Repositories, error) { + return r.cache.Load(ctx, func() (Repositories, error) { + return getRepositories(ctx, r.Org) + }) +} + +// getRepositories is considered to be privata API, as we want the usage to go through a cache +func getRepositories(ctx context.Context, org string) (Repositories, error) { + var repos Repositories + log.Debugf(ctx, "Loading repositories for %s from GitHub API", org) + url := fmt.Sprintf("%s/users/%s/repos", gitHubAPI, org) + err := httpGetAndUnmarshall(ctx, url, &repos) + return repos, err +} + +type Repositories []ghRepo + +type ghRepo struct { + Name string `json:"name"` + Description string `json:"description"` + Langauge string `json:"language"` + DefaultBranch string `json:"default_branch"` + Stars int `json:"stargazers_count"` + IsFork bool `json:"fork"` + IsArchived bool `json:"archived"` + Topics []string `json:"topics"` + HtmlURL string `json:"html_url"` + CloneURL string `json:"clone_url"` + SshURL string `json:"ssh_url"` + License struct { + Name string `json:"name"` + } `json:"license"` +} diff --git a/cmd/labs/github/repositories_test.go b/cmd/labs/github/repositories_test.go new file mode 100644 index 0000000000..4f2fef3e17 --- /dev/null +++ b/cmd/labs/github/repositories_test.go @@ -0,0 +1,30 @@ +package github + +import ( + "context" + "net/http" + "net/http/httptest" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestRepositories(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path == "/users/databrickslabs/repos" { + w.Write([]byte(`[{"name": "x"}]`)) + return + } + t.Logf("Requested: %s", r.URL.Path) + panic("stub required") + })) + defer server.Close() + + ctx := context.Background() + ctx = WithApiOverride(ctx, server.URL) + + r := NewRepositoryCache("databrickslabs", t.TempDir()) + all, err := r.Load(ctx) + assert.NoError(t, err) + assert.True(t, len(all) > 0) +} diff --git a/cmd/labs/install.go b/cmd/labs/install.go new file mode 100644 index 0000000000..31db438929 --- /dev/null +++ b/cmd/labs/install.go @@ -0,0 +1,21 @@ +package labs + +import ( + "github.com/databricks/cli/cmd/labs/project" + "github.com/spf13/cobra" +) + +func newInstallCommand() *cobra.Command { + return &cobra.Command{ + Use: "install NAME", + Args: cobra.ExactArgs(1), + Short: "Installs project", + RunE: func(cmd *cobra.Command, args []string) error { + inst, err := project.NewInstaller(cmd, args[0]) + if err != nil { + return err + } + return inst.Install(cmd.Context()) + }, + } +} diff --git a/cmd/labs/installed.go b/cmd/labs/installed.go new file mode 100644 index 0000000000..e4249c9ffb --- /dev/null +++ b/cmd/labs/installed.go @@ -0,0 +1,57 @@ +package labs + +import ( + "fmt" + + "github.com/databricks/cli/cmd/labs/project" + "github.com/databricks/cli/libs/cmdio" + "github.com/spf13/cobra" +) + +func newInstalledCommand() *cobra.Command { + return &cobra.Command{ + Use: "installed", + Short: "List all installed labs", + Annotations: map[string]string{ + "template": cmdio.Heredoc(` + Name Description Version + {{range .Projects}}{{.Name}} {{.Description}} {{.Version}} + {{end}} + `), + }, + RunE: func(cmd *cobra.Command, args []string) error { + ctx := cmd.Context() + type installedProject struct { + Name string `json:"name"` + Description string `json:"description"` + Version string `json:"version"` + } + projects, err := project.Installed(ctx) + if err != nil { + return err + } + var info struct { + Projects []installedProject `json:"projects"` + } + for _, v := range projects { + description := v.Description + if len(description) > 50 { + description = description[:50] + "..." + } + version, err := v.InstalledVersion(ctx) + if err != nil { + return fmt.Errorf("%s: %w", v.Name, err) + } + info.Projects = append(info.Projects, installedProject{ + Name: v.Name, + Description: description, + Version: version.Version, + }) + } + if len(info.Projects) == 0 { + return fmt.Errorf("no projects installed") + } + return cmdio.Render(ctx, info) + }, + } +} diff --git a/cmd/labs/installed_test.go b/cmd/labs/installed_test.go new file mode 100644 index 0000000000..00692f7960 --- /dev/null +++ b/cmd/labs/installed_test.go @@ -0,0 +1,19 @@ +package labs_test + +import ( + "context" + "testing" + + "github.com/databricks/cli/internal" + "github.com/databricks/cli/libs/env" +) + +func TestListsInstalledProjects(t *testing.T) { + ctx := context.Background() + ctx = env.WithUserHomeDir(ctx, "project/testdata/installed-in-home") + r := internal.NewCobraTestRunnerWithContext(t, ctx, "labs", "installed") + r.RunAndExpectOutput(` + Name Description Version + blueprint Blueprint Project v0.3.15 + `) +} diff --git a/cmd/labs/labs.go b/cmd/labs/labs.go new file mode 100644 index 0000000000..ee10be804f --- /dev/null +++ b/cmd/labs/labs.go @@ -0,0 +1,39 @@ +package labs + +import ( + "context" + + "github.com/databricks/cli/cmd/labs/project" + "github.com/spf13/cobra" +) + +func New(ctx context.Context) *cobra.Command { + cmd := &cobra.Command{ + Use: "labs", + Short: "Managge Databricks Labs installations", + Long: `Manage experimental Databricks Labs apps`, + } + + cmd.AddGroup(&cobra.Group{ + ID: "labs", + Title: "Installed Databricks Labs", + }) + + cmd.AddCommand( + newListCommand(), + newInstallCommand(), + newUpgradeCommand(), + newInstalledCommand(), + newClearCacheCommand(), + newUninstallCommand(), + newShowCommand(), + ) + all, err := project.Installed(ctx) + if err != nil { + panic(err) + } + for _, v := range all { + v.Register(cmd) + } + return cmd +} diff --git a/cmd/labs/list.go b/cmd/labs/list.go new file mode 100644 index 0000000000..07cc180c61 --- /dev/null +++ b/cmd/labs/list.go @@ -0,0 +1,62 @@ +package labs + +import ( + "context" + + "github.com/databricks/cli/cmd/labs/github" + "github.com/databricks/cli/cmd/labs/project" + "github.com/databricks/cli/libs/cmdio" + "github.com/spf13/cobra" +) + +type labsMeta struct { + Name string `json:"name"` + Description string `json:"description"` + License string `json:"license"` +} + +func allRepos(ctx context.Context) (github.Repositories, error) { + cacheDir := project.PathInLabs(ctx) + cache := github.NewRepositoryCache("databrickslabs", cacheDir) + return cache.Load(ctx) +} + +func newListCommand() *cobra.Command { + return &cobra.Command{ + Use: "list", + Short: "List all labs", + Annotations: map[string]string{ + "template": cmdio.Heredoc(` + Name Description + {{range .}}{{.Name}} {{.Description}} + {{end}} + `), + }, + RunE: func(cmd *cobra.Command, args []string) error { + ctx := cmd.Context() + repositories, err := allRepos(ctx) + if err != nil { + return err + } + info := []labsMeta{} + for _, v := range repositories { + if v.IsArchived { + continue + } + if v.IsFork { + continue + } + description := v.Description + if len(description) > 50 { + description = description[:50] + "..." + } + info = append(info, labsMeta{ + Name: v.Name, + Description: description, + License: v.License.Name, + }) + } + return cmdio.Render(ctx, info) + }, + } +} diff --git a/cmd/labs/list_test.go b/cmd/labs/list_test.go new file mode 100644 index 0000000000..925b984ab8 --- /dev/null +++ b/cmd/labs/list_test.go @@ -0,0 +1,19 @@ +package labs_test + +import ( + "context" + "testing" + + "github.com/databricks/cli/internal" + "github.com/databricks/cli/libs/env" + "github.com/stretchr/testify/require" +) + +func TestListingWorks(t *testing.T) { + ctx := context.Background() + ctx = env.WithUserHomeDir(ctx, "project/testdata/installed-in-home") + c := internal.NewCobraTestRunnerWithContext(t, ctx, "labs", "list") + stdout, _, err := c.Run() + require.NoError(t, err) + require.Contains(t, stdout.String(), "ucx") +} diff --git a/cmd/labs/localcache/jsonfile.go b/cmd/labs/localcache/jsonfile.go new file mode 100644 index 0000000000..7c87d08e82 --- /dev/null +++ b/cmd/labs/localcache/jsonfile.go @@ -0,0 +1,96 @@ +package localcache + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "io/fs" + "net/url" + "os" + "path/filepath" + "time" + + "github.com/databricks/cli/libs/log" +) + +const userRW = 0o600 + +func NewLocalCache[T any](dir, name string, validity time.Duration) LocalCache[T] { + return LocalCache[T]{ + dir: dir, + name: name, + validity: validity, + } +} + +type LocalCache[T any] struct { + name string + dir string + validity time.Duration + zero T +} + +func (r *LocalCache[T]) Load(ctx context.Context, refresh func() (T, error)) (T, error) { + cached, err := r.loadCache() + if errors.Is(err, fs.ErrNotExist) { + return r.refreshCache(ctx, refresh, r.zero) + } else if err != nil { + return r.zero, err + } else if time.Since(cached.Refreshed) > r.validity { + return r.refreshCache(ctx, refresh, cached.Data) + } + return cached.Data, nil +} + +type cached[T any] struct { + // we don't use mtime of the file because it's easier to + // for testdata used in the unit tests to be somewhere far + // in the future and don't bother about switching the mtime bit. + Refreshed time.Time `json:"refreshed_at"` + Data T `json:"data"` +} + +func (r *LocalCache[T]) refreshCache(ctx context.Context, refresh func() (T, error), offlineVal T) (T, error) { + data, err := refresh() + var urlError *url.Error + if errors.As(err, &urlError) { + log.Warnf(ctx, "System offline. Cannot refresh cache: %s", urlError) + return offlineVal, nil + } + if err != nil { + return r.zero, fmt.Errorf("refresh: %w", err) + } + return r.writeCache(ctx, data) +} + +func (r *LocalCache[T]) writeCache(ctx context.Context, data T) (T, error) { + cached := &cached[T]{time.Now(), data} + raw, err := json.MarshalIndent(cached, "", " ") + if err != nil { + return r.zero, fmt.Errorf("json marshal: %w", err) + } + err = os.WriteFile(r.FileName(), raw, userRW) + if err != nil { + return r.zero, fmt.Errorf("save cache: %w", err) + } + return data, nil +} + +func (r *LocalCache[T]) FileName() string { + return filepath.Join(r.dir, fmt.Sprintf("%s.json", r.name)) +} + +func (r *LocalCache[T]) loadCache() (*cached[T], error) { + jsonFile := r.FileName() + raw, err := os.ReadFile(r.FileName()) + if err != nil { + return nil, fmt.Errorf("read %s: %w", jsonFile, err) + } + var v cached[T] + err = json.Unmarshal(raw, &v) + if err != nil { + return nil, fmt.Errorf("parse %s: %w", jsonFile, err) + } + return &v, nil +} diff --git a/cmd/labs/localcache/jsonfile_test.go b/cmd/labs/localcache/jsonfile_test.go new file mode 100644 index 0000000000..d457b25070 --- /dev/null +++ b/cmd/labs/localcache/jsonfile_test.go @@ -0,0 +1,97 @@ +package localcache + +import ( + "context" + "errors" + "fmt" + "net/url" + "testing" + "time" + + "github.com/stretchr/testify/assert" +) + +func TestRefreshes(t *testing.T) { + ctx := context.Background() + c := NewLocalCache[int64](t.TempDir(), "time", 1*time.Minute) + thing := []int64{0} + tick := func() (int64, error) { + thing[0] += 1 + return thing[0], nil + } + first, err := c.Load(ctx, tick) + assert.NoError(t, err) + + second, err := c.Load(ctx, tick) + assert.NoError(t, err) + assert.Equal(t, first, second) + + c.validity = 0 + third, err := c.Load(ctx, tick) + assert.NoError(t, err) + assert.NotEqual(t, first, third) +} + +func TestSupportOfflineSystem(t *testing.T) { + c := NewLocalCache[int64](t.TempDir(), "time", 1*time.Minute) + thing := []int64{0} + tick := func() (int64, error) { + thing[0] += 1 + return thing[0], nil + } + ctx := context.Background() + first, err := c.Load(ctx, tick) + assert.NoError(t, err) + + tick = func() (int64, error) { + return 0, &url.Error{ + Op: "X", + URL: "Y", + Err: errors.New("nope"), + } + } + + c.validity = 0 + + // offline during refresh + third, err := c.Load(ctx, tick) + assert.NoError(t, err) + assert.Equal(t, first, third) + + // fully offline + c = NewLocalCache[int64](t.TempDir(), "time", 1*time.Minute) + zero, err := c.Load(ctx, tick) + assert.NoError(t, err) + assert.Equal(t, int64(0), zero) +} + +func TestFolderDisappears(t *testing.T) { + c := NewLocalCache[int64]("/dev/null", "time", 1*time.Minute) + tick := func() (int64, error) { + now := time.Now().UnixNano() + t.Log("TICKS") + return now, nil + } + ctx := context.Background() + _, err := c.Load(ctx, tick) + assert.Error(t, err) +} + +func TestRefreshFails(t *testing.T) { + c := NewLocalCache[int64](t.TempDir(), "time", 1*time.Minute) + tick := func() (int64, error) { + return 0, fmt.Errorf("nope") + } + ctx := context.Background() + _, err := c.Load(ctx, tick) + assert.EqualError(t, err, "refresh: nope") +} + +func TestWrongType(t *testing.T) { + c := NewLocalCache[chan int](t.TempDir(), "x", 1*time.Minute) + ctx := context.Background() + _, err := c.Load(ctx, func() (chan int, error) { + return make(chan int), nil + }) + assert.EqualError(t, err, "json marshal: json: unsupported type: chan int") +} diff --git a/cmd/labs/project/command_test.go b/cmd/labs/project/command_test.go new file mode 100644 index 0000000000..20021879fe --- /dev/null +++ b/cmd/labs/project/command_test.go @@ -0,0 +1,69 @@ +package project_test + +import ( + "context" + "path/filepath" + "testing" + "time" + + "github.com/databricks/cli/internal" + "github.com/databricks/cli/libs/env" + "github.com/databricks/cli/libs/python" + "github.com/databricks/databricks-sdk-go" + "github.com/stretchr/testify/assert" +) + +type echoOut struct { + Command string `json:"command"` + Flags map[string]string `json:"flags"` + Env map[string]string `json:"env"` +} + +func devEnvContext(t *testing.T) context.Context { + ctx := context.Background() + ctx = env.WithUserHomeDir(ctx, "testdata/installed-in-home") + py, _ := python.DetectExecutable(ctx) + py, _ = filepath.Abs(py) + ctx = env.Set(ctx, "PYTHON_BIN", py) + return ctx +} + +func TestRunningBlueprintEcho(t *testing.T) { + ctx := devEnvContext(t) + r := internal.NewCobraTestRunnerWithContext(t, ctx, "labs", "blueprint", "echo") + var out echoOut + r.RunAndParseJSON(&out) + assert.Equal(t, "echo", out.Command) + assert.Equal(t, "something", out.Flags["first"]) + assert.Equal(t, "https://accounts.cloud.databricks.com", out.Env["DATABRICKS_HOST"]) + assert.Equal(t, "cde", out.Env["DATABRICKS_ACCOUNT_ID"]) +} + +func TestRunningBlueprintEchoProfileWrongOverride(t *testing.T) { + ctx := devEnvContext(t) + r := internal.NewCobraTestRunnerWithContext(t, ctx, "labs", "blueprint", "echo", "--profile", "workspace-profile") + _, _, err := r.Run() + assert.ErrorIs(t, err, databricks.ErrNotAccountClient) +} + +func TestRunningCommand(t *testing.T) { + ctx := devEnvContext(t) + r := internal.NewCobraTestRunnerWithContext(t, ctx, "labs", "blueprint", "foo") + r.WithStdin() + defer r.CloseStdin() + + r.RunBackground() + r.WaitForTextPrinted("What is your name?", 5*time.Second) + r.SendText("Dude\n") + r.WaitForTextPrinted("Hello, Dude!", 5*time.Second) +} + +func TestRenderingTable(t *testing.T) { + ctx := devEnvContext(t) + r := internal.NewCobraTestRunnerWithContext(t, ctx, "labs", "blueprint", "table") + r.RunAndExpectOutput(` + Key Value + First Second + Third Fourth + `) +} diff --git a/cmd/labs/project/entrypoint.go b/cmd/labs/project/entrypoint.go new file mode 100644 index 0000000000..ebf886be55 --- /dev/null +++ b/cmd/labs/project/entrypoint.go @@ -0,0 +1,247 @@ +package project + +import ( + "context" + "errors" + "fmt" + "io/fs" + "net/http" + "os" + "path/filepath" + "strings" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/cmd/root" + "github.com/databricks/cli/internal/build" + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/cli/libs/env" + "github.com/databricks/cli/libs/log" + "github.com/databricks/databricks-sdk-go" + "github.com/databricks/databricks-sdk-go/config" + "github.com/spf13/cobra" +) + +type Entrypoint struct { + *Project + + RequireRunningCluster bool `yaml:"require_running_cluster,omitempty"` + IsUnauthenticated bool `yaml:"is_unauthenticated,omitempty"` + IsAccountLevel bool `yaml:"is_account_level,omitempty"` + IsBundleAware bool `yaml:"is_bundle_aware,omitempty"` +} + +var ErrNoLoginConfig = errors.New("no login configuration found") +var ErrMissingClusterID = errors.New("missing a cluster compatible with Databricks Connect") +var ErrMissingWarehouseID = errors.New("missing a SQL warehouse") +var ErrNotInTTY = errors.New("not in an interactive terminal") + +func (e *Entrypoint) NeedsCluster() bool { + if e.Installer == nil { + return false + } + if e.Installer.RequireDatabricksConnect && e.Installer.MinRuntimeVersion == "" { + e.Installer.MinRuntimeVersion = "13.1" + } + return e.Installer.MinRuntimeVersion != "" +} + +func (e *Entrypoint) NeedsWarehouse() bool { + if e.Installer == nil { + return false + } + return len(e.Installer.WarehouseTypes) != 0 +} + +func (e *Entrypoint) Prepare(cmd *cobra.Command) (map[string]string, error) { + ctx := cmd.Context() + libDir := e.EffectiveLibDir(ctx) + environment := map[string]string{ + "DATABRICKS_CLI_VERSION": build.GetInfo().Version, + "DATABRICKS_LABS_CACHE_DIR": e.CacheDir(ctx), + "DATABRICKS_LABS_CONFIG_DIR": e.ConfigDir(ctx), + "DATABRICKS_LABS_STATE_DIR": e.StateDir(ctx), + "DATABRICKS_LABS_LIB_DIR": libDir, + } + if e.IsPythonProject(ctx) { + e.preparePython(ctx, environment) + } + cfg, err := e.validLogin(cmd) + if err != nil { + return nil, fmt.Errorf("login: %w", err) + } + // cleanup auth profile and config file location, + // so that we don't confuse SDKs + cfg.Profile = "" + cfg.ConfigFile = "" + varNames := []string{} + for k, v := range e.environmentFromConfig(cfg) { + environment[k] = v + varNames = append(varNames, k) + } + if e.NeedsCluster() && e.RequireRunningCluster { + err = e.ensureRunningCluster(ctx, cfg) + if err != nil { + return nil, fmt.Errorf("running cluster: %w", err) + } + } + log.Debugf(ctx, "Passing down environment variables: %s", strings.Join(varNames, ", ")) + return environment, nil +} + +func (e *Entrypoint) preparePython(ctx context.Context, environment map[string]string) { + venv := e.virtualEnvPath(ctx) + environment["PATH"] = e.joinPaths(filepath.Join(venv, "bin"), env.Get(ctx, "PATH")) + + // PYTHONPATH extends the standard lookup locations for module files. It follows the same structure as + // the shell's PATH, where you specify one or more directory paths separated by the appropriate delimiter + // (such as colons for Unix or semicolons for Windows). If a directory listed in PYTHONPATH doesn't exist, + // it is disregarded without any notifications. + // + // Beyond regular directories, individual entries in PYTHONPATH can point to zipfiles that contain pure + // Python modules in either their source or compiled forms. It's important to note that extension modules + // cannot be imported from zipfiles. + // + // The initial search path varies depending on your installation but typically commences with the + // prefix/lib/pythonversion path (as indicated by PYTHONHOME). This default path is always included + // in PYTHONPATH. + // + // An extra directory can be included at the beginning of the search path, coming before PYTHONPATH, + // as explained in the Interface options section. You can control the search path from within a Python + // script using the sys.path variable. + // + // Here we are also supporting the "src" layout for python projects. + // + // See https://docs.python.org/3/using/cmdline.html#envvar-PYTHONPATH + libDir := e.EffectiveLibDir(ctx) + environment["PYTHONPATH"] = e.joinPaths(libDir, filepath.Join(libDir, "src")) +} + +func (e *Entrypoint) ensureRunningCluster(ctx context.Context, cfg *config.Config) error { + feedback := cmdio.Spinner(ctx) + defer close(feedback) + w, err := databricks.NewWorkspaceClient((*databricks.Config)(cfg)) + if err != nil { + return fmt.Errorf("workspace client: %w", err) + } + // TODO: add in-progress callback to EnsureClusterIsRunning() in SDK + feedback <- "Ensuring the cluster is running..." + err = w.Clusters.EnsureClusterIsRunning(ctx, cfg.ClusterID) + if err != nil { + return fmt.Errorf("ensure running: %w", err) + } + return nil +} + +func (e *Entrypoint) joinPaths(paths ...string) string { + return strings.Join(paths, string(os.PathListSeparator)) +} + +func (e *Entrypoint) envAwareConfig(ctx context.Context) *config.Config { + return &config.Config{ + ConfigFile: filepath.Join(env.UserHomeDir(ctx), ".databrickscfg"), + Loaders: []config.Loader{ + env.NewConfigLoader(ctx), + config.ConfigAttributes, + config.ConfigFile, + }, + } +} + +func (e *Entrypoint) envAwareConfigWithProfile(ctx context.Context, profile string) *config.Config { + cfg := e.envAwareConfig(ctx) + cfg.Profile = profile + return cfg +} + +func (e *Entrypoint) getLoginConfig(cmd *cobra.Command) (*loginConfig, *config.Config, error) { + ctx := cmd.Context() + // it's okay for this config file not to exist, because some environments, + // like GitHub Actions, don't (need) to have it. There's a small downside of + // a warning log message from within Go SDK. + profileOverride := e.profileOverride(cmd) + if profileOverride != "" { + log.Infof(ctx, "Overriding login profile: %s", profileOverride) + return &loginConfig{}, e.envAwareConfigWithProfile(ctx, profileOverride), nil + } + lc, err := e.loadLoginConfig(ctx) + isNoLoginConfig := errors.Is(err, fs.ErrNotExist) + defaultConfig := e.envAwareConfig(ctx) + if isNoLoginConfig && !e.IsBundleAware && e.isAuthConfigured(defaultConfig) { + log.Debugf(ctx, "Login is configured via environment variables") + return &loginConfig{}, defaultConfig, nil + } + if isNoLoginConfig && !e.IsBundleAware { + return nil, nil, ErrNoLoginConfig + } + if !isNoLoginConfig && err != nil { + return nil, nil, fmt.Errorf("load: %w", err) + } + if e.IsAccountLevel { + log.Debugf(ctx, "Using account-level login profile: %s", lc.AccountProfile) + return lc, e.envAwareConfigWithProfile(ctx, lc.AccountProfile), nil + } + if e.IsBundleAware { + err = root.TryConfigureBundle(cmd, []string{}) + if err != nil { + return nil, nil, fmt.Errorf("bundle: %w", err) + } + if b := bundle.GetOrNil(cmd.Context()); b != nil { + log.Infof(ctx, "Using login configuration from Databricks Asset Bundle") + return &loginConfig{}, b.WorkspaceClient().Config, nil + } + } + log.Debugf(ctx, "Using workspace-level login profile: %s", lc.WorkspaceProfile) + return lc, e.envAwareConfigWithProfile(ctx, lc.WorkspaceProfile), nil +} + +func (e *Entrypoint) validLogin(cmd *cobra.Command) (*config.Config, error) { + if e.IsUnauthenticated { + return &config.Config{}, nil + } + lc, cfg, err := e.getLoginConfig(cmd) + if err != nil { + return nil, fmt.Errorf("login config: %w", err) + } + err = cfg.EnsureResolved() + if err != nil { + return nil, err + } + // merge ~/.databrickscfg and ~/.databricks/labs/x/config/login.json when + // it comes to project-specific configuration + if e.NeedsCluster() && cfg.ClusterID == "" { + cfg.ClusterID = lc.ClusterID + } + if e.NeedsWarehouse() && cfg.WarehouseID == "" { + cfg.WarehouseID = lc.WarehouseID + } + isACC := cfg.IsAccountClient() + if e.IsAccountLevel && !isACC { + return nil, databricks.ErrNotAccountClient + } + if e.NeedsCluster() && !isACC && cfg.ClusterID == "" { + return nil, ErrMissingClusterID + } + if e.NeedsWarehouse() && !isACC && cfg.WarehouseID == "" { + return nil, ErrMissingWarehouseID + } + return cfg, nil +} + +func (e *Entrypoint) environmentFromConfig(cfg *config.Config) map[string]string { + env := map[string]string{} + for _, a := range config.ConfigAttributes { + if a.IsZero(cfg) { + continue + } + for _, ev := range a.EnvVars { + env[ev] = a.GetString(cfg) + } + } + return env +} + +func (e *Entrypoint) isAuthConfigured(cfg *config.Config) bool { + r := &http.Request{Header: http.Header{}} + err := cfg.Authenticate(r.WithContext(context.Background())) + return err == nil +} diff --git a/cmd/labs/project/fetcher.go b/cmd/labs/project/fetcher.go new file mode 100644 index 0000000000..179ba183a9 --- /dev/null +++ b/cmd/labs/project/fetcher.go @@ -0,0 +1,145 @@ +package project + +import ( + "context" + "errors" + "fmt" + "os" + "path/filepath" + "strings" + + "github.com/databricks/cli/cmd/labs/github" + "github.com/databricks/cli/libs/log" + "github.com/fatih/color" + "github.com/spf13/cobra" +) + +type installable interface { + Install(ctx context.Context) error +} + +type devInstallation struct { + *Project + *cobra.Command +} + +func (d *devInstallation) Install(ctx context.Context) error { + if d.Installer == nil { + return nil + } + _, err := d.Installer.validLogin(d.Command) + if errors.Is(err, ErrNoLoginConfig) { + cfg := d.Installer.envAwareConfig(ctx) + lc := &loginConfig{Entrypoint: d.Installer.Entrypoint} + _, err = lc.askWorkspace(ctx, cfg) + if err != nil { + return fmt.Errorf("ask for workspace: %w", err) + } + err = lc.askAccountProfile(ctx, cfg) + if err != nil { + return fmt.Errorf("ask for account: %w", err) + } + err = lc.EnsureFoldersExist(ctx) + if err != nil { + return fmt.Errorf("folders: %w", err) + } + err = lc.save(ctx) + if err != nil { + return fmt.Errorf("save: %w", err) + } + } + return d.Installer.runHook(d.Command) +} + +func NewInstaller(cmd *cobra.Command, name string) (installable, error) { + if name == "." { + wd, err := os.Getwd() + if err != nil { + return nil, fmt.Errorf("working directory: %w", err) + } + prj, err := Load(cmd.Context(), filepath.Join(wd, "labs.yml")) + if err != nil { + return nil, fmt.Errorf("load: %w", err) + } + cmd.PrintErrln(color.YellowString("Installing %s in development mode from %s", prj.Name, wd)) + return &devInstallation{ + Project: prj, + Command: cmd, + }, nil + } + name, version, ok := strings.Cut(name, "@") + if !ok { + version = "latest" + } + f := &fetcher{name} + version, err := f.checkReleasedVersions(cmd, version) + if err != nil { + return nil, fmt.Errorf("version: %w", err) + } + prj, err := f.loadRemoteProjectDefinition(cmd, version) + if err != nil { + return nil, fmt.Errorf("remote: %w", err) + } + return &installer{ + Project: prj, + version: version, + cmd: cmd, + }, nil +} + +func NewUpgrader(cmd *cobra.Command, name string) (*installer, error) { + f := &fetcher{name} + version, err := f.checkReleasedVersions(cmd, "latest") + if err != nil { + return nil, fmt.Errorf("version: %w", err) + } + prj, err := f.loadRemoteProjectDefinition(cmd, version) + if err != nil { + return nil, fmt.Errorf("remote: %w", err) + } + prj.folder = PathInLabs(cmd.Context(), name) + return &installer{ + Project: prj, + version: version, + cmd: cmd, + }, nil +} + +type fetcher struct { + name string +} + +func (f *fetcher) checkReleasedVersions(cmd *cobra.Command, version string) (string, error) { + ctx := cmd.Context() + cacheDir := PathInLabs(ctx, f.name, "cache") + err := os.MkdirAll(cacheDir, ownerRWXworldRX) + if err != nil { + return "", fmt.Errorf("make cache dir: %w", err) + } + // `databricks labs isntall X` doesn't know which exact version to fetch, so first + // we fetch all versions and then pick the latest one dynamically. + versions, err := github.NewReleaseCache("databrickslabs", f.name, cacheDir).Load(ctx) + if err != nil { + return "", fmt.Errorf("versions: %w", err) + } + for _, v := range versions { + if v.Version == version { + return version, nil + } + } + if version == "latest" && len(versions) > 0 { + log.Debugf(ctx, "Latest %s version is: %s", f.name, versions[0].Version) + return versions[0].Version, nil + } + cmd.PrintErrln(color.YellowString("[WARNING] Installing unreleased version: %s", version)) + return version, nil +} + +func (i *fetcher) loadRemoteProjectDefinition(cmd *cobra.Command, version string) (*Project, error) { + ctx := cmd.Context() + raw, err := github.ReadFileFromRef(ctx, "databrickslabs", i.name, version, "labs.yml") + if err != nil { + return nil, fmt.Errorf("read labs.yml from GitHub: %w", err) + } + return readFromBytes(ctx, raw) +} diff --git a/cmd/labs/project/helpers.go b/cmd/labs/project/helpers.go new file mode 100644 index 0000000000..9117d875d0 --- /dev/null +++ b/cmd/labs/project/helpers.go @@ -0,0 +1,35 @@ +package project + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "io/fs" + "os" + "path/filepath" + + "github.com/databricks/cli/libs/env" +) + +func PathInLabs(ctx context.Context, dirs ...string) string { + homdeDir := env.UserHomeDir(ctx) + prefix := []string{homdeDir, ".databricks", "labs"} + return filepath.Join(append(prefix, dirs...)...) +} + +func tryLoadAndParseJSON[T any](jsonFile string) (*T, error) { + raw, err := os.ReadFile(jsonFile) + if errors.Is(err, fs.ErrNotExist) { + return nil, err + } + if err != nil { + return nil, fmt.Errorf("read %s: %w", jsonFile, err) + } + var v T + err = json.Unmarshal(raw, &v) + if err != nil { + return nil, fmt.Errorf("parse %s: %w", jsonFile, err) + } + return &v, nil +} diff --git a/cmd/labs/project/init_test.go b/cmd/labs/project/init_test.go new file mode 100644 index 0000000000..959381f5dd --- /dev/null +++ b/cmd/labs/project/init_test.go @@ -0,0 +1,13 @@ +package project + +import ( + "log/slog" + "os" +) + +func init() { + slog.SetDefault(slog.New( + slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{ + Level: slog.LevelDebug, + }))) +} diff --git a/cmd/labs/project/installed.go b/cmd/labs/project/installed.go new file mode 100644 index 0000000000..77fee544fc --- /dev/null +++ b/cmd/labs/project/installed.go @@ -0,0 +1,58 @@ +package project + +import ( + "context" + "errors" + "fmt" + "io/fs" + "os" + "path/filepath" + + "github.com/databricks/cli/folders" + "github.com/databricks/cli/libs/log" +) + +func projectInDevMode(ctx context.Context) (*Project, error) { + cwd, err := os.Getwd() + if err != nil { + return nil, err + } + folder, err := folders.FindDirWithLeaf(cwd, "labs.yml") + if err != nil { + return nil, err + } + log.Debugf(ctx, "Found project under development in: %s", cwd) + return Load(ctx, filepath.Join(folder, "labs.yml")) +} + +func Installed(ctx context.Context) (projects []*Project, err error) { + labsDir, err := os.ReadDir(PathInLabs(ctx)) + if err != nil && !errors.Is(err, fs.ErrNotExist) { + return nil, err + } + projectDev, err := projectInDevMode(ctx) + if err != nil && !errors.Is(err, fs.ErrNotExist) { + return nil, err + } + if err == nil { + projects = append(projects, projectDev) + } + for _, v := range labsDir { + if !v.IsDir() { + continue + } + if projectDev != nil && v.Name() == projectDev.Name { + continue + } + labsYml := PathInLabs(ctx, v.Name(), "lib", "labs.yml") + prj, err := Load(ctx, labsYml) + if errors.Is(err, fs.ErrNotExist) { + continue + } + if err != nil { + return nil, fmt.Errorf("%s: %w", v.Name(), err) + } + projects = append(projects, prj) + } + return projects, nil +} diff --git a/cmd/labs/project/installed_test.go b/cmd/labs/project/installed_test.go new file mode 100644 index 0000000000..e837692d61 --- /dev/null +++ b/cmd/labs/project/installed_test.go @@ -0,0 +1,19 @@ +package project + +import ( + "context" + "testing" + + "github.com/databricks/cli/libs/env" + "github.com/stretchr/testify/assert" +) + +func TestInstalled(t *testing.T) { + ctx := context.Background() + ctx = env.WithUserHomeDir(ctx, "testdata/installed-in-home") + + all, err := Installed(ctx) + assert.NoError(t, err) + assert.Len(t, all, 1) + assert.Equal(t, "blueprint", all[0].Name) +} diff --git a/cmd/labs/project/installer.go b/cmd/labs/project/installer.go new file mode 100644 index 0000000000..1c990890e5 --- /dev/null +++ b/cmd/labs/project/installer.go @@ -0,0 +1,286 @@ +package project + +import ( + "bytes" + "context" + "errors" + "fmt" + "os" + "strings" + + "github.com/databricks/cli/cmd/labs/github" + "github.com/databricks/cli/cmd/labs/unpack" + "github.com/databricks/cli/cmd/workspace/clusters" + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/cli/libs/databrickscfg" + "github.com/databricks/cli/libs/log" + "github.com/databricks/cli/libs/process" + "github.com/databricks/cli/libs/python" + "github.com/databricks/databricks-sdk-go" + "github.com/databricks/databricks-sdk-go/service/compute" + "github.com/databricks/databricks-sdk-go/service/sql" + "github.com/fatih/color" + "github.com/spf13/cobra" +) + +const ownerRWXworldRX = 0o755 + +type whTypes []sql.EndpointInfoWarehouseType + +type hook struct { + *Entrypoint `yaml:",inline"` + Script string `yaml:"script"` + RequireDatabricksConnect bool `yaml:"require_databricks_connect,omitempty"` + MinRuntimeVersion string `yaml:"min_runtime_version,omitempty"` + WarehouseTypes whTypes `yaml:"warehouse_types,omitempty"` +} + +func (h *hook) RequireRunningCluster() bool { + if h.Entrypoint == nil { + return false + } + return h.Entrypoint.RequireRunningCluster +} + +func (h *hook) HasPython() bool { + return strings.HasSuffix(h.Script, ".py") +} + +func (h *hook) runHook(cmd *cobra.Command) error { + if h.Script == "" { + return nil + } + ctx := cmd.Context() + envs, err := h.Prepare(cmd) + if err != nil { + return fmt.Errorf("prepare: %w", err) + } + libDir := h.EffectiveLibDir(ctx) + args := []string{} + if strings.HasSuffix(h.Script, ".py") { + args = append(args, h.virtualEnvPython(ctx)) + } + return process.Forwarded(ctx, + append(args, h.Script), + cmd.InOrStdin(), + cmd.OutOrStdout(), + cmd.ErrOrStderr(), + process.WithDir(libDir), + process.WithEnvs(envs)) +} + +type installer struct { + *Project + version string + + // command instance is used for: + // - auth profile flag override + // - standard input, output, and error streams + cmd *cobra.Command +} + +func (i *installer) Install(ctx context.Context) error { + err := i.EnsureFoldersExist(ctx) + if err != nil { + return fmt.Errorf("folders: %w", err) + } + i.folder = PathInLabs(ctx, i.Name) + w, err := i.login(ctx) + if err != nil && errors.Is(err, databrickscfg.ErrNoConfiguration) { + cfg := i.Installer.envAwareConfig(ctx) + w, err = databricks.NewWorkspaceClient((*databricks.Config)(cfg)) + if err != nil { + return fmt.Errorf("no ~/.databrickscfg: %w", err) + } + } else if err != nil { + return fmt.Errorf("login: %w", err) + } + err = i.downloadLibrary(ctx) + if err != nil { + return fmt.Errorf("lib: %w", err) + } + err = i.setupPythonVirtualEnvironment(ctx, w) + if err != nil { + return fmt.Errorf("python: %w", err) + } + err = i.recordVersion(ctx) + if err != nil { + return fmt.Errorf("record version: %w", err) + } + // TODO: failing install hook for "clean installations" (not upgrages) + // should trigger removal of the project, otherwise users end up with + // misconfigured CLIs + err = i.runInstallHook(ctx) + if err != nil { + return fmt.Errorf("installer: %w", err) + } + return nil +} + +func (i *installer) Upgrade(ctx context.Context) error { + err := i.downloadLibrary(ctx) + if err != nil { + return fmt.Errorf("lib: %w", err) + } + err = i.recordVersion(ctx) + if err != nil { + return fmt.Errorf("record version: %w", err) + } + err = i.runInstallHook(ctx) + if err != nil { + return fmt.Errorf("installer: %w", err) + } + return nil +} + +func (i *installer) warningf(text string, v ...any) { + i.cmd.PrintErrln(color.YellowString(text, v...)) +} + +func (i *installer) cleanupLib(ctx context.Context) error { + libDir := i.LibDir(ctx) + err := os.RemoveAll(libDir) + if err != nil { + return fmt.Errorf("remove all: %w", err) + } + return os.MkdirAll(libDir, ownerRWXworldRX) +} + +func (i *installer) recordVersion(ctx context.Context) error { + return i.writeVersionFile(ctx, i.version) +} + +func (i *installer) login(ctx context.Context) (*databricks.WorkspaceClient, error) { + if !cmdio.IsInteractive(ctx) { + log.Debugf(ctx, "Skipping workspace profile prompts in non-interactive mode") + return nil, nil + } + cfg, err := i.metaEntrypoint(ctx).validLogin(i.cmd) + if errors.Is(err, ErrNoLoginConfig) { + cfg = i.Installer.envAwareConfig(ctx) + } else if err != nil { + return nil, fmt.Errorf("valid: %w", err) + } + if !i.HasAccountLevelCommands() && cfg.IsAccountClient() { + return nil, fmt.Errorf("got account-level client, but no account-level commands") + } + lc := &loginConfig{Entrypoint: i.Installer.Entrypoint} + w, err := lc.askWorkspace(ctx, cfg) + if err != nil { + return nil, fmt.Errorf("ask for workspace: %w", err) + } + err = lc.askAccountProfile(ctx, cfg) + if err != nil { + return nil, fmt.Errorf("ask for account: %w", err) + } + err = lc.save(ctx) + if err != nil { + return nil, fmt.Errorf("save: %w", err) + } + return w, nil +} + +func (i *installer) downloadLibrary(ctx context.Context) error { + feedback := cmdio.Spinner(ctx) + defer close(feedback) + feedback <- "Cleaning up previous installation if necessary" + err := i.cleanupLib(ctx) + if err != nil { + return fmt.Errorf("cleanup: %w", err) + } + libTarget := i.LibDir(ctx) + // we may support wheels, jars, and golang binaries. but those are not zipballs + if i.IsZipball() { + feedback <- fmt.Sprintf("Downloading and unpacking zipball for %s", i.version) + return i.downloadAndUnpackZipball(ctx, libTarget) + } + return fmt.Errorf("we only support zipballs for now") +} + +func (i *installer) downloadAndUnpackZipball(ctx context.Context, libTarget string) error { + raw, err := github.DownloadZipball(ctx, "databrickslabs", i.Name, i.version) + if err != nil { + return fmt.Errorf("download zipball from GitHub: %w", err) + } + zipball := unpack.GitHubZipball{Reader: bytes.NewBuffer(raw)} + log.Debugf(ctx, "Unpacking zipball to: %s", libTarget) + return zipball.UnpackTo(libTarget) +} + +func (i *installer) setupPythonVirtualEnvironment(ctx context.Context, w *databricks.WorkspaceClient) error { + if !i.HasPython() { + return nil + } + feedback := cmdio.Spinner(ctx) + defer close(feedback) + feedback <- "Detecting all installed Python interpreters on the system" + pythonInterpreters, err := python.DetectInterpreters(ctx) + if err != nil { + return fmt.Errorf("detect: %w", err) + } + py, err := pythonInterpreters.AtLeast(i.MinPython) + if err != nil { + return fmt.Errorf("min version: %w", err) + } + log.Debugf(ctx, "Detected Python %s at: %s", py.Version, py.Path) + venvPath := i.virtualEnvPath(ctx) + log.Debugf(ctx, "Creating Python Virtual Environment at: %s", venvPath) + feedback <- fmt.Sprintf("Creating Virtual Environment with Python %s", py.Version) + _, err = process.Background(ctx, []string{py.Path, "-m", "venv", venvPath}) + if err != nil { + return fmt.Errorf("create venv: %w", err) + } + if i.Installer != nil && i.Installer.RequireDatabricksConnect { + feedback <- "Determining Databricks Connect version" + cluster, err := w.Clusters.Get(ctx, compute.GetClusterRequest{ + ClusterId: w.Config.ClusterID, + }) + if err != nil { + return fmt.Errorf("cluster: %w", err) + } + runtimeVersion, ok := clusters.GetRuntimeVersion(cluster) + if !ok { + return fmt.Errorf("unsupported runtime: %s", cluster.SparkVersion) + } + feedback <- fmt.Sprintf("Installing Databricks Connect v%s", runtimeVersion) + pipSpec := fmt.Sprintf("databricks-connect==%s", runtimeVersion) + err = i.installPythonDependencies(ctx, pipSpec) + if err != nil { + return fmt.Errorf("dbconnect: %w", err) + } + } + feedback <- "Installing Python library dependencies" + return i.installPythonDependencies(ctx, ".") +} + +func (i *installer) installPythonDependencies(ctx context.Context, spec string) error { + if !i.IsPythonProject(ctx) { + return nil + } + libDir := i.LibDir(ctx) + log.Debugf(ctx, "Installing Python dependencies for: %s", libDir) + // maybe we'll need to add call one of the two scripts: + // - python3 -m ensurepip --default-pip + // - curl -o https://bootstrap.pypa.io/get-pip.py | python3 + var buf bytes.Buffer + _, err := process.Background(ctx, + []string{i.virtualEnvPython(ctx), "-m", "pip", "install", spec}, + process.WithCombinedOutput(&buf), + process.WithDir(libDir)) + if err != nil { + i.warningf(buf.String()) + return fmt.Errorf("failed to install dependencies of %s", spec) + } + return nil +} + +func (i *installer) runInstallHook(ctx context.Context) error { + if i.Installer == nil { + return nil + } + if i.Installer.Script == "" { + return nil + } + log.Debugf(ctx, "Launching installer script %s in %s", i.Installer.Script, i.LibDir(ctx)) + return i.Installer.runHook(i.cmd) +} diff --git a/cmd/labs/project/installer_test.go b/cmd/labs/project/installer_test.go new file mode 100644 index 0000000000..b61026f227 --- /dev/null +++ b/cmd/labs/project/installer_test.go @@ -0,0 +1,415 @@ +package project_test + +import ( + "archive/zip" + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "io/fs" + "net/http" + "net/http/httptest" + "os" + "path" + "path/filepath" + "strings" + "testing" + "time" + + "github.com/databricks/cli/cmd/labs/github" + "github.com/databricks/cli/cmd/labs/project" + "github.com/databricks/cli/internal" + "github.com/databricks/cli/libs/env" + "github.com/databricks/cli/libs/python" + "github.com/databricks/databricks-sdk-go/service/compute" + "github.com/databricks/databricks-sdk-go/service/iam" + "github.com/databricks/databricks-sdk-go/service/sql" + "github.com/stretchr/testify/require" +) + +const ownerRWXworldRX = 0o755 +const ownerRW = 0o600 + +func zipballFromFolder(src string) ([]byte, error) { + var buf bytes.Buffer + zw := zip.NewWriter(&buf) + rootDir := path.Base(src) // this is required to emulate github ZIP downloads + err := filepath.Walk(src, func(filePath string, info os.FileInfo, err error) error { + if err != nil { + return err + } + relpath, err := filepath.Rel(src, filePath) + if err != nil { + return err + } + relpath = path.Join(rootDir, relpath) + if info.IsDir() { + _, err = zw.Create(relpath + "/") + return err + } + file, err := os.Open(filePath) + if err != nil { + return err + } + defer file.Close() + f, err := zw.Create(relpath) + if err != nil { + return err + } + _, err = io.Copy(f, file) + return err + }) + if err != nil { + return nil, err + } + err = zw.Close() + if err != nil { + return nil, err + } + return buf.Bytes(), nil +} + +func copyTestdata(t *testing.T, name string) string { + // TODO: refactor fs.cp command into a reusable util + tempDir := t.TempDir() + name = strings.ReplaceAll(name, "/", string(os.PathSeparator)) + err := filepath.WalkDir(name, func(path string, d fs.DirEntry, err error) error { + require.NoError(t, err) + dst := strings.TrimPrefix(path, name) + if dst == "" { + return nil + } + if d.IsDir() { + err := os.MkdirAll(filepath.Join(tempDir, dst), ownerRWXworldRX) + require.NoError(t, err) + return nil + } + in, err := os.Open(path) + require.NoError(t, err) + defer in.Close() + out, err := os.Create(filepath.Join(tempDir, dst)) + require.NoError(t, err) + defer out.Close() + _, err = io.Copy(out, in) + require.NoError(t, err) + return nil + }) + require.NoError(t, err) + return tempDir +} + +func installerContext(t *testing.T, server *httptest.Server) context.Context { + ctx := context.Background() + ctx = github.WithApiOverride(ctx, server.URL) + ctx = github.WithUserContentOverride(ctx, server.URL) + ctx = env.WithUserHomeDir(ctx, t.TempDir()) + // trick release cache to thing it went to github already + cachePath := project.PathInLabs(ctx, "blueprint", "cache") + err := os.MkdirAll(cachePath, ownerRWXworldRX) + require.NoError(t, err) + bs := []byte(`{"refreshed_at": "2033-01-01T00:00:00.92857+02:00","data": [{"tag_name": "v0.3.15"}]}`) + err = os.WriteFile(filepath.Join(cachePath, "databrickslabs-blueprint-releases.json"), bs, ownerRW) + require.NoError(t, err) + return ctx +} + +func respondWithJSON(t *testing.T, w http.ResponseWriter, v any) { + raw, err := json.Marshal(v) + if err != nil { + require.NoError(t, err) + } + w.Write(raw) +} + +type fileTree struct { + Path string + MaxDepth int +} + +func (ft fileTree) String() string { + lines := ft.listFiles(ft.Path, ft.MaxDepth) + return strings.Join(lines, "\n") +} + +func (ft fileTree) listFiles(dir string, depth int) (lines []string) { + if ft.MaxDepth > 0 && depth > ft.MaxDepth { + return []string{fmt.Sprintf("deeper than %d levels", ft.MaxDepth)} + } + fileInfo, err := os.ReadDir(dir) + if err != nil { + return []string{err.Error()} + } + for _, entry := range fileInfo { + lines = append(lines, fmt.Sprintf("%s%s", ft.getIndent(depth), entry.Name())) + if entry.IsDir() { + subdir := filepath.Join(dir, entry.Name()) + lines = append(lines, ft.listFiles(subdir, depth+1)...) + } + } + return lines +} + +func (ft fileTree) getIndent(depth int) string { + return "│" + strings.Repeat(" ", depth*2) + "├─ " +} + +func TestInstallerWorksForReleases(t *testing.T) { + defer func() { + if !t.Failed() { + return + } + t.Logf("file tree:\n%s", fileTree{ + Path: filepath.Dir(t.TempDir()), + }) + }() + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path == "/databrickslabs/blueprint/v0.3.15/labs.yml" { + raw, err := os.ReadFile("testdata/installed-in-home/.databricks/labs/blueprint/lib/labs.yml") + if err != nil { + panic(err) + } + w.Write(raw) + return + } + if r.URL.Path == "/repos/databrickslabs/blueprint/zipball/v0.3.15" { + raw, err := zipballFromFolder("testdata/installed-in-home/.databricks/labs/blueprint/lib") + if err != nil { + panic(err) + } + w.Header().Add("Content-Type", "application/octet-stream") + w.Write(raw) + return + } + if r.URL.Path == "/api/2.0/clusters/get" { + respondWithJSON(t, w, &compute.ClusterDetails{ + State: compute.StateRunning, + }) + return + } + t.Logf("Requested: %s", r.URL.Path) + t.FailNow() + })) + defer server.Close() + + ctx := installerContext(t, server) + + // simulate the case of GitHub Actions + ctx = env.Set(ctx, "DATABRICKS_HOST", server.URL) + ctx = env.Set(ctx, "DATABRICKS_TOKEN", "...") + ctx = env.Set(ctx, "DATABRICKS_CLUSTER_ID", "installer-cluster") + ctx = env.Set(ctx, "DATABRICKS_WAREHOUSE_ID", "installer-warehouse") + + // After the installation, we'll have approximately the following state: + // t.TempDir() + // └── 001 <------------------------------------------------- env.UserHomeDir(ctx) + // ├── .databricks + // │ └── labs + // │ └── blueprint + // │ ├── cache <------------------------------- prj.CacheDir(ctx) + // │ │ └── databrickslabs-blueprint-releases.json + // │ ├── config + // │ ├── lib <--------------------------------- prj.LibDir(ctx) + // │ │ ├── install.py + // │ │ ├── labs.yml + // │ │ ├── main.py + // │ │ └── pyproject.toml + // │ └── state <------------------------------- prj.StateDir(ctx) + // │ ├── venv <---------------------------- prj.virtualEnvPath(ctx) + // │ │ ├── bin + // │ │ │ ├── pip + // │ │ │ ├── ... + // │ │ │ ├── python -> python3.9 + // │ │ │ ├── python3 -> python3.9 <---- prj.virtualEnvPython(ctx) + // │ │ │ └── python3.9 -> (path to a detected python) + // │ │ ├── include + // │ │ ├── lib + // │ │ │ └── python3.9 + // │ │ │ └── site-packages + // │ │ │ ├── ... + // │ │ │ ├── distutils-precedence.pth + r := internal.NewCobraTestRunnerWithContext(t, ctx, "labs", "install", "blueprint") + r.RunAndExpectOutput("setting up important infrastructure") +} + +func TestInstallerWorksForDevelopment(t *testing.T) { + defer func() { + if !t.Failed() { + return + } + t.Logf("file tree:\n%s", fileTree{ + Path: filepath.Dir(t.TempDir()), + }) + }() + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path == "/api/2.0/clusters/list" { + respondWithJSON(t, w, compute.ListClustersResponse{ + Clusters: []compute.ClusterDetails{ + { + ClusterId: "abc-id", + ClusterName: "first shared", + DataSecurityMode: compute.DataSecurityModeUserIsolation, + SparkVersion: "12.2.x-whatever", + State: compute.StateRunning, + }, + { + ClusterId: "bcd-id", + ClusterName: "second personal", + DataSecurityMode: compute.DataSecurityModeSingleUser, + SparkVersion: "14.5.x-whatever", + State: compute.StateRunning, + SingleUserName: "serge", + }, + }, + }) + return + } + if r.URL.Path == "/api/2.0/preview/scim/v2/Me" { + respondWithJSON(t, w, iam.User{ + UserName: "serge", + }) + return + } + if r.URL.Path == "/api/2.0/clusters/spark-versions" { + respondWithJSON(t, w, compute.GetSparkVersionsResponse{ + Versions: []compute.SparkVersion{ + { + Key: "14.5.x-whatever", + Name: "14.5 (Awesome)", + }, + }, + }) + return + } + if r.URL.Path == "/api/2.0/clusters/get" { + respondWithJSON(t, w, &compute.ClusterDetails{ + State: compute.StateRunning, + }) + return + } + if r.URL.Path == "/api/2.0/sql/warehouses" { + respondWithJSON(t, w, sql.ListWarehousesResponse{ + Warehouses: []sql.EndpointInfo{ + { + Id: "efg-id", + Name: "First PRO Warehouse", + WarehouseType: sql.EndpointInfoWarehouseTypePro, + }, + }, + }) + return + } + t.Logf("Requested: %s", r.URL.Path) + t.FailNow() + })) + defer server.Close() + + wd, _ := os.Getwd() + defer os.Chdir(wd) + + devDir := copyTestdata(t, "testdata/installed-in-home/.databricks/labs/blueprint/lib") + err := os.Chdir(devDir) + require.NoError(t, err) + + ctx := installerContext(t, server) + py, _ := python.DetectExecutable(ctx) + py, _ = filepath.Abs(py) + + // development installer assumes it's in the active virtualenv + ctx = env.Set(ctx, "PYTHON_BIN", py) + + err = os.WriteFile(filepath.Join(env.UserHomeDir(ctx), ".databrickscfg"), []byte(fmt.Sprintf(` +[profile-one] +host = %s +token = ... + +[acc] +host = %s +account_id = abc + `, server.URL, server.URL)), ownerRW) + require.NoError(t, err) + + // We have the following state at this point: + // t.TempDir() + // ├── 001 <------------------ $CWD, prj.EffectiveLibDir(ctx), prj.folder + // │ ├── install.py + // │ ├── labs.yml <--------- prj.IsDeveloperMode(ctx) == true + // │ ├── main.py + // │ └── pyproject.toml + // └── 002 <------------------ env.UserHomeDir(ctx) + // └── .databricks + // └── labs + // └── blueprint <--- project.PathInLabs(ctx, "blueprint"), prj.rootDir(ctx) + // └── cache <--- prj.CacheDir(ctx) + // └── databrickslabs-blueprint-releases.json + + // `databricks labs install .` means "verify this installer i'm developing does work" + r := internal.NewCobraTestRunnerWithContext(t, ctx, "labs", "install", ".") + r.WithStdin() + defer r.CloseStdin() + + r.RunBackground() + r.WaitForTextPrinted("setting up important infrastructure", 5*time.Second) +} + +func TestUpgraderWorksForReleases(t *testing.T) { + defer func() { + if !t.Failed() { + return + } + t.Logf("file tree:\n%s", fileTree{ + Path: filepath.Dir(t.TempDir()), + }) + }() + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path == "/databrickslabs/blueprint/v0.4.0/labs.yml" { + raw, err := os.ReadFile("testdata/installed-in-home/.databricks/labs/blueprint/lib/labs.yml") + if err != nil { + panic(err) + } + w.Write(raw) + return + } + if r.URL.Path == "/repos/databrickslabs/blueprint/zipball/v0.4.0" { + raw, err := zipballFromFolder("testdata/installed-in-home/.databricks/labs/blueprint/lib") + if err != nil { + panic(err) + } + w.Header().Add("Content-Type", "application/octet-stream") + w.Write(raw) + return + } + if r.URL.Path == "/api/2.0/clusters/get" { + respondWithJSON(t, w, &compute.ClusterDetails{ + State: compute.StateRunning, + }) + return + } + t.Logf("Requested: %s", r.URL.Path) + t.FailNow() + })) + defer server.Close() + + ctx := installerContext(t, server) + + newHome := copyTestdata(t, "testdata/installed-in-home") + ctx = env.WithUserHomeDir(ctx, newHome) + + py, _ := python.DetectExecutable(ctx) + py, _ = filepath.Abs(py) + ctx = env.Set(ctx, "PYTHON_BIN", py) + + cachePath := project.PathInLabs(ctx, "blueprint", "cache") + bs := []byte(`{"refreshed_at": "2033-01-01T00:00:00.92857+02:00","data": [{"tag_name": "v0.4.0"}]}`) + err := os.WriteFile(filepath.Join(cachePath, "databrickslabs-blueprint-releases.json"), bs, ownerRW) + require.NoError(t, err) + + // simulate the case of GitHub Actions + ctx = env.Set(ctx, "DATABRICKS_HOST", server.URL) + ctx = env.Set(ctx, "DATABRICKS_TOKEN", "...") + ctx = env.Set(ctx, "DATABRICKS_CLUSTER_ID", "installer-cluster") + ctx = env.Set(ctx, "DATABRICKS_WAREHOUSE_ID", "installer-warehouse") + + r := internal.NewCobraTestRunnerWithContext(t, ctx, "labs", "upgrade", "blueprint") + r.RunAndExpectOutput("setting up important infrastructure") +} diff --git a/cmd/labs/project/login.go b/cmd/labs/project/login.go new file mode 100644 index 0000000000..226d477538 --- /dev/null +++ b/cmd/labs/project/login.go @@ -0,0 +1,116 @@ +package project + +import ( + "context" + "encoding/json" + "fmt" + "os" + + "github.com/databricks/cli/cmd/root" + "github.com/databricks/cli/cmd/workspace/clusters" + "github.com/databricks/cli/cmd/workspace/warehouses" + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/cli/libs/log" + "github.com/databricks/databricks-sdk-go" + "github.com/databricks/databricks-sdk-go/config" +) + +type loginConfig struct { + *Entrypoint `json:"-"` + WorkspaceProfile string `json:"workspace_profile,omitempty"` + AccountProfile string `json:"account_profile,omitempty"` + ClusterID string `json:"cluster_id,omitempty"` + WarehouseID string `json:"warehouse_id,omitempty"` +} + +func (lc *loginConfig) askWorkspace(ctx context.Context, cfg *config.Config) (*databricks.WorkspaceClient, error) { + if cfg.IsAccountClient() { + return nil, nil + } + err := lc.askWorkspaceProfile(ctx, cfg) + if err != nil { + return nil, fmt.Errorf("profile: %w", err) + } + w, err := databricks.NewWorkspaceClient((*databricks.Config)(cfg)) + if err != nil { + return nil, fmt.Errorf("client: %w", err) + } + err = lc.askCluster(ctx, w) + if err != nil { + return nil, fmt.Errorf("cluster: %w", err) + } + err = lc.askWarehouse(ctx, w) + if err != nil { + return nil, fmt.Errorf("warehouse: %w", err) + } + return w, nil +} + +func (lc *loginConfig) askWorkspaceProfile(ctx context.Context, cfg *config.Config) (err error) { + if cfg.Profile != "" { + lc.WorkspaceProfile = cfg.Profile + return + } + if !cmdio.IsInteractive(ctx) { + return ErrNotInTTY + } + lc.WorkspaceProfile, err = root.AskForWorkspaceProfile(ctx) + cfg.Profile = lc.WorkspaceProfile + return +} + +func (lc *loginConfig) askCluster(ctx context.Context, w *databricks.WorkspaceClient) (err error) { + if !lc.NeedsCluster() { + return + } + if w.Config.ClusterID != "" { + lc.ClusterID = w.Config.ClusterID + return + } + if !cmdio.IsInteractive(ctx) { + return ErrNotInTTY + } + clusterID, err := clusters.AskForCompatibleCluster(ctx, w, lc.Installer.MinRuntimeVersion) + if err != nil { + return fmt.Errorf("select: %w", err) + } + w.Config.ClusterID = clusterID + lc.ClusterID = clusterID + return +} + +func (lc *loginConfig) askWarehouse(ctx context.Context, w *databricks.WorkspaceClient) (err error) { + if !lc.NeedsWarehouse() { + return + } + if w.Config.WarehouseID != "" { + lc.WarehouseID = w.Config.WarehouseID + return + } + if !cmdio.IsInteractive(ctx) { + return ErrNotInTTY + } + lc.WarehouseID, err = warehouses.AskForCompatibleWarehouses(ctx, w, lc.Installer.WarehouseTypes) + return +} + +func (lc *loginConfig) askAccountProfile(ctx context.Context, cfg *config.Config) (err error) { + if !lc.HasAccountLevelCommands() { + return nil + } + if !cmdio.IsInteractive(ctx) { + return ErrNotInTTY + } + lc.AccountProfile, err = root.AskForAccountProfile(ctx) + return +} + +func (lc *loginConfig) save(ctx context.Context) error { + authFile := lc.loginFile(ctx) + raw, err := json.MarshalIndent(lc, "", " ") + if err != nil { + return err + } + log.Debugf(ctx, "Writing auth configuration to: %s", authFile) + return os.WriteFile(authFile, raw, ownerRW) +} diff --git a/cmd/labs/project/project.go b/cmd/labs/project/project.go new file mode 100644 index 0000000000..6adf9a3cff --- /dev/null +++ b/cmd/labs/project/project.go @@ -0,0 +1,352 @@ +package project + +import ( + "context" + "encoding/json" + "fmt" + "os" + "path/filepath" + "runtime" + "strings" + "time" + + "github.com/databricks/cli/cmd/labs/github" + "github.com/databricks/cli/libs/env" + "github.com/databricks/cli/libs/log" + "github.com/databricks/cli/libs/python" + "github.com/databricks/databricks-sdk-go/logger" + "github.com/fatih/color" + "gopkg.in/yaml.v3" + + "github.com/spf13/cobra" +) + +const ownerRW = 0o600 + +func Load(ctx context.Context, labsYml string) (*Project, error) { + raw, err := os.ReadFile(labsYml) + if err != nil { + return nil, fmt.Errorf("read labs.yml: %w", err) + } + project, err := readFromBytes(ctx, raw) + if err != nil { + return nil, err + } + project.folder = filepath.Dir(labsYml) + return project, nil +} + +func readFromBytes(ctx context.Context, labsYmlRaw []byte) (*Project, error) { + var project Project + err := yaml.Unmarshal(labsYmlRaw, &project) + if err != nil { + return nil, fmt.Errorf("parse labs.yml: %w", err) + } + e := (&project).metaEntrypoint(ctx) + if project.Installer != nil { + project.Installer.Entrypoint = e + } + if project.Uninstaller != nil { + project.Uninstaller.Entrypoint = e + } + return &project, nil +} + +type Project struct { + SpecVersion int `yaml:"$version"` + + Name string `yaml:"name"` + Description string `yaml:"description"` + Installer *hook `yaml:"install,omitempty"` + Uninstaller *hook `yaml:"uninstall,omitempty"` + Main string `yaml:"entrypoint"` + MinPython string `yaml:"min_python"` + Commands []*proxy `yaml:"commands,omitempty"` + + folder string +} + +func (p *Project) IsZipball() bool { + // the simplest way of running the project - download ZIP file from github + return true +} + +func (p *Project) HasPython() bool { + if strings.HasSuffix(p.Main, ".py") { + return true + } + if p.Installer != nil && p.Installer.HasPython() { + return true + } + if p.Uninstaller != nil && p.Uninstaller.HasPython() { + return true + } + return p.MinPython != "" +} + +func (p *Project) metaEntrypoint(ctx context.Context) *Entrypoint { + return &Entrypoint{ + Project: p, + RequireRunningCluster: p.requireRunningCluster(), + } +} + +func (p *Project) requireRunningCluster() bool { + if p.Installer != nil && p.Installer.RequireRunningCluster() { + return true + } + for _, v := range p.Commands { + if v.RequireRunningCluster { + return true + } + } + return false +} + +func (p *Project) fileExists(name string) bool { + _, err := os.Stat(name) + return err == nil +} + +func (p *Project) projectFilePath(ctx context.Context, name string) string { + return filepath.Join(p.EffectiveLibDir(ctx), name) +} + +func (p *Project) IsPythonProject(ctx context.Context) bool { + if p.fileExists(p.projectFilePath(ctx, "setup.py")) { + return true + } + if p.fileExists(p.projectFilePath(ctx, "pyproject.toml")) { + return true + } + return false +} + +func (p *Project) IsDeveloperMode(ctx context.Context) bool { + return p.folder != "" && !strings.HasPrefix(p.LibDir(ctx), p.folder) +} + +func (p *Project) HasFolder() bool { + return p.folder != "" +} + +func (p *Project) HasAccountLevelCommands() bool { + for _, v := range p.Commands { + if v.IsAccountLevel { + return true + } + } + return false +} + +func (p *Project) IsBundleAware() bool { + for _, v := range p.Commands { + if v.IsBundleAware { + return true + } + } + return false +} + +func (p *Project) Register(parent *cobra.Command) { + group := &cobra.Command{ + Use: p.Name, + Short: p.Description, + GroupID: "labs", + } + parent.AddCommand(group) + for _, cp := range p.Commands { + cp.register(group) + cp.Entrypoint.Project = p + } +} + +func (p *Project) rootDir(ctx context.Context) string { + return PathInLabs(ctx, p.Name) +} + +func (p *Project) CacheDir(ctx context.Context) string { + return filepath.Join(p.rootDir(ctx), "cache") +} + +func (p *Project) ConfigDir(ctx context.Context) string { + return filepath.Join(p.rootDir(ctx), "config") +} + +func (p *Project) LibDir(ctx context.Context) string { + return filepath.Join(p.rootDir(ctx), "lib") +} + +func (p *Project) EffectiveLibDir(ctx context.Context) string { + if p.IsDeveloperMode(ctx) { + // developer is working on a local checkout, that is not inside of installed root + return p.folder + } + return p.LibDir(ctx) +} + +func (p *Project) StateDir(ctx context.Context) string { + return filepath.Join(p.rootDir(ctx), "state") +} + +func (p *Project) EnsureFoldersExist(ctx context.Context) error { + dirs := []string{p.CacheDir(ctx), p.ConfigDir(ctx), p.LibDir(ctx), p.StateDir(ctx)} + for _, v := range dirs { + err := os.MkdirAll(v, ownerRWXworldRX) + if err != nil { + return fmt.Errorf("folder %s: %w", v, err) + } + } + return nil +} + +func (p *Project) Uninstall(cmd *cobra.Command) error { + if p.Uninstaller != nil { + err := p.Uninstaller.runHook(cmd) + if err != nil { + return fmt.Errorf("uninstall hook: %w", err) + } + } + ctx := cmd.Context() + log.Infof(ctx, "Removing project: %s", p.Name) + return os.RemoveAll(p.rootDir(ctx)) +} + +func (p *Project) virtualEnvPath(ctx context.Context) string { + if p.IsDeveloperMode(ctx) { + // When a virtual environment has been activated, the VIRTUAL_ENV environment variable + // is set to the path of the environment. Since explicitly activating a virtual environment + // is not required to use it, VIRTUAL_ENV cannot be relied upon to determine whether a virtual + // environment is being used. + // + // See https://docs.python.org/3/library/venv.html#how-venvs-work + activatedVenv := env.Get(ctx, "VIRTUAL_ENV") + if activatedVenv != "" { + logger.Debugf(ctx, "(development mode) using active virtual environment from: %s", activatedVenv) + return activatedVenv + } + nonActivatedVenv, err := python.DetectVirtualEnvPath(p.EffectiveLibDir(ctx)) + if err == nil { + logger.Debugf(ctx, "(development mode) using virtual environment from: %s", nonActivatedVenv) + return nonActivatedVenv + } + } + // by default, we pick Virtual Environment from DATABRICKS_LABS_STATE_DIR + return filepath.Join(p.StateDir(ctx), "venv") +} + +func (p *Project) virtualEnvPython(ctx context.Context) string { + overridePython := env.Get(ctx, "PYTHON_BIN") + if overridePython != "" { + return overridePython + } + if runtime.GOOS == "windows" { + return filepath.Join(p.virtualEnvPath(ctx), "Scripts", "python.exe") + } + return filepath.Join(p.virtualEnvPath(ctx), "bin", "python3") +} + +func (p *Project) loginFile(ctx context.Context) string { + if p.IsDeveloperMode(ctx) { + // developers may not want to pollute the state in + // ~/.databricks/labs/X/config while the version is not yet + // released + return p.projectFilePath(ctx, ".databricks-login.json") + } + return filepath.Join(p.ConfigDir(ctx), "login.json") +} + +func (p *Project) loadLoginConfig(ctx context.Context) (*loginConfig, error) { + loginFile := p.loginFile(ctx) + log.Debugf(ctx, "Loading login configuration from: %s", loginFile) + lc, err := tryLoadAndParseJSON[loginConfig](loginFile) + if err != nil { + return nil, fmt.Errorf("try load: %w", err) + } + lc.Entrypoint = p.metaEntrypoint(ctx) + return lc, nil +} + +func (p *Project) versionFile(ctx context.Context) string { + return filepath.Join(p.StateDir(ctx), "version.json") +} + +func (p *Project) InstalledVersion(ctx context.Context) (*version, error) { + if p.IsDeveloperMode(ctx) { + return &version{ + Version: "*", + Date: time.Now(), + }, nil + } + versionFile := p.versionFile(ctx) + log.Debugf(ctx, "Loading installed version info from: %s", versionFile) + return tryLoadAndParseJSON[version](versionFile) +} + +func (p *Project) writeVersionFile(ctx context.Context, ver string) error { + versionFile := p.versionFile(ctx) + raw, err := json.Marshal(version{ + Version: ver, + Date: time.Now(), + }) + if err != nil { + return err + } + log.Debugf(ctx, "Writing installed version info to: %s", versionFile) + return os.WriteFile(versionFile, raw, ownerRW) +} + +// checkUpdates is called before every command of an installed project, +// giving users hints when they need to update their installations. +func (p *Project) checkUpdates(cmd *cobra.Command) error { + ctx := cmd.Context() + if p.IsDeveloperMode(ctx) { + // skipping update check for projects in developer mode, that + // might not be installed yet + return nil + } + r := github.NewReleaseCache("databrickslabs", p.Name, p.CacheDir(ctx)) + versions, err := r.Load(ctx) + if err != nil { + return err + } + installed, err := p.InstalledVersion(ctx) + if err != nil { + return err + } + latest := versions[0] + if installed.Version == latest.Version { + return nil + } + ago := time.Since(latest.PublishedAt) + msg := "[UPGRADE ADVISED] Newer %s version was released %s ago. Please run `databricks labs upgrade %s` to upgrade: %s -> %s" + cmd.PrintErrln(color.YellowString(msg, p.Name, p.timeAgo(ago), p.Name, installed.Version, latest.Version)) + return nil +} + +func (p *Project) timeAgo(dur time.Duration) string { + days := int(dur.Hours()) / 24 + hours := int(dur.Hours()) % 24 + minutes := int(dur.Minutes()) % 60 + if dur < time.Minute { + return "minute" + } else if dur < time.Hour { + return fmt.Sprintf("%d minutes", minutes) + } else if dur < (24 * time.Hour) { + return fmt.Sprintf("%d hours", hours) + } + return fmt.Sprintf("%d days", days) +} + +func (p *Project) profileOverride(cmd *cobra.Command) string { + profileFlag := cmd.Flag("profile") + if profileFlag == nil { + return "" + } + return profileFlag.Value.String() +} + +type version struct { + Version string `json:"version"` + Date time.Time `json:"date"` +} diff --git a/cmd/labs/project/project_test.go b/cmd/labs/project/project_test.go new file mode 100644 index 0000000000..79e69bad6e --- /dev/null +++ b/cmd/labs/project/project_test.go @@ -0,0 +1,22 @@ +package project + +import ( + "context" + "os" + "strings" + "testing" + + "github.com/stretchr/testify/assert" +) + +func assertEqualPaths(t *testing.T, expected, actual string) { + expected = strings.ReplaceAll(expected, "/", string(os.PathSeparator)) + assert.Equal(t, expected, actual) +} + +func TestLoad(t *testing.T) { + ctx := context.Background() + prj, err := Load(ctx, "testdata/installed-in-home/.databricks/labs/blueprint/lib/labs.yml") + assert.NoError(t, err) + assertEqualPaths(t, "testdata/installed-in-home/.databricks/labs/blueprint/lib", prj.folder) +} diff --git a/cmd/labs/project/proxy.go b/cmd/labs/project/proxy.go new file mode 100644 index 0000000000..86a110f995 --- /dev/null +++ b/cmd/labs/project/proxy.go @@ -0,0 +1,144 @@ +package project + +import ( + "bytes" + "encoding/json" + "errors" + "fmt" + "io/fs" + "path/filepath" + "strings" + + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/cli/libs/log" + "github.com/databricks/cli/libs/process" + "github.com/spf13/cobra" + "github.com/spf13/pflag" +) + +type proxy struct { + Entrypoint `yaml:",inline"` + Name string `yaml:"name"` + Description string `yaml:"description"` + TableTemplate string `yaml:"table_template,omitempty"` + Flags []flag `yaml:"flags,omitempty"` +} + +func (cp *proxy) register(parent *cobra.Command) { + cmd := &cobra.Command{ + Use: cp.Name, + Short: cp.Description, + RunE: cp.runE, + } + parent.AddCommand(cmd) + flags := cmd.Flags() + for _, flag := range cp.Flags { + flag.register(flags) + } +} + +func (cp *proxy) runE(cmd *cobra.Command, _ []string) error { + err := cp.checkUpdates(cmd) + if err != nil { + return err + } + args, err := cp.commandInput(cmd) + if err != nil { + return err + } + envs, err := cp.Prepare(cmd) + if err != nil { + return fmt.Errorf("entrypoint: %w", err) + } + ctx := cmd.Context() + log.Debugf(ctx, "Forwarding subprocess: %s", strings.Join(args, " ")) + if cp.TableTemplate != "" { + return cp.renderJsonAsTable(cmd, args, envs) + } + err = process.Forwarded(ctx, args, + cmd.InOrStdin(), + cmd.OutOrStdout(), + cmd.ErrOrStderr(), + process.WithEnvs(envs)) + if errors.Is(err, fs.ErrNotExist) && cp.IsPythonProject(ctx) { + msg := "cannot find Python %s. Please re-run: databricks labs install %s" + return fmt.Errorf(msg, cp.MinPython, cp.Name) + } + return err +} + +func (cp *proxy) renderJsonAsTable(cmd *cobra.Command, args []string, envs map[string]string) error { + var buf bytes.Buffer + ctx := cmd.Context() + err := process.Forwarded(ctx, args, + cmd.InOrStdin(), + &buf, + cmd.ErrOrStderr(), + process.WithEnvs(envs)) + if err != nil { + return err + } + var anyVal any + err = json.Unmarshal(buf.Bytes(), &anyVal) + if err != nil { + return err + } + // IntelliJ eagerly replaces tabs with spaces, even though we're not asking for it + fixedTemplate := strings.ReplaceAll(cp.TableTemplate, "\\t", "\t") + return cmdio.RenderWithTemplate(ctx, anyVal, fixedTemplate) +} + +func (cp *proxy) commandInput(cmd *cobra.Command) ([]string, error) { + flags := cmd.Flags() + commandInput := struct { + Command string `json:"command"` + Flags map[string]any `json:"flags"` + OutputType string `json:"output_type"` + }{ + Command: cp.Name, + Flags: map[string]any{}, + } + for _, f := range cp.Flags { + v, err := f.get(flags) + if err != nil { + return nil, fmt.Errorf("get %s flag: %w", f.Name, err) + } + commandInput.Flags[f.Name] = v + } + logLevelFlag := flags.Lookup("log-level") + if logLevelFlag != nil { + commandInput.Flags["log_level"] = logLevelFlag.Value.String() + } + args := []string{} + ctx := cmd.Context() + if cp.IsPythonProject(ctx) { + args = append(args, cp.virtualEnvPython(ctx)) + libDir := cp.EffectiveLibDir(cmd.Context()) + entrypoint := filepath.Join(libDir, cp.Main) + args = append(args, entrypoint) + } + raw, err := json.Marshal(commandInput) + if err != nil { + return nil, fmt.Errorf("command input: %w", err) + } + args = append(args, string(raw)) + return args, nil +} + +type flag struct { + Name string `yaml:"name"` + Description string `yaml:"description"` + Default any `yaml:"default,omitempty"` +} + +func (f *flag) register(pf *pflag.FlagSet) { + var dflt string + if f.Default != nil { + dflt = fmt.Sprint(f.Default) + } + pf.String(f.Name, dflt, f.Description) +} + +func (f *flag) get(pf *pflag.FlagSet) (any, error) { + return pf.GetString(f.Name) +} diff --git a/cmd/labs/project/schema.json b/cmd/labs/project/schema.json new file mode 100644 index 0000000000..a779b15e47 --- /dev/null +++ b/cmd/labs/project/schema.json @@ -0,0 +1,126 @@ +{ + "id": "https://raw.githubusercontent.com/databricks/cli/feat/labs/cmd/labs/project/schema.json#", + "$schema": "http://json-schema.org/draft-04/schema", + "definitions": { + "entrypoint": { + "type": "object", + "properties": { + "require_running_cluster": { + "type": "boolean", + "default": false + }, + "is_unauthenticated": { + "type": "boolean", + "default": false + }, + "is_account_level": { + "type": "boolean", + "default": false + }, + "is_bundle_aware": { + "type": "boolean", + "default": false + } + } + }, + "hook": { + "type": "object", + "$ref": "#/definitions/entrypoint", + "unevaluatedProperties": true, + "properties": { + "script": { + "type": "string", + "pattern": "^[A-Za-z0-9_-/\\.]+$" + }, + "min_runtime_version": { + "type": "string", + "pattern": "^[0-9]+.[0-9]+$" + }, + "require_databricks_connect": { + "type": "boolean", + "default": false + }, + "warehouse_types": { + "enum": [ "PRO", "CLASSIC", "TYPE_UNSPECIFIED" ] + } + } + }, + "alphanum": { + "type": "string", + "pattern": "^[a-z0-9-]$" + }, + "command": { + "type": "object", + "$ref": "#/definitions/entrypoint", + "unevaluatedProperties": true, + "required": ["name", "description"], + "properties": { + "name": { + "$ref": "#/definitions/alphanum" + }, + "description": { + "type": "string" + }, + "table_template": { + "type": "string" + }, + "flags": { + "$ref": "#/definitions/flag" + } + } + }, + "flag": { + "type": "object", + "required": ["name", "description"], + "properties": { + "name": { + "$ref": "#/definitions/alphanum" + }, + "description": { + "type": "string" + }, + "default": {} + } + } + }, + "type": "object", + "additionalProperties": false, + "required": ["name", "description", "entrypoint"], + "properties": { + "$version": { + "type": "integer", + "default": 1 + }, + "name": { + "$ref": "#/definitions/alphanum", + "description": "Name of the project" + }, + "description": { + "type": "string", + "description": "Short description of the project" + }, + "entrypoint": { + "type": "string", + "description": "Script that routes subcommands" + }, + "min_python": { + "type": "string", + "pattern": "^3.[0-9]+$", + "description": "Minimal Python version required" + }, + "install": { + "$ref": "#/definitions/hook", + "description": "Installation configuration" + }, + "uninstall": { + "$ref": "#/definitions/hook" + }, + "commands": { + "type": "array", + "description": "Exposed commands", + "items": { + "$ref": "#/definitions/command" + } + } + } +} diff --git a/cmd/labs/project/testdata/.gitignore b/cmd/labs/project/testdata/.gitignore new file mode 100644 index 0000000000..bd1711fd16 --- /dev/null +++ b/cmd/labs/project/testdata/.gitignore @@ -0,0 +1 @@ +!.databricks diff --git a/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/cache/databrickslabs-blueprint-releases.json b/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/cache/databrickslabs-blueprint-releases.json new file mode 100644 index 0000000000..87651864c5 --- /dev/null +++ b/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/cache/databrickslabs-blueprint-releases.json @@ -0,0 +1,8 @@ +{ + "refreshed_at": "2033-01-01T00:00:00.92857+02:00", + "data": [ + { + "tag_name": "v0.3.15" + } + ] + } diff --git a/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/config/login.json b/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/config/login.json new file mode 100644 index 0000000000..7b611ba3f2 --- /dev/null +++ b/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/config/login.json @@ -0,0 +1,4 @@ +{ + "workspace_profile": "workspace-profile", + "account_profile": "account-profile" +} diff --git a/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/lib/install.py b/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/lib/install.py new file mode 100644 index 0000000000..6873257d5c --- /dev/null +++ b/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/lib/install.py @@ -0,0 +1 @@ +print(f'setting up important infrastructure') diff --git a/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/lib/labs.yml b/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/lib/labs.yml new file mode 100644 index 0000000000..0ac4bf8260 --- /dev/null +++ b/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/lib/labs.yml @@ -0,0 +1,33 @@ +--- +version: 1 +name: blueprint +description: Blueprint Project +install: + min_runtime_version: 13.1 + require_running_cluster: true + warehouse_types: + - PRO + script: install.py +entrypoint: main.py +min_python: 3.9 +commands: + - name: echo + is_account_level: true + description: non-interactive echo + flags: + - name: first + default: something + description: first flag description + - name: foo + description: foo command + flags: + - name: first + description: first flag description + - name: second + description: second flag description + - name: table + description: something that renders a table + table_template: | + Key Value + {{range .records}}{{.key}} {{.value}} + {{end}} diff --git a/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/lib/main.py b/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/lib/main.py new file mode 100644 index 0000000000..769ee73eec --- /dev/null +++ b/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/lib/main.py @@ -0,0 +1,27 @@ +import os, sys, json + +payload = json.loads(sys.argv[1]) + +if 'echo' == payload['command']: + json.dump({ + 'command': payload['command'], + 'flags': payload['flags'], + 'env': {k:v for k,v in os.environ.items()} + }, sys.stdout) + sys.exit(0) + +if 'table' == payload['command']: + sys.stderr.write("some intermediate info\n") + json.dump({'records': [ + {'key': 'First', 'value': 'Second'}, + {'key': 'Third', 'value': 'Fourth'}, + ]}, sys.stdout) + sys.exit(0) + +print(f'host is {os.environ["DATABRICKS_HOST"]}') + +print(f'[{payload["command"]}] command flags are {payload["flags"]}') + +answer = input('What is your name? ') + +print(f'Hello, {answer}!') diff --git a/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/lib/pyproject.toml b/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/lib/pyproject.toml new file mode 100644 index 0000000000..d33ab1fb1e --- /dev/null +++ b/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/lib/pyproject.toml @@ -0,0 +1,11 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "blueprint" +version = "0.3.15" +description = 'Databricks Labs Blueprint' +requires-python = ">=3.9" +classifiers = ["Programming Language :: Python"] +dependencies = [] diff --git a/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/state/other-state-file.json b/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/state/other-state-file.json new file mode 100644 index 0000000000..0967ef424b --- /dev/null +++ b/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/state/other-state-file.json @@ -0,0 +1 @@ +{} diff --git a/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/state/venv/pyvenv.cfg b/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/state/venv/pyvenv.cfg new file mode 100644 index 0000000000..e69de29bb2 diff --git a/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/state/version.json b/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/state/version.json new file mode 100644 index 0000000000..4bcae15559 --- /dev/null +++ b/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/state/version.json @@ -0,0 +1,4 @@ +{ + "version": "v0.3.15", + "date": "2023-10-24T15:04:05+01:00" +} diff --git a/cmd/labs/project/testdata/installed-in-home/.databricks/labs/databrickslabs-repositories.json b/cmd/labs/project/testdata/installed-in-home/.databricks/labs/databrickslabs-repositories.json new file mode 100644 index 0000000000..896ebecc59 --- /dev/null +++ b/cmd/labs/project/testdata/installed-in-home/.databricks/labs/databrickslabs-repositories.json @@ -0,0 +1,37 @@ +{ + "refreshed_at": "2033-01-01T00:00:00.92857+02:00", + "data": [ + { + "name": "blueprint", + "description": "Sample project", + "language": "Python", + "default_branch": "main", + "stargazers_count": 100500, + "fork": false, + "archived": false, + "topics": [], + "html_url": "https://github.com/databrickslabs/blueprint", + "clone_url": "https://github.com/databrickslabs/blueprint.git", + "ssh_url": "git@github.com:databrickslabs/blueprint.git", + "license": { + "name": "Other" + } + }, + { + "name": "ucx", + "description": "Unity Catalog Migrations", + "language": "Python", + "default_branch": "main", + "stargazers_count": 100500, + "fork": false, + "archived": false, + "topics": [], + "html_url": "https://github.com/databrickslabs/ucx", + "clone_url": "https://github.com/databrickslabs/ucx.git", + "ssh_url": "git@github.com:databrickslabs/ucx.git", + "license": { + "name": "Other" + } + } + ] +} diff --git a/cmd/labs/project/testdata/installed-in-home/.databrickscfg b/cmd/labs/project/testdata/installed-in-home/.databrickscfg new file mode 100644 index 0000000000..ec1bf7bdcf --- /dev/null +++ b/cmd/labs/project/testdata/installed-in-home/.databrickscfg @@ -0,0 +1,9 @@ +[workspace-profile] +host = https://abc +token = bcd +cluster_id = cde +warehouse_id = def + +[account-profile] +host = https://accounts.cloud.databricks.com +account_id = cde diff --git a/cmd/labs/show.go b/cmd/labs/show.go new file mode 100644 index 0000000000..fc9d175c42 --- /dev/null +++ b/cmd/labs/show.go @@ -0,0 +1,57 @@ +package labs + +import ( + "fmt" + + "github.com/databricks/cli/cmd/labs/project" + "github.com/databricks/cli/libs/cmdio" + "github.com/spf13/cobra" +) + +func newShowCommand() *cobra.Command { + return &cobra.Command{ + Use: "show NAME", + Args: cobra.ExactArgs(1), + Short: "Shows information about the project", + Annotations: map[string]string{ + "template": cmdio.Heredoc(` + Name: {{.name}} + Description: {{.description}} + Python: {{.is_python}} + + Folders: + - lib: {{.lib_dir}} + - cache: {{.cache_dir}} + - config: {{.config_dir}} + + `), + }, + RunE: func(cmd *cobra.Command, args []string) error { + ctx := cmd.Context() + installed, err := project.Installed(ctx) + if err != nil { + return err + } + if len(installed) == 0 { + return fmt.Errorf("no projects found") + } + name := args[0] + for _, v := range installed { + isDev := name == "." && v.IsDeveloperMode(ctx) + isMatch := name == v.Name + if !(isDev || isMatch) { + continue + } + return cmdio.Render(ctx, map[string]any{ + "name": v.Name, + "description": v.Description, + "cache_dir": v.CacheDir(ctx), + "config_dir": v.ConfigDir(ctx), + "lib_dir": v.EffectiveLibDir(ctx), + "is_python": v.IsPythonProject(ctx), + }) + } + return nil + }, + } +} diff --git a/cmd/labs/uninstall.go b/cmd/labs/uninstall.go new file mode 100644 index 0000000000..981107311d --- /dev/null +++ b/cmd/labs/uninstall.go @@ -0,0 +1,40 @@ +package labs + +import ( + "fmt" + + "github.com/databricks/cli/cmd/labs/project" + "github.com/spf13/cobra" +) + +func newUninstallCommand() *cobra.Command { + return &cobra.Command{ + Use: "uninstall NAME", + Args: cobra.ExactArgs(1), + Short: "Uninstalls project", + ValidArgsFunction: func(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) { + // cobra.ShellCompDirectiveNoFileComp + var names []string + installed, _ := project.Installed(cmd.Context()) + for _, v := range installed { + names = append(names, v.Name) + } + return names, cobra.ShellCompDirectiveNoFileComp + }, + RunE: func(cmd *cobra.Command, args []string) error { + ctx := cmd.Context() + installed, err := project.Installed(ctx) + if err != nil { + return err + } + name := args[0] + for _, prj := range installed { + if prj.Name != name { + continue + } + return prj.Uninstall(cmd) + } + return fmt.Errorf("not found: %s", name) + }, + } +} diff --git a/cmd/labs/unpack/zipball.go b/cmd/labs/unpack/zipball.go new file mode 100644 index 0000000000..d2cfa8c945 --- /dev/null +++ b/cmd/labs/unpack/zipball.go @@ -0,0 +1,64 @@ +package unpack + +import ( + "archive/zip" + "bytes" + "fmt" + "io" + "os" + "path/filepath" + "strings" +) + +const ownerRWXworldRX = 0o755 + +type GitHubZipball struct { + io.Reader +} + +func (v GitHubZipball) UnpackTo(libTarget string) error { + raw, err := io.ReadAll(v) + if err != nil { + return err + } + zipReader, err := zip.NewReader(bytes.NewReader(raw), int64(len(raw))) + if err != nil { + return fmt.Errorf("zip: %w", err) + } + // GitHub packages entire repo contents into a top-level folder, e.g. databrickslabs-ucx-2800c6b + rootDirInZIP := zipReader.File[0].FileHeader.Name + for _, zf := range zipReader.File { + if zf.Name == rootDirInZIP { + continue + } + normalizedName := strings.TrimPrefix(zf.Name, rootDirInZIP) + targetName := filepath.Join(libTarget, normalizedName) + if zf.FileInfo().IsDir() { + err = os.MkdirAll(targetName, ownerRWXworldRX) + if err != nil { + return fmt.Errorf("mkdir %s: %w", normalizedName, err) + } + continue + } + err = v.extractFile(zf, targetName) + if err != nil { + return fmt.Errorf("extract %s: %w", zf.Name, err) + } + } + return nil +} + +func (v GitHubZipball) extractFile(zf *zip.File, targetName string) error { + reader, err := zf.Open() + if err != nil { + return fmt.Errorf("source: %w", err) + } + defer reader.Close() + writer, err := os.OpenFile(targetName, os.O_CREATE|os.O_RDWR, zf.Mode()) + if err != nil { + return fmt.Errorf("target: %w", err) + } + defer writer.Close() + _, err = io.Copy(writer, reader) + return err +} diff --git a/cmd/labs/upgrade.go b/cmd/labs/upgrade.go new file mode 100644 index 0000000000..88b7bc928e --- /dev/null +++ b/cmd/labs/upgrade.go @@ -0,0 +1,21 @@ +package labs + +import ( + "github.com/databricks/cli/cmd/labs/project" + "github.com/spf13/cobra" +) + +func newUpgradeCommand() *cobra.Command { + return &cobra.Command{ + Use: "upgrade NAME", + Args: cobra.ExactArgs(1), + Short: "Upgrades project", + RunE: func(cmd *cobra.Command, args []string) error { + inst, err := project.NewUpgrader(cmd, args[0]) + if err != nil { + return err + } + return inst.Upgrade(cmd.Context()) + }, + } +} diff --git a/cmd/workspace/clusters/uc.go b/cmd/workspace/clusters/uc.go new file mode 100644 index 0000000000..45abed65ed --- /dev/null +++ b/cmd/workspace/clusters/uc.go @@ -0,0 +1,147 @@ +package clusters + +import ( + "context" + "errors" + "fmt" + "regexp" + "strings" + + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/databricks-sdk-go" + "github.com/databricks/databricks-sdk-go/service/compute" + "github.com/fatih/color" + "github.com/manifoldco/promptui" + "golang.org/x/mod/semver" +) + +var minUcRuntime = canonicalVersion("v12.0") + +var dbrVersionRegex = regexp.MustCompile(`^(\d+\.\d+)\.x-.*`) + +func canonicalVersion(v string) string { + return semver.Canonical("v" + strings.TrimPrefix(v, "v")) +} + +func GetRuntimeVersion(cluster *compute.ClusterDetails) (string, bool) { + match := dbrVersionRegex.FindStringSubmatch(cluster.SparkVersion) + if len(match) < 1 { + return "", false + } + return match[1], true +} + +func IsUnityCatalogCompatible(cluster *compute.ClusterDetails, minVersion string) bool { + minVersion = canonicalVersion(minVersion) + if semver.Compare(minUcRuntime, minVersion) >= 0 { + return false + } + runtimeVersion, ok := GetRuntimeVersion(cluster) + if !ok { + return false + } + clusterRuntime := canonicalVersion(runtimeVersion) + if semver.Compare(minVersion, clusterRuntime) >= 0 { + return false + } + switch cluster.DataSecurityMode { + case compute.DataSecurityModeUserIsolation, compute.DataSecurityModeSingleUser: + return true + default: + return false + } +} + +var ErrNoCompatibleClusters = errors.New("no compatible clusters found") + +type compatibleCluster struct { + compute.ClusterDetails + versionName string +} + +func (v compatibleCluster) Access() string { + switch v.DataSecurityMode { + case compute.DataSecurityModeUserIsolation: + return "Shared" + case compute.DataSecurityModeSingleUser: + return "Assigned" + default: + return "Unknown" + } +} + +func (v compatibleCluster) Runtime() string { + runtime, _, _ := strings.Cut(v.versionName, " (") + return runtime +} + +func (v compatibleCluster) State() string { + state := v.ClusterDetails.State + switch state { + case compute.StateRunning, compute.StateResizing: + return color.GreenString(state.String()) + case compute.StateError, compute.StateTerminated, compute.StateTerminating, compute.StateUnknown: + return color.RedString(state.String()) + default: + return color.BlueString(state.String()) + } +} + +func AskForCompatibleCluster(ctx context.Context, w *databricks.WorkspaceClient, minVersion string) (string, error) { + all, err := w.Clusters.ListAll(ctx, compute.ListClustersRequest{ + CanUseClient: "NOTEBOOKS", + }) + if err != nil { + return "", fmt.Errorf("list clusters: %w", err) + } + me, err := w.CurrentUser.Me(ctx) + if err != nil { + return "", fmt.Errorf("current user: %w", err) + } + versions := map[string]string{} + sv, err := w.Clusters.SparkVersions(ctx) + if err != nil { + return "", fmt.Errorf("list runtime versions: %w", err) + } + for _, v := range sv.Versions { + versions[v.Key] = v.Name + } + var compatible []compatibleCluster + for _, v := range all { + if !IsUnityCatalogCompatible(&v, minVersion) { + continue + } + if v.SingleUserName != "" && v.SingleUserName != me.UserName { + continue + } + compatible = append(compatible, compatibleCluster{ + ClusterDetails: v, + versionName: versions[v.SparkVersion], + }) + } + if len(compatible) == 0 { + return "", ErrNoCompatibleClusters + } + if len(compatible) == 1 { + return compatible[0].ClusterId, nil + } + i, _, err := cmdio.RunSelect(ctx, &promptui.Select{ + Label: "Choose compatible cluster", + Items: compatible, + Searcher: func(input string, idx int) bool { + lower := strings.ToLower(compatible[idx].ClusterName) + return strings.Contains(lower, input) + }, + StartInSearchMode: true, + Templates: &promptui.SelectTemplates{ + Label: "{{.ClusterName | faint}}", + Active: `{{.ClusterName | bold}} ({{.State}} {{.Access}} Runtime {{.Runtime}})`, + Inactive: `{{.ClusterName}} ({{.State}} {{.Access}} Runtime {{.Runtime}})`, + Selected: `{{ "Configured cluster" | faint }}: {{ .ClusterName | bold }} ({{.ClusterId | faint}})`, + }, + }) + if err != nil { + return "", err + } + return compatible[i].ClusterId, nil +} diff --git a/cmd/workspace/clusters/uc_test.go b/cmd/workspace/clusters/uc_test.go new file mode 100644 index 0000000000..1a7e36c345 --- /dev/null +++ b/cmd/workspace/clusters/uc_test.go @@ -0,0 +1,31 @@ +package clusters + +import ( + "testing" + + "github.com/databricks/databricks-sdk-go/service/compute" + "github.com/stretchr/testify/assert" +) + +func TestIsCompatible(t *testing.T) { + assert.True(t, IsUnityCatalogCompatible(&compute.ClusterDetails{ + SparkVersion: "13.2.x-aarch64-scala2.12", + DataSecurityMode: compute.DataSecurityModeUserIsolation, + }, "13.0")) + assert.False(t, IsUnityCatalogCompatible(&compute.ClusterDetails{ + SparkVersion: "13.2.x-aarch64-scala2.12", + DataSecurityMode: compute.DataSecurityModeNone, + }, "13.0")) + assert.False(t, IsUnityCatalogCompatible(&compute.ClusterDetails{ + SparkVersion: "9.1.x-photon-scala2.12", + DataSecurityMode: compute.DataSecurityModeNone, + }, "13.0")) + assert.False(t, IsUnityCatalogCompatible(&compute.ClusterDetails{ + SparkVersion: "9.1.x-photon-scala2.12", + DataSecurityMode: compute.DataSecurityModeNone, + }, "10.0")) + assert.False(t, IsUnityCatalogCompatible(&compute.ClusterDetails{ + SparkVersion: "custom-9.1.x-photon-scala2.12", + DataSecurityMode: compute.DataSecurityModeNone, + }, "14.0")) +} diff --git a/cmd/workspace/warehouses/ask.go b/cmd/workspace/warehouses/ask.go new file mode 100644 index 0000000000..72963187a2 --- /dev/null +++ b/cmd/workspace/warehouses/ask.go @@ -0,0 +1,51 @@ +package warehouses + +import ( + "context" + "errors" + "fmt" + + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/databricks-sdk-go" + "github.com/databricks/databricks-sdk-go/service/sql" + "github.com/fatih/color" +) + +var ErrNoCompatibleWarehouses = errors.New("no compatible warehouses") + +func AskForCompatibleWarehouses(ctx context.Context, w *databricks.WorkspaceClient, types []sql.EndpointInfoWarehouseType) (string, error) { + all, err := w.Warehouses.ListAll(ctx, sql.ListWarehousesRequest{}) + if err != nil { + return "", fmt.Errorf("list warehouses: %w", err) + } + allowed := map[sql.EndpointInfoWarehouseType]bool{} + for _, v := range types { + allowed[v] = true + } + var lastWarehouseID string + names := map[string]string{} + for _, v := range all { + if !allowed[v.WarehouseType] { + continue + } + var state string + switch v.State { + case sql.StateRunning: + state = color.GreenString(v.State.String()) + case sql.StateStopped, sql.StateDeleted, sql.StateStopping, sql.StateDeleting: + state = color.RedString(v.State.String()) + default: + state = color.BlueString(v.State.String()) + } + visibleTouser := fmt.Sprintf("%s (%s %s)", v.Name, state, v.WarehouseType) + names[visibleTouser] = v.Id + lastWarehouseID = v.Id + } + if len(names) == 0 { + return "", ErrNoCompatibleWarehouses + } + if len(names) == 1 { + return lastWarehouseID, nil + } + return cmdio.Select(ctx, names, "Choose SQL Warehouse") +} From 5819644739e8083b41ae6578abf6cb95faada36d Mon Sep 17 00:00:00 2001 From: Serge Smertin Date: Thu, 9 Nov 2023 18:25:04 +0100 Subject: [PATCH 2/8] cfgpickers --- cmd/labs/project/installer.go | 4 +- cmd/labs/project/login.go | 9 +- cmd/workspace/clusters/uc.go | 147 ------------------------------ cmd/workspace/clusters/uc_test.go | 31 ------- cmd/workspace/warehouses/ask.go | 51 ----------- 5 files changed, 7 insertions(+), 235 deletions(-) delete mode 100644 cmd/workspace/clusters/uc.go delete mode 100644 cmd/workspace/clusters/uc_test.go delete mode 100644 cmd/workspace/warehouses/ask.go diff --git a/cmd/labs/project/installer.go b/cmd/labs/project/installer.go index 1c990890e5..2e09ed3707 100644 --- a/cmd/labs/project/installer.go +++ b/cmd/labs/project/installer.go @@ -10,9 +10,9 @@ import ( "github.com/databricks/cli/cmd/labs/github" "github.com/databricks/cli/cmd/labs/unpack" - "github.com/databricks/cli/cmd/workspace/clusters" "github.com/databricks/cli/libs/cmdio" "github.com/databricks/cli/libs/databrickscfg" + "github.com/databricks/cli/libs/databrickscfg/cfgpickers" "github.com/databricks/cli/libs/log" "github.com/databricks/cli/libs/process" "github.com/databricks/cli/libs/python" @@ -238,7 +238,7 @@ func (i *installer) setupPythonVirtualEnvironment(ctx context.Context, w *databr if err != nil { return fmt.Errorf("cluster: %w", err) } - runtimeVersion, ok := clusters.GetRuntimeVersion(cluster) + runtimeVersion, ok := cfgpickers.GetRuntimeVersion(*cluster) if !ok { return fmt.Errorf("unsupported runtime: %s", cluster.SparkVersion) } diff --git a/cmd/labs/project/login.go b/cmd/labs/project/login.go index 226d477538..dd2350642e 100644 --- a/cmd/labs/project/login.go +++ b/cmd/labs/project/login.go @@ -7,9 +7,8 @@ import ( "os" "github.com/databricks/cli/cmd/root" - "github.com/databricks/cli/cmd/workspace/clusters" - "github.com/databricks/cli/cmd/workspace/warehouses" "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/cli/libs/databrickscfg/cfgpickers" "github.com/databricks/cli/libs/log" "github.com/databricks/databricks-sdk-go" "github.com/databricks/databricks-sdk-go/config" @@ -70,7 +69,8 @@ func (lc *loginConfig) askCluster(ctx context.Context, w *databricks.WorkspaceCl if !cmdio.IsInteractive(ctx) { return ErrNotInTTY } - clusterID, err := clusters.AskForCompatibleCluster(ctx, w, lc.Installer.MinRuntimeVersion) + clusterID, err := cfgpickers.AskForCluster(ctx, w, + cfgpickers.WithDatabricksConnect(lc.Installer.MinRuntimeVersion)) if err != nil { return fmt.Errorf("select: %w", err) } @@ -90,7 +90,8 @@ func (lc *loginConfig) askWarehouse(ctx context.Context, w *databricks.Workspace if !cmdio.IsInteractive(ctx) { return ErrNotInTTY } - lc.WarehouseID, err = warehouses.AskForCompatibleWarehouses(ctx, w, lc.Installer.WarehouseTypes) + lc.WarehouseID, err = cfgpickers.AskForWarehouse(ctx, w, + cfgpickers.WithWarehouseTypes(lc.Installer.WarehouseTypes...)) return } diff --git a/cmd/workspace/clusters/uc.go b/cmd/workspace/clusters/uc.go deleted file mode 100644 index 45abed65ed..0000000000 --- a/cmd/workspace/clusters/uc.go +++ /dev/null @@ -1,147 +0,0 @@ -package clusters - -import ( - "context" - "errors" - "fmt" - "regexp" - "strings" - - "github.com/databricks/cli/libs/cmdio" - "github.com/databricks/databricks-sdk-go" - "github.com/databricks/databricks-sdk-go/service/compute" - "github.com/fatih/color" - "github.com/manifoldco/promptui" - "golang.org/x/mod/semver" -) - -var minUcRuntime = canonicalVersion("v12.0") - -var dbrVersionRegex = regexp.MustCompile(`^(\d+\.\d+)\.x-.*`) - -func canonicalVersion(v string) string { - return semver.Canonical("v" + strings.TrimPrefix(v, "v")) -} - -func GetRuntimeVersion(cluster *compute.ClusterDetails) (string, bool) { - match := dbrVersionRegex.FindStringSubmatch(cluster.SparkVersion) - if len(match) < 1 { - return "", false - } - return match[1], true -} - -func IsUnityCatalogCompatible(cluster *compute.ClusterDetails, minVersion string) bool { - minVersion = canonicalVersion(minVersion) - if semver.Compare(minUcRuntime, minVersion) >= 0 { - return false - } - runtimeVersion, ok := GetRuntimeVersion(cluster) - if !ok { - return false - } - clusterRuntime := canonicalVersion(runtimeVersion) - if semver.Compare(minVersion, clusterRuntime) >= 0 { - return false - } - switch cluster.DataSecurityMode { - case compute.DataSecurityModeUserIsolation, compute.DataSecurityModeSingleUser: - return true - default: - return false - } -} - -var ErrNoCompatibleClusters = errors.New("no compatible clusters found") - -type compatibleCluster struct { - compute.ClusterDetails - versionName string -} - -func (v compatibleCluster) Access() string { - switch v.DataSecurityMode { - case compute.DataSecurityModeUserIsolation: - return "Shared" - case compute.DataSecurityModeSingleUser: - return "Assigned" - default: - return "Unknown" - } -} - -func (v compatibleCluster) Runtime() string { - runtime, _, _ := strings.Cut(v.versionName, " (") - return runtime -} - -func (v compatibleCluster) State() string { - state := v.ClusterDetails.State - switch state { - case compute.StateRunning, compute.StateResizing: - return color.GreenString(state.String()) - case compute.StateError, compute.StateTerminated, compute.StateTerminating, compute.StateUnknown: - return color.RedString(state.String()) - default: - return color.BlueString(state.String()) - } -} - -func AskForCompatibleCluster(ctx context.Context, w *databricks.WorkspaceClient, minVersion string) (string, error) { - all, err := w.Clusters.ListAll(ctx, compute.ListClustersRequest{ - CanUseClient: "NOTEBOOKS", - }) - if err != nil { - return "", fmt.Errorf("list clusters: %w", err) - } - me, err := w.CurrentUser.Me(ctx) - if err != nil { - return "", fmt.Errorf("current user: %w", err) - } - versions := map[string]string{} - sv, err := w.Clusters.SparkVersions(ctx) - if err != nil { - return "", fmt.Errorf("list runtime versions: %w", err) - } - for _, v := range sv.Versions { - versions[v.Key] = v.Name - } - var compatible []compatibleCluster - for _, v := range all { - if !IsUnityCatalogCompatible(&v, minVersion) { - continue - } - if v.SingleUserName != "" && v.SingleUserName != me.UserName { - continue - } - compatible = append(compatible, compatibleCluster{ - ClusterDetails: v, - versionName: versions[v.SparkVersion], - }) - } - if len(compatible) == 0 { - return "", ErrNoCompatibleClusters - } - if len(compatible) == 1 { - return compatible[0].ClusterId, nil - } - i, _, err := cmdio.RunSelect(ctx, &promptui.Select{ - Label: "Choose compatible cluster", - Items: compatible, - Searcher: func(input string, idx int) bool { - lower := strings.ToLower(compatible[idx].ClusterName) - return strings.Contains(lower, input) - }, - StartInSearchMode: true, - Templates: &promptui.SelectTemplates{ - Label: "{{.ClusterName | faint}}", - Active: `{{.ClusterName | bold}} ({{.State}} {{.Access}} Runtime {{.Runtime}})`, - Inactive: `{{.ClusterName}} ({{.State}} {{.Access}} Runtime {{.Runtime}})`, - Selected: `{{ "Configured cluster" | faint }}: {{ .ClusterName | bold }} ({{.ClusterId | faint}})`, - }, - }) - if err != nil { - return "", err - } - return compatible[i].ClusterId, nil -} diff --git a/cmd/workspace/clusters/uc_test.go b/cmd/workspace/clusters/uc_test.go deleted file mode 100644 index 1a7e36c345..0000000000 --- a/cmd/workspace/clusters/uc_test.go +++ /dev/null @@ -1,31 +0,0 @@ -package clusters - -import ( - "testing" - - "github.com/databricks/databricks-sdk-go/service/compute" - "github.com/stretchr/testify/assert" -) - -func TestIsCompatible(t *testing.T) { - assert.True(t, IsUnityCatalogCompatible(&compute.ClusterDetails{ - SparkVersion: "13.2.x-aarch64-scala2.12", - DataSecurityMode: compute.DataSecurityModeUserIsolation, - }, "13.0")) - assert.False(t, IsUnityCatalogCompatible(&compute.ClusterDetails{ - SparkVersion: "13.2.x-aarch64-scala2.12", - DataSecurityMode: compute.DataSecurityModeNone, - }, "13.0")) - assert.False(t, IsUnityCatalogCompatible(&compute.ClusterDetails{ - SparkVersion: "9.1.x-photon-scala2.12", - DataSecurityMode: compute.DataSecurityModeNone, - }, "13.0")) - assert.False(t, IsUnityCatalogCompatible(&compute.ClusterDetails{ - SparkVersion: "9.1.x-photon-scala2.12", - DataSecurityMode: compute.DataSecurityModeNone, - }, "10.0")) - assert.False(t, IsUnityCatalogCompatible(&compute.ClusterDetails{ - SparkVersion: "custom-9.1.x-photon-scala2.12", - DataSecurityMode: compute.DataSecurityModeNone, - }, "14.0")) -} diff --git a/cmd/workspace/warehouses/ask.go b/cmd/workspace/warehouses/ask.go deleted file mode 100644 index 72963187a2..0000000000 --- a/cmd/workspace/warehouses/ask.go +++ /dev/null @@ -1,51 +0,0 @@ -package warehouses - -import ( - "context" - "errors" - "fmt" - - "github.com/databricks/cli/libs/cmdio" - "github.com/databricks/databricks-sdk-go" - "github.com/databricks/databricks-sdk-go/service/sql" - "github.com/fatih/color" -) - -var ErrNoCompatibleWarehouses = errors.New("no compatible warehouses") - -func AskForCompatibleWarehouses(ctx context.Context, w *databricks.WorkspaceClient, types []sql.EndpointInfoWarehouseType) (string, error) { - all, err := w.Warehouses.ListAll(ctx, sql.ListWarehousesRequest{}) - if err != nil { - return "", fmt.Errorf("list warehouses: %w", err) - } - allowed := map[sql.EndpointInfoWarehouseType]bool{} - for _, v := range types { - allowed[v] = true - } - var lastWarehouseID string - names := map[string]string{} - for _, v := range all { - if !allowed[v.WarehouseType] { - continue - } - var state string - switch v.State { - case sql.StateRunning: - state = color.GreenString(v.State.String()) - case sql.StateStopped, sql.StateDeleted, sql.StateStopping, sql.StateDeleting: - state = color.RedString(v.State.String()) - default: - state = color.BlueString(v.State.String()) - } - visibleTouser := fmt.Sprintf("%s (%s %s)", v.Name, state, v.WarehouseType) - names[visibleTouser] = v.Id - lastWarehouseID = v.Id - } - if len(names) == 0 { - return "", ErrNoCompatibleWarehouses - } - if len(names) == 1 { - return lastWarehouseID, nil - } - return cmdio.Select(ctx, names, "Choose SQL Warehouse") -} From ef16326c84988136929f1d51aa120ef1ab3606c7 Mon Sep 17 00:00:00 2001 From: Serge Smertin Date: Thu, 16 Nov 2023 14:33:08 +0100 Subject: [PATCH 3/8] eagerly create nested cache directories --- cmd/labs/localcache/jsonfile.go | 17 +++++++++++++++-- cmd/labs/localcache/jsonfile_test.go | 25 +++++++++++++++++++++++++ cmd/labs/project/fetcher.go | 4 ---- 3 files changed, 40 insertions(+), 6 deletions(-) diff --git a/cmd/labs/localcache/jsonfile.go b/cmd/labs/localcache/jsonfile.go index 7c87d08e82..495743a576 100644 --- a/cmd/labs/localcache/jsonfile.go +++ b/cmd/labs/localcache/jsonfile.go @@ -15,6 +15,7 @@ import ( ) const userRW = 0o600 +const ownerRWXworldRX = 0o755 func NewLocalCache[T any](dir, name string, validity time.Duration) LocalCache[T] { return LocalCache[T]{ @@ -70,8 +71,20 @@ func (r *LocalCache[T]) writeCache(ctx context.Context, data T) (T, error) { if err != nil { return r.zero, fmt.Errorf("json marshal: %w", err) } - err = os.WriteFile(r.FileName(), raw, userRW) - if err != nil { + cacheFile := r.FileName() + err = os.WriteFile(cacheFile, raw, userRW) + if errors.Is(err, fs.ErrNotExist) { + cacheDir := filepath.Dir(cacheFile) + err := os.MkdirAll(cacheDir, ownerRWXworldRX) + if err != nil { + return r.zero, fmt.Errorf("create %s: %w", cacheDir, err) + } + err = os.WriteFile(cacheFile, raw, userRW) + if err != nil { + return r.zero, fmt.Errorf("retry save cache: %w", err) + } + return data, nil + } else if err != nil { return r.zero, fmt.Errorf("save cache: %w", err) } return data, nil diff --git a/cmd/labs/localcache/jsonfile_test.go b/cmd/labs/localcache/jsonfile_test.go index d457b25070..0fbb010064 100644 --- a/cmd/labs/localcache/jsonfile_test.go +++ b/cmd/labs/localcache/jsonfile_test.go @@ -11,6 +11,31 @@ import ( "github.com/stretchr/testify/assert" ) +func TestCreatesDirectoryIfNeeded(t *testing.T) { + ctx := context.Background() + c := NewLocalCache[int64](t.TempDir(), "some/nested/file", 1*time.Minute) + thing := []int64{0} + tick := func() (int64, error) { + thing[0] += 1 + return thing[0], nil + } + first, err := c.Load(ctx, tick) + assert.NoError(t, err) + assert.Equal(t, first, int64(1)) +} + +func TestImpossibleToCreateDir(t *testing.T) { + ctx := context.Background() + c := NewLocalCache[int64]("/dev/null", "some/nested/file", 1*time.Minute) + thing := []int64{0} + tick := func() (int64, error) { + thing[0] += 1 + return thing[0], nil + } + _, err := c.Load(ctx, tick) + assert.Error(t, err) +} + func TestRefreshes(t *testing.T) { ctx := context.Background() c := NewLocalCache[int64](t.TempDir(), "time", 1*time.Minute) diff --git a/cmd/labs/project/fetcher.go b/cmd/labs/project/fetcher.go index 179ba183a9..b677bcd991 100644 --- a/cmd/labs/project/fetcher.go +++ b/cmd/labs/project/fetcher.go @@ -112,10 +112,6 @@ type fetcher struct { func (f *fetcher) checkReleasedVersions(cmd *cobra.Command, version string) (string, error) { ctx := cmd.Context() cacheDir := PathInLabs(ctx, f.name, "cache") - err := os.MkdirAll(cacheDir, ownerRWXworldRX) - if err != nil { - return "", fmt.Errorf("make cache dir: %w", err) - } // `databricks labs isntall X` doesn't know which exact version to fetch, so first // we fetch all versions and then pick the latest one dynamically. versions, err := github.NewReleaseCache("databrickslabs", f.name, cacheDir).Load(ctx) From 33473138ce230ed3942ff7ee136c63b52d2c0ab2 Mon Sep 17 00:00:00 2001 From: Serge Smertin Date: Thu, 16 Nov 2023 15:52:54 +0100 Subject: [PATCH 4/8] .. --- cmd/labs/localcache/jsonfile_test.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/cmd/labs/localcache/jsonfile_test.go b/cmd/labs/localcache/jsonfile_test.go index 0fbb010064..13e46d3d54 100644 --- a/cmd/labs/localcache/jsonfile_test.go +++ b/cmd/labs/localcache/jsonfile_test.go @@ -5,6 +5,7 @@ import ( "errors" "fmt" "net/url" + "runtime" "testing" "time" @@ -25,6 +26,9 @@ func TestCreatesDirectoryIfNeeded(t *testing.T) { } func TestImpossibleToCreateDir(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("No /dev/null on windows") + } ctx := context.Background() c := NewLocalCache[int64]("/dev/null", "some/nested/file", 1*time.Minute) thing := []int64{0} @@ -91,6 +95,9 @@ func TestSupportOfflineSystem(t *testing.T) { } func TestFolderDisappears(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("No /dev/null on windows") + } c := NewLocalCache[int64]("/dev/null", "time", 1*time.Minute) tick := func() (int64, error) { now := time.Now().UnixNano() From 7c186cb0053a61700d9258bf7e35e407eda16010 Mon Sep 17 00:00:00 2001 From: Serge Smertin Date: Fri, 17 Nov 2023 10:53:44 +0100 Subject: [PATCH 5/8] .. --- cmd/labs/clear_cache.go | 3 ++- cmd/labs/github/releases_test.go | 4 ++-- cmd/labs/labs.go | 2 +- cmd/labs/project/proxy.go | 2 ++ cmd/labs/uninstall.go | 1 - 5 files changed, 7 insertions(+), 5 deletions(-) diff --git a/cmd/labs/clear_cache.go b/cmd/labs/clear_cache.go index 480e312248..e2f531cfd0 100644 --- a/cmd/labs/clear_cache.go +++ b/cmd/labs/clear_cache.go @@ -19,11 +19,12 @@ func newClearCacheCommand() *cobra.Command { if err != nil { return err } - _ = os.Remove(project.PathInLabs(ctx, "repositories.json")) + _ = os.Remove(project.PathInLabs(ctx, "databrickslabs-repositories.json")) logger := log.GetLogger(ctx) for _, prj := range projects { logger.Info("clearing labs project cache", slog.String("name", prj.Name)) _ = os.RemoveAll(prj.CacheDir(ctx)) + // recreating empty cache folder for downstream apps to work normally _ = prj.EnsureFoldersExist(ctx) } return nil diff --git a/cmd/labs/github/releases_test.go b/cmd/labs/github/releases_test.go index 75a5f96609..ea24a1e2ee 100644 --- a/cmd/labs/github/releases_test.go +++ b/cmd/labs/github/releases_test.go @@ -12,7 +12,7 @@ import ( func TestLoadsReleasesForCLI(t *testing.T) { server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { if r.URL.Path == "/repos/databricks/cli/releases" { - w.Write([]byte("[]")) + w.Write([]byte(`[{"tag_name": "v1.2.3"}, {"tag_name": "v1.2.2"}]`)) return } t.Logf("Requested: %s", r.URL.Path) @@ -26,7 +26,7 @@ func TestLoadsReleasesForCLI(t *testing.T) { r := NewReleaseCache("databricks", "cli", t.TempDir()) all, err := r.Load(ctx) assert.NoError(t, err) - assert.NotNil(t, all) + assert.Len(t, all, 2) // no call is made _, err = r.Load(ctx) diff --git a/cmd/labs/labs.go b/cmd/labs/labs.go index ee10be804f..cccf8ac44c 100644 --- a/cmd/labs/labs.go +++ b/cmd/labs/labs.go @@ -10,7 +10,7 @@ import ( func New(ctx context.Context) *cobra.Command { cmd := &cobra.Command{ Use: "labs", - Short: "Managge Databricks Labs installations", + Short: "Manage Databricks Labs installations", Long: `Manage experimental Databricks Labs apps`, } diff --git a/cmd/labs/project/proxy.go b/cmd/labs/project/proxy.go index 86a110f995..ae7df286c6 100644 --- a/cmd/labs/project/proxy.go +++ b/cmd/labs/project/proxy.go @@ -67,6 +67,8 @@ func (cp *proxy) runE(cmd *cobra.Command, _ []string) error { return err } +// [EXPERIMENTAL] this interface contract may change in the future. +// See https://github.com/databricks/cli/issues/994 func (cp *proxy) renderJsonAsTable(cmd *cobra.Command, args []string, envs map[string]string) error { var buf bytes.Buffer ctx := cmd.Context() diff --git a/cmd/labs/uninstall.go b/cmd/labs/uninstall.go index 981107311d..b2c83fff7f 100644 --- a/cmd/labs/uninstall.go +++ b/cmd/labs/uninstall.go @@ -13,7 +13,6 @@ func newUninstallCommand() *cobra.Command { Args: cobra.ExactArgs(1), Short: "Uninstalls project", ValidArgsFunction: func(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) { - // cobra.ShellCompDirectiveNoFileComp var names []string installed, _ := project.Installed(cmd.Context()) for _, v := range installed { From 610919780dad22a208b724021c18d765283f7418 Mon Sep 17 00:00:00 2001 From: Serge Smertin Date: Fri, 17 Nov 2023 11:02:52 +0100 Subject: [PATCH 6/8] .. --- cmd/labs/project/entrypoint.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cmd/labs/project/entrypoint.go b/cmd/labs/project/entrypoint.go index ebf886be55..fedd70a4bd 100644 --- a/cmd/labs/project/entrypoint.go +++ b/cmd/labs/project/entrypoint.go @@ -113,6 +113,9 @@ func (e *Entrypoint) preparePython(ctx context.Context, environment map[string]s // // See https://docs.python.org/3/using/cmdline.html#envvar-PYTHONPATH libDir := e.EffectiveLibDir(ctx) + // The intention for every install is to be sandboxed - not dependent on anything else than Python binary. + // Having ability to override PYTHONPATH in the mix will break this assumption. Need strong evidence that + // this is really needed. environment["PYTHONPATH"] = e.joinPaths(libDir, filepath.Join(libDir, "src")) } From fc09822ea8fe142fed382b98d1c69d30b2058e43 Mon Sep 17 00:00:00 2001 From: Serge Smertin Date: Fri, 17 Nov 2023 11:03:31 +0100 Subject: [PATCH 7/8] .. --- cmd/labs/github/github.go | 2 +- cmd/labs/github/releases.go | 2 +- cmd/labs/github/repositories.go | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cmd/labs/github/github.go b/cmd/labs/github/github.go index 98e11e75a2..1dd9fae5eb 100644 --- a/cmd/labs/github/github.go +++ b/cmd/labs/github/github.go @@ -57,7 +57,7 @@ func getBytes(ctx context.Context, method, url string, body io.Reader) ([]byte, return io.ReadAll(res.Body) } -func httpGetAndUnmarshall(ctx context.Context, url string, response any) error { +func httpGetAndUnmarshal(ctx context.Context, url string, response any) error { raw, err := getBytes(ctx, "GET", url, nil) if err != nil { return err diff --git a/cmd/labs/github/releases.go b/cmd/labs/github/releases.go index c5a8364caa..0dae0317d4 100644 --- a/cmd/labs/github/releases.go +++ b/cmd/labs/github/releases.go @@ -39,7 +39,7 @@ func getVersions(ctx context.Context, org, repo string) (Versions, error) { var releases Versions log.Debugf(ctx, "Fetching latest releases for %s/%s from GitHub API", org, repo) url := fmt.Sprintf("%s/repos/%s/%s/releases", gitHubAPI, org, repo) - err := httpGetAndUnmarshall(ctx, url, &releases) + err := httpGetAndUnmarshal(ctx, url, &releases) return releases, err } diff --git a/cmd/labs/github/repositories.go b/cmd/labs/github/repositories.go index 545c886fb2..850cdb1cb7 100644 --- a/cmd/labs/github/repositories.go +++ b/cmd/labs/github/repositories.go @@ -35,7 +35,7 @@ func getRepositories(ctx context.Context, org string) (Repositories, error) { var repos Repositories log.Debugf(ctx, "Loading repositories for %s from GitHub API", org) url := fmt.Sprintf("%s/users/%s/repos", gitHubAPI, org) - err := httpGetAndUnmarshall(ctx, url, &repos) + err := httpGetAndUnmarshal(ctx, url, &repos) return repos, err } From ebbba371b683f1644f85267590a32fc5831eca4c Mon Sep 17 00:00:00 2001 From: Serge Smertin Date: Fri, 17 Nov 2023 11:43:18 +0100 Subject: [PATCH 8/8] .. --- cmd/labs/localcache/jsonfile_test.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cmd/labs/localcache/jsonfile_test.go b/cmd/labs/localcache/jsonfile_test.go index 13e46d3d54..0d852174c4 100644 --- a/cmd/labs/localcache/jsonfile_test.go +++ b/cmd/labs/localcache/jsonfile_test.go @@ -41,6 +41,9 @@ func TestImpossibleToCreateDir(t *testing.T) { } func TestRefreshes(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("No /dev/null on windows") + } ctx := context.Background() c := NewLocalCache[int64](t.TempDir(), "time", 1*time.Minute) thing := []int64{0}