From 067c06cd2ec0b469bc7a9a0f4fd5c6c2064ece00 Mon Sep 17 00:00:00 2001 From: Vladimir Iliakov Date: Wed, 19 Nov 2025 11:12:48 +0100 Subject: [PATCH 1/7] STAC-23603: Restoring Settings --- README.md | 55 ++++++++++++++++++++++++++++-- cmd/settings/check_and_finalize.go | 2 +- cmd/settings/restore.go | 2 +- 3 files changed, 55 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 1cc244f..0de70bb 100644 --- a/README.md +++ b/README.md @@ -10,8 +10,9 @@ This CLI tool replaces the legacy Bash-based backup/restore scripts with a singl - Elasticsearch snapshots and restores - Stackgraph backups and restores - VictoriaMetrics backups and restores +- Settings backups and restores -**Planned:** ClickHouse, Configuration backups +**Planned:** ClickHouse backups ## Installation @@ -183,6 +184,52 @@ sts-backup victoriametrics check-and-finalize --namespace --job +``` + +#### restore + +Restore Settings from a backup archive. Automatically scales down affected deployments before restore and scales them +back up afterward. + +```bash +sts-backup settings restore --namespace [--archive | --latest] [flags] +``` + +**Flags:** + +- `--archive` - Specific archive name to restore (e.g., sts-backup-20251117-1404.sty) +- `--latest` - Restore from the most recent backup +- `--background` - Run restore job in background without waiting for completion +- `--yes, -y` - Skip confirmation prompt + +**Note**: Either `--archive` or `--latest` must be specified (mutually exclusive). + +#### check-and-finalize + +Check the status of a background Settings restore job and clean up resources. + +```bash +sts-backup settings check-and-finalize --namespace --job [--wait] +``` + +**Flags:** + +- `--job, -j` - Settings restore job name (required) +- `--wait, -w` - Wait for job to complete before cleanup + +**Use Case**: This command is useful when a restore job was started with `--background` flag or was interrupted ( +Ctrl+C). + ## Configuration The CLI uses configuration from Kubernetes ConfigMaps and Secrets with the following precedence: @@ -264,7 +311,11 @@ See [internal/foundation/config/testdata/validConfigMapConfig.yaml](internal/fou │ │ ├── list.go # List backups │ │ ├── restore.go # Restore backup │ │ └── check-and-finalize.go # Check and finalize restore job -│ └── victoriametrics/ # VictoriaMetrics subcommands +│ ├── victoriametrics/ # VictoriaMetrics subcommands +│ │ ├── list.go # List backups +│ │ ├── restore.go # Restore backup +│ │ └── check-and-finalize.go # Check and finalize restore job +│ └── settings/ # Settings subcommands │ ├── list.go # List backups │ ├── restore.go # Restore backup │ └── check-and-finalize.go # Check and finalize restore job diff --git a/cmd/settings/check_and_finalize.go b/cmd/settings/check_and_finalize.go index 4317693..ef357dc 100644 --- a/cmd/settings/check_and_finalize.go +++ b/cmd/settings/check_and_finalize.go @@ -58,7 +58,7 @@ func runCheckAndFinalize(appCtx *app.Context) error { JobName: checkJobName, ServiceName: "settings", ScaleSelector: appCtx.Config.Settings.Restore.ScaleDownLabelSelector, - CleanupPVC: true, + CleanupPVC: false, WaitForJob: waitForJob, Log: appCtx.Logger, }) diff --git a/cmd/settings/restore.go b/cmd/settings/restore.go index 71b15f8..5e553fd 100644 --- a/cmd/settings/restore.go +++ b/cmd/settings/restore.go @@ -78,7 +78,7 @@ func runRestore(appCtx *app.Context) error { // Warn user and ask for confirmation if !skipConfirmation { appCtx.Logger.Println() - appCtx.Logger.Warningf("WARNING: Restoring from backup will PURGE all existing Settings data!") + appCtx.Logger.Warningf("WARNING: Restoring from backup will PURGE all existing Stackgraph (Topology) data!") appCtx.Logger.Warningf("This operation cannot be undone.") appCtx.Logger.Println() appCtx.Logger.Infof("Backup to restore: %s", backupFile) From 0fc117d7c8e90100b5f73cb0090dda158d756679 Mon Sep 17 00:00:00 2001 From: Vladimir Iliakov Date: Thu, 20 Nov 2025 14:00:34 +0100 Subject: [PATCH 2/7] STAC-23600: Remove cross-dependency between orchestration packages --- cmd/settings/check_and_finalize.go | 3 +++ cmd/stackgraph/check_and_finalize.go | 3 +++ cmd/victoriametrics/check_and_finalize.go | 3 +++ internal/orchestration/restore/finalize.go | 15 ++++++++++++--- 4 files changed, 21 insertions(+), 3 deletions(-) diff --git a/cmd/settings/check_and_finalize.go b/cmd/settings/check_and_finalize.go index ef357dc..9ceac4c 100644 --- a/cmd/settings/check_and_finalize.go +++ b/cmd/settings/check_and_finalize.go @@ -8,6 +8,7 @@ import ( "github.com/stackvista/stackstate-backup-cli/internal/app" "github.com/stackvista/stackstate-backup-cli/internal/foundation/config" "github.com/stackvista/stackstate-backup-cli/internal/orchestration/restore" + "github.com/stackvista/stackstate-backup-cli/internal/orchestration/scale" ) // Check and finalize command flags @@ -57,6 +58,8 @@ func runCheckAndFinalize(appCtx *app.Context) error { Namespace: appCtx.Namespace, JobName: checkJobName, ServiceName: "settings", + ScaleUpFn: scale.ScaleUpFromAnnotations, + ScaleDownFn: scale.ScaleDown, ScaleSelector: appCtx.Config.Settings.Restore.ScaleDownLabelSelector, CleanupPVC: false, WaitForJob: waitForJob, diff --git a/cmd/stackgraph/check_and_finalize.go b/cmd/stackgraph/check_and_finalize.go index fc47df9..8139270 100644 --- a/cmd/stackgraph/check_and_finalize.go +++ b/cmd/stackgraph/check_and_finalize.go @@ -8,6 +8,7 @@ import ( "github.com/stackvista/stackstate-backup-cli/internal/app" "github.com/stackvista/stackstate-backup-cli/internal/foundation/config" "github.com/stackvista/stackstate-backup-cli/internal/orchestration/restore" + "github.com/stackvista/stackstate-backup-cli/internal/orchestration/scale" ) // Check and finalize command flags @@ -57,6 +58,8 @@ func runCheckAndFinalize(appCtx *app.Context) error { Namespace: appCtx.Namespace, JobName: checkJobName, ServiceName: "stackgraph", + ScaleUpFn: scale.ScaleUpFromAnnotations, + ScaleDownFn: scale.ScaleDown, ScaleSelector: appCtx.Config.Stackgraph.Restore.ScaleDownLabelSelector, CleanupPVC: true, WaitForJob: waitForJob, diff --git a/cmd/victoriametrics/check_and_finalize.go b/cmd/victoriametrics/check_and_finalize.go index 04924bb..768a5f8 100644 --- a/cmd/victoriametrics/check_and_finalize.go +++ b/cmd/victoriametrics/check_and_finalize.go @@ -8,6 +8,7 @@ import ( "github.com/stackvista/stackstate-backup-cli/internal/app" "github.com/stackvista/stackstate-backup-cli/internal/foundation/config" "github.com/stackvista/stackstate-backup-cli/internal/orchestration/restore" + "github.com/stackvista/stackstate-backup-cli/internal/orchestration/scale" ) // Check and finalize command flags @@ -57,6 +58,8 @@ func runCheckAndFinalize(appCtx *app.Context) error { Namespace: appCtx.Namespace, JobName: checkJobName, ServiceName: "victoria-metrics", + ScaleUpFn: scale.ScaleUpFromAnnotations, + ScaleDownFn: scale.ScaleDown, ScaleSelector: appCtx.Config.VictoriaMetrics.Restore.ScaleDownLabelSelector, CleanupPVC: false, WaitForJob: waitForJob, diff --git a/internal/orchestration/restore/finalize.go b/internal/orchestration/restore/finalize.go index 832791b..ae3ba1f 100644 --- a/internal/orchestration/restore/finalize.go +++ b/internal/orchestration/restore/finalize.go @@ -5,7 +5,6 @@ import ( "github.com/stackvista/stackstate-backup-cli/internal/clients/k8s" "github.com/stackvista/stackstate-backup-cli/internal/foundation/logger" - "github.com/stackvista/stackstate-backup-cli/internal/orchestration/scale" batchv1 "k8s.io/api/batch/v1" ) @@ -29,6 +28,8 @@ type HandleCompletedJobParams struct { Namespace string JobName string ServiceName string + ScaleUpFn func(k8sClient *k8s.Client, namespace, labelSelector string, log *logger.Logger) error + ScaleDownFn func(k8sClient *k8s.Client, namespace, labelSelector string, log *logger.Logger) ([]k8s.AppsScale, error) ScaleSelector string CleanupPVC bool Log *logger.Logger @@ -44,7 +45,7 @@ func HandleCompletedJob(params HandleCompletedJobParams) error { params.Log.Println() // Scale up deployments that were scaled down before restore - if err := scale.ScaleUpFromAnnotations(params.K8sClient, params.Namespace, params.ScaleSelector, params.Log); err != nil { + if err := params.ScaleUpFn(params.K8sClient, params.Namespace, params.ScaleSelector, params.Log); err != nil { params.Log.Warningf("Failed to scale up workload: %v", err) } } else { @@ -68,6 +69,8 @@ type WaitAndFinalizeParams struct { Namespace string JobName string ServiceName string + ScaleUpFn func(k8sClient *k8s.Client, namespace, labelSelector string, log *logger.Logger) error + ScaleDownFn func(k8sClient *k8s.Client, namespace, labelSelector string, log *logger.Logger) ([]k8s.AppsScale, error) ScaleSelector string CleanupPVC bool Log *logger.Logger @@ -90,7 +93,7 @@ func WaitAndFinalize(params WaitAndFinalizeParams) error { params.Log.Println() // Scale up deployments that were scaled down before restore - if err := scale.ScaleUpFromAnnotations(params.K8sClient, params.Namespace, params.ScaleSelector, params.Log); err != nil { + if err := params.ScaleUpFn(params.K8sClient, params.Namespace, params.ScaleSelector, params.Log); err != nil { params.Log.Warningf("Failed to scale up workload: %v", err) } @@ -104,6 +107,8 @@ type CheckAndFinalizeParams struct { Namespace string JobName string ServiceName string + ScaleUpFn func(k8sClient *k8s.Client, namespace, labelSelector string, log *logger.Logger) error + ScaleDownFn func(k8sClient *k8s.Client, namespace, labelSelector string, log *logger.Logger) ([]k8s.AppsScale, error) ScaleSelector string CleanupPVC bool WaitForJob bool @@ -130,6 +135,8 @@ func CheckAndFinalize(params CheckAndFinalizeParams) error { Namespace: params.Namespace, JobName: params.JobName, ServiceName: params.ServiceName, + ScaleUpFn: params.ScaleUpFn, + ScaleDownFn: params.ScaleDownFn, ScaleSelector: params.ScaleSelector, CleanupPVC: params.CleanupPVC, Log: params.Log, @@ -145,6 +152,8 @@ func CheckAndFinalize(params CheckAndFinalizeParams) error { Namespace: params.Namespace, JobName: params.JobName, ServiceName: params.ServiceName, + ScaleUpFn: params.ScaleUpFn, + ScaleDownFn: params.ScaleDownFn, ScaleSelector: params.ScaleSelector, CleanupPVC: params.CleanupPVC, Log: params.Log, From df324c2c285d1a6052a07b776567e1eafa2cc890 Mon Sep 17 00:00:00 2001 From: Vladimir Iliakov Date: Sun, 23 Nov 2025 17:30:56 +0100 Subject: [PATCH 3/7] STAC-23600: Restore Clickhouse --- ARCHITECTURE.md | 11 +- README.md | 82 ++++- cmd/clickhouse/check_and_finalize.go | 197 +++++++++++ cmd/clickhouse/clickhouse.go | 21 ++ cmd/clickhouse/list.go | 83 +++++ cmd/clickhouse/restore.go | 159 +++++++++ cmd/elasticsearch/check_and_finalize.go | 148 +++++++++ cmd/elasticsearch/elasticsearch.go | 3 +- .../{list-snapshots.go => list.go} | 10 +- .../{list_snapshots_test.go => list_test.go} | 26 +- cmd/elasticsearch/restore-snapshot.go | 233 ------------- cmd/elasticsearch/restore.go | 236 +++++++++++++ ...store_snapshot_test.go => restore_test.go} | 22 +- cmd/root.go | 5 + go.mod | 31 +- go.sum | 95 ++++-- internal/app/app.go | 17 + internal/clients/clickhouse/client.go | 314 ++++++++++++++++++ internal/clients/clickhouse/client_test.go | 123 +++++++ internal/clients/clickhouse/interface.go | 28 ++ internal/clients/clickhouse/sql.go | 33 ++ internal/clients/elasticsearch/client.go | 96 +++++- internal/clients/elasticsearch/client_test.go | 7 +- internal/clients/elasticsearch/interface.go | 4 +- internal/foundation/config/config.go | 16 + internal/foundation/config/config_test.go | 18 + .../config/testdata/validConfigMapConfig.yaml | 16 + .../config/testdata/validConfigMapOnly.yaml | 16 + internal/orchestration/restore/apirestore.go | 87 +++++ 29 files changed, 1832 insertions(+), 305 deletions(-) create mode 100644 cmd/clickhouse/check_and_finalize.go create mode 100644 cmd/clickhouse/clickhouse.go create mode 100644 cmd/clickhouse/list.go create mode 100644 cmd/clickhouse/restore.go create mode 100644 cmd/elasticsearch/check_and_finalize.go rename cmd/elasticsearch/{list-snapshots.go => list.go} (88%) rename cmd/elasticsearch/{list_snapshots_test.go => list_test.go} (92%) delete mode 100644 cmd/elasticsearch/restore-snapshot.go create mode 100644 cmd/elasticsearch/restore.go rename cmd/elasticsearch/{restore_snapshot_test.go => restore_test.go} (95%) create mode 100644 internal/clients/clickhouse/client.go create mode 100644 internal/clients/clickhouse/client_test.go create mode 100644 internal/clients/clickhouse/interface.go create mode 100644 internal/clients/clickhouse/sql.go create mode 100644 internal/orchestration/restore/apirestore.go diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md index 56d3a21..d8fde2a 100644 --- a/ARCHITECTURE.md +++ b/ARCHITECTURE.md @@ -20,8 +20,10 @@ stackstate-backup-cli/ │ ├── root.go # Root command and global flags │ ├── version/ # Version information command │ ├── elasticsearch/ # Elasticsearch backup/restore commands +│ ├── clickhouse/ # ClickHouse backup/restore commands │ ├── stackgraph/ # Stackgraph backup/restore commands -│ └── victoriametrics/ # VictoriaMetrics backup/restore commands +│ ├── victoriametrics/ # VictoriaMetrics backup/restore commands +│ └── settings/ # Settings backup/restore commands │ ├── internal/ # Internal packages (Layers 0-3) │ ├── foundation/ # Layer 0: Core utilities @@ -63,9 +65,11 @@ stackstate-backup-cli/ - Formats output for end users **Key Packages**: -- `cmd/elasticsearch/`: Elasticsearch snapshot/restore commands (configure, list-snapshots, list-indices, restore-snapshot) +- `cmd/elasticsearch/`: Elasticsearch snapshot/restore commands (configure, list, list-indices, restore, check-and-finalize) +- `cmd/clickhouse/`: ClickHouse backup/restore commands (list, restore, check-and-finalize) - `cmd/stackgraph/`: Stackgraph backup/restore commands (list, restore, check-and-finalize) - `cmd/victoriametrics/`: VictoriaMetrics backup/restore commands (list, restore, check-and-finalize) +- `cmd/settings/`: Settings backup/restore commands (list, restore, check-and-finalize) - `cmd/version/`: Version information **Dependency Rules**: @@ -140,6 +144,7 @@ appCtx.Formatter **Key Packages**: - `k8s/`: Kubernetes API operations (Jobs, Pods, Deployments, ConfigMaps, Secrets, Logs) - `elasticsearch/`: Elasticsearch HTTP API (snapshots, indices, datastreams) +- `clickhouse/`: ClickHouse Backup API and SQL operations (backups, restore operations, status tracking) - `s3/`: S3/Minio operations (client creation, object filtering) **Dependency Rules**: @@ -407,7 +412,7 @@ endpoint := "http://localhost:9200" ### ❌ Don't: Create Clients Directly in Commands ```go -// BAD: cmd/elasticsearch/list-snapshots.go +// BAD: cmd/elasticsearch/list.go func runListSnapshots(globalFlags *config.CLIGlobalFlags) error { k8sClient, _ := k8s.NewClient(globalFlags.Kubeconfig, globalFlags.Debug) esClient, _ := elasticsearch.NewClient("http://localhost:9200") diff --git a/README.md b/README.md index 0de70bb..941c8b6 100644 --- a/README.md +++ b/README.md @@ -8,12 +8,11 @@ This CLI tool replaces the legacy Bash-based backup/restore scripts with a singl **Current Support:** - Elasticsearch snapshots and restores +- ClickHouse backups and restores - Stackgraph backups and restores - VictoriaMetrics backups and restores - Settings backups and restores -**Planned:** ClickHouse backups - ## Installation Download pre-built binaries from the [releases page](https://github.com/stackvista/stackstate-backup-cli/releases). @@ -70,26 +69,43 @@ List Elasticsearch indices. sts-backup elasticsearch list-indices --namespace ``` -#### list-snapshots +#### list List available Elasticsearch snapshots. ```bash -sts-backup elasticsearch list-snapshots --namespace +sts-backup elasticsearch list --namespace ``` -#### restore-snapshot +#### restore Restore Elasticsearch snapshot. Automatically scales down affected deployments before restore and scales them back up afterward. ```bash -sts-backup elasticsearch restore-snapshot --namespace --snapshot-name [flags] +sts-backup elasticsearch restore --namespace [--snapshot | --latest] [flags] ``` **Flags:** -- `--snapshot-name, -s` - Name of snapshot to restore (required) -- `--drop-all-indices, -r` - Delete all existing STS indices before restore -- `--yes` - Skip confirmation prompt +- `--snapshot, -s` - Name of snapshot to restore (mutually exclusive with --latest) +- `--latest` - Restore from the most recent snapshot (mutually exclusive with --snapshot) +- `--background` - Run restore in background without waiting for completion +- `--yes, -y` - Skip confirmation prompt + +**Note**: Either `--snapshot` or `--latest` must be specified (mutually exclusive). + +#### check-and-finalize + +Check the status of a restore operation and finalize if complete. + +```bash +sts-backup elasticsearch check-and-finalize --namespace --operation-id [--wait] +``` + +**Flags:** +- `--operation-id` - Operation ID of the restore operation (snapshot name) (required) +- `--wait` - Wait for restore to complete if still running + +**Use Case**: This command is useful when a restore was started with `--background` flag or was interrupted (Ctrl+C). ### stackgraph @@ -230,6 +246,44 @@ sts-backup settings check-and-finalize --namespace --job **Use Case**: This command is useful when a restore job was started with `--background` flag or was interrupted ( Ctrl+C). +### clickhouse + +Manage ClickHouse backups and restores. + +#### list + +List available ClickHouse backups from the backup API. + +```bash +sts-backup clickhouse list --namespace +``` + +#### restore + +Restore ClickHouse from a backup. Automatically scales down affected StatefulSets before restore and scales them back up afterward. + +```bash +sts-backup clickhouse restore --namespace --backup-name [flags] +``` + +**Flags:** +- `--backup-name` - Name of the backup to restore (required) +- `--wait` - Wait for restore to complete (default: true) + +#### check-and-finalize + +Check the status of a ClickHouse restore operation and finalize if complete. + +```bash +sts-backup clickhouse check-and-finalize --namespace --operation-id [--wait] +``` + +**Flags:** +- `--operation-id` - Operation ID of the restore operation (required) +- `--wait` - Wait for restore to complete if still running + +**Use Case**: This command is useful when checking the status of a restore operation or finalizing after completion. + ## Configuration The CLI uses configuration from Kubernetes ConfigMaps and Secrets with the following precedence: @@ -305,8 +359,13 @@ See [internal/foundation/config/testdata/validConfigMapConfig.yaml](internal/fou │ ├── elasticsearch/ # Elasticsearch subcommands │ │ ├── configure.go # Configure snapshot repository │ │ ├── list-indices.go # List indices -│ │ ├── list-snapshots.go # List snapshots -│ │ └── restore-snapshot.go # Restore snapshot +│ │ ├── list.go # List snapshots +│ │ ├── restore.go # Restore snapshot +│ │ └── check-and-finalize.go # Check and finalize restore +│ ├── clickhouse/ # ClickHouse subcommands +│ │ ├── list.go # List backups +│ │ ├── restore.go # Restore backup +│ │ └── check-and-finalize.go # Check and finalize restore │ ├── stackgraph/ # Stackgraph subcommands │ │ ├── list.go # List backups │ │ ├── restore.go # Restore backup @@ -327,6 +386,7 @@ See [internal/foundation/config/testdata/validConfigMapConfig.yaml](internal/fou │ ├── clients/ # Layer 1: Service clients │ │ ├── k8s/ # Kubernetes client │ │ ├── elasticsearch/ # Elasticsearch client +│ │ ├── clickhouse/ # ClickHouse client │ │ └── s3/ # S3/Minio client │ ├── orchestration/ # Layer 2: Workflows │ │ ├── portforward/ # Port-forwarding lifecycle diff --git a/cmd/clickhouse/check_and_finalize.go b/cmd/clickhouse/check_and_finalize.go new file mode 100644 index 0000000..d0d9206 --- /dev/null +++ b/cmd/clickhouse/check_and_finalize.go @@ -0,0 +1,197 @@ +package clickhouse + +import ( + "fmt" + "os" + "time" + + "github.com/spf13/cobra" + "github.com/stackvista/stackstate-backup-cli/internal/app" + "github.com/stackvista/stackstate-backup-cli/internal/clients/clickhouse" + "github.com/stackvista/stackstate-backup-cli/internal/foundation/config" + "github.com/stackvista/stackstate-backup-cli/internal/orchestration/portforward" + "github.com/stackvista/stackstate-backup-cli/internal/orchestration/restore" + "github.com/stackvista/stackstate-backup-cli/internal/orchestration/scale" +) + +const ( + defaultRestoreTimeout = 30 * time.Minute + defaultPollInterval = 10 * time.Second +) + +// Check-and-finalize command flags +var ( + checkOperationID string + waitForRestore bool +) + +func checkAndFinalizeCmd(globalFlags *config.CLIGlobalFlags) *cobra.Command { + cmd := &cobra.Command{ + Use: "check-and-finalize", + Short: "Check and finalize a ClickHouse restore operation", + Long: `Check the status of a ClickHouse restore operation and finalize it. + +This command is useful when a restore was started without --wait flag or was interrupted. +It will check the restore status and if complete, execute post-restore tasks and scale up resources.`, + Run: func(_ *cobra.Command, _ []string) { + appCtx, err := app.NewContext(globalFlags) + if err != nil { + _, _ = fmt.Fprintf(os.Stderr, "error: %v\n", err) + os.Exit(1) + } + if err := runCheckAndFinalize(appCtx); err != nil { + _, _ = fmt.Fprintf(os.Stderr, "error: %v\n", err) + os.Exit(1) + } + }, + } + + cmd.Flags().StringVar(&checkOperationID, "operation-id", "", "Operation ID of the restore operation (required)") + cmd.Flags().BoolVar(&waitForRestore, "wait", false, "Wait for restore to complete before finalizing") + _ = cmd.MarkFlagRequired("operation-id") + + return cmd +} + +func runCheckAndFinalize(appCtx *app.Context) error { + return checkAndFinalize(appCtx, checkOperationID, waitForRestore) +} + +// waitAndFinalize waits for restore completion and finalizes +func waitAndFinalize(appCtx *app.Context, chClient clickhouse.Interface, operationID string) error { + restore.PrintAPIWaitingMessage("clickhouse", operationID, appCtx.Namespace, appCtx.Logger) + + // Wait for restore using shared utility + checkStatusFn := func() (string, bool, error) { + status, err := chClient.GetRestoreStatus(operationID) + if err != nil { + return "", false, err + } + + switch status.Status { + case "success": + return "SUCCESS", true, nil + case "error": + return "FAILED", true, fmt.Errorf("%s", status.Error) + default: + return "IN_PROGRESS", false, nil + } + } + + if err := restore.WaitForAPIRestore(checkStatusFn, defaultPollInterval, defaultRestoreTimeout, appCtx.Logger); err != nil { + return err + } + + appCtx.Logger.Successf("Restore completed successfully") + + // Finalize + return finalizeRestore(appCtx) +} + +// checkAndFinalize checks restore status and finalizes if complete +func checkAndFinalize(appCtx *app.Context, operationID string, waitForComplete bool) error { + // Setup port-forward + pf, err := portforward.SetupPortForward( + appCtx.K8sClient, + appCtx.Namespace, + appCtx.Config.Clickhouse.BackupService.Name, + appCtx.Config.Clickhouse.BackupService.LocalPortForwardPort, + appCtx.Config.Clickhouse.BackupService.Port, + appCtx.Logger, + ) + if err != nil { + return err + } + defer close(pf.StopChan) + + // Check status + appCtx.Logger.Println() + appCtx.Logger.Infof("Checking restore status for operation: %s", operationID) + status, err := appCtx.CHClient.GetRestoreStatus(operationID) + if err != nil { + return err + } + + // Handle based on status + switch status.Status { + case "success": + // Already complete - finalize + appCtx.Logger.Successf("Restore completed successfully") + return finalizeRestore(appCtx) + case "error": + return fmt.Errorf("restore failed: %s", status.Error) + default: + // Status is "in progress" or other + if waitForComplete { + // Still running - wait + appCtx.Logger.Infof("Restore is in progress, waiting for completion...") + return waitAndFinalize(appCtx, appCtx.CHClient, operationID) + } + // Just print status + appCtx.Logger.Println() + appCtx.Logger.Infof("Restore is in progress (status: %s)", status.Status) + restore.PrintAPIRunningRestoreStatus("clickhouse", operationID, appCtx.Namespace, appCtx.Logger) + return nil + } +} + +// finalizeRestore finalizes the restore by executing SQL and scaling up +func finalizeRestore(appCtx *app.Context) error { + if err := executePostRestoreSQL(appCtx); err != nil { + appCtx.Logger.Warningf("Post-restore SQL failed: %v", err) + } + + appCtx.Logger.Println() + scaleSelector := appCtx.Config.Clickhouse.Restore.ScaleDownLabelSelector + if err := scale.ScaleUpFromAnnotations( + appCtx.K8sClient, + appCtx.Namespace, + scaleSelector, + appCtx.Logger, + ); err != nil { + return fmt.Errorf("failed to scale up: %w", err) + } + + appCtx.Logger.Println() + appCtx.Logger.Successf("Restore finalized successfully") + return nil +} + +// executePostRestoreSQL executes post-restore SQL commands +func executePostRestoreSQL(appCtx *app.Context) error { + appCtx.Logger.Infof("Executing post-restore SQL commands...") + + // Setup port-forward to ClickHouse database service + pf, err := portforward.SetupPortForward( + appCtx.K8sClient, + appCtx.Namespace, + appCtx.Config.Clickhouse.Service.Name, + appCtx.Config.Clickhouse.Service.LocalPortForwardPort, + appCtx.Config.Clickhouse.Service.Port, + appCtx.Logger, + ) + if err != nil { + return fmt.Errorf("failed to setup port-forward for SQL: %w", err) + } + defer close(pf.StopChan) + + // Create ClickHouse SQL connection + conn, closeConn, err := appCtx.CHClient.Connect() + if err != nil { + return fmt.Errorf("failed to connect to ClickHouse: %w", err) + } + defer func() { + _ = closeConn() + }() + + // Execute post-restore SQL command + query := "SYSTEM DROP MARK CACHE" + + appCtx.Logger.Debugf("Executing SQL: %s", query) + if err := conn.Exec(appCtx.Context, query); err != nil { + return fmt.Errorf("failed to execute SQL: %w", err) + } + + appCtx.Logger.Successf("Post-restore SQL executed successfully") + return nil +} diff --git a/cmd/clickhouse/clickhouse.go b/cmd/clickhouse/clickhouse.go new file mode 100644 index 0000000..fce738a --- /dev/null +++ b/cmd/clickhouse/clickhouse.go @@ -0,0 +1,21 @@ +package clickhouse + +import ( + "github.com/spf13/cobra" + "github.com/stackvista/stackstate-backup-cli/internal/foundation/config" +) + +// NewClickhouseCmd creates the clickhouse parent command +func NewClickhouseCmd(globalFlags *config.CLIGlobalFlags) *cobra.Command { + cmd := &cobra.Command{ + Use: "clickhouse", + Short: "Manage Clickhouse backups and restores", + Long: `Commands for listing, restoring, and managing Clickhouse backups.`, + } + + cmd.AddCommand(listCmd(globalFlags)) + cmd.AddCommand(restoreCmd(globalFlags)) + cmd.AddCommand(checkAndFinalizeCmd(globalFlags)) + + return cmd +} diff --git a/cmd/clickhouse/list.go b/cmd/clickhouse/list.go new file mode 100644 index 0000000..efb529b --- /dev/null +++ b/cmd/clickhouse/list.go @@ -0,0 +1,83 @@ +package clickhouse + +import ( + "fmt" + "os" + "sort" + + "github.com/spf13/cobra" + "github.com/stackvista/stackstate-backup-cli/internal/app" + "github.com/stackvista/stackstate-backup-cli/internal/foundation/config" + "github.com/stackvista/stackstate-backup-cli/internal/foundation/output" + "github.com/stackvista/stackstate-backup-cli/internal/orchestration/portforward" +) + +func listCmd(globalFlags *config.CLIGlobalFlags) *cobra.Command { + return &cobra.Command{ + Use: "list", + Short: "List available Clickhouse backups", + Long: `List all Clickhouse backups from the ClickHouse Backup API.`, + Run: func(_ *cobra.Command, _ []string) { + appCtx, err := app.NewContext(globalFlags) + if err != nil { + _, _ = fmt.Fprintf(os.Stderr, "error: %v\n", err) + os.Exit(1) + } + if err := runList(appCtx); err != nil { + _, _ = fmt.Fprintf(os.Stderr, "error: %v\n", err) + os.Exit(1) + } + }, + } +} + +func runList(appCtx *app.Context) error { + // Setup port-forward to ClickHouse Backup API + pf, err := portforward.SetupPortForward( + appCtx.K8sClient, + appCtx.Namespace, + appCtx.Config.Clickhouse.BackupService.Name, + appCtx.Config.Clickhouse.BackupService.LocalPortForwardPort, + appCtx.Config.Clickhouse.BackupService.Port, + appCtx.Logger, + ) + if err != nil { + return err + } + defer close(pf.StopChan) + + // List backups + appCtx.Logger.Infof("Listing Clickhouse backups...") + appCtx.Logger.Println() + + backups, err := appCtx.CHClient.ListBackups() + if err != nil { + return fmt.Errorf("failed to list backups: %w", err) + } + + if len(backups) == 0 { + appCtx.Formatter.PrintMessage("No backups found") + return nil + } + + // Sort by created time (most recent first) + sort.Slice(backups, func(i, j int) bool { + return backups[i].Created > backups[j].Created + }) + + table := output.Table{ + Headers: []string{"NAME", "CREATED", "SIZE"}, + Rows: make([][]string, 0, len(backups)), + } + + for _, backup := range backups { + row := []string{ + backup.Name, + backup.Created, + output.FormatBytes(backup.Size), + } + table.Rows = append(table.Rows, row) + } + + return appCtx.Formatter.PrintTable(table) +} diff --git a/cmd/clickhouse/restore.go b/cmd/clickhouse/restore.go new file mode 100644 index 0000000..51e8465 --- /dev/null +++ b/cmd/clickhouse/restore.go @@ -0,0 +1,159 @@ +package clickhouse + +import ( + "fmt" + "os" + "sort" + + "github.com/spf13/cobra" + "github.com/stackvista/stackstate-backup-cli/internal/app" + "github.com/stackvista/stackstate-backup-cli/internal/foundation/config" + "github.com/stackvista/stackstate-backup-cli/internal/orchestration/portforward" + "github.com/stackvista/stackstate-backup-cli/internal/orchestration/restore" + "github.com/stackvista/stackstate-backup-cli/internal/orchestration/scale" +) + +// Restore command flags +var ( + restoreSnapshotName string + restoreUseLatest bool + restoreBackground bool + restoreSkipConfirmation bool +) + +func restoreCmd(globalFlags *config.CLIGlobalFlags) *cobra.Command { + cmd := &cobra.Command{ + Use: "restore", + Short: "Restore ClickHouse from a backup archive", + Long: `Restore ClickHouse data from a backup archive via ClickHouse Backup API. Waits for completion by default; use --background to run asynchronously.`, + Run: func(_ *cobra.Command, _ []string) { + appCtx, err := app.NewContext(globalFlags) + if err != nil { + _, _ = fmt.Fprintf(os.Stderr, "error: %v\n", err) + os.Exit(1) + } + if err := runRestore(appCtx); err != nil { + _, _ = fmt.Fprintf(os.Stderr, "error: %v\n", err) + os.Exit(1) + } + }, + } + + cmd.Flags().StringVar(&restoreSnapshotName, "snapshot", "", "Specific snapshot/archive name to restore (e.g., full_2025-11-18T11-45-04)") + cmd.Flags().BoolVar(&restoreUseLatest, "latest", false, "Restore from the most recent backup") + cmd.Flags().BoolVar(&restoreBackground, "background", false, "Run restore in background without waiting for completion") + cmd.Flags().BoolVarP(&restoreSkipConfirmation, "yes", "y", false, "Skip confirmation prompt") + cmd.MarkFlagsMutuallyExclusive("snapshot", "latest") + cmd.MarkFlagsOneRequired("snapshot", "latest") + + return cmd +} + +func runRestore(appCtx *app.Context) error { + // Determine which backup to restore + backupName := restoreSnapshotName + if restoreUseLatest { + appCtx.Logger.Infof("Finding latest backup...") + latest, err := getLatestBackupForRestore(appCtx) + if err != nil { + return err + } + backupName = latest + appCtx.Logger.Infof("Using latest backup: %s", backupName) + } + + // Warn user and ask for confirmation + if !restoreSkipConfirmation { + appCtx.Logger.Println() + appCtx.Logger.Warningf("WARNING: Restoring from backup will overwrite existing ClickHouse data!") + appCtx.Logger.Warningf("This operation cannot be undone.") + appCtx.Logger.Println() + appCtx.Logger.Infof("Backup to restore: %s", backupName) + appCtx.Logger.Infof("Namespace: %s", appCtx.Namespace) + appCtx.Logger.Println() + + if !restore.PromptForConfirmation() { + return fmt.Errorf("restore operation cancelled by user") + } + } + + // Scale down deployments/statefulsets before restore + appCtx.Logger.Println() + scaleDownLabelSelector := appCtx.Config.Clickhouse.Restore.ScaleDownLabelSelector + _, err := scale.ScaleDown(appCtx.K8sClient, appCtx.Namespace, scaleDownLabelSelector, appCtx.Logger) + if err != nil { + return err + } + + // Execute restore workflow + return executeRestore(appCtx, backupName, !restoreBackground) +} + +// executeRestore orchestrates the complete ClickHouse restore workflow +func executeRestore(appCtx *app.Context, backupName string, waitForComplete bool) error { + // Setup port-forward to ClickHouse Backup API + pf, err := portforward.SetupPortForward( + appCtx.K8sClient, + appCtx.Namespace, + appCtx.Config.Clickhouse.BackupService.Name, + appCtx.Config.Clickhouse.BackupService.LocalPortForwardPort, + appCtx.Config.Clickhouse.BackupService.Port, + appCtx.Logger, + ) + if err != nil { + return err + } + defer close(pf.StopChan) + + // Trigger restore + appCtx.Logger.Println() + appCtx.Logger.Infof("Triggering restore for backup: %s", backupName) + operationID, err := appCtx.CHClient.TriggerRestore(backupName) + if err != nil { + return fmt.Errorf("failed to trigger restore: %w", err) + } + appCtx.Logger.Successf("Restore triggered successfully (operation ID: %s)", operationID) + + // Wait for completion if requested + if waitForComplete { + return waitAndFinalize(appCtx, appCtx.CHClient, operationID) + } + + // Print background status + restore.PrintAPIRunningRestoreStatus("clickhouse", operationID, appCtx.Namespace, appCtx.Logger) + return nil +} + +// getLatestBackupForRestore retrieves the most recent backup +func getLatestBackupForRestore(appCtx *app.Context) (string, error) { + // Setup port-forward to ClickHouse Backup API + pf, err := portforward.SetupPortForward( + appCtx.K8sClient, + appCtx.Namespace, + appCtx.Config.Clickhouse.BackupService.Name, + appCtx.Config.Clickhouse.BackupService.LocalPortForwardPort, + appCtx.Config.Clickhouse.BackupService.Port, + appCtx.Logger, + ) + if err != nil { + return "", err + } + defer close(pf.StopChan) + + // List backups + backups, err := appCtx.CHClient.ListBackups() + if err != nil { + return "", fmt.Errorf("failed to list backups: %w", err) + } + + if len(backups) == 0 { + return "", fmt.Errorf("no backups found") + } + + // Sort by created time (most recent first) + sort.Slice(backups, func(i, j int) bool { + return backups[i].Created > backups[j].Created + }) + + return backups[0].Name, nil +} diff --git a/cmd/elasticsearch/check_and_finalize.go b/cmd/elasticsearch/check_and_finalize.go new file mode 100644 index 0000000..7fcebe2 --- /dev/null +++ b/cmd/elasticsearch/check_and_finalize.go @@ -0,0 +1,148 @@ +package elasticsearch + +import ( + "fmt" + "os" + + "github.com/spf13/cobra" + "github.com/stackvista/stackstate-backup-cli/internal/app" + "github.com/stackvista/stackstate-backup-cli/internal/foundation/config" + "github.com/stackvista/stackstate-backup-cli/internal/orchestration/portforward" + "github.com/stackvista/stackstate-backup-cli/internal/orchestration/restore" + "github.com/stackvista/stackstate-backup-cli/internal/orchestration/scale" +) + +// Check-and-finalize command flags +var ( + checkOperationID string + checkWait bool +) + +func checkAndFinalizeCmd(globalFlags *config.CLIGlobalFlags) *cobra.Command { + cmd := &cobra.Command{ + Use: "check-and-finalize", + Short: "Check restore status and finalize if complete", + Long: `Check the status of a restore operation and perform finalization (scale up deployments) if complete. +If the restore is still running and --wait is specified, wait for completion before finalizing.`, + Run: func(_ *cobra.Command, _ []string) { + appCtx, err := app.NewContext(globalFlags) + if err != nil { + _, _ = fmt.Fprintf(os.Stderr, "error: %v\n", err) + os.Exit(1) + } + if err := runCheckAndFinalize(appCtx); err != nil { + _, _ = fmt.Fprintf(os.Stderr, "error: %v\n", err) + os.Exit(1) + } + }, + } + + cmd.Flags().StringVar(&checkOperationID, "operation-id", "", "Operation ID of the restore operation (required)") + cmd.Flags().BoolVar(&checkWait, "wait", false, "Wait for restore to complete if still running") + _ = cmd.MarkFlagRequired("operation-id") + + return cmd +} + +func runCheckAndFinalize(appCtx *app.Context) error { + // Setup port-forward to Elasticsearch + serviceName := appCtx.Config.Elasticsearch.Service.Name + localPort := appCtx.Config.Elasticsearch.Service.LocalPortForwardPort + remotePort := appCtx.Config.Elasticsearch.Service.Port + + pf, err := portforward.SetupPortForward(appCtx.K8sClient, appCtx.Namespace, serviceName, localPort, remotePort, appCtx.Logger) + if err != nil { + return err + } + defer close(pf.StopChan) + + repository := appCtx.Config.Elasticsearch.Restore.Repository + + return checkAndFinalize(appCtx, repository, checkOperationID, checkWait) +} + +func checkAndFinalize(appCtx *app.Context, repository, snapshotName string, wait bool) error { + // Get restore status + appCtx.Logger.Infof("Checking restore status for snapshot: %s", snapshotName) + status, isComplete, err := appCtx.ESClient.GetRestoreStatus(repository, snapshotName) + if err != nil { + return fmt.Errorf("failed to get restore status: %w", err) + } + + appCtx.Logger.Debugf("Restore status: %s (complete: %v)", status, isComplete) + + // Handle different scenarios + if isComplete { + switch status { + case "SUCCESS": + appCtx.Logger.Successf("Restore completed successfully") + return finalizeRestore(appCtx) + case "NOT_FOUND": + appCtx.Logger.Infof("No restore operation found for snapshot: %s", snapshotName) + appCtx.Logger.Infof("The restore may have already been finalized") + appCtx.Logger.Println() + appCtx.Logger.Infof("Checking if deployments need to be scaled up...") + return attemptScaleUp(appCtx) + case "FAILED": + return fmt.Errorf("restore failed with status: %s", status) + default: + return fmt.Errorf("restore completed with unexpected status: %s", status) + } + } + + // Restore still running + appCtx.Logger.Infof("Restore is still in progress (status: %s)", status) + + if wait { + appCtx.Logger.Println() + return waitAndFinalize(appCtx, repository, snapshotName) + } + + // Not waiting - print status and exit + appCtx.Logger.Println() + restore.PrintAPIRunningRestoreStatus("elasticsearch", snapshotName, appCtx.Namespace, appCtx.Logger) + return nil +} + +// waitAndFinalize waits for restore to complete and finalizes (scale up) +func waitAndFinalize(appCtx *app.Context, repository, snapshotName string) error { + restore.PrintAPIWaitingMessage("elasticsearch", snapshotName, appCtx.Namespace, appCtx.Logger) + + // Wait for restore to complete + checkStatusFn := func() (string, bool, error) { + return appCtx.ESClient.GetRestoreStatus(repository, snapshotName) + } + + if err := restore.WaitForAPIRestore(checkStatusFn, 0, 0, appCtx.Logger); err != nil { + return err + } + + // Finalize restore (scale up) + return finalizeRestore(appCtx) +} + +// finalizeRestore performs post-restore finalization (scale up deployments) +func finalizeRestore(appCtx *app.Context) error { + appCtx.Logger.Println() + scaleUpFn := func() error { + return scale.ScaleUpFromAnnotations(appCtx.K8sClient, appCtx.Namespace, appCtx.Config.Elasticsearch.Restore.ScaleDownLabelSelector, appCtx.Logger) + } + + return restore.FinalizeRestore(scaleUpFn, appCtx.Logger) +} + +// attemptScaleUp tries to scale up deployments (used when restore is not found/already complete) +func attemptScaleUp(appCtx *app.Context) error { + scaleUpFn := func() error { + return scale.ScaleUpFromAnnotations(appCtx.K8sClient, appCtx.Namespace, appCtx.Config.Elasticsearch.Restore.ScaleDownLabelSelector, appCtx.Logger) + } + + if err := scaleUpFn(); err != nil { + // Don't fail if no deployments found to scale up + appCtx.Logger.Infof("No deployments found to scale up (this is normal if already finalized)") + return nil + } + + appCtx.Logger.Successf("Finalization completed successfully") + return nil +} diff --git a/cmd/elasticsearch/elasticsearch.go b/cmd/elasticsearch/elasticsearch.go index abd609c..5f5e397 100644 --- a/cmd/elasticsearch/elasticsearch.go +++ b/cmd/elasticsearch/elasticsearch.go @@ -11,9 +11,10 @@ func Cmd(globalFlags *config.CLIGlobalFlags) *cobra.Command { Short: "Elasticsearch backup and restore operations", } - cmd.AddCommand(listSnapshotsCmd(globalFlags)) + cmd.AddCommand(listCmd(globalFlags)) cmd.AddCommand(listIndicesCmd(globalFlags)) cmd.AddCommand(restoreCmd(globalFlags)) + cmd.AddCommand(checkAndFinalizeCmd(globalFlags)) cmd.AddCommand(configureCmd(globalFlags)) return cmd diff --git a/cmd/elasticsearch/list-snapshots.go b/cmd/elasticsearch/list.go similarity index 88% rename from cmd/elasticsearch/list-snapshots.go rename to cmd/elasticsearch/list.go index 5db5c54..42c7864 100644 --- a/cmd/elasticsearch/list-snapshots.go +++ b/cmd/elasticsearch/list.go @@ -3,6 +3,7 @@ package elasticsearch import ( "fmt" "os" + "sort" "github.com/spf13/cobra" "github.com/stackvista/stackstate-backup-cli/internal/app" @@ -11,9 +12,9 @@ import ( "github.com/stackvista/stackstate-backup-cli/internal/orchestration/portforward" ) -func listSnapshotsCmd(globalFlags *config.CLIGlobalFlags) *cobra.Command { +func listCmd(globalFlags *config.CLIGlobalFlags) *cobra.Command { return &cobra.Command{ - Use: "list-snapshots", + Use: "list", Short: "List available Elasticsearch snapshots", Run: func(_ *cobra.Command, _ []string) { appCtx, err := app.NewContext(globalFlags) @@ -55,6 +56,11 @@ func runListSnapshots(appCtx *app.Context) error { return nil } + // Sort snapshots by start time in descending order (newest first) + sort.Slice(snapshots, func(i, j int) bool { + return snapshots[i].StartTimeMillis > snapshots[j].StartTimeMillis + }) + table := output.Table{ Headers: []string{"SNAPSHOT", "STATE", "START TIME", "DURATION (ms)", "FAILURES"}, Rows: make([][]string, 0, len(snapshots)), diff --git a/cmd/elasticsearch/list_snapshots_test.go b/cmd/elasticsearch/list_test.go similarity index 92% rename from cmd/elasticsearch/list_snapshots_test.go rename to cmd/elasticsearch/list_test.go index 50269c3..b09ad41 100644 --- a/cmd/elasticsearch/list_snapshots_test.go +++ b/cmd/elasticsearch/list_test.go @@ -89,6 +89,20 @@ settings: requests: cpu: "500m" memory: "1Gi" +clickhouse: + service: + name: "clickhouse" + port: 9000 + localPortForwardPort: 9000 + backupService: + name: "clickhouse" + port: 7171 + localPortForwardPort: 7171 + database: "default" + username: "default" + password: "password" + restore: + scaleDownLabelSelector: "app=clickhouse" ` // mockESClient is a simple mock for testing commands @@ -140,9 +154,9 @@ func (m *mockESClient) RolloverDatastream(_ string) error { return fmt.Errorf("not implemented") } -// TestListSnapshotsCmd_Integration demonstrates an integration-style test +// TestListCmd_Integration demonstrates an integration-style test // This test uses real fake.Clientset to test the full command flow -func TestListSnapshotsCmd_Integration(t *testing.T) { +func TestListCmd_Integration(t *testing.T) { // Skip this test in short mode as it requires more setup if testing.Short() { t.Skip("skipping integration test in short mode") @@ -202,18 +216,18 @@ elasticsearch: assert.Equal(t, "elasticsearch-master", cfg.Elasticsearch.Service.Name) } -// TestListSnapshotsCmd_Unit demonstrates a unit-style test +// TestListCmd_Unit demonstrates a unit-style test // This test focuses on the command structure and basic behavior -func TestListSnapshotsCmd_Unit(t *testing.T) { +func TestListCmd_Unit(t *testing.T) { flags := config.NewCLIGlobalFlags() flags.Namespace = testNamespace flags.ConfigMapName = testConfigMapName flags.OutputFormat = "table" - cmd := listSnapshotsCmd(flags) + cmd := listCmd(flags) // Test command metadata - assert.Equal(t, "list-snapshots", cmd.Use) + assert.Equal(t, "list", cmd.Use) assert.Equal(t, "List available Elasticsearch snapshots", cmd.Short) assert.NotNil(t, cmd.Run) } diff --git a/cmd/elasticsearch/restore-snapshot.go b/cmd/elasticsearch/restore-snapshot.go deleted file mode 100644 index 30b8cb1..0000000 --- a/cmd/elasticsearch/restore-snapshot.go +++ /dev/null @@ -1,233 +0,0 @@ -package elasticsearch - -import ( - "bufio" - "fmt" - "os" - "strings" - "time" - - "github.com/spf13/cobra" - "github.com/stackvista/stackstate-backup-cli/internal/app" - "github.com/stackvista/stackstate-backup-cli/internal/clients/elasticsearch" - "github.com/stackvista/stackstate-backup-cli/internal/foundation/config" - "github.com/stackvista/stackstate-backup-cli/internal/foundation/logger" - "github.com/stackvista/stackstate-backup-cli/internal/orchestration/portforward" - "github.com/stackvista/stackstate-backup-cli/internal/orchestration/scale" -) - -const ( - // defaultMaxIndexDeleteAttempts is the maximum number of attempts to verify index deletion - defaultMaxIndexDeleteAttempts = 30 - // defaultIndexDeleteRetryInterval is the time to wait between index deletion verification attempts - defaultIndexDeleteRetryInterval = 1 * time.Second -) - -// Restore command flags -var ( - snapshotName string - dropAllIndices bool - skipConfirmation bool -) - -func restoreCmd(globalFlags *config.CLIGlobalFlags) *cobra.Command { - cmd := &cobra.Command{ - Use: "restore-snapshot", - Short: "Restore Elasticsearch from a snapshot", - Long: `Restore Elasticsearch indices from a snapshot. Can optionally delete existing indices before restore.`, - Run: func(_ *cobra.Command, _ []string) { - appCtx, err := app.NewContext(globalFlags) - if err != nil { - _, _ = fmt.Fprintf(os.Stderr, "error: %v\n", err) - os.Exit(1) - } - if err := runRestore(appCtx); err != nil { - _, _ = fmt.Fprintf(os.Stderr, "error: %v\n", err) - os.Exit(1) - } - }} - - cmd.Flags().StringVarP(&snapshotName, "snapshot-name", "s", "", "Snapshot name to restore (required)") - cmd.Flags().BoolVarP(&dropAllIndices, "drop-all-indices", "r", false, "Delete all existing STS indices before restore") - cmd.Flags().BoolVar(&skipConfirmation, "yes", false, "Skip confirmation prompt") - _ = cmd.MarkFlagRequired("snapshot-name") - return cmd -} - -func runRestore(appCtx *app.Context) error { - // Scale down deployments before restore - scaledDeployments, err := scale.ScaleDown(appCtx.K8sClient, appCtx.Namespace, appCtx.Config.Elasticsearch.Restore.ScaleDownLabelSelector, appCtx.Logger) - if err != nil { - return err - } - - // Ensure deployments are scaled back up on exit (even if restore fails) - defer func() { - if len(scaledDeployments) > 0 { - appCtx.Logger.Println() - if err := scale.ScaleUpFromAnnotations(appCtx.K8sClient, appCtx.Namespace, appCtx.Config.Elasticsearch.Restore.ScaleDownLabelSelector, appCtx.Logger); err != nil { - appCtx.Logger.Warningf("Failed to scale up deployments: %v", err) - } - } - }() - - // Setup port-forward to Elasticsearch - serviceName := appCtx.Config.Elasticsearch.Service.Name - localPort := appCtx.Config.Elasticsearch.Service.LocalPortForwardPort - remotePort := appCtx.Config.Elasticsearch.Service.Port - - pf, err := portforward.SetupPortForward(appCtx.K8sClient, appCtx.Namespace, serviceName, localPort, remotePort, appCtx.Logger) - if err != nil { - return err - } - defer close(pf.StopChan) - - repository := appCtx.Config.Elasticsearch.Restore.Repository - - // Get all indices and filter for STS indices - appCtx.Logger.Infof("Fetching current Elasticsearch indices...") - allIndices, err := appCtx.ESClient.ListIndices("*") - if err != nil { - return fmt.Errorf("failed to list indices: %w", err) - } - - stsIndices := filterSTSIndices(allIndices, appCtx.Config.Elasticsearch.Restore.IndexPrefix, appCtx.Config.Elasticsearch.Restore.DatastreamIndexPrefix) - - if dropAllIndices { - appCtx.Logger.Println() - if err := deleteIndices(appCtx.ESClient, stsIndices, appCtx.Config, appCtx.Logger, skipConfirmation); err != nil { - return err - } - } - - // Restore snapshot - appCtx.Logger.Println() - appCtx.Logger.Infof("Restoring snapshot '%s' from repository '%s'", snapshotName, repository) - - // Get snapshot details to show indices - snapshot, err := appCtx.ESClient.GetSnapshot(repository, snapshotName) - if err != nil { - return fmt.Errorf("failed to get snapshot details: %w", err) - } - - appCtx.Logger.Debugf("Indices pattern: %s", appCtx.Config.Elasticsearch.Restore.IndicesPattern) - - if len(snapshot.Indices) == 0 { - appCtx.Logger.Warningf("Snapshot contains no indices") - } else { - appCtx.Logger.Infof("Snapshot contains %d index(es)", len(snapshot.Indices)) - for _, index := range snapshot.Indices { - appCtx.Logger.Debugf(" - %s", index) - } - } - - appCtx.Logger.Infof("Starting restore - this may take several minutes...") - - if err := appCtx.ESClient.RestoreSnapshot(repository, snapshotName, appCtx.Config.Elasticsearch.Restore.IndicesPattern, true); err != nil { - return fmt.Errorf("failed to restore snapshot: %w", err) - } - - appCtx.Logger.Println() - appCtx.Logger.Successf("Restore completed successfully") - return nil -} - -// filterSTSIndices filters indices that match the configured STS prefixes -func filterSTSIndices(allIndices []string, indexPrefix, datastreamPrefix string) []string { - var stsIndices []string - for _, index := range allIndices { - if strings.HasPrefix(index, indexPrefix) || strings.HasPrefix(index, datastreamPrefix) { - stsIndices = append(stsIndices, index) - } - } - return stsIndices -} - -// confirmDeletion prompts the user to confirm index deletion -func confirmDeletion() error { - fmt.Print("\nAre you sure you want to delete these indices? (yes/no): ") - reader := bufio.NewReader(os.Stdin) - response, err := reader.ReadString('\n') - if err != nil { - return fmt.Errorf("failed to read confirmation: %w", err) - } - response = strings.TrimSpace(strings.ToLower(response)) - if response != "yes" && response != "y" { - return fmt.Errorf("restore cancelled by user") - } - return nil -} - -// hasDatastreamIndices checks if any indices belong to a datastream -func hasDatastreamIndices(indices []string, datastreamPrefix string) bool { - for _, index := range indices { - if strings.HasPrefix(index, datastreamPrefix+"-") { - return true - } - } - return false -} - -// deleteIndexWithVerification deletes an index and verifies it's gone -func deleteIndexWithVerification(esClient elasticsearch.Interface, index string, log *logger.Logger) error { - log.Infof(" Deleting index: %s", index) - if err := esClient.DeleteIndex(index); err != nil { - return fmt.Errorf("failed to delete index %s: %w", index, err) - } - - // Verify deletion with timeout - for attempt := 0; attempt < defaultMaxIndexDeleteAttempts; attempt++ { - exists, err := esClient.IndexExists(index) - if err != nil { - return fmt.Errorf("failed to check index existence: %w", err) - } - if !exists { - log.Debugf("Index successfully deleted: %s", index) - return nil - } - if attempt >= defaultMaxIndexDeleteAttempts-1 { - return fmt.Errorf("timeout waiting for index %s to be deleted", index) - } - time.Sleep(defaultIndexDeleteRetryInterval) - } - return nil -} - -// deleteIndices handles the deletion of all STS indices including datastream rollover -func deleteIndices(esClient elasticsearch.Interface, stsIndices []string, cfg *config.Config, log *logger.Logger, skipConfirm bool) error { - if len(stsIndices) == 0 { - log.Infof("No STS indices found to delete") - return nil - } - - log.Infof("Found %d STS index(es) to delete", len(stsIndices)) - for _, index := range stsIndices { - log.Debugf(" - %s", index) - } - - // Confirmation prompt - if !skipConfirm { - if err := confirmDeletion(); err != nil { - return err - } - } - - // Check for datastream and rollover if needed - if hasDatastreamIndices(stsIndices, cfg.Elasticsearch.Restore.DatastreamIndexPrefix) { - log.Infof("Rolling over datastream '%s'...", cfg.Elasticsearch.Restore.DatastreamName) - if err := esClient.RolloverDatastream(cfg.Elasticsearch.Restore.DatastreamName); err != nil { - return fmt.Errorf("failed to rollover datastream: %w", err) - } - log.Successf("Datastream rolled over successfully") - } - - // Delete all indices - log.Infof("Deleting %d index(es)...", len(stsIndices)) - for _, index := range stsIndices { - if err := deleteIndexWithVerification(esClient, index, log); err != nil { - return err - } - } - log.Successf("All indices deleted successfully") - return nil -} diff --git a/cmd/elasticsearch/restore.go b/cmd/elasticsearch/restore.go new file mode 100644 index 0000000..5a9f389 --- /dev/null +++ b/cmd/elasticsearch/restore.go @@ -0,0 +1,236 @@ +package elasticsearch + +import ( + "fmt" + "os" + "sort" + "strings" + "time" + + "github.com/spf13/cobra" + "github.com/stackvista/stackstate-backup-cli/internal/app" + es "github.com/stackvista/stackstate-backup-cli/internal/clients/elasticsearch" + "github.com/stackvista/stackstate-backup-cli/internal/foundation/config" + "github.com/stackvista/stackstate-backup-cli/internal/foundation/logger" + "github.com/stackvista/stackstate-backup-cli/internal/orchestration/portforward" + "github.com/stackvista/stackstate-backup-cli/internal/orchestration/restore" + "github.com/stackvista/stackstate-backup-cli/internal/orchestration/scale" +) + +const ( + // defaultMaxIndexDeleteAttempts is the maximum number of attempts to verify index deletion + defaultMaxIndexDeleteAttempts = 30 + // defaultIndexDeleteRetryInterval is the time to wait between index deletion verification attempts + defaultIndexDeleteRetryInterval = 1 * time.Second +) + +// Restore command flags +var ( + snapshotName string + useLatest bool + runBackground bool + skipConfirmation bool +) + +func restoreCmd(globalFlags *config.CLIGlobalFlags) *cobra.Command { + cmd := &cobra.Command{ + Use: "restore", + Short: "Restore Elasticsearch from a snapshot", + Long: `Restore Elasticsearch indices from a snapshot. Deletes existing STS indices before restore. Waits for completion by default; use --background to run asynchronously.`, + Run: func(_ *cobra.Command, _ []string) { + appCtx, err := app.NewContext(globalFlags) + if err != nil { + _, _ = fmt.Fprintf(os.Stderr, "error: %v\n", err) + os.Exit(1) + } + if err := runRestore(appCtx); err != nil { + _, _ = fmt.Fprintf(os.Stderr, "error: %v\n", err) + os.Exit(1) + } + }} + + cmd.Flags().StringVarP(&snapshotName, "snapshot", "s", "", "Snapshot name to restore (mutually exclusive with --latest)") + cmd.Flags().BoolVar(&useLatest, "latest", false, "Restore from the most recent snapshot (mutually exclusive with --snapshot)") + cmd.Flags().BoolVar(&runBackground, "background", false, "Run restore in background without waiting for completion") + cmd.Flags().BoolVarP(&skipConfirmation, "yes", "y", false, "Skip confirmation prompt") + cmd.MarkFlagsMutuallyExclusive("snapshot", "latest") + cmd.MarkFlagsOneRequired("snapshot", "latest") + return cmd +} + +func runRestore(appCtx *app.Context) error { + // Setup port-forward to Elasticsearch (needed for both snapshot selection and restore) + serviceName := appCtx.Config.Elasticsearch.Service.Name + localPort := appCtx.Config.Elasticsearch.Service.LocalPortForwardPort + remotePort := appCtx.Config.Elasticsearch.Service.Port + + pf, err := portforward.SetupPortForward(appCtx.K8sClient, appCtx.Namespace, serviceName, localPort, remotePort, appCtx.Logger) + if err != nil { + return err + } + defer close(pf.StopChan) + + repository := appCtx.Config.Elasticsearch.Restore.Repository + + // Determine snapshot name (either from flag or latest) + selectedSnapshot := snapshotName + if useLatest { + appCtx.Logger.Infof("Fetching latest snapshot from repository '%s'...", repository) + latestSnapshot, err := getLatestSnapshot(appCtx, repository) + if err != nil { + return err + } + selectedSnapshot = latestSnapshot + appCtx.Logger.Successf("Latest snapshot found: %s", selectedSnapshot) + } + + // Confirm with user before starting destructive operation + if !skipConfirmation { + appCtx.Logger.Println() + appCtx.Logger.Warningf("WARNING: Restoring from snapshot will DELETE all existing STS indices!") + appCtx.Logger.Warningf("This operation cannot be undone.") + appCtx.Logger.Println() + appCtx.Logger.Infof("Snapshot to restore: %s", selectedSnapshot) + appCtx.Logger.Infof("Namespace: %s", appCtx.Namespace) + appCtx.Logger.Println() + + if !restore.PromptForConfirmation() { + return fmt.Errorf("restore operation cancelled by user") + } + } + + // Scale down deployments before restore + appCtx.Logger.Println() + _, err = scale.ScaleDown(appCtx.K8sClient, appCtx.Namespace, appCtx.Config.Elasticsearch.Restore.ScaleDownLabelSelector, appCtx.Logger) + if err != nil { + return err + } + + // Delete all STS indices before restore + appCtx.Logger.Println() + if err := deleteAllSTSIndices(appCtx); err != nil { + return err + } + + // Trigger async restore + appCtx.Logger.Println() + appCtx.Logger.Infof("Triggering restore for snapshot: %s", selectedSnapshot) + if err := appCtx.ESClient.RestoreSnapshot(repository, selectedSnapshot, appCtx.Config.Elasticsearch.Restore.IndicesPattern); err != nil { + return fmt.Errorf("failed to trigger restore: %w", err) + } + appCtx.Logger.Successf("Restore triggered successfully") + + // Wait for completion unless background mode requested + if !runBackground { + return waitAndFinalize(appCtx, repository, selectedSnapshot) + } + + // Print background status + restore.PrintAPIRunningRestoreStatus("elasticsearch", selectedSnapshot, appCtx.Namespace, appCtx.Logger) + return nil +} + +// getLatestSnapshot retrieves the most recent snapshot from the repository +func getLatestSnapshot(appCtx *app.Context, repository string) (string, error) { + snapshots, err := appCtx.ESClient.ListSnapshots(repository) + if err != nil { + return "", fmt.Errorf("failed to list snapshots: %w", err) + } + + if len(snapshots) == 0 { + return "", fmt.Errorf("no snapshots found in repository '%s'", repository) + } + + // Sort snapshots by start time in descending order (newest first) + sort.Slice(snapshots, func(i, j int) bool { + return snapshots[i].StartTimeMillis > snapshots[j].StartTimeMillis + }) + + return snapshots[0].Snapshot, nil +} + +// deleteAllSTSIndices deletes all STS indices including datastream rollover if needed +func deleteAllSTSIndices(appCtx *app.Context) error { + appCtx.Logger.Infof("Fetching current Elasticsearch indices...") + allIndices, err := appCtx.ESClient.ListIndices("*") + if err != nil { + return fmt.Errorf("failed to list indices: %w", err) + } + + stsIndices := filterSTSIndices(allIndices, appCtx.Config.Elasticsearch.Restore.IndexPrefix, appCtx.Config.Elasticsearch.Restore.DatastreamIndexPrefix) + + if len(stsIndices) == 0 { + appCtx.Logger.Infof("No STS indices found to delete") + return nil + } + + appCtx.Logger.Infof("Found %d STS index(es) to delete", len(stsIndices)) + for _, index := range stsIndices { + appCtx.Logger.Debugf(" - %s", index) + } + + // Check for datastream and rollover if needed + if hasDatastreamIndices(stsIndices, appCtx.Config.Elasticsearch.Restore.DatastreamIndexPrefix) { + appCtx.Logger.Infof("Rolling over datastream '%s'...", appCtx.Config.Elasticsearch.Restore.DatastreamName) + if err := appCtx.ESClient.RolloverDatastream(appCtx.Config.Elasticsearch.Restore.DatastreamName); err != nil { + return fmt.Errorf("failed to rollover datastream: %w", err) + } + appCtx.Logger.Successf("Datastream rolled over successfully") + } + + // Delete all indices + appCtx.Logger.Infof("Deleting %d index(es)...", len(stsIndices)) + for _, index := range stsIndices { + if err := deleteIndexWithVerification(appCtx.ESClient, index, appCtx.Logger); err != nil { + return err + } + } + appCtx.Logger.Successf("All indices deleted successfully") + return nil +} + +// filterSTSIndices filters indices that match the configured STS prefixes +func filterSTSIndices(allIndices []string, indexPrefix, datastreamPrefix string) []string { + var stsIndices []string + for _, index := range allIndices { + if strings.HasPrefix(index, indexPrefix) || strings.HasPrefix(index, datastreamPrefix) { + stsIndices = append(stsIndices, index) + } + } + return stsIndices +} + +// hasDatastreamIndices checks if any indices belong to a datastream +func hasDatastreamIndices(indices []string, datastreamPrefix string) bool { + for _, index := range indices { + if strings.HasPrefix(index, datastreamPrefix+"-") { + return true + } + } + return false +} + +// deleteIndexWithVerification deletes an index and verifies it's gone +func deleteIndexWithVerification(esClient es.Interface, index string, log *logger.Logger) error { + log.Infof(" Deleting index: %s", index) + if err := esClient.DeleteIndex(index); err != nil { + return fmt.Errorf("failed to delete index %s: %w", index, err) + } + + // Verify deletion with timeout + for attempt := 0; attempt < defaultMaxIndexDeleteAttempts; attempt++ { + exists, err := esClient.IndexExists(index) + if err != nil { + return fmt.Errorf("failed to check index existence: %w", err) + } + if !exists { + log.Debugf("Index successfully deleted: %s", index) + return nil + } + if attempt >= defaultMaxIndexDeleteAttempts-1 { + return fmt.Errorf("timeout waiting for index %s to be deleted", index) + } + time.Sleep(defaultIndexDeleteRetryInterval) + } + return nil +} diff --git a/cmd/elasticsearch/restore_snapshot_test.go b/cmd/elasticsearch/restore_test.go similarity index 95% rename from cmd/elasticsearch/restore_snapshot_test.go rename to cmd/elasticsearch/restore_test.go index 49b5018..c7f3055 100644 --- a/cmd/elasticsearch/restore_snapshot_test.go +++ b/cmd/elasticsearch/restore_test.go @@ -60,7 +60,7 @@ func (m *mockESClientForRestore) IndexExists(index string) (bool, error) { return exists, nil } -func (m *mockESClientForRestore) RestoreSnapshot(_, snapshotName, _ string, _ bool) error { +func (m *mockESClientForRestore) RestoreSnapshot(_, snapshotName, _ string) error { if m.restoreErr != nil { return m.restoreErr } @@ -92,6 +92,14 @@ func (m *mockESClientForRestore) ConfigureSLMPolicy(_, _, _, _, _, _ string, _, return fmt.Errorf("not implemented") } +func (m *mockESClientForRestore) GetRestoreStatus(_, _ string) (string, bool, error) { + return "NOT_FOUND", true, nil +} + +func (m *mockESClientForRestore) IsRestoreInProgress(_, _ string) (bool, error) { + return false, nil +} + // TestRestoreCmd_Unit tests the command structure func TestRestoreCmd_Unit(t *testing.T) { flags := config.NewCLIGlobalFlags() @@ -100,22 +108,22 @@ func TestRestoreCmd_Unit(t *testing.T) { cmd := restoreCmd(flags) // Test command metadata - assert.Equal(t, "restore-snapshot", cmd.Use) + assert.Equal(t, "restore", cmd.Use) assert.Equal(t, "Restore Elasticsearch from a snapshot", cmd.Short) assert.NotEmpty(t, cmd.Long) assert.NotNil(t, cmd.Run) // Test flags - snapshotFlag := cmd.Flags().Lookup("snapshot-name") + snapshotFlag := cmd.Flags().Lookup("snapshot") require.NotNil(t, snapshotFlag) assert.Equal(t, "s", snapshotFlag.Shorthand) - dropFlag := cmd.Flags().Lookup("drop-all-indices") - require.NotNil(t, dropFlag) - assert.Equal(t, "r", dropFlag.Shorthand) + backgroundFlag := cmd.Flags().Lookup("background") + require.NotNil(t, backgroundFlag) yesFlag := cmd.Flags().Lookup("yes") require.NotNil(t, yesFlag) + assert.Equal(t, "y", yesFlag.Shorthand) } // TestFilterSTSIndices tests the index filtering logic @@ -344,7 +352,7 @@ func TestMockESClientForRestore(t *testing.T) { } // Test restore - err := mockClient.RestoreSnapshot("backup-repo", "test-snapshot", "sts_*", true) + err := mockClient.RestoreSnapshot("backup-repo", "test-snapshot", "sts_*") if tt.expectRestoreOK { assert.NoError(t, err) assert.Equal(t, "test-snapshot", mockClient.restoredSnapshot) diff --git a/cmd/root.go b/cmd/root.go index 394da47..f2d7686 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -4,6 +4,7 @@ import ( "os" "github.com/spf13/cobra" + "github.com/stackvista/stackstate-backup-cli/cmd/clickhouse" "github.com/stackvista/stackstate-backup-cli/cmd/elasticsearch" "github.com/stackvista/stackstate-backup-cli/cmd/settings" "github.com/stackvista/stackstate-backup-cli/cmd/stackgraph" @@ -49,6 +50,10 @@ func init() { addBackupConfigFlags(victoriaMetricsCmd) rootCmd.AddCommand(victoriaMetricsCmd) + clickhouseCmd := clickhouse.NewClickhouseCmd(flags) + addBackupConfigFlags(clickhouseCmd) + rootCmd.AddCommand(clickhouseCmd) + // Add commands that don't need backup config flags rootCmd.AddCommand(version.Cmd()) } diff --git a/go.mod b/go.mod index 5f12358..fb69ce0 100644 --- a/go.mod +++ b/go.mod @@ -1,9 +1,10 @@ module github.com/stackvista/stackstate-backup-cli -go 1.25.2 +go 1.25.3 require ( dario.cat/mergo v1.0.2 + github.com/ClickHouse/clickhouse-go/v2 v2.41.0 github.com/aws/aws-sdk-go-v2 v1.39.3 github.com/aws/aws-sdk-go-v2/config v1.31.13 github.com/aws/aws-sdk-go-v2/credentials v1.18.17 @@ -19,6 +20,8 @@ require ( ) require ( + github.com/ClickHouse/ch-go v0.69.0 // indirect + github.com/andybalholm/brotli v1.2.0 // indirect github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.2 // indirect github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.10 // indirect github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.10 // indirect @@ -38,7 +41,9 @@ require ( github.com/emicklei/go-restful/v3 v3.12.2 // indirect github.com/fxamacker/cbor/v2 v2.9.0 // indirect github.com/gabriel-vasile/mimetype v1.4.10 // indirect - github.com/go-logr/logr v1.4.2 // indirect + github.com/go-faster/city v1.0.1 // indirect + github.com/go-faster/errors v0.7.1 // indirect + github.com/go-logr/logr v1.4.3 // indirect github.com/go-logr/stdr v1.2.2 // indirect github.com/go-openapi/jsonpointer v0.21.0 // indirect github.com/go-openapi/jsonreference v0.20.2 // indirect @@ -52,6 +57,7 @@ require ( github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/josharian/intern v1.0.0 // indirect github.com/json-iterator/go v1.1.12 // indirect + github.com/klauspost/compress v1.18.0 // indirect github.com/leodido/go-urn v1.4.0 // indirect github.com/mailru/easyjson v0.7.7 // indirect github.com/moby/spdystream v0.5.0 // indirect @@ -59,21 +65,26 @@ require ( github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f // indirect + github.com/paulmach/orb v0.12.0 // indirect + github.com/pierrec/lz4/v4 v4.1.22 // indirect github.com/pkg/errors v0.9.1 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect + github.com/segmentio/asm v1.2.1 // indirect + github.com/shopspring/decimal v1.4.0 // indirect github.com/spf13/pflag v1.0.9 // indirect github.com/x448/float16 v0.8.4 // indirect - go.opentelemetry.io/otel v1.28.0 // indirect - go.opentelemetry.io/otel/metric v1.28.0 // indirect - go.opentelemetry.io/otel/trace v1.28.0 // indirect + go.opentelemetry.io/auto/sdk v1.1.0 // indirect + go.opentelemetry.io/otel v1.38.0 // indirect + go.opentelemetry.io/otel/metric v1.38.0 // indirect + go.opentelemetry.io/otel/trace v1.38.0 // indirect go.yaml.in/yaml/v2 v2.4.2 // indirect go.yaml.in/yaml/v3 v3.0.4 // indirect - golang.org/x/crypto v0.42.0 // indirect - golang.org/x/net v0.43.0 // indirect + golang.org/x/crypto v0.44.0 // indirect + golang.org/x/net v0.47.0 // indirect golang.org/x/oauth2 v0.27.0 // indirect - golang.org/x/sys v0.36.0 // indirect - golang.org/x/term v0.35.0 // indirect - golang.org/x/text v0.29.0 // indirect + golang.org/x/sys v0.38.0 // indirect + golang.org/x/term v0.37.0 // indirect + golang.org/x/text v0.31.0 // indirect golang.org/x/time v0.9.0 // indirect google.golang.org/protobuf v1.36.5 // indirect gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect diff --git a/go.sum b/go.sum index cfd0103..09151c5 100644 --- a/go.sum +++ b/go.sum @@ -1,5 +1,11 @@ dario.cat/mergo v1.0.2 h1:85+piFYR1tMbRrLcDwR18y4UKJ3aH1Tbzi24VRW1TK8= dario.cat/mergo v1.0.2/go.mod h1:E/hbnu0NxMFBjpMIE34DRGLWqDy0g5FuKDhCb31ngxA= +github.com/ClickHouse/ch-go v0.69.0 h1:nO0OJkpxOlN/eaXFj0KzjTz5p7vwP1/y3GN4qc5z/iM= +github.com/ClickHouse/ch-go v0.69.0/go.mod h1:9XeZpSAT4S0kVjOpaJ5186b7PY/NH/hhF8R6u0WIjwg= +github.com/ClickHouse/clickhouse-go/v2 v2.41.0 h1:JbLKMXLEkW0NMalMgI+GYb6FVZtpaMVEzQa/HC1ZMRE= +github.com/ClickHouse/clickhouse-go/v2 v2.41.0/go.mod h1:/RoTHh4aDA4FOCIQggwsiOwO7Zq1+HxQ0inef0Au/7k= +github.com/andybalholm/brotli v1.2.0 h1:ukwgCxwYrmACq68yiUqwIWnGY0cTPox/M94sVwToPjQ= +github.com/andybalholm/brotli v1.2.0/go.mod h1:rzTDkvFWvIrjDXZHkuS16NPggd91W3kUSvPlQ1pLaKY= github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio= github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs= github.com/aws/aws-sdk-go-v2 v1.39.3 h1:h7xSsanJ4EQJXG5iuW4UqgP7qBopLpj84mpkNx3wPjM= @@ -54,9 +60,13 @@ github.com/fxamacker/cbor/v2 v2.9.0 h1:NpKPmjDBgUfBms6tr6JZkTHtfFGcMKsw3eGcmD/sa github.com/fxamacker/cbor/v2 v2.9.0/go.mod h1:vM4b+DJCtHn+zz7h3FFp/hDAI9WNWCsZj23V5ytsSxQ= github.com/gabriel-vasile/mimetype v1.4.10 h1:zyueNbySn/z8mJZHLt6IPw0KoZsiQNszIpU+bX4+ZK0= github.com/gabriel-vasile/mimetype v1.4.10/go.mod h1:d+9Oxyo1wTzWdyVUPMmXFvp4F9tea18J8ufA774AB3s= +github.com/go-faster/city v1.0.1 h1:4WAxSZ3V2Ws4QRDrscLEDcibJY8uf41H6AhXDrNDcGw= +github.com/go-faster/city v1.0.1/go.mod h1:jKcUJId49qdW3L1qKHH/3wPeUstCVpVSXTM6vO3VcTw= +github.com/go-faster/errors v0.7.1 h1:MkJTnDoEdi9pDabt1dpWf7AA8/BaSYZqibYyhZ20AYg= +github.com/go-faster/errors v0.7.1/go.mod h1:5ySTjWFiphBs07IKuiL69nxdfd5+fzh1u7FPGZP2quo= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= -github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= -github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= +github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= github.com/go-openapi/jsonpointer v0.19.6/go.mod h1:osyAmYz/mB/C3I+WsTTSgw1ONzaLJoLCyoi6/zppojs= @@ -79,8 +89,12 @@ github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1v github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8= github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= +github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= +github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/google/gnostic-models v0.7.0 h1:qwTtogB15McXDaNqTZdzPJRHvaVJlAl+HVQnLmJEJxo= github.com/google/gnostic-models v0.7.0/go.mod h1:whL5G0m6dmc5cPxKc5bdKdEN3UjI7OUGxBlw57miDrQ= +github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= @@ -98,6 +112,10 @@ github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnr github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/klauspost/compress v1.13.6/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk= +github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo= +github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ= +github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= @@ -117,6 +135,7 @@ github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJ github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee h1:W5t00kpgFdJifH4BDsTlE89Zl93FEloxaWZfGcifgq8= github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/montanaflynn/stats v0.0.0-20171201202039-1bf9dbcd8cbe/go.mod h1:wL8QJuTMNUDYhXwkmfOly8iTdp5TEcJFWZD2D7SIkUc= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f h1:y5//uYreIhSUg3J1GEMiLbxo1LJaP8RfCpH6pymGZus= @@ -125,6 +144,11 @@ github.com/onsi/ginkgo/v2 v2.21.0 h1:7rg/4f3rB88pb5obDgNZrNHrQ4e6WpjonchcpuBRnZM github.com/onsi/ginkgo/v2 v2.21.0/go.mod h1:7Du3c42kxCUegi0IImZ1wUQzMBVecgIHjR1C+NkhLQo= github.com/onsi/gomega v1.35.1 h1:Cwbd75ZBPxFSuZ6T+rN/WCb/gOc6YgFBXLlZLhC7Ds4= github.com/onsi/gomega v1.35.1/go.mod h1:PvZbdDc8J6XJEpDK4HCuRBm8a6Fzp9/DmhC9C7yFlog= +github.com/paulmach/orb v0.12.0 h1:z+zOwjmG3MyEEqzv92UN49Lg1JFYx0L9GpGKNVDKk1s= +github.com/paulmach/orb v0.12.0/go.mod h1:5mULz1xQfs3bmQm63QEJA6lNGujuRafwA5S/EnuLaLU= +github.com/paulmach/protoscan v0.2.1/go.mod h1:SpcSwydNLrxUGSDvXvO0P7g7AuhJ7lcKfDlhJCDw2gY= +github.com/pierrec/lz4/v4 v4.1.22 h1:cKFw6uJDK+/gfw5BcDL0JL5aBsAFdsIT18eRtLj7VIU= +github.com/pierrec/lz4/v4 v4.1.22/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= @@ -132,6 +156,10 @@ github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZN github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII= github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/segmentio/asm v1.2.1 h1:DTNbBqs57ioxAD4PrArqftgypG4/qNpXoJx8TVXxPR0= +github.com/segmentio/asm v1.2.1/go.mod h1:BqMnlJP91P8d+4ibuonYZw9mfnzI9HfxselHZr5aAcs= +github.com/shopspring/decimal v1.4.0 h1:bxl37RwXBklmTi0C79JfXCEBD1cqqHt0bbgBAGFp81k= +github.com/shopspring/decimal v1.4.0/go.mod h1:gawqmDU56v4yIKSwfBSFip1HdCCXN8/+DMd9qYNcwME= github.com/spf13/cobra v1.10.1 h1:lJeBwCfmrnXthfAupyUTzJ/J4Nc1RsHC/mSRU2dll/s= github.com/spf13/cobra v1.10.1/go.mod h1:7SmJGaTHFVBY0jW4NXGluQoLvhqFQM+6XSKD+P4XaB0= github.com/spf13/pflag v1.0.9 h1:9exaQaMOCwffKiiiYk6/BndUBv+iRViNW+4lEMi0PvY= @@ -142,23 +170,34 @@ github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpE github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY= github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= +github.com/tidwall/pretty v1.0.0/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk= github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM= github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg= +github.com/xdg-go/pbkdf2 v1.0.0/go.mod h1:jrpuAogTd400dnrH08LKmI/xc1MbPOebTwRqcT5RDeI= +github.com/xdg-go/scram v1.1.1/go.mod h1:RaEWvsqvNKKvBPvcKeFjrG2cJqOkHTiyTpzz23ni57g= +github.com/xdg-go/stringprep v1.0.3/go.mod h1:W3f5j4i+9rC0kuIEJL0ky1VpHXQU3ocBgklLGvcBnW8= +github.com/xyproto/randomstring v1.0.5 h1:YtlWPoRdgMu3NZtP45drfy1GKoojuR7hmRcnhZqKjWU= +github.com/xyproto/randomstring v1.0.5/go.mod h1:rgmS5DeNXLivK7YprL0pY+lTuhNQW3iGxZ18UQApw/E= +github.com/youmark/pkcs8 v0.0.0-20181117223130-1be2e3e5546d/go.mod h1:rHwXgn7JulP+udvsHwJoVG1YGAP6VLg4y9I5dyZdqmA= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= -go.opentelemetry.io/otel v1.28.0 h1:/SqNcYk+idO0CxKEUOtKQClMK/MimZihKYMruSMViUo= -go.opentelemetry.io/otel v1.28.0/go.mod h1:q68ijF8Fc8CnMHKyzqL6akLO46ePnjkgfIMIjUIX9z4= -go.opentelemetry.io/otel/metric v1.28.0 h1:f0HGvSl1KRAU1DLgLGFjrwVyismPlnuU6JD6bOeuA5Q= -go.opentelemetry.io/otel/metric v1.28.0/go.mod h1:Fb1eVBFZmLVTMb6PPohq3TO9IIhUisDsbJoL/+uQW4s= -go.opentelemetry.io/otel/sdk v1.21.0 h1:FTt8qirL1EysG6sTQRZ5TokkU8d0ugCj8htOgThZXQ8= -go.opentelemetry.io/otel/sdk v1.21.0/go.mod h1:Nna6Yv7PWTdgJHVRD9hIYywQBRx7pbox6nwBnZIxl/E= -go.opentelemetry.io/otel/trace v1.28.0 h1:GhQ9cUuQGmNDd5BTCP2dAvv75RdMxEfTmYejp+lkx9g= -go.opentelemetry.io/otel/trace v1.28.0/go.mod h1:jPyXzNPg6da9+38HEwElrQiHlVMTnVfM3/yv2OlIHaI= +go.mongodb.org/mongo-driver v1.11.4/go.mod h1:PTSz5yu21bkT/wXpkS7WR5f0ddqw5quethTUn9WM+2g= +go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA= +go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A= +go.opentelemetry.io/otel v1.38.0 h1:RkfdswUDRimDg0m2Az18RKOsnI8UDzppJAtj01/Ymk8= +go.opentelemetry.io/otel v1.38.0/go.mod h1:zcmtmQ1+YmQM9wrNsTGV/q/uyusom3P8RxwExxkZhjM= +go.opentelemetry.io/otel/metric v1.38.0 h1:Kl6lzIYGAh5M159u9NgiRkmoMKjvbsKtYRwgfrA6WpA= +go.opentelemetry.io/otel/metric v1.38.0/go.mod h1:kB5n/QoRM8YwmUahxvI3bO34eVtQf2i4utNVLr9gEmI= +go.opentelemetry.io/otel/sdk v1.38.0 h1:l48sr5YbNf2hpCUj/FoGhW9yDkl+Ma+LrVl8qaM5b+E= +go.opentelemetry.io/otel/sdk v1.38.0/go.mod h1:ghmNdGlVemJI3+ZB5iDEuk4bWA3GkTpW+DOoZMYBVVg= +go.opentelemetry.io/otel/trace v1.38.0 h1:Fxk5bKrDZJUH+AMyyIXGcFAPah0oRcT+LuNtJrmcNLE= +go.opentelemetry.io/otel/trace v1.38.0/go.mod h1:j1P9ivuFsTceSWe1oY+EeW3sc+Pp42sO++GHkg4wwhs= go.yaml.in/yaml/v2 v2.4.2 h1:DzmwEr2rDGHl7lsFgAHxmNz/1NlQ7xLIrlN2h5d1eGI= go.yaml.in/yaml/v2 v2.4.2/go.mod h1:081UH+NErpNdqlCXm3TtEran0rJZGxAYx9hb/ELlsPU= go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc= @@ -166,47 +205,59 @@ go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/crypto v0.42.0 h1:chiH31gIWm57EkTXpwnqf8qeuMUi0yekh6mT2AvFlqI= -golang.org/x/crypto v0.42.0/go.mod h1:4+rDnOTJhQCx2q7/j6rAN5XDw8kPjeaXEUR2eL94ix8= +golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= +golang.org/x/crypto v0.44.0 h1:A97SsFvM3AIwEEmTBiaxPPTYpDC47w720rdiiUvgoAU= +golang.org/x/crypto v0.44.0/go.mod h1:013i+Nw79BMiQiMsOPcVCB5ZIJbYkerPrGnOa00tvmc= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/net v0.43.0 h1:lat02VYK2j4aLzMzecihNvTlJNQUq316m2Mr9rnM6YE= -golang.org/x/net v0.43.0/go.mod h1:vhO1fvI4dGsIjh73sWfUVjj3N7CA9WkKJNQm2svM6Jg= +golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/net v0.47.0 h1:Mx+4dIFzqraBXUugkia1OOvlD6LemFo1ALMHjrXDOhY= +golang.org/x/net v0.47.0/go.mod h1:/jNxtkgq5yWUGYkaZGqo27cfGZ1c5Nen03aYrrKpVRU= golang.org/x/oauth2 v0.27.0 h1:da9Vo7/tDv5RH/7nZDz1eMGS/q1Vv1N/7FCrBhI9I3M= golang.org/x/oauth2 v0.27.0/go.mod h1:onh5ek6nERTohokkhCD/y2cV4Do3fxFHFuAejCkRWT8= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.36.0 h1:KVRy2GtZBrk1cBYA7MKu5bEZFxQk4NIDV6RLVcC8o0k= -golang.org/x/sys v0.36.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= -golang.org/x/term v0.35.0 h1:bZBVKBudEyhRcajGcNc3jIfWPqV4y/Kt2XcoigOWtDQ= -golang.org/x/term v0.35.0/go.mod h1:TPGtkTLesOwf2DE8CgVYiZinHAOuy5AYUYT1lENIZnA= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc= +golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/term v0.37.0 h1:8EGAD0qCmHYZg6J17DvsMy9/wJ7/D/4pV/wfnld5lTU= +golang.org/x/term v0.37.0/go.mod h1:5pB4lxRNYYVZuTLmy8oR2BH8dflOR+IbTYFD8fi3254= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.29.0 h1:1neNs90w9YzJ9BocxfsQNHKuAT4pkghyXc4nhZ6sJvk= -golang.org/x/text v0.29.0/go.mod h1:7MhJOA9CD2qZyOKYazxdYMF85OwPdEr9jTtBpO7ydH4= +golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.31.0 h1:aC8ghyu4JhP8VojJ2lEHBnochRno1sgL6nEi9WGFGMM= +golang.org/x/text v0.31.0/go.mod h1:tKRAlv61yKIjGGHX/4tP1LTbc13YSec1pxVEWXzfoeM= golang.org/x/time v0.9.0 h1:EsRrnYcQiGH+5FfbgvV4AP7qEZstoyrHB0DzarOQ4ZY= golang.org/x/time v0.9.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= -golang.org/x/tools v0.36.0 h1:kWS0uv/zsvHEle1LbV5LE8QujrxB3wfQyxHfhOk0Qkg= -golang.org/x/tools v0.36.0/go.mod h1:WBDiHKJK8YgLHlcQPYQzNCkUxUypCaa5ZegCVutKm+s= +golang.org/x/tools v0.38.0 h1:Hx2Xv8hISq8Lm16jvBZ2VQf+RLmbd7wVUsALibYI/IQ= +golang.org/x/tools v0.38.0/go.mod h1:yEsQ/d/YK8cjh0L6rZlY8tgtlKiBNTL14pGDJPJpYQs= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= +google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= google.golang.org/protobuf v1.36.5 h1:tPhr+woSbjfYvY6/GPufUoYizxw1cF/yFoxJ2fmpwlM= google.golang.org/protobuf v1.36.5/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= gopkg.in/evanphx/json-patch.v4 v4.12.0 h1:n6jtcsulIzXPJaxegRbvFNNrZDjbij7ny3gmSPG+6V4= diff --git a/internal/app/app.go b/internal/app/app.go index 1d746ce..7cd1d94 100644 --- a/internal/app/app.go +++ b/internal/app/app.go @@ -1,9 +1,11 @@ package app import ( + "context" "fmt" "os" + "github.com/stackvista/stackstate-backup-cli/internal/clients/clickhouse" "github.com/stackvista/stackstate-backup-cli/internal/clients/elasticsearch" "github.com/stackvista/stackstate-backup-cli/internal/clients/k8s" "github.com/stackvista/stackstate-backup-cli/internal/clients/s3" @@ -18,9 +20,11 @@ type Context struct { Namespace string S3Client s3.Interface ESClient elasticsearch.Interface + CHClient clickhouse.Interface Config *config.Config Logger *logger.Logger Formatter *output.Formatter + Context context.Context } // NewContext creates production dependencies @@ -49,6 +53,17 @@ func NewContext(flags *config.CLIGlobalFlags) (*Context, error) { return nil, fmt.Errorf("failed to create Elasticsearch client: %w", err) } + // Create ClickHouse client + chClient, err := clickhouse.NewClient( + fmt.Sprintf("http://localhost:%d", cfg.Clickhouse.BackupService.LocalPortForwardPort), + fmt.Sprintf("localhost:%d", cfg.Clickhouse.Service.LocalPortForwardPort), + cfg.Clickhouse.Database, + cfg.Clickhouse.Username, + cfg.Clickhouse.Password) + if err != nil { + return nil, fmt.Errorf("failed to create ClickHouse client: %w", err) + } + // Format and print backups formatter := output.NewFormatter(os.Stdout, flags.OutputFormat) @@ -58,7 +73,9 @@ func NewContext(flags *config.CLIGlobalFlags) (*Context, error) { Config: cfg, S3Client: s3Client, ESClient: esClient, + CHClient: chClient, Logger: logger.New(flags.Quiet, flags.Debug), Formatter: formatter, + Context: context.Background(), }, nil } diff --git a/internal/clients/clickhouse/client.go b/internal/clients/clickhouse/client.go new file mode 100644 index 0000000..e2bf6e1 --- /dev/null +++ b/internal/clients/clickhouse/client.go @@ -0,0 +1,314 @@ +package clickhouse + +import ( + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "net/url" + "time" + + clickhouseDriver "github.com/ClickHouse/clickhouse-go/v2" + "github.com/ClickHouse/clickhouse-go/v2/lib/driver" +) + +const ( + // defaultRestoreOperationTimeout is the timeout for waiting for restore operation to be created + defaultRestoreOperationTimeout = 30 * time.Second + // defaultRestoreOperationPollInterval is the interval between checks for restore operation + defaultRestoreOperationPollInterval = 2 * time.Second +) + +// Client represents a ClickHouse Backup API client with optional SQL support +type Client struct { + backupAPIURL string + backupAPIHTTPClient *http.Client + clickhouseAddr string + clickhouseDatabase string + clickhouseUsername string + clickhousePassword string +} + +// Backup represents a ClickHouse backup from the API +type Backup struct { + Name string `json:"name"` + Created string `json:"created"` + Size int64 `json:"size"` + DataSize int64 `json:"data_size"` + MetadataSize int64 `json:"metadata_size"` + CompressedSize int64 `json:"compressed_size"` + Location string `json:"location"` + Required string `json:"required"` + Desc string `json:"desc"` +} + +// RestoreAction represents a restore action from the backup API +type RestoreAction struct { + Command string `json:"command"` + Start string `json:"start"` + Finish string `json:"finish"` + Status string `json:"status"` // "in progress", "success", "error" + Error string `json:"error"` + OperationID string `json:"operation_id"` +} + +// NewClient creates a new ClickHouse client with both Backup API and SQL support +func NewClient(backupAPI, addr, db, username, password string) (*Client, error) { + if backupAPI == "" { + return nil, fmt.Errorf("backupAPIURL cannot be empty") + } + if addr == "" { + return nil, fmt.Errorf("clickhouseAddr cannot be empty") + } + if db == "" { + return nil, fmt.Errorf("clickhouseDatabase cannot be empty") + } + if username == "" { + return nil, fmt.Errorf("clickhouseUsername cannot be empty") + } + if password == "" { + return nil, fmt.Errorf("clickhousePassword cannot be empty") + } + + return &Client{ + backupAPIURL: backupAPI, + backupAPIHTTPClient: &http.Client{ + Timeout: 30 * time.Second, + }, + clickhouseAddr: addr, + clickhouseDatabase: db, + clickhouseUsername: username, + clickhousePassword: password, + }, nil +} + +// ListBackups retrieves all backups from ClickHouse Backup API +// The API returns newline-delimited JSON (NDJSON) format +func (c *Client) ListBackups() ([]Backup, error) { + url := fmt.Sprintf("%s/backup/list", c.backupAPIURL) + + ctx := context.Background() + req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) + if err != nil { + return nil, fmt.Errorf("failed to create request: %w", err) + } + + resp, err := c.backupAPIHTTPClient.Do(req) + if err != nil { + return nil, fmt.Errorf("failed to execute request: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("backup API returned status %d", resp.StatusCode) + } + + // Parse NDJSON response (newline-delimited JSON) + var backups []Backup + dec := json.NewDecoder(resp.Body) + for { + var backup Backup + if err := dec.Decode(&backup); err == io.EOF { + break + } else if err != nil { + return nil, fmt.Errorf("failed to decode response: %w", err) + } + backups = append(backups, backup) + } + + return backups, nil +} + +// TriggerRestore initiates a restore operation via HTTP POST and returns the restore operation ID +// POST /backup/download/${BACKUP_NAME}?callback=http://localhost:{port}/backup/restore/${BACKUP_NAME} +// Note: The initial response contains the download operation ID, but we need to poll for the restore operation ID +func (c *Client) TriggerRestore(backupName string) (string, error) { + callbackURL := fmt.Sprintf("%s/backup/restore/%s", c.backupAPIURL, backupName) + reqURL := fmt.Sprintf("%s/backup/download/%s?callback=%s", c.backupAPIURL, backupName, url.QueryEscape(callbackURL)) + + ctx := context.Background() + req, err := http.NewRequestWithContext(ctx, http.MethodPost, reqURL, nil) + if err != nil { + return "", fmt.Errorf("failed to create request: %w", err) + } + + resp, err := c.backupAPIHTTPClient.Do(req) + if err != nil { + return "", fmt.Errorf("failed to trigger restore: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return "", fmt.Errorf("restore API returned status %d", resp.StatusCode) + } + + // Parse response to get download operation ID (not the restore operation ID) + var downloadAction RestoreAction + if err := json.NewDecoder(resp.Body).Decode(&downloadAction); err != nil { + return "", fmt.Errorf("failed to decode response: %w", err) + } + + // Poll for the restore operation (command contains "restore" not "download") + // The restore is triggered via callback after download completes, so we need to wait + return c.waitForRestoreOperationID(backupName, defaultRestoreOperationTimeout, defaultRestoreOperationPollInterval) +} + +// waitForRestoreOperationID polls for the restore operation ID with timeout and retry +func (c *Client) waitForRestoreOperationID(backupName string, timeout, pollInterval time.Duration) (string, error) { + deadline := time.After(timeout) + ticker := time.NewTicker(pollInterval) + defer ticker.Stop() + + for { + select { + case <-deadline: + return "", fmt.Errorf("timeout waiting for restore operation to be created for backup: %s", backupName) + case <-ticker.C: + operationID, err := c.getRestoreOperationID(backupName) + if err == nil { + return operationID, nil + } + // Continue polling on error (restore might not be created yet) + } + } +} + +// getRestoreOperationID polls for the restore operation ID for a given backup +// It looks for the most recent restore action (not download) matching the backup name +func (c *Client) getRestoreOperationID(backupName string) (string, error) { + reqURL := fmt.Sprintf("%s/backup/actions?filter=restore", c.backupAPIURL) + + ctx := context.Background() + req, err := http.NewRequestWithContext(ctx, http.MethodGet, reqURL, nil) + if err != nil { + return "", fmt.Errorf("failed to create request: %w", err) + } + + resp, err := c.backupAPIHTTPClient.Do(req) + if err != nil { + return "", fmt.Errorf("failed to get actions: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return "", fmt.Errorf("actions API returned %d", resp.StatusCode) + } + + // Parse NDJSON response + var actions []RestoreAction + dec := json.NewDecoder(resp.Body) + for { + var action RestoreAction + if err := dec.Decode(&action); err == io.EOF { + break + } else if err != nil { + return "", fmt.Errorf("failed to decode: %w", err) + } + // Look for restore command (not download) matching the backup name + if action.Command == fmt.Sprintf("restore %s", backupName) { + actions = append(actions, action) + } + } + + if len(actions) == 0 { + return "", fmt.Errorf("no restore operation found for backup: %s", backupName) + } + + // Return most recent action's operation ID (last in list) + return actions[len(actions)-1].OperationID, nil +} + +// GetRestoreStatus retrieves the current restore status for a specific backup +// GET /backup/actions?filter=restore +// Returns the most recent restore action matching the operation id +func (c *Client) GetRestoreStatus(operationID string) (*RestoreAction, error) { + reqURL := fmt.Sprintf("%s/backup/actions?filter=restore", c.backupAPIURL) + + ctx := context.Background() + req, err := http.NewRequestWithContext(ctx, http.MethodGet, reqURL, nil) + if err != nil { + return nil, fmt.Errorf("failed to create request: %w", err) + } + + resp, err := c.backupAPIHTTPClient.Do(req) + if err != nil { + return nil, fmt.Errorf("failed to get status: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("status API returned %d", resp.StatusCode) + } + + // Parse NDJSON response (newline-delimited JSON) + var actions []RestoreAction + dec := json.NewDecoder(resp.Body) + for { + var action RestoreAction + if err := dec.Decode(&action); err == io.EOF { + break + } else if err != nil { + return nil, fmt.Errorf("failed to decode: %w", err) + } + // Filter by operation id + if action.OperationID == operationID { + actions = append(actions, action) + } + } + + if len(actions) == 0 { + return nil, fmt.Errorf("no restore action found for operation id: %s", operationID) + } + + // Return most recent action (last in list) + return &actions[len(actions)-1], nil +} + +// WaitForRestoreCompletion polls until restore completes or times out +func (c *Client) WaitForRestoreCompletion(operationID string, timeout, pollInterval time.Duration) error { + deadline := time.After(timeout) + ticker := time.NewTicker(pollInterval) + defer ticker.Stop() + + for { + select { + case <-deadline: + return fmt.Errorf("timeout waiting for restore to complete") + case <-ticker.C: + status, err := c.GetRestoreStatus(operationID) + if err != nil { + // Continue polling on error (might be transient) + continue + } + + if status.Status == "success" { + return nil + } + + if status.Status == "error" { + return fmt.Errorf("restore failed: %s", status.Error) + } + + // Status is "in progress" - continue polling + } + } +} + +// Connect opens a connection to ClickHouse instance +func (c *Client) Connect() (driver.Conn, func() error, error) { + // Create ClickHouse SQL connection + conn, err := clickhouseDriver.Open(&clickhouseDriver.Options{ + Addr: []string{c.clickhouseAddr}, + Auth: clickhouseDriver.Auth{ + Database: c.clickhouseDatabase, + Username: c.clickhouseUsername, + Password: c.clickhousePassword, + }, + }) + if err != nil { + return nil, nil, fmt.Errorf("failed to connect to ClickHouse: %w", err) + } + + return conn, conn.Close, nil +} diff --git a/internal/clients/clickhouse/client_test.go b/internal/clients/clickhouse/client_test.go new file mode 100644 index 0000000..3ecfe66 --- /dev/null +++ b/internal/clients/clickhouse/client_test.go @@ -0,0 +1,123 @@ +package clickhouse + +import ( + "net/http" + "net/http/httptest" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestNewClient(t *testing.T) { + tests := []struct { + name string + baseURL string + wantError bool + }{ + { + name: "valid backupAPIURL", + baseURL: "http://localhost:7171", + wantError: false, + }, + { + name: "empty backupAPIURL", + baseURL: "", + wantError: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + client, err := NewClient(tt.baseURL, "localhost:9000", "default", "default", "password") + if tt.wantError { + assert.Error(t, err) + assert.Nil(t, client) + } else { + assert.NoError(t, err) + assert.NotNil(t, client) + assert.Equal(t, tt.baseURL, client.backupAPIURL) + } + }) + } +} + +func TestListBackups_Success(t *testing.T) { + // Create mock HTTP server + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + assert.Equal(t, "/backup/list", r.URL.Path) + assert.Equal(t, http.MethodGet, r.Method) + + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + // NDJSON format: newline-delimited JSON objects + _, _ = w.Write([]byte(`{"name":"full_2025-11-18T11-45-04","created":"2025-11-18 11:45:07","size":48915,"data_size":2649,"metadata_size":7955,"compressed_size":40960,"location":"remote","required":"","desc":"tar, regular"} +{"name":"incremental_2025-11-18T12-45-00","created":"2025-11-18 12:45:03","size":21827776,"data_size":21301225,"metadata_size":10944,"compressed_size":21816832,"location":"remote","required":"full_2025-11-18T11-45-04","desc":"tar, regular"} +`)) + })) + defer server.Close() + + client, err := NewClient(server.URL, "localhost:9000", "default", "default", "password") + require.NoError(t, err) + + backups, err := client.ListBackups() + require.NoError(t, err) + assert.Len(t, backups, 2) + + assert.Equal(t, "full_2025-11-18T11-45-04", backups[0].Name) + assert.Equal(t, "2025-11-18 11:45:07", backups[0].Created) + assert.Equal(t, int64(48915), backups[0].Size) + + assert.Equal(t, "incremental_2025-11-18T12-45-00", backups[1].Name) + assert.Equal(t, "2025-11-18 12:45:03", backups[1].Created) + assert.Equal(t, int64(21827776), backups[1].Size) +} + +func TestListBackups_EmptyList(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + // Empty NDJSON response - no content + _, _ = w.Write([]byte(``)) + })) + defer server.Close() + + client, err := NewClient(server.URL, "localhost:9000", "default", "default", "password") + require.NoError(t, err) + + backups, err := client.ListBackups() + require.NoError(t, err) + assert.Empty(t, backups) +} + +func TestListBackups_ServerError(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusInternalServerError) + })) + defer server.Close() + + client, err := NewClient(server.URL, "localhost:9000", "default", "default", "password") + require.NoError(t, err) + + backups, err := client.ListBackups() + assert.Error(t, err) + assert.Nil(t, backups) + assert.Contains(t, err.Error(), "backup API returned status 500") +} + +func TestListBackups_InvalidJSON(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(`{invalid json`)) + })) + defer server.Close() + + client, err := NewClient(server.URL, "localhost:9000", "default", "default", "password") + require.NoError(t, err) + + backups, err := client.ListBackups() + assert.Error(t, err) + assert.Nil(t, backups) + assert.Contains(t, err.Error(), "failed to decode response") +} diff --git a/internal/clients/clickhouse/interface.go b/internal/clients/clickhouse/interface.go new file mode 100644 index 0000000..bf70737 --- /dev/null +++ b/internal/clients/clickhouse/interface.go @@ -0,0 +1,28 @@ +package clickhouse + +import ( + "time" + + "github.com/ClickHouse/clickhouse-go/v2/lib/driver" +) + +// Interface defines the contract for ClickHouse Backup API client operations +type Interface interface { + // ListBackups retrieves all backups from ClickHouse Backup API + ListBackups() ([]Backup, error) + + // TriggerRestore initiates a restore operation and returns the operation ID + TriggerRestore(backupName string) (string, error) + + // GetRestoreStatus retrieves the current restore status + GetRestoreStatus(operationID string) (*RestoreAction, error) + + // WaitForRestoreCompletion polls until restore completes or times out + WaitForRestoreCompletion(operationID string, timeout, pollInterval time.Duration) error + + // Connect opens connection to a ClickHouse database + Connect() (driver.Conn, func() error, error) +} + +// Ensure *Client implements Interface +var _ Interface = (*Client)(nil) diff --git a/internal/clients/clickhouse/sql.go b/internal/clients/clickhouse/sql.go new file mode 100644 index 0000000..f6838e5 --- /dev/null +++ b/internal/clients/clickhouse/sql.go @@ -0,0 +1,33 @@ +package clickhouse + +import "github.com/stackvista/stackstate-backup-cli/internal/foundation/logger" + +// ExecutePostRestoreCommand is a stub for executing ClickHouse SQL commands after restore +// TODO: Implement actual ClickHouse SQL client and commands +// +// Future implementation should include: +// - Connect to ClickHouse via native protocol or HTTP interface +// - Execute necessary post-restore SQL commands such as: +// - ATTACH TABLE commands to reattach restored tables +// - SYSTEM RELOAD DICTIONARIES to reload dictionary data +// - Refresh materialized views if needed +// - Data validation queries to verify restore integrity +// - Any other ClickHouse-specific post-restore operations +// +// Example commands that might be needed: +// +// ATTACH TABLE IF NOT EXISTS database.table; +// SYSTEM RELOAD DICTIONARIES; +// OPTIMIZE TABLE database.table FINAL; +func ExecutePostRestoreCommand(endpoint string, log *logger.Logger) error { + log.Debugf("Post-restore SQL command (stub) - endpoint: %s", endpoint) + log.Debugf("TODO: Implement actual ClickHouse SQL command execution") + + // Future implementation will: + // 1. Create ClickHouse client connection + // 2. Execute required SQL commands + // 3. Handle errors and retries + // 4. Log results + + return nil +} diff --git a/internal/clients/elasticsearch/client.go b/internal/clients/elasticsearch/client.go index 74b20f3..ce99cec 100644 --- a/internal/clients/elasticsearch/client.go +++ b/internal/clients/elasticsearch/client.go @@ -12,6 +12,17 @@ import ( "github.com/elastic/go-elasticsearch/v8" ) +// Restore status constants +const ( + StatusSuccess = "SUCCESS" + StatusFailed = "FAILED" + StatusInProgress = "IN_PROGRESS" + StatusNotFound = "NOT_FOUND" + StatusPartial = "PARTIAL" + StatusStarted = "STARTED" + StatusInit = "INIT" +) + // Client represents an Elasticsearch client type Client struct { es *elasticsearch.Client @@ -319,8 +330,10 @@ func (c *Client) ConfigureSLMPolicy(name, schedule, snapshotName, repository, in return nil } -// RestoreSnapshot restores a snapshot from a repository -func (c *Client) RestoreSnapshot(repository, snapshotName, indicesPattern string, waitForCompletion bool) error { +// RestoreSnapshot restores a snapshot from a repository asynchronously +// The restore is triggered and returns immediately (waitForCompletion=false) +// Use GetRestoreStatus to check the progress of the restore operation +func (c *Client) RestoreSnapshot(repository, snapshotName, indicesPattern string) error { body := map[string]interface{}{ "indices": indicesPattern, } @@ -335,7 +348,7 @@ func (c *Client) RestoreSnapshot(repository, snapshotName, indicesPattern string snapshotName, c.es.Snapshot.Restore.WithContext(context.Background()), c.es.Snapshot.Restore.WithBody(strings.NewReader(string(bodyJSON))), - c.es.Snapshot.Restore.WithWaitForCompletion(waitForCompletion), + c.es.Snapshot.Restore.WithWaitForCompletion(false), ) if err != nil { return fmt.Errorf("failed to restore snapshot: %w", err) @@ -348,3 +361,80 @@ func (c *Client) RestoreSnapshot(repository, snapshotName, indicesPattern string return nil } + +// RestoreStatusResponse represents the response from Elasticsearch restore status API +type RestoreStatusResponse struct { + Snapshots []struct { + Snapshot string `json:"snapshot"` + State string `json:"state"` + Shards struct { + Total int `json:"total"` + Failed int `json:"failed"` + Successful int `json:"successful"` + } `json:"shards_stats"` + } `json:"snapshots"` +} + +// GetRestoreStatus checks the status of a restore operation +// Returns: (statusMessage, isComplete, error) +// Status can be: "IN_PROGRESS", "SUCCESS", "FAILED", "NOT_FOUND" +func (c *Client) GetRestoreStatus(repository, snapshotName string) (string, bool, error) { + res, err := c.es.Snapshot.Status( + c.es.Snapshot.Status.WithContext(context.Background()), + c.es.Snapshot.Status.WithRepository(repository), + c.es.Snapshot.Status.WithSnapshot(snapshotName), + ) + if err != nil { + return "", false, fmt.Errorf("failed to get restore status: %w", err) + } + defer res.Body.Close() + + // 404 means no restore is in progress + if res.StatusCode == http.StatusNotFound { + return StatusNotFound, true, nil + } + + if res.IsError() { + return "", false, fmt.Errorf("elasticsearch returned error: %s", res.String()) + } + + var statusResp RestoreStatusResponse + if err := json.NewDecoder(res.Body).Decode(&statusResp); err != nil { + return "", false, fmt.Errorf("failed to decode response: %w", err) + } + + // If no snapshots are being restored, it's complete + if len(statusResp.Snapshots) == 0 { + return StatusSuccess, true, nil + } + + // Check the state of the snapshot + snapshotStatus := statusResp.Snapshots[0] + state := snapshotStatus.State + + switch state { + case StatusSuccess, StatusPartial: + return StatusSuccess, true, nil + case StatusFailed: + return StatusFailed, true, nil + case StatusInProgress, StatusStarted, StatusInit: + return StatusInProgress, false, nil + default: + return state, false, nil + } +} + +// IsRestoreInProgress checks if a restore operation is currently in progress +func (c *Client) IsRestoreInProgress(repository, snapshotName string) (bool, error) { + status, isComplete, err := c.GetRestoreStatus(repository, snapshotName) + if err != nil { + return false, err + } + + // If status is NOT_FOUND or complete, no restore in progress + if status == "NOT_FOUND" || isComplete { + return false, nil + } + + return true, nil +} diff --git a/internal/clients/elasticsearch/client_test.go b/internal/clients/elasticsearch/client_test.go index a347721..85910a1 100644 --- a/internal/clients/elasticsearch/client_test.go +++ b/internal/clients/elasticsearch/client_test.go @@ -391,11 +391,6 @@ func TestClient_RestoreSnapshot(t *testing.T) { expectedPath := "/_snapshot/" + tt.repository + "/" + tt.snapshotName + "/_restore" assert.Equal(t, expectedPath, r.URL.Path) assert.Equal(t, http.MethodPost, r.Method) - - if tt.waitForCompletion { - assert.Equal(t, "true", r.URL.Query().Get("wait_for_completion")) - } - w.WriteHeader(tt.responseStatus) })) defer server.Close() @@ -405,7 +400,7 @@ func TestClient_RestoreSnapshot(t *testing.T) { require.NoError(t, err) // Execute test - err = client.RestoreSnapshot(tt.repository, tt.snapshotName, tt.indicesPattern, tt.waitForCompletion) + err = client.RestoreSnapshot(tt.repository, tt.snapshotName, tt.indicesPattern) // Assertions if tt.expectError { diff --git a/internal/clients/elasticsearch/interface.go b/internal/clients/elasticsearch/interface.go index 40ecdea..669d6f0 100644 --- a/internal/clients/elasticsearch/interface.go +++ b/internal/clients/elasticsearch/interface.go @@ -6,7 +6,9 @@ type Interface interface { // Snapshot operations ListSnapshots(repository string) ([]Snapshot, error) GetSnapshot(repository, snapshotName string) (*Snapshot, error) - RestoreSnapshot(repository, snapshotName, indicesPattern string, waitForCompletion bool) error + RestoreSnapshot(repository, snapshotName, indicesPattern string) error + GetRestoreStatus(repository, snapshotName string) (string, bool, error) + IsRestoreInProgress(repository, snapshotName string) (bool, error) // Index operations ListIndices(pattern string) ([]string, error) diff --git a/internal/foundation/config/config.go b/internal/foundation/config/config.go index 68383f9..3814a24 100644 --- a/internal/foundation/config/config.go +++ b/internal/foundation/config/config.go @@ -22,6 +22,7 @@ type Config struct { Stackgraph StackgraphConfig `yaml:"stackgraph" validate:"required"` Settings SettingsConfig `yaml:"settings" validate:"required"` VictoriaMetrics VictoriaMetricsConfig `yaml:"victoriaMetrics" validate:"required"` + Clickhouse ClickhouseConfig `yaml:"clickhouse" validate:"required"` } // KubernetesConfig holds Kubernetes-wide configuration @@ -134,6 +135,21 @@ type SettingsRestoreConfig struct { Job JobConfig `yaml:"job" validate:"required"` } +// ClickhouseConfig holds Clickhouse-specific configuration +type ClickhouseConfig struct { + Service ServiceConfig `yaml:"service" validate:"required"` + BackupService ServiceConfig `yaml:"backupService" validate:"required"` + Database string `yaml:"database" validate:"required"` + Username string `yaml:"username" validate:"required"` + Password string `yaml:"password" validate:"required"` + Restore ClickhouseRestoreConfig `yaml:"restore" validate:"required"` +} + +// ClickhouseRestoreConfig holds Clickhouse restore-specific configuration +type ClickhouseRestoreConfig struct { + ScaleDownLabelSelector string `yaml:"scaleDownLabelSelector" validate:"required"` +} + // PVCConfig holds PersistentVolumeClaim configuration type PVCConfig struct { Size string `yaml:"size" validate:"required"` diff --git a/internal/foundation/config/config_test.go b/internal/foundation/config/config_test.go index bac1352..c2201eb 100644 --- a/internal/foundation/config/config_test.go +++ b/internal/foundation/config/config_test.go @@ -451,6 +451,24 @@ func TestConfig_StructValidation(t *testing.T) { }, }, }, + Clickhouse: ClickhouseConfig{ + Service: ServiceConfig{ + Name: "clickhouse", + Port: 9000, + LocalPortForwardPort: 9000, + }, + BackupService: ServiceConfig{ + Name: "clickhouse", + Port: 7171, + LocalPortForwardPort: 7171, + }, + Database: "default", + Username: "default", + Password: "password", + Restore: ClickhouseRestoreConfig{ + ScaleDownLabelSelector: "app=clickhouse", + }, + }, }, expectError: false, }, diff --git a/internal/foundation/config/testdata/validConfigMapConfig.yaml b/internal/foundation/config/testdata/validConfigMapConfig.yaml index 58368b8..af41ced 100644 --- a/internal/foundation/config/testdata/validConfigMapConfig.yaml +++ b/internal/foundation/config/testdata/validConfigMapConfig.yaml @@ -160,3 +160,19 @@ settings: requests: cpu: "500m" memory: "1Gi" + +# ClickHouse backup configuration +clickhouse: + service: + name: "suse-observability-clickhouse-shard0-0" + port: 9000 + localPortForwardPort: 9000 + backupService: + name: "suse-observability-clickhouse-shard0-0" + port: 7171 + localPortForwardPort: 7171 + database: "default" + username: "default" + password: "password" + restore: + scaleDownLabelSelector: "observability.suse.com/scalable-during-clickhouse-restore=true" diff --git a/internal/foundation/config/testdata/validConfigMapOnly.yaml b/internal/foundation/config/testdata/validConfigMapOnly.yaml index 6af0c75..f7eeb1b 100644 --- a/internal/foundation/config/testdata/validConfigMapOnly.yaml +++ b/internal/foundation/config/testdata/validConfigMapOnly.yaml @@ -150,3 +150,19 @@ settings: requests: cpu: "500m" memory: "1Gi" + +# ClickHouse backup configuration +clickhouse: + service: + name: "suse-observability-clickhouse-shard0-0" + port: 9000 + localPortForwardPort: 9000 + backupService: + name: "suse-observability-clickhouse-shard0-0" + port: 7171 + localPortForwardPort: 7171 + database: "default" + username: "default" + password: "password" + restore: + scaleDownLabelSelector: "observability.suse.com/scalable-during-clickhouse-restore=true" diff --git a/internal/orchestration/restore/apirestore.go b/internal/orchestration/restore/apirestore.go new file mode 100644 index 0000000..8882774 --- /dev/null +++ b/internal/orchestration/restore/apirestore.go @@ -0,0 +1,87 @@ +package restore + +import ( + "fmt" + "time" + + "github.com/stackvista/stackstate-backup-cli/internal/foundation/logger" +) + +const ( + defaultAPIRestoreTimeout = 30 * time.Minute + defaultAPIStatusCheckInterval = 10 * time.Second +) + +// WaitForAPIRestore waits for an API-based restore operation to complete by polling status +// checkStatusFn should return (statusMessage, isComplete, error) +// where statusMessage describes current state, isComplete indicates if operation finished +func WaitForAPIRestore( + checkStatusFn func() (string, bool, error), + interval time.Duration, + timeout time.Duration, + log *logger.Logger, +) error { + if interval == 0 { + interval = defaultAPIStatusCheckInterval + } + if timeout == 0 { + timeout = defaultAPIRestoreTimeout + } + + timeoutChan := time.After(timeout) + ticker := time.NewTicker(interval) + defer ticker.Stop() + + for { + select { + case <-timeoutChan: + return fmt.Errorf("timeout waiting for restore to complete") + case <-ticker.C: + statusMsg, isComplete, err := checkStatusFn() + if err != nil { + return fmt.Errorf("failed to check restore status: %w", err) + } + + log.Debugf("Restore status: %s (complete: %v)", statusMsg, isComplete) + + if isComplete { + if statusMsg == "SUCCESS" || statusMsg == "PARTIAL" { + log.Debugf("Restore completed successfully") + return nil + } + return fmt.Errorf("restore failed with status: %s", statusMsg) + } + } + } +} + +// PrintAPIWaitingMessage prints waiting message with instructions for interruption +// Adapted from job.go:PrintWaitingMessage for API-based restores +func PrintAPIWaitingMessage(serviceName, identifier, namespace string, log *logger.Logger) { + log.Println() + log.Infof("Waiting for restore to complete (this may take significant amount of time depending on the data size)...") + log.Println() + log.Infof("You can safely interrupt this command with Ctrl+C.") + log.Infof("To check status and finalize later, run:") + log.Infof(" sts-backup %s check-and-finalize --operation-id %s -n %s", serviceName, identifier, namespace) +} + +// PrintAPIRunningRestoreStatus prints status and instructions for a running restore +// Adapted from job.go:PrintRunningJobStatus for API-based restores +func PrintAPIRunningRestoreStatus(serviceName, identifier, namespace string, log *logger.Logger) { + log.Println() + log.Infof("Restore is running for %s: %s", serviceName, identifier) + log.Println() + log.Infof("To check status and finalize, run:") + log.Infof(" sts-backup %s check-and-finalize --operation-id %s --wait -n %s", serviceName, identifier, namespace) +} + +// FinalizeRestore executes post-restore finalization steps +func FinalizeRestore(scaleUpFn func() error, log *logger.Logger) error { + log.Infof("Finalizing restore...") + if err := scaleUpFn(); err != nil { + return fmt.Errorf("failed to scale up deployments: %w", err) + } + log.Successf("Finalization completed successfully") + return nil +} From f1671a9bbb72b94c5af265a8d0e07ee5d102b0f3 Mon Sep 17 00:00:00 2001 From: Vladimir Iliakov Date: Mon, 24 Nov 2025 11:43:14 +0100 Subject: [PATCH 4/7] STAC-23600: Remove unused method --- internal/clients/clickhouse/sql.go | 33 ------------------------------ 1 file changed, 33 deletions(-) delete mode 100644 internal/clients/clickhouse/sql.go diff --git a/internal/clients/clickhouse/sql.go b/internal/clients/clickhouse/sql.go deleted file mode 100644 index f6838e5..0000000 --- a/internal/clients/clickhouse/sql.go +++ /dev/null @@ -1,33 +0,0 @@ -package clickhouse - -import "github.com/stackvista/stackstate-backup-cli/internal/foundation/logger" - -// ExecutePostRestoreCommand is a stub for executing ClickHouse SQL commands after restore -// TODO: Implement actual ClickHouse SQL client and commands -// -// Future implementation should include: -// - Connect to ClickHouse via native protocol or HTTP interface -// - Execute necessary post-restore SQL commands such as: -// - ATTACH TABLE commands to reattach restored tables -// - SYSTEM RELOAD DICTIONARIES to reload dictionary data -// - Refresh materialized views if needed -// - Data validation queries to verify restore integrity -// - Any other ClickHouse-specific post-restore operations -// -// Example commands that might be needed: -// -// ATTACH TABLE IF NOT EXISTS database.table; -// SYSTEM RELOAD DICTIONARIES; -// OPTIMIZE TABLE database.table FINAL; -func ExecutePostRestoreCommand(endpoint string, log *logger.Logger) error { - log.Debugf("Post-restore SQL command (stub) - endpoint: %s", endpoint) - log.Debugf("TODO: Implement actual ClickHouse SQL command execution") - - // Future implementation will: - // 1. Create ClickHouse client connection - // 2. Execute required SQL commands - // 3. Handle errors and retries - // 4. Log results - - return nil -} From b08113b97294e4b5bd91d30db8980573836f939d Mon Sep 17 00:00:00 2001 From: Vladimir Iliakov Date: Mon, 24 Nov 2025 11:53:08 +0100 Subject: [PATCH 5/7] STAC-23600: Clickhouse client uses appCtx.Context --- cmd/clickhouse/check_and_finalize.go | 4 ++-- cmd/clickhouse/list.go | 2 +- cmd/clickhouse/restore.go | 4 ++-- internal/clients/clickhouse/client.go | 22 +++++++++------------- internal/clients/clickhouse/client_test.go | 13 +++++++++---- internal/clients/clickhouse/interface.go | 9 +++++---- 6 files changed, 28 insertions(+), 26 deletions(-) diff --git a/cmd/clickhouse/check_and_finalize.go b/cmd/clickhouse/check_and_finalize.go index d0d9206..6b188c0 100644 --- a/cmd/clickhouse/check_and_finalize.go +++ b/cmd/clickhouse/check_and_finalize.go @@ -63,7 +63,7 @@ func waitAndFinalize(appCtx *app.Context, chClient clickhouse.Interface, operati // Wait for restore using shared utility checkStatusFn := func() (string, bool, error) { - status, err := chClient.GetRestoreStatus(operationID) + status, err := chClient.GetRestoreStatus(appCtx.Context, operationID) if err != nil { return "", false, err } @@ -107,7 +107,7 @@ func checkAndFinalize(appCtx *app.Context, operationID string, waitForComplete b // Check status appCtx.Logger.Println() appCtx.Logger.Infof("Checking restore status for operation: %s", operationID) - status, err := appCtx.CHClient.GetRestoreStatus(operationID) + status, err := appCtx.CHClient.GetRestoreStatus(appCtx.Context, operationID) if err != nil { return err } diff --git a/cmd/clickhouse/list.go b/cmd/clickhouse/list.go index efb529b..a2d0352 100644 --- a/cmd/clickhouse/list.go +++ b/cmd/clickhouse/list.go @@ -50,7 +50,7 @@ func runList(appCtx *app.Context) error { appCtx.Logger.Infof("Listing Clickhouse backups...") appCtx.Logger.Println() - backups, err := appCtx.CHClient.ListBackups() + backups, err := appCtx.CHClient.ListBackups(appCtx.Context) if err != nil { return fmt.Errorf("failed to list backups: %w", err) } diff --git a/cmd/clickhouse/restore.go b/cmd/clickhouse/restore.go index 51e8465..eaa727e 100644 --- a/cmd/clickhouse/restore.go +++ b/cmd/clickhouse/restore.go @@ -108,7 +108,7 @@ func executeRestore(appCtx *app.Context, backupName string, waitForComplete bool // Trigger restore appCtx.Logger.Println() appCtx.Logger.Infof("Triggering restore for backup: %s", backupName) - operationID, err := appCtx.CHClient.TriggerRestore(backupName) + operationID, err := appCtx.CHClient.TriggerRestore(appCtx.Context, backupName) if err != nil { return fmt.Errorf("failed to trigger restore: %w", err) } @@ -141,7 +141,7 @@ func getLatestBackupForRestore(appCtx *app.Context) (string, error) { defer close(pf.StopChan) // List backups - backups, err := appCtx.CHClient.ListBackups() + backups, err := appCtx.CHClient.ListBackups(appCtx.Context) if err != nil { return "", fmt.Errorf("failed to list backups: %w", err) } diff --git a/internal/clients/clickhouse/client.go b/internal/clients/clickhouse/client.go index e2bf6e1..4e7def2 100644 --- a/internal/clients/clickhouse/client.go +++ b/internal/clients/clickhouse/client.go @@ -85,10 +85,9 @@ func NewClient(backupAPI, addr, db, username, password string) (*Client, error) // ListBackups retrieves all backups from ClickHouse Backup API // The API returns newline-delimited JSON (NDJSON) format -func (c *Client) ListBackups() ([]Backup, error) { +func (c *Client) ListBackups(ctx context.Context) ([]Backup, error) { url := fmt.Sprintf("%s/backup/list", c.backupAPIURL) - ctx := context.Background() req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) if err != nil { return nil, fmt.Errorf("failed to create request: %w", err) @@ -123,11 +122,10 @@ func (c *Client) ListBackups() ([]Backup, error) { // TriggerRestore initiates a restore operation via HTTP POST and returns the restore operation ID // POST /backup/download/${BACKUP_NAME}?callback=http://localhost:{port}/backup/restore/${BACKUP_NAME} // Note: The initial response contains the download operation ID, but we need to poll for the restore operation ID -func (c *Client) TriggerRestore(backupName string) (string, error) { +func (c *Client) TriggerRestore(ctx context.Context, backupName string) (string, error) { callbackURL := fmt.Sprintf("%s/backup/restore/%s", c.backupAPIURL, backupName) reqURL := fmt.Sprintf("%s/backup/download/%s?callback=%s", c.backupAPIURL, backupName, url.QueryEscape(callbackURL)) - ctx := context.Background() req, err := http.NewRequestWithContext(ctx, http.MethodPost, reqURL, nil) if err != nil { return "", fmt.Errorf("failed to create request: %w", err) @@ -151,11 +149,11 @@ func (c *Client) TriggerRestore(backupName string) (string, error) { // Poll for the restore operation (command contains "restore" not "download") // The restore is triggered via callback after download completes, so we need to wait - return c.waitForRestoreOperationID(backupName, defaultRestoreOperationTimeout, defaultRestoreOperationPollInterval) + return c.waitForRestoreOperationID(ctx, backupName, defaultRestoreOperationTimeout, defaultRestoreOperationPollInterval) } // waitForRestoreOperationID polls for the restore operation ID with timeout and retry -func (c *Client) waitForRestoreOperationID(backupName string, timeout, pollInterval time.Duration) (string, error) { +func (c *Client) waitForRestoreOperationID(ctx context.Context, backupName string, timeout, pollInterval time.Duration) (string, error) { deadline := time.After(timeout) ticker := time.NewTicker(pollInterval) defer ticker.Stop() @@ -165,7 +163,7 @@ func (c *Client) waitForRestoreOperationID(backupName string, timeout, pollInter case <-deadline: return "", fmt.Errorf("timeout waiting for restore operation to be created for backup: %s", backupName) case <-ticker.C: - operationID, err := c.getRestoreOperationID(backupName) + operationID, err := c.getRestoreOperationID(ctx, backupName) if err == nil { return operationID, nil } @@ -176,10 +174,9 @@ func (c *Client) waitForRestoreOperationID(backupName string, timeout, pollInter // getRestoreOperationID polls for the restore operation ID for a given backup // It looks for the most recent restore action (not download) matching the backup name -func (c *Client) getRestoreOperationID(backupName string) (string, error) { +func (c *Client) getRestoreOperationID(ctx context.Context, backupName string) (string, error) { reqURL := fmt.Sprintf("%s/backup/actions?filter=restore", c.backupAPIURL) - ctx := context.Background() req, err := http.NewRequestWithContext(ctx, http.MethodGet, reqURL, nil) if err != nil { return "", fmt.Errorf("failed to create request: %w", err) @@ -222,10 +219,9 @@ func (c *Client) getRestoreOperationID(backupName string) (string, error) { // GetRestoreStatus retrieves the current restore status for a specific backup // GET /backup/actions?filter=restore // Returns the most recent restore action matching the operation id -func (c *Client) GetRestoreStatus(operationID string) (*RestoreAction, error) { +func (c *Client) GetRestoreStatus(ctx context.Context, operationID string) (*RestoreAction, error) { reqURL := fmt.Sprintf("%s/backup/actions?filter=restore", c.backupAPIURL) - ctx := context.Background() req, err := http.NewRequestWithContext(ctx, http.MethodGet, reqURL, nil) if err != nil { return nil, fmt.Errorf("failed to create request: %w", err) @@ -266,7 +262,7 @@ func (c *Client) GetRestoreStatus(operationID string) (*RestoreAction, error) { } // WaitForRestoreCompletion polls until restore completes or times out -func (c *Client) WaitForRestoreCompletion(operationID string, timeout, pollInterval time.Duration) error { +func (c *Client) WaitForRestoreCompletion(ctx context.Context, operationID string, timeout, pollInterval time.Duration) error { deadline := time.After(timeout) ticker := time.NewTicker(pollInterval) defer ticker.Stop() @@ -276,7 +272,7 @@ func (c *Client) WaitForRestoreCompletion(operationID string, timeout, pollInter case <-deadline: return fmt.Errorf("timeout waiting for restore to complete") case <-ticker.C: - status, err := c.GetRestoreStatus(operationID) + status, err := c.GetRestoreStatus(ctx, operationID) if err != nil { // Continue polling on error (might be transient) continue diff --git a/internal/clients/clickhouse/client_test.go b/internal/clients/clickhouse/client_test.go index 3ecfe66..b6f8b36 100644 --- a/internal/clients/clickhouse/client_test.go +++ b/internal/clients/clickhouse/client_test.go @@ -1,6 +1,7 @@ package clickhouse import ( + "context" "net/http" "net/http/httptest" "testing" @@ -60,7 +61,8 @@ func TestListBackups_Success(t *testing.T) { client, err := NewClient(server.URL, "localhost:9000", "default", "default", "password") require.NoError(t, err) - backups, err := client.ListBackups() + ctx := context.Background() + backups, err := client.ListBackups(ctx) require.NoError(t, err) assert.Len(t, backups, 2) @@ -85,7 +87,8 @@ func TestListBackups_EmptyList(t *testing.T) { client, err := NewClient(server.URL, "localhost:9000", "default", "default", "password") require.NoError(t, err) - backups, err := client.ListBackups() + ctx := context.Background() + backups, err := client.ListBackups(ctx) require.NoError(t, err) assert.Empty(t, backups) } @@ -99,7 +102,8 @@ func TestListBackups_ServerError(t *testing.T) { client, err := NewClient(server.URL, "localhost:9000", "default", "default", "password") require.NoError(t, err) - backups, err := client.ListBackups() + ctx := context.Background() + backups, err := client.ListBackups(ctx) assert.Error(t, err) assert.Nil(t, backups) assert.Contains(t, err.Error(), "backup API returned status 500") @@ -116,7 +120,8 @@ func TestListBackups_InvalidJSON(t *testing.T) { client, err := NewClient(server.URL, "localhost:9000", "default", "default", "password") require.NoError(t, err) - backups, err := client.ListBackups() + ctx := context.Background() + backups, err := client.ListBackups(ctx) assert.Error(t, err) assert.Nil(t, backups) assert.Contains(t, err.Error(), "failed to decode response") diff --git a/internal/clients/clickhouse/interface.go b/internal/clients/clickhouse/interface.go index bf70737..059d9bd 100644 --- a/internal/clients/clickhouse/interface.go +++ b/internal/clients/clickhouse/interface.go @@ -1,6 +1,7 @@ package clickhouse import ( + "context" "time" "github.com/ClickHouse/clickhouse-go/v2/lib/driver" @@ -9,16 +10,16 @@ import ( // Interface defines the contract for ClickHouse Backup API client operations type Interface interface { // ListBackups retrieves all backups from ClickHouse Backup API - ListBackups() ([]Backup, error) + ListBackups(ctx context.Context) ([]Backup, error) // TriggerRestore initiates a restore operation and returns the operation ID - TriggerRestore(backupName string) (string, error) + TriggerRestore(ctx context.Context, backupName string) (string, error) // GetRestoreStatus retrieves the current restore status - GetRestoreStatus(operationID string) (*RestoreAction, error) + GetRestoreStatus(ctx context.Context, operationID string) (*RestoreAction, error) // WaitForRestoreCompletion polls until restore completes or times out - WaitForRestoreCompletion(operationID string, timeout, pollInterval time.Duration) error + WaitForRestoreCompletion(ctx context.Context, operationID string, timeout, pollInterval time.Duration) error // Connect opens connection to a ClickHouse database Connect() (driver.Conn, func() error, error) From fcb90096f75aca56bce21712355e12064f33fee4 Mon Sep 17 00:00:00 2001 From: Vladimir Iliakov Date: Mon, 24 Nov 2025 20:13:26 +0100 Subject: [PATCH 6/7] STAC-23600: Fix Clickhouse restore --- cmd/clickhouse/check_and_finalize.go | 92 ++++--- cmd/clickhouse/restore.go | 13 +- internal/clients/clickhouse/client.go | 266 +++++++++------------ internal/clients/clickhouse/client_test.go | 2 +- internal/clients/clickhouse/interface.go | 8 +- 5 files changed, 167 insertions(+), 214 deletions(-) diff --git a/cmd/clickhouse/check_and_finalize.go b/cmd/clickhouse/check_and_finalize.go index 6b188c0..31042dd 100644 --- a/cmd/clickhouse/check_and_finalize.go +++ b/cmd/clickhouse/check_and_finalize.go @@ -54,9 +54,54 @@ It will check the restore status and if complete, execute post-restore tasks and } func runCheckAndFinalize(appCtx *app.Context) error { + // Setup port-forward + pf, err := portforward.SetupPortForward( + appCtx.K8sClient, + appCtx.Namespace, + appCtx.Config.Clickhouse.BackupService.Name, + appCtx.Config.Clickhouse.BackupService.LocalPortForwardPort, + appCtx.Config.Clickhouse.BackupService.Port, + appCtx.Logger, + ) + if err != nil { + return err + } + defer close(pf.StopChan) return checkAndFinalize(appCtx, checkOperationID, waitForRestore) } +// checkAndFinalize checks restore status and finalizes if complete +func checkAndFinalize(appCtx *app.Context, operationID string, waitForComplete bool) error { + // Check status + appCtx.Logger.Println() + appCtx.Logger.Infof("Checking restore status for operation: %s", operationID) + status, err := appCtx.CHClient.GetRestoreStatus(appCtx.Context, operationID) + if err != nil { + return err + } + + if status.Status == "error" { + return fmt.Errorf("restore failed: %s", status.Error) + } + + if status.Status == "success" { + appCtx.Logger.Successf("Restore completed successfully") + return finalizeRestore(appCtx) + } + + // Status is "in progress" or other + if waitForComplete { + // Still running - wait + appCtx.Logger.Infof("Restore is in progress, waiting for completion...") + return waitAndFinalize(appCtx, appCtx.CHClient, operationID) + } + // Just print status + appCtx.Logger.Println() + appCtx.Logger.Infof("Restore is in progress (status: %s)", status.Status) + restore.PrintAPIRunningRestoreStatus("clickhouse", operationID, appCtx.Namespace, appCtx.Logger) + return nil +} + // waitAndFinalize waits for restore completion and finalizes func waitAndFinalize(appCtx *app.Context, chClient clickhouse.Interface, operationID string) error { restore.PrintAPIWaitingMessage("clickhouse", operationID, appCtx.Namespace, appCtx.Logger) @@ -88,53 +133,6 @@ func waitAndFinalize(appCtx *app.Context, chClient clickhouse.Interface, operati return finalizeRestore(appCtx) } -// checkAndFinalize checks restore status and finalizes if complete -func checkAndFinalize(appCtx *app.Context, operationID string, waitForComplete bool) error { - // Setup port-forward - pf, err := portforward.SetupPortForward( - appCtx.K8sClient, - appCtx.Namespace, - appCtx.Config.Clickhouse.BackupService.Name, - appCtx.Config.Clickhouse.BackupService.LocalPortForwardPort, - appCtx.Config.Clickhouse.BackupService.Port, - appCtx.Logger, - ) - if err != nil { - return err - } - defer close(pf.StopChan) - - // Check status - appCtx.Logger.Println() - appCtx.Logger.Infof("Checking restore status for operation: %s", operationID) - status, err := appCtx.CHClient.GetRestoreStatus(appCtx.Context, operationID) - if err != nil { - return err - } - - // Handle based on status - switch status.Status { - case "success": - // Already complete - finalize - appCtx.Logger.Successf("Restore completed successfully") - return finalizeRestore(appCtx) - case "error": - return fmt.Errorf("restore failed: %s", status.Error) - default: - // Status is "in progress" or other - if waitForComplete { - // Still running - wait - appCtx.Logger.Infof("Restore is in progress, waiting for completion...") - return waitAndFinalize(appCtx, appCtx.CHClient, operationID) - } - // Just print status - appCtx.Logger.Println() - appCtx.Logger.Infof("Restore is in progress (status: %s)", status.Status) - restore.PrintAPIRunningRestoreStatus("clickhouse", operationID, appCtx.Namespace, appCtx.Logger) - return nil - } -} - // finalizeRestore finalizes the restore by executing SQL and scaling up func finalizeRestore(appCtx *app.Context) error { if err := executePostRestoreSQL(appCtx); err != nil { diff --git a/cmd/clickhouse/restore.go b/cmd/clickhouse/restore.go index eaa727e..5fb4d27 100644 --- a/cmd/clickhouse/restore.go +++ b/cmd/clickhouse/restore.go @@ -86,7 +86,8 @@ func runRestore(appCtx *app.Context) error { } // Execute restore workflow - return executeRestore(appCtx, backupName, !restoreBackground) + waitForComplete := !restoreBackground + return executeRestore(appCtx, backupName, waitForComplete) } // executeRestore orchestrates the complete ClickHouse restore workflow @@ -114,14 +115,12 @@ func executeRestore(appCtx *app.Context, backupName string, waitForComplete bool } appCtx.Logger.Successf("Restore triggered successfully (operation ID: %s)", operationID) - // Wait for completion if requested - if waitForComplete { - return waitAndFinalize(appCtx, appCtx.CHClient, operationID) + if !waitForComplete { + restore.PrintAPIRunningRestoreStatus("clickhouse", operationID, appCtx.Namespace, appCtx.Logger) + return nil } - // Print background status - restore.PrintAPIRunningRestoreStatus("clickhouse", operationID, appCtx.Namespace, appCtx.Logger) - return nil + return checkAndFinalize(appCtx, operationID, waitForComplete) } // getLatestBackupForRestore retrieves the most recent backup diff --git a/internal/clients/clickhouse/client.go b/internal/clients/clickhouse/client.go index 4e7def2..8a1d6b9 100644 --- a/internal/clients/clickhouse/client.go +++ b/internal/clients/clickhouse/client.go @@ -3,10 +3,10 @@ package clickhouse import ( "context" "encoding/json" + "errors" "fmt" "io" "net/http" - "net/url" "time" clickhouseDriver "github.com/ClickHouse/clickhouse-go/v2" @@ -14,10 +14,9 @@ import ( ) const ( - // defaultRestoreOperationTimeout is the timeout for waiting for restore operation to be created - defaultRestoreOperationTimeout = 30 * time.Second - // defaultRestoreOperationPollInterval is the interval between checks for restore operation - defaultRestoreOperationPollInterval = 2 * time.Second + defaultHTTPClientTimeout = 30 * time.Second + defaultOperationTimeout = 30 * time.Second + defaultOperationPollInterval = 2 * time.Second ) // Client represents a ClickHouse Backup API client with optional SQL support @@ -30,8 +29,8 @@ type Client struct { clickhousePassword string } -// Backup represents a ClickHouse backup from the API -type Backup struct { +// ListBackupResponse represents a ClickHouse backup from the API +type ListBackupResponse struct { Name string `json:"name"` Created string `json:"created"` Size int64 `json:"size"` @@ -43,12 +42,12 @@ type Backup struct { Desc string `json:"desc"` } -// RestoreAction represents a restore action from the backup API -type RestoreAction struct { +// ActionResponse represents a action from the backup API +type ActionResponse struct { Command string `json:"command"` Start string `json:"start"` Finish string `json:"finish"` - Status string `json:"status"` // "in progress", "success", "error" + Status string `json:"status"` Error string `json:"error"` OperationID string `json:"operation_id"` } @@ -74,7 +73,7 @@ func NewClient(backupAPI, addr, db, username, password string) (*Client, error) return &Client{ backupAPIURL: backupAPI, backupAPIHTTPClient: &http.Client{ - Timeout: 30 * time.Second, + Timeout: defaultHTTPClientTimeout, }, clickhouseAddr: addr, clickhouseDatabase: db, @@ -85,10 +84,10 @@ func NewClient(backupAPI, addr, db, username, password string) (*Client, error) // ListBackups retrieves all backups from ClickHouse Backup API // The API returns newline-delimited JSON (NDJSON) format -func (c *Client) ListBackups(ctx context.Context) ([]Backup, error) { - url := fmt.Sprintf("%s/backup/list", c.backupAPIURL) +func (c *Client) ListBackups(ctx context.Context) ([]ListBackupResponse, error) { + listURL := fmt.Sprintf("%s/backup/list", c.backupAPIURL) - req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) + req, err := http.NewRequestWithContext(ctx, http.MethodGet, listURL, nil) if err != nil { return nil, fmt.Errorf("failed to create request: %w", err) } @@ -97,36 +96,27 @@ func (c *Client) ListBackups(ctx context.Context) ([]Backup, error) { if err != nil { return nil, fmt.Errorf("failed to execute request: %w", err) } - defer resp.Body.Close() + defer func() { + _ = resp.Body.Close() + }() if resp.StatusCode != http.StatusOK { return nil, fmt.Errorf("backup API returned status %d", resp.StatusCode) } // Parse NDJSON response (newline-delimited JSON) - var backups []Backup - dec := json.NewDecoder(resp.Body) - for { - var backup Backup - if err := dec.Decode(&backup); err == io.EOF { - break - } else if err != nil { - return nil, fmt.Errorf("failed to decode response: %w", err) - } - backups = append(backups, backup) + backups, err := parseNDJSONBackups(resp.Body) + if err != nil { + return nil, fmt.Errorf("failed to parse backups: %w", err) } return backups, nil } // TriggerRestore initiates a restore operation via HTTP POST and returns the restore operation ID -// POST /backup/download/${BACKUP_NAME}?callback=http://localhost:{port}/backup/restore/${BACKUP_NAME} -// Note: The initial response contains the download operation ID, but we need to poll for the restore operation ID func (c *Client) TriggerRestore(ctx context.Context, backupName string) (string, error) { - callbackURL := fmt.Sprintf("%s/backup/restore/%s", c.backupAPIURL, backupName) - reqURL := fmt.Sprintf("%s/backup/download/%s?callback=%s", c.backupAPIURL, backupName, url.QueryEscape(callbackURL)) - - req, err := http.NewRequestWithContext(ctx, http.MethodPost, reqURL, nil) + downloadURL := fmt.Sprintf("%s/backup/download/%s", c.backupAPIURL, backupName) + req, err := http.NewRequestWithContext(ctx, http.MethodPost, downloadURL, nil) if err != nil { return "", fmt.Errorf("failed to create request: %w", err) } @@ -135,160 +125,98 @@ func (c *Client) TriggerRestore(ctx context.Context, backupName string) (string, if err != nil { return "", fmt.Errorf("failed to trigger restore: %w", err) } - defer resp.Body.Close() + var downloadAction ActionResponse + if err := json.NewDecoder(resp.Body).Decode(&downloadAction); err != nil { + return "", fmt.Errorf("failed to decode %s response: %w", downloadURL, err) + } + if err = resp.Body.Close(); err != nil { + return "", fmt.Errorf("failed to close response body: %w", err) + } + + _, err = c.waitForAction(ctx, downloadAction.OperationID, defaultOperationTimeout, defaultOperationPollInterval) + if err != nil { + return "", fmt.Errorf("failed to download backup: %w", err) + } + restoreURL := fmt.Sprintf("%s/backup/restore/%s", c.backupAPIURL, backupName) + req, err = http.NewRequestWithContext(ctx, http.MethodPost, restoreURL, nil) + if err != nil { + return "", fmt.Errorf("failed to create request: %w", err) + } + + resp, err = c.backupAPIHTTPClient.Do(req) + if err != nil { + return "", fmt.Errorf("failed to trigger restore: %w", err) + } if resp.StatusCode != http.StatusOK { return "", fmt.Errorf("restore API returned status %d", resp.StatusCode) } - - // Parse response to get download operation ID (not the restore operation ID) - var downloadAction RestoreAction - if err := json.NewDecoder(resp.Body).Decode(&downloadAction); err != nil { - return "", fmt.Errorf("failed to decode response: %w", err) + var restoreAction ActionResponse + if err := json.NewDecoder(resp.Body).Decode(&restoreAction); err != nil { + return "", fmt.Errorf("failed to decode %s response: %w", restoreURL, err) + } + if err = resp.Body.Close(); err != nil { + return "", fmt.Errorf("failed to close response body: %w", err) } - // Poll for the restore operation (command contains "restore" not "download") - // The restore is triggered via callback after download completes, so we need to wait - return c.waitForRestoreOperationID(ctx, backupName, defaultRestoreOperationTimeout, defaultRestoreOperationPollInterval) + return restoreAction.OperationID, nil } -// waitForRestoreOperationID polls for the restore operation ID with timeout and retry -func (c *Client) waitForRestoreOperationID(ctx context.Context, backupName string, timeout, pollInterval time.Duration) (string, error) { - deadline := time.After(timeout) +// waitForAction polls for the restore operation ID with timeout and retry +func (c *Client) waitForAction(ctx context.Context, operationID string, timeout, pollInterval time.Duration) (*ActionResponse, error) { + // Create a child context with timeout + ctx, cancel := context.WithTimeout(ctx, timeout) + defer cancel() + ticker := time.NewTicker(pollInterval) defer ticker.Stop() for { select { - case <-deadline: - return "", fmt.Errorf("timeout waiting for restore operation to be created for backup: %s", backupName) + case <-ctx.Done(): + // Could be timeout OR cancellation + if errors.Is(ctx.Err(), context.DeadlineExceeded) { + return nil, fmt.Errorf("timeout waiting for operation to finish: %s", operationID) + } + return nil, fmt.Errorf("operation cancelled: %w", ctx.Err()) case <-ticker.C: - operationID, err := c.getRestoreOperationID(ctx, backupName) - if err == nil { - return operationID, nil + action, err := c.GetRestoreStatus(ctx, operationID) + if err != nil { + return nil, fmt.Errorf("fail to get action with operation id: %s", operationID) + } + if action.Status == "success" || action.Status == "error" { + return action, nil } - // Continue polling on error (restore might not be created yet) + // Status is "in progress" - continue polling } } } -// getRestoreOperationID polls for the restore operation ID for a given backup -// It looks for the most recent restore action (not download) matching the backup name -func (c *Client) getRestoreOperationID(ctx context.Context, backupName string) (string, error) { - reqURL := fmt.Sprintf("%s/backup/actions?filter=restore", c.backupAPIURL) - - req, err := http.NewRequestWithContext(ctx, http.MethodGet, reqURL, nil) +// GetRestoreStatus polls for the restore operation ID with timeout and retry +func (c *Client) GetRestoreStatus(ctx context.Context, operationID string) (*ActionResponse, error) { + actionURL := fmt.Sprintf("%s/backup/status?operation_id=%s", c.backupAPIURL, operationID) + req, err := http.NewRequestWithContext(ctx, http.MethodGet, actionURL, nil) if err != nil { - return "", fmt.Errorf("failed to create request: %w", err) + return nil, fmt.Errorf("failed to create request: %w", err) } resp, err := c.backupAPIHTTPClient.Do(req) if err != nil { - return "", fmt.Errorf("failed to get actions: %w", err) - } - defer resp.Body.Close() - - if resp.StatusCode != http.StatusOK { - return "", fmt.Errorf("actions API returned %d", resp.StatusCode) + return nil, fmt.Errorf("failed to request operation status: %w", err) } // Parse NDJSON response - var actions []RestoreAction - dec := json.NewDecoder(resp.Body) - for { - var action RestoreAction - if err := dec.Decode(&action); err == io.EOF { - break - } else if err != nil { - return "", fmt.Errorf("failed to decode: %w", err) - } - // Look for restore command (not download) matching the backup name - if action.Command == fmt.Sprintf("restore %s", backupName) { - actions = append(actions, action) - } - } - - if len(actions) == 0 { - return "", fmt.Errorf("no restore operation found for backup: %s", backupName) - } - - // Return most recent action's operation ID (last in list) - return actions[len(actions)-1].OperationID, nil -} - -// GetRestoreStatus retrieves the current restore status for a specific backup -// GET /backup/actions?filter=restore -// Returns the most recent restore action matching the operation id -func (c *Client) GetRestoreStatus(ctx context.Context, operationID string) (*RestoreAction, error) { - reqURL := fmt.Sprintf("%s/backup/actions?filter=restore", c.backupAPIURL) - - req, err := http.NewRequestWithContext(ctx, http.MethodGet, reqURL, nil) + allActions, err := parseNDJSONActions(resp.Body) if err != nil { - return nil, fmt.Errorf("failed to create request: %w", err) + return nil, fmt.Errorf("failed to parse operation status response: %w", err) } - resp, err := c.backupAPIHTTPClient.Do(req) - if err != nil { - return nil, fmt.Errorf("failed to get status: %w", err) + // It should be exactly one action + if lenActions := len(allActions); lenActions != 1 { + return nil, fmt.Errorf("incorrect operation status response, expected one record for operation, got: %d", lenActions) } - defer resp.Body.Close() - if resp.StatusCode != http.StatusOK { - return nil, fmt.Errorf("status API returned %d", resp.StatusCode) - } - - // Parse NDJSON response (newline-delimited JSON) - var actions []RestoreAction - dec := json.NewDecoder(resp.Body) - for { - var action RestoreAction - if err := dec.Decode(&action); err == io.EOF { - break - } else if err != nil { - return nil, fmt.Errorf("failed to decode: %w", err) - } - // Filter by operation id - if action.OperationID == operationID { - actions = append(actions, action) - } - } - - if len(actions) == 0 { - return nil, fmt.Errorf("no restore action found for operation id: %s", operationID) - } - - // Return most recent action (last in list) - return &actions[len(actions)-1], nil -} - -// WaitForRestoreCompletion polls until restore completes or times out -func (c *Client) WaitForRestoreCompletion(ctx context.Context, operationID string, timeout, pollInterval time.Duration) error { - deadline := time.After(timeout) - ticker := time.NewTicker(pollInterval) - defer ticker.Stop() - - for { - select { - case <-deadline: - return fmt.Errorf("timeout waiting for restore to complete") - case <-ticker.C: - status, err := c.GetRestoreStatus(ctx, operationID) - if err != nil { - // Continue polling on error (might be transient) - continue - } - - if status.Status == "success" { - return nil - } - - if status.Status == "error" { - return fmt.Errorf("restore failed: %s", status.Error) - } - - // Status is "in progress" - continue polling - } - } + return &allActions[0], nil } // Connect opens a connection to ClickHouse instance @@ -308,3 +236,35 @@ func (c *Client) Connect() (driver.Conn, func() error, error) { return conn, conn.Close, nil } + +// parseNDJSONBackups parses newline-delimited JSON into ListBackupResponse slice +func parseNDJSONBackups(body io.Reader) ([]ListBackupResponse, error) { + var backups []ListBackupResponse + dec := json.NewDecoder(body) + for { + var backup ListBackupResponse + if err := dec.Decode(&backup); err == io.EOF { + break + } else if err != nil { + return nil, fmt.Errorf("failed to decode NDJSON: %w", err) + } + backups = append(backups, backup) + } + return backups, nil +} + +// parseNDJSONActions parses newline-delimited JSON into ActionResponse slice +func parseNDJSONActions(body io.Reader) ([]ActionResponse, error) { + var actions []ActionResponse + dec := json.NewDecoder(body) + for { + var action ActionResponse + if err := dec.Decode(&action); err == io.EOF { + break + } else if err != nil { + return nil, fmt.Errorf("failed to decode NDJSON: %w", err) + } + actions = append(actions, action) + } + return actions, nil +} diff --git a/internal/clients/clickhouse/client_test.go b/internal/clients/clickhouse/client_test.go index b6f8b36..431ea21 100644 --- a/internal/clients/clickhouse/client_test.go +++ b/internal/clients/clickhouse/client_test.go @@ -124,5 +124,5 @@ func TestListBackups_InvalidJSON(t *testing.T) { backups, err := client.ListBackups(ctx) assert.Error(t, err) assert.Nil(t, backups) - assert.Contains(t, err.Error(), "failed to decode response") + assert.Contains(t, err.Error(), "failed to decode NDJSON") } diff --git a/internal/clients/clickhouse/interface.go b/internal/clients/clickhouse/interface.go index 059d9bd..f455fc5 100644 --- a/internal/clients/clickhouse/interface.go +++ b/internal/clients/clickhouse/interface.go @@ -2,7 +2,6 @@ package clickhouse import ( "context" - "time" "github.com/ClickHouse/clickhouse-go/v2/lib/driver" ) @@ -10,16 +9,13 @@ import ( // Interface defines the contract for ClickHouse Backup API client operations type Interface interface { // ListBackups retrieves all backups from ClickHouse Backup API - ListBackups(ctx context.Context) ([]Backup, error) + ListBackups(ctx context.Context) ([]ListBackupResponse, error) // TriggerRestore initiates a restore operation and returns the operation ID TriggerRestore(ctx context.Context, backupName string) (string, error) // GetRestoreStatus retrieves the current restore status - GetRestoreStatus(ctx context.Context, operationID string) (*RestoreAction, error) - - // WaitForRestoreCompletion polls until restore completes or times out - WaitForRestoreCompletion(ctx context.Context, operationID string, timeout, pollInterval time.Duration) error + GetRestoreStatus(ctx context.Context, operationID string) (*ActionResponse, error) // Connect opens connection to a ClickHouse database Connect() (driver.Conn, func() error, error) From 07dfb23e1e0684e6cc0f7cd9fabf1184ff7d7b42 Mon Sep 17 00:00:00 2001 From: Vladimir Iliakov Date: Mon, 24 Nov 2025 21:20:49 +0100 Subject: [PATCH 7/7] STAC-23600: Minor fixes --- cmd/clickhouse/check_and_finalize.go | 4 +++- cmd/elasticsearch/check_and_finalize.go | 4 ++-- cmd/elasticsearch/restore.go | 10 ++++------ internal/clients/clickhouse/client.go | 3 +++ 4 files changed, 12 insertions(+), 9 deletions(-) diff --git a/cmd/clickhouse/check_and_finalize.go b/cmd/clickhouse/check_and_finalize.go index 31042dd..e86365e 100644 --- a/cmd/clickhouse/check_and_finalize.go +++ b/cmd/clickhouse/check_and_finalize.go @@ -89,6 +89,9 @@ func checkAndFinalize(appCtx *app.Context, operationID string, waitForComplete b return finalizeRestore(appCtx) } + // Restore still running + appCtx.Logger.Infof("Restore is still in progress (status: %s)", status) + // Status is "in progress" or other if waitForComplete { // Still running - wait @@ -97,7 +100,6 @@ func checkAndFinalize(appCtx *app.Context, operationID string, waitForComplete b } // Just print status appCtx.Logger.Println() - appCtx.Logger.Infof("Restore is in progress (status: %s)", status.Status) restore.PrintAPIRunningRestoreStatus("clickhouse", operationID, appCtx.Namespace, appCtx.Logger) return nil } diff --git a/cmd/elasticsearch/check_and_finalize.go b/cmd/elasticsearch/check_and_finalize.go index 7fcebe2..08863a7 100644 --- a/cmd/elasticsearch/check_and_finalize.go +++ b/cmd/elasticsearch/check_and_finalize.go @@ -61,7 +61,7 @@ func runCheckAndFinalize(appCtx *app.Context) error { return checkAndFinalize(appCtx, repository, checkOperationID, checkWait) } -func checkAndFinalize(appCtx *app.Context, repository, snapshotName string, wait bool) error { +func checkAndFinalize(appCtx *app.Context, repository, snapshotName string, waitForComplete bool) error { // Get restore status appCtx.Logger.Infof("Checking restore status for snapshot: %s", snapshotName) status, isComplete, err := appCtx.ESClient.GetRestoreStatus(repository, snapshotName) @@ -93,7 +93,7 @@ func checkAndFinalize(appCtx *app.Context, repository, snapshotName string, wait // Restore still running appCtx.Logger.Infof("Restore is still in progress (status: %s)", status) - if wait { + if waitForComplete { appCtx.Logger.Println() return waitAndFinalize(appCtx, repository, snapshotName) } diff --git a/cmd/elasticsearch/restore.go b/cmd/elasticsearch/restore.go index 5a9f389..ef5a31f 100644 --- a/cmd/elasticsearch/restore.go +++ b/cmd/elasticsearch/restore.go @@ -120,14 +120,12 @@ func runRestore(appCtx *app.Context) error { } appCtx.Logger.Successf("Restore triggered successfully") - // Wait for completion unless background mode requested - if !runBackground { - return waitAndFinalize(appCtx, repository, selectedSnapshot) + if runBackground { + restore.PrintAPIRunningRestoreStatus("elasticsearch", selectedSnapshot, appCtx.Namespace, appCtx.Logger) + return nil } - // Print background status - restore.PrintAPIRunningRestoreStatus("elasticsearch", selectedSnapshot, appCtx.Namespace, appCtx.Logger) - return nil + return checkAndFinalize(appCtx, repository, selectedSnapshot, !runBackground) } // getLatestSnapshot retrieves the most recent snapshot from the repository diff --git a/internal/clients/clickhouse/client.go b/internal/clients/clickhouse/client.go index 8a1d6b9..a91adcc 100644 --- a/internal/clients/clickhouse/client.go +++ b/internal/clients/clickhouse/client.go @@ -210,6 +210,9 @@ func (c *Client) GetRestoreStatus(ctx context.Context, operationID string) (*Act if err != nil { return nil, fmt.Errorf("failed to parse operation status response: %w", err) } + if err = resp.Body.Close(); err != nil { + return nil, fmt.Errorf("failed to close response body: %w", err) + } // It should be exactly one action if lenActions := len(allActions); lenActions != 1 {