From c65ab7a406181de761533d8348c5c99c49a9800e Mon Sep 17 00:00:00 2001 From: Nico Bistolfi Date: Tue, 5 Aug 2025 14:42:53 -0700 Subject: [PATCH 1/2] no-comments on pg_dump --- cmd/lambda/main.go | 1 + 1 file changed, 1 insertion(+) diff --git a/cmd/lambda/main.go b/cmd/lambda/main.go index dc9c99a..0b9dab6 100644 --- a/cmd/lambda/main.go +++ b/cmd/lambda/main.go @@ -206,6 +206,7 @@ func (h *BackupHandler) createBackup(ctx context.Context) ([]byte, error) { "--clean", "--if-exists", "--exclude-schema=supabase_migrations", + "--no-comments", ) // Capture output From 057901be8151804f5db02c4ba113f53106794df9 Mon Sep 17 00:00:00 2001 From: Nico Bistolfi Date: Tue, 5 Aug 2025 15:03:33 -0700 Subject: [PATCH 2/2] dont upload a new backup if the database hasnt changed --- cmd/lambda/main.go | 201 ++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 179 insertions(+), 22 deletions(-) diff --git a/cmd/lambda/main.go b/cmd/lambda/main.go index 0b9dab6..0ec56e0 100644 --- a/cmd/lambda/main.go +++ b/cmd/lambda/main.go @@ -3,7 +3,10 @@ package main import ( "bytes" "context" + "crypto/sha256" + "encoding/hex" "fmt" + "io" "log" "net/url" "os" @@ -15,6 +18,7 @@ import ( "github.com/aws/aws-sdk-go-v2/aws" "github.com/aws/aws-sdk-go-v2/config" "github.com/aws/aws-sdk-go-v2/service/s3" + "github.com/aws/aws-sdk-go-v2/service/s3/types" "github.com/joho/godotenv" ) @@ -110,33 +114,61 @@ func (h *BackupHandler) HandleRequest(ctx context.Context) error { month := now.Format("2006-01") day := now.Format("2006-01-02") - // Upload daily backup + // Calculate checksum of the new backup + newChecksum := h.calculateChecksum(backupData) + + // Find the most recent daily backup to compare against + mostRecentBackup, err := h.findMostRecentBackup(ctx, "daily/") + if err != nil { + log.Printf("Warning: couldn't find most recent backup: %v", err) + } + + // Check if content has changed from the most recent backup + contentChanged := true + if mostRecentBackup != "" { + existingChecksum, err := h.getObjectChecksum(ctx, mostRecentBackup) + if err == nil && existingChecksum == newChecksum { + contentChanged = false + log.Printf("Backup content unchanged from %s, skipping all uploads", mostRecentBackup) + } + } + + // Upload daily backup only if content changed dailyKey := fmt.Sprintf("daily/%s-backup.sql", day) - if err := h.uploadToS3(ctx, dailyKey, backupData); err != nil { - return fmt.Errorf("failed to upload daily backup: %w", err) - } - log.Printf("Daily backup uploaded: %s", dailyKey) - - // Check and create monthly backup if needed - monthlyKey := fmt.Sprintf("monthly/%s-backup.sql", month) - if exists, err := h.objectExists(ctx, monthlyKey); err != nil { - return fmt.Errorf("failed to check monthly backup: %w", err) - } else if !exists { - if err := h.uploadToS3(ctx, monthlyKey, backupData); err != nil { - return fmt.Errorf("failed to upload monthly backup: %w", err) + if contentChanged { + if err := h.uploadToS3WithChecksum(ctx, dailyKey, backupData, newChecksum); err != nil { + return fmt.Errorf("failed to upload daily backup: %w", err) } - log.Printf("Monthly backup created: %s", monthlyKey) + log.Printf("Daily backup uploaded: %s", dailyKey) + } else { + // Even though content hasn't changed, we might want to update the timestamp + // by creating a new file with today's date pointing to the same content + return nil // Skip all uploads if content hasn't changed } - // Check and create yearly backup if needed - yearlyKey := fmt.Sprintf("yearly/%s-backup.sql", year) - if exists, err := h.objectExists(ctx, yearlyKey); err != nil { - return fmt.Errorf("failed to check yearly backup: %w", err) - } else if !exists { - if err := h.uploadToS3(ctx, yearlyKey, backupData); err != nil { - return fmt.Errorf("failed to upload yearly backup: %w", err) + // Only create monthly/yearly backups if content changed + if contentChanged { + // Check and create monthly backup if needed + monthlyKey := fmt.Sprintf("monthly/%s-backup.sql", month) + if exists, err := h.objectExists(ctx, monthlyKey); err != nil { + return fmt.Errorf("failed to check monthly backup: %w", err) + } else if !exists { + if err := h.uploadToS3WithChecksum(ctx, monthlyKey, backupData, newChecksum); err != nil { + return fmt.Errorf("failed to upload monthly backup: %w", err) + } + log.Printf("Monthly backup created: %s", monthlyKey) + } + + // Check and create yearly backup if needed + yearlyKey := fmt.Sprintf("yearly/%s-backup.sql", year) + if exists, err := h.objectExists(ctx, yearlyKey); err != nil { + return fmt.Errorf("failed to check yearly backup: %w", err) + } else if !exists { + if err := h.uploadToS3WithChecksum(ctx, yearlyKey, backupData, newChecksum); err != nil { + return fmt.Errorf("failed to upload yearly backup: %w", err) + } + log.Printf("Yearly backup created: %s", yearlyKey) } - log.Printf("Yearly backup created: %s", yearlyKey) } // Clean up old daily backups (keep only last 7 days) @@ -228,11 +260,136 @@ func (h *BackupHandler) createBackup(ctx context.Context) ([]byte, error) { } backupData := stdout.Bytes() + + // Remove timestamp comments that cause unnecessary duplicates + backupData = h.removeTimestampComments(backupData) + log.Printf("Backup created successfully, size: %d bytes", len(backupData)) return backupData, nil } +func (h *BackupHandler) removeTimestampComments(data []byte) []byte { + lines := bytes.Split(data, []byte("\n")) + var filtered [][]byte + + for _, line := range lines { + // Skip lines that start with "-- Started on" or "-- Completed on" + if bytes.HasPrefix(line, []byte("-- Started on ")) || + bytes.HasPrefix(line, []byte("-- Completed on ")) { + continue + } + filtered = append(filtered, line) + } + + return bytes.Join(filtered, []byte("\n")) +} + +func (h *BackupHandler) findMostRecentBackup(ctx context.Context, prefix string) (string, error) { + resp, err := h.s3Client.ListObjectsV2(ctx, &s3.ListObjectsV2Input{ + Bucket: aws.String(h.bucket), + Prefix: aws.String(prefix), + }) + if err != nil { + return "", err + } + + if len(resp.Contents) == 0 { + return "", nil + } + + // Find the most recent backup by LastModified time + var mostRecent types.Object + var found bool + for _, obj := range resp.Contents { + if !found || obj.LastModified.After(*mostRecent.LastModified) { + mostRecent = obj + found = true + } + } + + if found { + return *mostRecent.Key, nil + } + + return "", nil +} + +func (h *BackupHandler) calculateChecksum(data []byte) string { + hash := sha256.Sum256(data) + return hex.EncodeToString(hash[:]) +} + +func (h *BackupHandler) getObjectChecksum(ctx context.Context, key string) (string, error) { + // Try to get checksum from object metadata + resp, err := h.s3Client.HeadObject(ctx, &s3.HeadObjectInput{ + Bucket: aws.String(h.bucket), + Key: aws.String(key), + }) + if err != nil { + return "", err + } + + // Check if we stored the checksum in metadata + if resp.Metadata != nil { + if checksum, ok := resp.Metadata["sha256"]; ok { + return checksum, nil + } + } + + // If no checksum in metadata, we need to download and calculate + // This is for backwards compatibility with existing backups + getResp, err := h.s3Client.GetObject(ctx, &s3.GetObjectInput{ + Bucket: aws.String(h.bucket), + Key: aws.String(key), + }) + if err != nil { + return "", err + } + defer getResp.Body.Close() + + hash := sha256.New() + if _, err := io.Copy(hash, getResp.Body); err != nil { + return "", err + } + + return hex.EncodeToString(hash.Sum(nil)), nil +} + +func (h *BackupHandler) uploadIfChanged(ctx context.Context, key string, data []byte, newChecksum string) (bool, error) { + // Try to get existing checksum + existingChecksum, err := h.getObjectChecksum(ctx, key) + if err != nil { + // If object doesn't exist, upload it + if strings.Contains(err.Error(), "NotFound") { + return true, h.uploadToS3WithChecksum(ctx, key, data, newChecksum) + } + // For other errors, still try to upload + log.Printf("Warning: couldn't get checksum for %s: %v", key, err) + } + + // Compare checksums + if existingChecksum == newChecksum { + return false, nil // No upload needed + } + + // Upload with checksum + return true, h.uploadToS3WithChecksum(ctx, key, data, newChecksum) +} + +func (h *BackupHandler) uploadToS3WithChecksum(ctx context.Context, key string, data []byte, checksum string) error { + _, err := h.s3Client.PutObject(ctx, &s3.PutObjectInput{ + Bucket: aws.String(h.bucket), + Key: aws.String(key), + Body: bytes.NewReader(data), + ContentType: aws.String("application/sql"), + Metadata: map[string]string{ + "sha256": checksum, + }, + }) + return err +} + func (h *BackupHandler) uploadToS3(ctx context.Context, key string, data []byte) error { _, err := h.s3Client.PutObject(ctx, &s3.PutObjectInput{ Bucket: aws.String(h.bucket),