From 87a391f7a9fcd2fc9bf80d70fc99fce879a8a3f2 Mon Sep 17 00:00:00 2001 From: Priyanka Chatterjee Date: Wed, 17 Sep 2025 00:38:05 +0530 Subject: [PATCH 1/4] Add backup functionality for DuckLake metadata database in collect and compact commands --- cmd/collect.go | 7 ++ cmd/compact.go | 6 ++ internal/database/backup.go | 130 ++++++++++++++++++++++++++++++++++++ 3 files changed, 143 insertions(+) create mode 100644 internal/database/backup.go diff --git a/cmd/collect.go b/cmd/collect.go index 332847fc..80f228af 100644 --- a/cmd/collect.go +++ b/cmd/collect.go @@ -124,6 +124,13 @@ func doCollect(ctx context.Context, cancel context.CancelFunc, args []string) er partitionNames = append(partitionNames, partition.FullName) } slog.Info("Starting collection", "partition(s)", partitionNames, "from", fromTime, "to", toTime) + + // Create backup of metadata database before starting collection + if err := database.BackupDucklakeMetadata(); err != nil { + slog.Warn("Failed to backup metadata database", "error", err) + // Continue with collection - backup failure shouldn't block the operation + } + // now we have the partitions, we can start collecting // start the plugin manager diff --git a/cmd/compact.go b/cmd/compact.go index a02605c0..d95d8e4e 100644 --- a/cmd/compact.go +++ b/cmd/compact.go @@ -77,6 +77,12 @@ func runCompactCmd(cmd *cobra.Command, args []string) { patterns, err := database.GetPartitionPatternsForArgs(maps.Keys(config.GlobalConfig.Partitions), args...) error_helpers.FailOnErrorWithMessage(err, "failed to get partition patterns") + // Create backup of metadata database before starting compaction + if err := database.BackupDucklakeMetadata(); err != nil { + slog.Warn("Failed to backup metadata database", "error", err) + // Continue with compaction - backup failure shouldn't block the operation + } + // do the compaction status, err := doCompaction(ctx, db, patterns) diff --git a/internal/database/backup.go b/internal/database/backup.go new file mode 100644 index 00000000..7a534bd9 --- /dev/null +++ b/internal/database/backup.go @@ -0,0 +1,130 @@ +package database + +import ( + "fmt" + "io" + "log/slog" + "os" + "path/filepath" + "strings" + "time" + + "github.com/turbot/tailpipe/internal/config" +) + +// BackupDucklakeMetadata creates a timestamped backup of the DuckLake metadata database. +// It creates a backup file with format: tailpipe_ducklake.db.backup.YYYYMMDDHHMMSS +// and removes any existing backup files to maintain only the most recent backup. +// +// The backup is created in the same directory as the original database file. +// If the database file doesn't exist, no backup is created and no error is returned. +// +// Returns an error if the backup operation fails. +func BackupDucklakeMetadata() error { + // Get the path to the DuckLake metadata database + dbPath := config.GlobalWorkspaceProfile.GetDucklakeDbPath() + + // Check if the database file exists + if _, err := os.Stat(dbPath); os.IsNotExist(err) { + slog.Debug("DuckLake metadata database does not exist, skipping backup", "path", dbPath) + return nil + } else if err != nil { + return fmt.Errorf("failed to check if database exists: %w", err) + } + + // Generate timestamp for backup filename + timestamp := time.Now().Format("20060102150405") // YYYYMMDDHHMMSS format + + // Create backup filename + dbDir := filepath.Dir(dbPath) + backupFilename := fmt.Sprintf("tailpipe_ducklake.db.backup.%s", timestamp) + backupPath := filepath.Join(dbDir, backupFilename) + + slog.Info("Creating backup of DuckLake metadata database", "source", dbPath, "backup", backupPath) + + // Clean up any existing backup files before creating new one + if err := cleanupOldBackups(dbDir); err != nil { + slog.Warn("Failed to clean up old backup files", "error", err) + // Continue with backup creation even if cleanup fails + } + + // Create the backup by copying the database file + if err := copyFile(dbPath, backupPath); err != nil { + return fmt.Errorf("failed to create backup: %w", err) + } + + slog.Info("Successfully created backup of DuckLake metadata database", "backup", backupPath) + return nil +} + +// cleanupOldBackups removes all existing backup files in the specified directory. +// Backup files are identified by the pattern: tailpipe_ducklake.db.backup.* +func cleanupOldBackups(dir string) error { + entries, err := os.ReadDir(dir) + if err != nil { + return fmt.Errorf("failed to read directory: %w", err) + } + + backupPrefix := "tailpipe_ducklake.db.backup." + var deletedCount int + + for _, entry := range entries { + if entry.IsDir() { + continue + } + + filename := entry.Name() + if strings.HasPrefix(filename, backupPrefix) { + backupPath := filepath.Join(dir, filename) + if err := os.Remove(backupPath); err != nil { + slog.Warn("Failed to remove old backup file", "file", backupPath, "error", err) + // Continue removing other files even if one fails + } else { + slog.Debug("Removed old backup file", "file", backupPath) + deletedCount++ + } + } + } + + if deletedCount > 0 { + slog.Debug("Cleaned up old backup files", "count", deletedCount) + } + + return nil +} + +// copyFile copies a file from src to dst, preserving file permissions. +// It creates the destination file and copies the content using io.Copy. +func copyFile(src, dst string) error { + // Open source file + srcFile, err := os.Open(src) + if err != nil { + return fmt.Errorf("failed to open source file %s: %w", src, err) + } + defer srcFile.Close() + + // Get source file info for permissions + srcInfo, err := srcFile.Stat() + if err != nil { + return fmt.Errorf("failed to get source file info: %w", err) + } + + // Create destination file + dstFile, err := os.OpenFile(dst, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, srcInfo.Mode()) + if err != nil { + return fmt.Errorf("failed to create destination file %s: %w", dst, err) + } + defer dstFile.Close() + + // Copy content + if _, err := io.Copy(dstFile, srcFile); err != nil { + return fmt.Errorf("failed to copy file content: %w", err) + } + + // Ensure data is written to disk + if err := dstFile.Sync(); err != nil { + return fmt.Errorf("failed to sync destination file: %w", err) + } + + return nil +} From ab3b884f385cfa3b33d1eede0cd93afebd7ec06d Mon Sep 17 00:00:00 2001 From: Priyanka Chatterjee Date: Wed, 17 Sep 2025 16:50:27 +0530 Subject: [PATCH 2/4] update metadata backup functionality to include WAL and SHM files, update backup file naming conventions, and improve old backup cleanup logic. --- internal/database/backup.go | 154 +++++++++++++++++++++--------------- 1 file changed, 89 insertions(+), 65 deletions(-) diff --git a/internal/database/backup.go b/internal/database/backup.go index 7a534bd9..1232fc52 100644 --- a/internal/database/backup.go +++ b/internal/database/backup.go @@ -2,19 +2,22 @@ package database import ( "fmt" - "io" "log/slog" "os" "path/filepath" "strings" "time" + "github.com/turbot/pipe-fittings/v2/utils" "github.com/turbot/tailpipe/internal/config" ) // BackupDucklakeMetadata creates a timestamped backup of the DuckLake metadata database. -// It creates a backup file with format: tailpipe_ducklake.db.backup.YYYYMMDDHHMMSS -// and removes any existing backup files to maintain only the most recent backup. +// It creates backup files with format: metadata.sqlite.backup.YYYYMMDDHHMMSS +// and also backs up the WAL and SHM files if they exist: +// - metadata.sqlite-wal.backup.YYYYMMDDHHMMSS +// - metadata.sqlite-shm.backup.YYYYMMDDHHMMSS +// It removes any existing backup files to maintain only the most recent backup. // // The backup is created in the same directory as the original database file. // If the database file doesn't exist, no backup is created and no error is returned. @@ -35,54 +38,111 @@ func BackupDucklakeMetadata() error { // Generate timestamp for backup filename timestamp := time.Now().Format("20060102150405") // YYYYMMDDHHMMSS format - // Create backup filename + // Create backup filenames dbDir := filepath.Dir(dbPath) - backupFilename := fmt.Sprintf("tailpipe_ducklake.db.backup.%s", timestamp) - backupPath := filepath.Join(dbDir, backupFilename) + mainBackupFilename := fmt.Sprintf("metadata.sqlite.backup.%s", timestamp) + mainBackupPath := filepath.Join(dbDir, mainBackupFilename) + + // Also prepare paths for WAL and SHM files + walPath := dbPath + "-wal" + shmPath := dbPath + "-shm" + walBackupFilename := fmt.Sprintf("metadata.sqlite-wal.backup.%s", timestamp) + shmBackupFilename := fmt.Sprintf("metadata.sqlite-shm.backup.%s", timestamp) + walBackupPath := filepath.Join(dbDir, walBackupFilename) + shmBackupPath := filepath.Join(dbDir, shmBackupFilename) + + slog.Info("Creating backup of DuckLake metadata database", "source", dbPath, "backup", mainBackupPath) + + // Create the main database backup first + if err := utils.CopyFile(dbPath, mainBackupPath); err != nil { + return fmt.Errorf("failed to create main database backup: %w", err) + } - slog.Info("Creating backup of DuckLake metadata database", "source", dbPath, "backup", backupPath) + // Backup WAL file if it exists + if _, err := os.Stat(walPath); err == nil { + if err := utils.CopyFile(walPath, walBackupPath); err != nil { + slog.Warn("Failed to backup WAL file", "source", walPath, "error", err) + // Continue - WAL backup failure is not critical + } else { + slog.Debug("Successfully backed up WAL file", "backup", walBackupPath) + } + } - // Clean up any existing backup files before creating new one - if err := cleanupOldBackups(dbDir); err != nil { + // Backup SHM file if it exists + if _, err := os.Stat(shmPath); err == nil { + if err := utils.CopyFile(shmPath, shmBackupPath); err != nil { + slog.Warn("Failed to backup SHM file", "source", shmPath, "error", err) + // Continue - SHM backup failure is not critical + } else { + slog.Debug("Successfully backed up SHM file", "backup", shmBackupPath) + } + } + + slog.Info("Successfully created backup of DuckLake metadata database", "backup", mainBackupPath) + + // Clean up old backup files after successfully creating the new one + if err := cleanupOldBackups(dbDir, timestamp); err != nil { slog.Warn("Failed to clean up old backup files", "error", err) - // Continue with backup creation even if cleanup fails + // Don't return error - the backup was successful, cleanup is just housekeeping + } + return nil +} + +// isBackupFile checks if a filename matches any of the backup patterns +func isBackupFile(filename string) bool { + backupPrefixes := []string{ + "metadata.sqlite.backup.", + "metadata.sqlite-wal.backup.", + "metadata.sqlite-shm.backup.", } - // Create the backup by copying the database file - if err := copyFile(dbPath, backupPath); err != nil { - return fmt.Errorf("failed to create backup: %w", err) + for _, prefix := range backupPrefixes { + if strings.HasPrefix(filename, prefix) { + return true + } } + return false +} - slog.Info("Successfully created backup of DuckLake metadata database", "backup", backupPath) - return nil +// shouldRemoveBackup determines if a backup file should be removed +func shouldRemoveBackup(filename, excludeTimestamp string) bool { + if !isBackupFile(filename) { + return false + } + // Don't remove files with the current timestamp + return !strings.HasSuffix(filename, "."+excludeTimestamp) } -// cleanupOldBackups removes all existing backup files in the specified directory. -// Backup files are identified by the pattern: tailpipe_ducklake.db.backup.* -func cleanupOldBackups(dir string) error { +// cleanupOldBackups removes all existing backup files in the specified directory, +// except for the newly created backup files with the given timestamp. +// Backup files are identified by the patterns: +// - metadata.sqlite.backup.* +// - metadata.sqlite-wal.backup.* +// - metadata.sqlite-shm.backup.* +func cleanupOldBackups(dir, excludeTimestamp string) error { entries, err := os.ReadDir(dir) if err != nil { return fmt.Errorf("failed to read directory: %w", err) } - backupPrefix := "tailpipe_ducklake.db.backup." var deletedCount int - for _, entry := range entries { if entry.IsDir() { continue } filename := entry.Name() - if strings.HasPrefix(filename, backupPrefix) { - backupPath := filepath.Join(dir, filename) - if err := os.Remove(backupPath); err != nil { - slog.Warn("Failed to remove old backup file", "file", backupPath, "error", err) - // Continue removing other files even if one fails - } else { - slog.Debug("Removed old backup file", "file", backupPath) - deletedCount++ - } + if !shouldRemoveBackup(filename, excludeTimestamp) { + continue + } + + backupPath := filepath.Join(dir, filename) + if err := os.Remove(backupPath); err != nil { + slog.Warn("Failed to remove old backup file", "file", backupPath, "error", err) + // Continue removing other files even if one fails + } else { + slog.Debug("Removed old backup file", "file", backupPath) + deletedCount++ } } @@ -92,39 +152,3 @@ func cleanupOldBackups(dir string) error { return nil } - -// copyFile copies a file from src to dst, preserving file permissions. -// It creates the destination file and copies the content using io.Copy. -func copyFile(src, dst string) error { - // Open source file - srcFile, err := os.Open(src) - if err != nil { - return fmt.Errorf("failed to open source file %s: %w", src, err) - } - defer srcFile.Close() - - // Get source file info for permissions - srcInfo, err := srcFile.Stat() - if err != nil { - return fmt.Errorf("failed to get source file info: %w", err) - } - - // Create destination file - dstFile, err := os.OpenFile(dst, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, srcInfo.Mode()) - if err != nil { - return fmt.Errorf("failed to create destination file %s: %w", dst, err) - } - defer dstFile.Close() - - // Copy content - if _, err := io.Copy(dstFile, srcFile); err != nil { - return fmt.Errorf("failed to copy file content: %w", err) - } - - // Ensure data is written to disk - if err := dstFile.Sync(); err != nil { - return fmt.Errorf("failed to sync destination file: %w", err) - } - - return nil -} From ea6127c08cb95ac05c81acbe09ea51bc4399eb1e Mon Sep 17 00:00:00 2001 From: Priyanka Chatterjee Date: Wed, 17 Sep 2025 17:46:57 +0530 Subject: [PATCH 3/4] Refactor DuckLake metadata backup to remove SHM file handling. Update comments and cleanup logic to reflect changes in backup file management. --- internal/database/backup.go | 20 ++------------------ 1 file changed, 2 insertions(+), 18 deletions(-) diff --git a/internal/database/backup.go b/internal/database/backup.go index 1232fc52..05d106c4 100644 --- a/internal/database/backup.go +++ b/internal/database/backup.go @@ -14,9 +14,8 @@ import ( // BackupDucklakeMetadata creates a timestamped backup of the DuckLake metadata database. // It creates backup files with format: metadata.sqlite.backup.YYYYMMDDHHMMSS -// and also backs up the WAL and SHM files if they exist: +// and also backs up the WAL file if it exists: // - metadata.sqlite-wal.backup.YYYYMMDDHHMMSS -// - metadata.sqlite-shm.backup.YYYYMMDDHHMMSS // It removes any existing backup files to maintain only the most recent backup. // // The backup is created in the same directory as the original database file. @@ -43,13 +42,10 @@ func BackupDucklakeMetadata() error { mainBackupFilename := fmt.Sprintf("metadata.sqlite.backup.%s", timestamp) mainBackupPath := filepath.Join(dbDir, mainBackupFilename) - // Also prepare paths for WAL and SHM files + // Also prepare paths for WAL file walPath := dbPath + "-wal" - shmPath := dbPath + "-shm" walBackupFilename := fmt.Sprintf("metadata.sqlite-wal.backup.%s", timestamp) - shmBackupFilename := fmt.Sprintf("metadata.sqlite-shm.backup.%s", timestamp) walBackupPath := filepath.Join(dbDir, walBackupFilename) - shmBackupPath := filepath.Join(dbDir, shmBackupFilename) slog.Info("Creating backup of DuckLake metadata database", "source", dbPath, "backup", mainBackupPath) @@ -68,16 +64,6 @@ func BackupDucklakeMetadata() error { } } - // Backup SHM file if it exists - if _, err := os.Stat(shmPath); err == nil { - if err := utils.CopyFile(shmPath, shmBackupPath); err != nil { - slog.Warn("Failed to backup SHM file", "source", shmPath, "error", err) - // Continue - SHM backup failure is not critical - } else { - slog.Debug("Successfully backed up SHM file", "backup", shmBackupPath) - } - } - slog.Info("Successfully created backup of DuckLake metadata database", "backup", mainBackupPath) // Clean up old backup files after successfully creating the new one @@ -93,7 +79,6 @@ func isBackupFile(filename string) bool { backupPrefixes := []string{ "metadata.sqlite.backup.", "metadata.sqlite-wal.backup.", - "metadata.sqlite-shm.backup.", } for _, prefix := range backupPrefixes { @@ -118,7 +103,6 @@ func shouldRemoveBackup(filename, excludeTimestamp string) bool { // Backup files are identified by the patterns: // - metadata.sqlite.backup.* // - metadata.sqlite-wal.backup.* -// - metadata.sqlite-shm.backup.* func cleanupOldBackups(dir, excludeTimestamp string) error { entries, err := os.ReadDir(dir) if err != nil { From aea1b878725bca614850d21c1d4e4b3a1abc0f7b Mon Sep 17 00:00:00 2001 From: Priyanka Chatterjee Date: Wed, 17 Sep 2025 17:56:11 +0530 Subject: [PATCH 4/4] Add backup creation before partition deletion in runPartitionDeleteCmd --- cmd/partition.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/cmd/partition.go b/cmd/partition.go index 3a72c583..7faf8be2 100644 --- a/cmd/partition.go +++ b/cmd/partition.go @@ -301,6 +301,13 @@ func runPartitionDeleteCmd(cmd *cobra.Command, args []string) { error_helpers.FailOnError(err) defer db.Close() + // Create backup before deletion + slog.Info("Creating backup before partition deletion", "partition", partitionName) + if err := database.BackupDucklakeMetadata(); err != nil { + slog.Warn("Failed to create backup before partition deletion", "error", err) + // Continue with deletion - backup failure should not prevent deletion + } + // show spinner while deleting the partition spinner := statushooks.NewStatusSpinnerHook() spinner.SetStatus(fmt.Sprintf("Deleting partition %s", partition.TableName))