diff --git a/pkg/backup/backuper.go b/pkg/backup/backuper.go index f78bad77..cae6667c 100644 --- a/pkg/backup/backuper.go +++ b/pkg/backup/backuper.go @@ -11,6 +11,7 @@ import ( "regexp" "strings" "sync" + "time" "github.com/Altinity/clickhouse-backup/v2/pkg/common" "github.com/Altinity/clickhouse-backup/v2/pkg/metadata" @@ -51,6 +52,8 @@ type Backuper struct { resumableState *resumable.State shadowBackupUUIDs []string shadowBackupUUIDsMutex sync.Mutex + fileManifest *storage.BackupManifest + fileManifestMu sync.Mutex } func NewBackuper(cfg *config.Config, opts ...BackuperOpt) *Backuper { @@ -569,6 +572,39 @@ func (b *Backuper) addShadowBackupUUID(uuid string) { b.shadowBackupUUIDsMutex.Unlock() } +// recordUploadedFile records a single file in the backup manifest (thread-safe). +// remotePath should be the full remote path including the backupName prefix. +func (b *Backuper) recordUploadedFile(backupName, remotePath string, size int64) { + if b.fileManifest == nil { + return + } + relPath := strings.TrimPrefix(remotePath, backupName+"/") + b.fileManifestMu.Lock() + b.fileManifest.AddFile(relPath, size, time.Now().UTC()) + b.fileManifestMu.Unlock() +} + +// recordUploadedLocalFiles records multiple files in the backup manifest by +// stating the local source files to obtain their sizes (thread-safe). +// remotePath is the remote directory (including backupName prefix), localBasePath +// is the local directory the files are relative to. +func (b *Backuper) recordUploadedLocalFiles(backupName, remotePath, localBasePath string, files []string) { + if b.fileManifest == nil { + return + } + b.fileManifestMu.Lock() + defer b.fileManifestMu.Unlock() + for _, f := range files { + fullLocal := path.Join(localBasePath, f) + info, err := os.Stat(fullLocal) + if err != nil { + continue + } + relPath := strings.TrimPrefix(path.Join(remotePath, f), backupName+"/") + b.fileManifest.AddFile(relPath, info.Size(), info.ModTime()) + } +} + // CheckDisksUsage - https://github.com/Altinity/clickhouse-backup/issues/878 func (b *Backuper) CheckDisksUsage(backup storage.Backup, disks []clickhouse.Disk, isResumeExists bool, tablePattern string) error { if tablePattern != "" && tablePattern != "*.*" && tablePattern != "*" { diff --git a/pkg/backup/download.go b/pkg/backup/download.go index 28a8a8ee..1524033d 100644 --- a/pkg/backup/download.go +++ b/pkg/backup/download.go @@ -165,6 +165,10 @@ func (b *Backuper) Download(backupName string, tablePattern string, partitions [ if !found { return errors.Errorf("'%s' is not found on remote storage", backupName) } + // Download file manifest for Walk-free restore (falls back gracefully if not present) + var backupManifest *storage.BackupManifest + backupManifest, _ = b.dst.DownloadManifest(ctx, backupName) + if len(remoteBackup.Tables) == 0 && remoteBackup.RBACSize == 0 && remoteBackup.ConfigSize == 0 && remoteBackup.NamedCollectionsSize == 0 && !b.cfg.General.AllowEmptyBackups { return errors.Errorf("'%s' is empty backup", backupName) } @@ -266,7 +270,7 @@ func (b *Backuper) Download(backupName string, tablePattern string, partitions [ }).Msg("download table start") var downloadDataErr error var downloadDataSize uint64 - downloadDataSize, downloadDataErr = b.downloadTableData(dataCtx, remoteBackup.BackupMetadata, *tableMetadataAfterDownload[idx], disks, hardlinkExistsFiles) + downloadDataSize, downloadDataErr = b.downloadTableData(dataCtx, remoteBackup.BackupMetadata, *tableMetadataAfterDownload[idx], disks, hardlinkExistsFiles, backupManifest) if downloadDataErr != nil { return errors.WithMessage(downloadDataErr, "downloadTableData") } @@ -720,7 +724,7 @@ func (b *Backuper) downloadBackupRelatedDir(ctx context.Context, remoteBackup st return uint64(remoteFileInfo.Size()), nil } -func (b *Backuper) downloadTableData(ctx context.Context, remoteBackup metadata.BackupMetadata, table metadata.TableMetadata, disks []clickhouse.Disk, hardlinkExistsFiles bool) (uint64, error) { +func (b *Backuper) downloadTableData(ctx context.Context, remoteBackup metadata.BackupMetadata, table metadata.TableMetadata, disks []clickhouse.Disk, hardlinkExistsFiles bool, manifest *storage.BackupManifest) (uint64, error) { dbAndTableDir := path.Join(common.TablePathEncode(table.Database), common.TablePathEncode(table.Table)) dataGroup, dataCtx := errgroup.WithContext(ctx) dataGroup.SetLimit(int(b.cfg.General.DownloadConcurrency)) @@ -882,6 +886,24 @@ func (b *Backuper) downloadTableData(ctx context.Context, remoteBackup metadata. } } + // Try manifest-based download to avoid Walk (ListObjectsV2) + if manifest != nil { + manifestPrefix := path.Join("shadow", dbAndTableDir, capturedDisk, capturedPart.Name) + manifestFiles := manifest.FilesUnderPrefix(manifestPrefix) + if len(manifestFiles) > 0 { + pathSize, downloadErr := b.dst.DownloadPathWithManifest(dataCtx, partRemotePath, partLocalPath, manifestFiles, manifestPrefix, b.cfg.General.RetriesOnFailure, b.cfg.General.RetriesDuration, b.cfg.General.RetriesJitter, b, b.cfg.General.DownloadMaxBytesPerSecond) + if downloadErr != nil { + return errors.WithMessage(downloadErr, "DownloadPathWithManifest") + } + atomic.AddUint64(&downloadedSize, uint64(pathSize)) + if b.resume { + b.resumableState.AppendToState(partRemotePath, pathSize) + } + log.Debug().Msgf("finish %s -> %s (manifest, %d files)", partRemotePath, partLocalPath, len(manifestFiles)) + return nil + } + } + // Fall back to Walk (ListObjectsV2) when no manifest is available pathSize, downloadErr := b.dst.DownloadPath(dataCtx, partRemotePath, partLocalPath, b.cfg.General.RetriesOnFailure, b.cfg.General.RetriesDuration, b.cfg.General.RetriesJitter, b, b.cfg.General.DownloadMaxBytesPerSecond) if downloadErr != nil { return errors.WithMessage(downloadErr, "DownloadPath") diff --git a/pkg/backup/upload.go b/pkg/backup/upload.go index bf85f931..e0e48d51 100644 --- a/pkg/backup/upload.go +++ b/pkg/backup/upload.go @@ -148,6 +148,9 @@ func (b *Backuper) Upload(backupName string, deleteSource bool, diffFrom, diffFr defer b.resumableState.Close() } + // Initialize file manifest to record all uploaded files for Walk-free restore + b.fileManifest = storage.NewBackupManifest(backupName) + compressedDataSize := int64(0) metadataSize := int64(0) @@ -272,6 +275,17 @@ func (b *Backuper) Upload(backupName string, deleteSource bool, diffFrom, diffFr return errors.Wrapf(err, "can't upload %s", remoteBackupMetaFile) } } + // Record metadata.json in the manifest, then upload the manifest itself + b.recordUploadedFile(backupName, remoteBackupMetaFile, int64(len(newBackupMetadataBody))) + if b.fileManifest != nil { + if manifestErr := b.dst.UploadManifest(ctx, backupName, b.fileManifest); manifestErr != nil { + log.Warn().Err(manifestErr).Msg("failed to upload manifest.json, restore will fall back to Walk") + } else { + log.Info().Int("total_files", b.fileManifest.TotalFiles). + Int64("total_size", b.fileManifest.TotalSize). + Msg("uploaded backup manifest") + } + } log.Info().Fields(map[string]interface{}{ "backup": backupName, "operation": "upload", @@ -604,6 +618,7 @@ func (b *Backuper) uploadTableData(ctx context.Context, backupName string, delet } atomic.AddInt64(&uploadedBytes, uploadPathBytes) + b.recordUploadedLocalFiles(backupName, remotePath, backupPath, partFiles) if b.resume { b.resumableState.AppendToState(remotePathFull, uploadPathBytes) } @@ -649,6 +664,7 @@ func (b *Backuper) uploadTableData(ctx context.Context, backupName string, delet return errors.Wrapf(err, "can't check uploaded remoteDataFile: %s, error", remoteDataFile) } atomic.AddInt64(&uploadedBytes, remoteFile.Size()) + b.recordUploadedFile(backupName, remoteDataFile, remoteFile.Size()) if b.resume { b.resumableState.AppendToState(remoteDataFile, remoteFile.Size()) } @@ -707,6 +723,7 @@ func (b *Backuper) uploadTableMetadataRegular(ctx context.Context, backupName st if err != nil { return 0, errors.Wrap(err, "can't upload") } + b.recordUploadedFile(backupName, remoteTableMetaFile, int64(len(content))) if b.resume { b.resumableState.AppendToState(remoteTableMetaFile, int64(len(content))) } diff --git a/pkg/storage/manifest.go b/pkg/storage/manifest.go new file mode 100644 index 00000000..125c9915 --- /dev/null +++ b/pkg/storage/manifest.go @@ -0,0 +1,243 @@ +package storage + +import ( + "bytes" + "context" + "encoding/json" + "io" + "os" + "path" + "strings" + "time" + + "github.com/Altinity/clickhouse-backup/v2/pkg/common" + "github.com/eapache/go-resiliency/retrier" + "github.com/pkg/errors" + "github.com/rs/zerolog/log" +) + +const ( + // ManifestFileName is the name of the file manifest stored alongside metadata.json. + // It lists every file in the backup with its relative path, size, and last-modified time, + // allowing restore operations to skip the expensive S3 ListObjects Walk. + ManifestFileName = "manifest.json" + // ManifestVersion is the current manifest format version. + ManifestVersion = 1 +) + +// BackupManifest is a listing of all files within a single backup. +// It is written during upload and read during download to avoid +// the expensive recursive Walk (ListObjects) on object storage. +type BackupManifest struct { + Version int `json:"version"` + BackupName string `json:"backup_name"` + CreatedAt time.Time `json:"created_at"` + TotalSize int64 `json:"total_size"` + TotalFiles int `json:"total_files"` + Files []ManifestEntry `json:"files"` +} + +// ManifestEntry represents a single file in the backup manifest. +type ManifestEntry struct { + Path string `json:"path"` + Size int64 `json:"size"` + LastModified time.Time `json:"last_modified"` +} + +// manifestFile implements RemoteFile interface so manifest entries +// can be used directly in download paths that expect RemoteFile. +type manifestFile struct { + name string + size int64 + lastModified time.Time +} + +func (mf *manifestFile) Name() string { return mf.name } +func (mf *manifestFile) Size() int64 { return mf.size } +func (mf *manifestFile) LastModified() time.Time { return mf.lastModified } + +// ManifestEntryToRemoteFile converts a ManifestEntry to a RemoteFile, +// adjusting the path relative to the given prefix. +func ManifestEntryToRemoteFile(entry ManifestEntry, prefix string) RemoteFile { + name := entry.Path + if prefix != "" { + name = strings.TrimPrefix(name, prefix) + name = strings.TrimPrefix(name, "/") + } + return &manifestFile{ + name: name, + size: entry.Size, + lastModified: entry.LastModified, + } +} + +// NewBackupManifest creates a new empty manifest for the given backup. +func NewBackupManifest(backupName string) *BackupManifest { + return &BackupManifest{ + Version: ManifestVersion, + BackupName: backupName, + CreatedAt: time.Now().UTC(), + Files: make([]ManifestEntry, 0, 256), + } +} + +// NewBackupManifestWithCapacity creates a new empty manifest with a pre-allocated +// file slice capacity, reducing GC pressure for large backups by avoiding repeated +// slice growth. Use this when the expected file count is known or can be estimated. +func NewBackupManifestWithCapacity(backupName string, expectedFiles int) *BackupManifest { + if expectedFiles < 256 { + expectedFiles = 256 + } + return &BackupManifest{ + Version: ManifestVersion, + BackupName: backupName, + CreatedAt: time.Now().UTC(), + Files: make([]ManifestEntry, 0, expectedFiles), + } +} + +// AddFile adds a file entry to the manifest. +func (m *BackupManifest) AddFile(relativePath string, size int64, lastModified time.Time) { + m.Files = append(m.Files, ManifestEntry{ + Path: relativePath, + Size: size, + LastModified: lastModified, + }) + m.TotalFiles = len(m.Files) + m.TotalSize += size +} + +// HasFile returns true if the manifest contains an entry with the exact given path. +func (m *BackupManifest) HasFile(relativePath string) bool { + for _, f := range m.Files { + if f.Path == relativePath { + return true + } + } + return false +} + +// FilesUnderPrefix returns all manifest entries whose path starts with the given prefix. +// The prefix should NOT include the backup name (e.g., "shadow/default/my_table/default/part1"). +func (m *BackupManifest) FilesUnderPrefix(prefix string) []ManifestEntry { + prefix = strings.TrimSuffix(prefix, "/") + "/" + var result []ManifestEntry + for _, f := range m.Files { + if strings.HasPrefix(f.Path, prefix) { + result = append(result, f) + } + } + return result +} + +// Marshal serializes the manifest to JSON. +func (m *BackupManifest) Marshal() ([]byte, error) { + return json.MarshalIndent(m, "", "\t") +} + +// UnmarshalManifest deserializes a manifest from JSON. +func UnmarshalManifest(data []byte) (*BackupManifest, error) { + var m BackupManifest + if err := json.Unmarshal(data, &m); err != nil { + return nil, err + } + return &m, nil +} + +// UploadManifest uploads the manifest to remote storage alongside metadata.json. +func (bd *BackupDestination) UploadManifest(ctx context.Context, backupName string, manifest *BackupManifest) error { + data, err := manifest.Marshal() + if err != nil { + return err + } + remotePath := path.Join(backupName, ManifestFileName) + return bd.PutFile(ctx, remotePath, io.NopCloser(bytes.NewReader(data)), 0) +} + +// DownloadManifest attempts to download and parse the manifest for a given backup. +// Returns nil, nil if the manifest does not exist (graceful fallback to Walk). +func (bd *BackupDestination) DownloadManifest(ctx context.Context, backupName string) (*BackupManifest, error) { + remotePath := path.Join(backupName, ManifestFileName) + r, err := bd.GetFileReader(ctx, remotePath) + if err != nil { + // Manifest doesn't exist — this is expected for older backups + log.Debug().Str("backup", backupName).Msg("no manifest.json found, will fall back to Walk") + return nil, nil + } + defer r.Close() + data, err := io.ReadAll(r) + if err != nil { + log.Warn().Str("backup", backupName).Err(err).Msg("failed to read manifest.json, will fall back to Walk") + return nil, nil + } + manifest, err := UnmarshalManifest(data) + if err != nil { + log.Warn().Str("backup", backupName).Err(err).Msg("failed to parse manifest.json, will fall back to Walk") + return nil, nil + } + log.Info().Str("backup", backupName).Int("total_files", manifest.TotalFiles). + Int64("total_size", manifest.TotalSize).Msg("loaded backup manifest, skipping Walk") + return manifest, nil +} + +// DownloadPathWithManifest downloads files from remotePath using a pre-loaded manifest +// instead of calling Walk. Only files from manifestFiles are downloaded. +// prefixInManifest should be relative to the backup root (e.g., +// "shadow/default/my_table/default/part1"). +func (bd *BackupDestination) DownloadPathWithManifest(ctx context.Context, remotePath string, localPath string, manifestFiles []ManifestEntry, prefixInManifest string, RetriesOnFailure int, RetriesDuration time.Duration, RetriesJitter int8, RetrierClassifier retrier.Classifier, maxSpeed uint64) (int64, error) { + downloadedBytes := int64(0) + + for _, entry := range manifestFiles { + // Compute the relative name within remotePath + relativeName := strings.TrimPrefix(entry.Path, prefixInManifest) + relativeName = strings.TrimPrefix(relativeName, "/") + if relativeName == "" { + continue + } + f := ManifestEntryToRemoteFile(entry, prefixInManifest) + retry := retrier.New(retrier.ExponentialBackoff(RetriesOnFailure, common.AddRandomJitter(RetriesDuration, RetriesJitter)), RetrierClassifier) + err := retry.RunCtx(ctx, func(ctx context.Context) error { + startTime := time.Now() + r, err := bd.GetFileReader(ctx, path.Join(remotePath, f.Name())) + if err != nil { + log.Error().Err(err).Send() + return errors.WithMessage(err, "DownloadPathWithManifest GetFileReader") + } + dstFilePath := path.Join(localPath, f.Name()) + dstDirPath, _ := path.Split(dstFilePath) + if err := os.MkdirAll(dstDirPath, 0750); err != nil { + log.Error().Err(err).Send() + return errors.WithMessage(err, "DownloadPathWithManifest MkdirAll") + } + dst, err := os.Create(dstFilePath) + if err != nil { + log.Error().Err(err).Send() + return errors.WithMessage(err, "DownloadPathWithManifest Create") + } + if copyBytes, copyErr := bd.copyWithBuffer(dst, r); copyErr != nil { + log.Error().Err(copyErr).Send() + return errors.WithMessage(copyErr, "DownloadPathWithManifest io.Copy") + } else { + downloadedBytes += copyBytes + } + if dstCloseErr := dst.Close(); dstCloseErr != nil { + log.Error().Err(dstCloseErr).Send() + return errors.WithMessage(dstCloseErr, "DownloadPathWithManifest dst.Close") + } + if srcCloseErr := r.Close(); srcCloseErr != nil { + log.Error().Err(srcCloseErr).Send() + return errors.WithMessage(srcCloseErr, "DownloadPathWithManifest r.Close") + } + if dstFileInfo, statErr := os.Stat(dstFilePath); statErr == nil { + bd.throttleSpeed(startTime, dstFileInfo.Size(), maxSpeed) + } else { + return errors.WithMessage(statErr, "DownloadPathWithManifest Stat") + } + return nil + }) + if err != nil { + return downloadedBytes, errors.WithMessage(err, "DownloadPathWithManifest retry") + } + } + return downloadedBytes, nil +} diff --git a/pkg/storage/manifest_test.go b/pkg/storage/manifest_test.go new file mode 100644 index 00000000..c94b3369 --- /dev/null +++ b/pkg/storage/manifest_test.go @@ -0,0 +1,304 @@ +package storage + +import ( + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestNewBackupManifest(t *testing.T) { + t.Parallel() + m := NewBackupManifest("test_backup") + assert.Equal(t, ManifestVersion, m.Version) + assert.Equal(t, "test_backup", m.BackupName) + assert.NotZero(t, m.CreatedAt) + assert.Empty(t, m.Files) + assert.Equal(t, 0, m.TotalFiles) + assert.Equal(t, int64(0), m.TotalSize) +} + +func TestBackupManifest_AddFile(t *testing.T) { + t.Parallel() + m := NewBackupManifest("test_backup") + now := time.Now().UTC() + + m.AddFile("shadow/default/table1/default/part1/data.bin", 12345, now) + m.AddFile("shadow/default/table1/default/part1/primary.idx", 456, now) + m.AddFile("metadata/default/table1.json", 789, now) + + assert.Equal(t, 3, m.TotalFiles) + assert.Equal(t, int64(12345+456+789), m.TotalSize) + assert.Len(t, m.Files, 3) + + assert.Equal(t, "shadow/default/table1/default/part1/data.bin", m.Files[0].Path) + assert.Equal(t, int64(12345), m.Files[0].Size) + assert.Equal(t, now, m.Files[0].LastModified) +} + +func TestBackupManifest_FilesUnderPrefix(t *testing.T) { + t.Parallel() + m := NewBackupManifest("test_backup") + now := time.Now().UTC() + + // Add files from different tables and parts + m.AddFile("shadow/default/table1/default/part1/data.bin", 100, now) + m.AddFile("shadow/default/table1/default/part1/primary.idx", 50, now) + m.AddFile("shadow/default/table1/default/part2/data.bin", 200, now) + m.AddFile("shadow/default/table2/default/part1/data.bin", 300, now) + m.AddFile("metadata/default/table1.json", 10, now) + + // Files under part1 of table1 + files := m.FilesUnderPrefix("shadow/default/table1/default/part1") + assert.Len(t, files, 2) + for _, f := range files { + assert.Contains(t, f.Path, "shadow/default/table1/default/part1/") + } + + // Files under table1 (all parts) + files = m.FilesUnderPrefix("shadow/default/table1") + assert.Len(t, files, 3) // part1/data.bin, part1/primary.idx, part2/data.bin + + // Files under table2 + files = m.FilesUnderPrefix("shadow/default/table2") + assert.Len(t, files, 1) + + // Files under metadata + files = m.FilesUnderPrefix("metadata") + assert.Len(t, files, 1) + + // Non-existent prefix + files = m.FilesUnderPrefix("shadow/default/table3") + assert.Len(t, files, 0) +} + +func TestBackupManifest_FilesUnderPrefix_TrailingSlash(t *testing.T) { + t.Parallel() + m := NewBackupManifest("test") + now := time.Now().UTC() + m.AddFile("shadow/db/tbl/disk/part1/file.bin", 100, now) + + // Should work with or without trailing slash + files1 := m.FilesUnderPrefix("shadow/db/tbl/disk/part1") + files2 := m.FilesUnderPrefix("shadow/db/tbl/disk/part1/") + assert.Equal(t, len(files1), len(files2)) +} + +func TestBackupManifest_MarshalUnmarshal(t *testing.T) { + t.Parallel() + m := NewBackupManifest("my_backup_2025") + now := time.Now().UTC().Truncate(time.Millisecond) // truncate for JSON roundtrip + + m.AddFile("shadow/default/orders/default/20250101_1_1_0/data.bin", 1048576, now) + m.AddFile("shadow/default/orders/default/20250101_1_1_0/primary.idx", 256, now) + m.AddFile("metadata/default/orders.json", 1024, now) + + data, err := m.Marshal() + require.NoError(t, err) + assert.Contains(t, string(data), `"version": 1`) + assert.Contains(t, string(data), `"backup_name": "my_backup_2025"`) + assert.Contains(t, string(data), `"total_files": 3`) + + // Unmarshal + m2, err := UnmarshalManifest(data) + require.NoError(t, err) + assert.Equal(t, m.Version, m2.Version) + assert.Equal(t, m.BackupName, m2.BackupName) + assert.Equal(t, m.TotalFiles, m2.TotalFiles) + assert.Equal(t, m.TotalSize, m2.TotalSize) + assert.Len(t, m2.Files, 3) + + // Verify file entries round-trip + assert.Equal(t, m.Files[0].Path, m2.Files[0].Path) + assert.Equal(t, m.Files[0].Size, m2.Files[0].Size) + assert.Equal(t, m.Files[1].Path, m2.Files[1].Path) + assert.Equal(t, m.Files[2].Path, m2.Files[2].Path) +} + +func TestUnmarshalManifest_InvalidJSON(t *testing.T) { + t.Parallel() + _, err := UnmarshalManifest([]byte(`not json`)) + assert.Error(t, err) +} + +func TestUnmarshalManifest_EmptyFiles(t *testing.T) { + t.Parallel() + data := []byte(`{"version":1,"backup_name":"empty","created_at":"2025-05-16T00:00:00Z","total_size":0,"total_files":0,"files":[]}`) + m, err := UnmarshalManifest(data) + require.NoError(t, err) + assert.Equal(t, "empty", m.BackupName) + assert.Empty(t, m.Files) +} + +func TestManifestEntryToRemoteFile(t *testing.T) { + t.Parallel() + now := time.Now().UTC() + entry := ManifestEntry{ + Path: "shadow/default/table1/default/part1/data.bin", + Size: 12345, + LastModified: now, + } + + // No prefix stripping + rf := ManifestEntryToRemoteFile(entry, "") + assert.Equal(t, "shadow/default/table1/default/part1/data.bin", rf.Name()) + assert.Equal(t, int64(12345), rf.Size()) + assert.Equal(t, now, rf.LastModified()) + + // With prefix stripping + rf2 := ManifestEntryToRemoteFile(entry, "shadow/default/table1/default/part1") + assert.Equal(t, "data.bin", rf2.Name()) + assert.Equal(t, int64(12345), rf2.Size()) +} + +func TestManifestEntryToRemoteFile_PrefixWithSlash(t *testing.T) { + t.Parallel() + entry := ManifestEntry{ + Path: "shadow/db/tbl/disk/part/subdir/file.bin", + Size: 100, + } + rf := ManifestEntryToRemoteFile(entry, "shadow/db/tbl/disk/part/") + assert.Equal(t, "subdir/file.bin", rf.Name()) +} + +func TestBackupManifest_LargeFileCount(t *testing.T) { + t.Parallel() + m := NewBackupManifest("large_backup") + now := time.Now().UTC() + + // Simulate a real backup with 10k files + for i := 0; i < 10000; i++ { + m.AddFile( + "shadow/default/trades/default/20250101_1_1_0/column"+string(rune('A'+i%26))+".bin", + int64(1024+i), + now, + ) + } + + assert.Equal(t, 10000, m.TotalFiles) + assert.Greater(t, m.TotalSize, int64(0)) + + // Marshal and unmarshal should work + data, err := m.Marshal() + require.NoError(t, err) + + m2, err := UnmarshalManifest(data) + require.NoError(t, err) + assert.Equal(t, 10000, m2.TotalFiles) + assert.Equal(t, m.TotalSize, m2.TotalSize) +} + +func TestManifestFileName_IsCorrect(t *testing.T) { + t.Parallel() + assert.Equal(t, "manifest.json", ManifestFileName) +} + +func TestManifestVersion_IsOne(t *testing.T) { + t.Parallel() + assert.Equal(t, 1, ManifestVersion) +} + +func TestBackupManifest_HasFile(t *testing.T) { + t.Parallel() + m := NewBackupManifest("test") + now := time.Now().UTC() + + m.AddFile("metadata/default/orders.json", 100, now) + m.AddFile("shadow/default/orders/default/part1/data.bin", 200, now) + + assert.True(t, m.HasFile("metadata/default/orders.json")) + assert.True(t, m.HasFile("shadow/default/orders/default/part1/data.bin")) + assert.False(t, m.HasFile("metadata/default/nonexistent.json")) + assert.False(t, m.HasFile("metadata/default/orders.json/")) + assert.False(t, m.HasFile("")) +} + +func TestNewBackupManifestWithCapacity(t *testing.T) { + t.Parallel() + m := NewBackupManifestWithCapacity("test_backup", 5000) + assert.Equal(t, ManifestVersion, m.Version) + assert.Equal(t, "test_backup", m.BackupName) + assert.NotZero(t, m.CreatedAt) + assert.Empty(t, m.Files) + assert.Equal(t, 5000, cap(m.Files)) + assert.Equal(t, 0, m.TotalFiles) + assert.Equal(t, int64(0), m.TotalSize) +} + +func TestNewBackupManifestWithCapacity_MinimumFloor(t *testing.T) { + t.Parallel() + m := NewBackupManifestWithCapacity("test", 10) + assert.Equal(t, 256, cap(m.Files), "capacity should be at least 256") +} + +func TestNewBackupManifestWithCapacity_ZeroCapacity(t *testing.T) { + t.Parallel() + m := NewBackupManifestWithCapacity("test", 0) + assert.Equal(t, 256, cap(m.Files), "zero capacity should use floor of 256") +} + +func TestNewBackupManifestWithCapacity_NegativeCapacity(t *testing.T) { + t.Parallel() + m := NewBackupManifestWithCapacity("test", -100) + assert.Equal(t, 256, cap(m.Files), "negative capacity should use floor of 256") +} + +func TestNewBackupManifestWithCapacity_CorrectBehavior(t *testing.T) { + t.Parallel() + // Pre-sized manifest should produce identical results to default manifest + now := time.Now().UTC() + m1 := NewBackupManifest("test") + m2 := NewBackupManifestWithCapacity("test", 10000) + + for i := 0; i < 1000; i++ { + path := "shadow/default/table/default/part/col" + string(rune('A'+i%26)) + ".bin" + m1.AddFile(path, int64(i*100), now) + m2.AddFile(path, int64(i*100), now) + } + + assert.Equal(t, m1.TotalFiles, m2.TotalFiles) + assert.Equal(t, m1.TotalSize, m2.TotalSize) + assert.Equal(t, len(m1.Files), len(m2.Files)) + + // Verify marshal/unmarshal produces identical output + data1, err1 := m1.Marshal() + data2, err2 := m2.Marshal() + require.NoError(t, err1) + require.NoError(t, err2) + + // Unmarshal and compare (can't compare JSON directly due to timestamp) + um1, _ := UnmarshalManifest(data1) + um2, _ := UnmarshalManifest(data2) + assert.Equal(t, um1.TotalFiles, um2.TotalFiles) + assert.Equal(t, um1.TotalSize, um2.TotalSize) +} + +func TestNewBackupManifestWithCapacity_LargeCapacityNoAlloc(t *testing.T) { + t.Parallel() + // Verify that pre-sizing to exact capacity produces correct results + m := NewBackupManifestWithCapacity("test", 50000) + now := time.Now().UTC() + + for i := 0; i < 50000; i++ { + m.AddFile("shadow/default/t/d/p/f.bin", int64(i), now) + } + assert.Equal(t, 50000, m.TotalFiles) + // Pre-sized capacity should prevent any reallocation + assert.GreaterOrEqual(t, cap(m.Files), 50000) +} + +func TestBackupManifest_FilesUnderPrefix_NoPartialMatch(t *testing.T) { + t.Parallel() + m := NewBackupManifest("test") + now := time.Now().UTC() + + // "part1" should not match "part10" or "part1_suffix" + m.AddFile("shadow/db/tbl/disk/part1/file.bin", 100, now) + m.AddFile("shadow/db/tbl/disk/part10/file.bin", 200, now) + m.AddFile("shadow/db/tbl/disk/part1_suffix/file.bin", 300, now) + + files := m.FilesUnderPrefix("shadow/db/tbl/disk/part1") + assert.Len(t, files, 1, "should only match exact prefix with trailing slash") + assert.Equal(t, "shadow/db/tbl/disk/part1/file.bin", files[0].Path) +}