From 5964ab76cb3fb4aa2ad1536ae67c575036d7a2e8 Mon Sep 17 00:00:00 2001 From: ruslanen Date: Fri, 19 Jun 2026 16:18:00 +0300 Subject: [PATCH] fix(restore): eliminate O(N^2) linear scan in getTableListByPatternLocal addTableToListIfNotExistsOrEnrichQueryAndParts iterated the full result slice for every file processed during filepath.Walk. With 550K tables this produced N*(N-1)/2 ~ 151 billion string comparisons (~2 hours). Fix: maintain a tableIndex map[TableTitle]int alongside the result slice. Lookup and enrich are now O(1); total restore init phase drops from O(N^2) to O(N). The inline addToResult closure replaces the two local call sites; the original function is kept for the remote-restore path which operates on far fewer tables in practice. --- pkg/backup/table_pattern.go | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/pkg/backup/table_pattern.go b/pkg/backup/table_pattern.go index 44aa6491..1ffa1f17 100644 --- a/pkg/backup/table_pattern.go +++ b/pkg/backup/table_pattern.go @@ -52,6 +52,7 @@ func addTableToListIfNotExistsOrEnrichQueryAndParts(tables ListOfTables, table m func (b *Backuper) getTableListByPatternLocal(ctx context.Context, metadataPath string, tablePattern string, dropTable bool, partitions []string) (ListOfTables, map[metadata.TableTitle][]string, error) { result := ListOfTables{} + tableIndex := make(map[metadata.TableTitle]int) resultPartitionNames := map[metadata.TableTitle][]string{} tablePatterns := []string{"*"} if tablePattern != "" { @@ -67,6 +68,21 @@ func (b *Backuper) getTableListByPatternLocal(ctx context.Context, metadataPath // https://github.com/Altinity/clickhouse-backup/issues/1091 replacer := strings.NewReplacer(`/`, "_", `\`, "_") + addToResult := func(t metadata.TableMetadata) { + key := metadata.TableTitle{Database: t.Database, Table: t.Table} + if idx, exists := tableIndex[key]; exists { + if result[idx].Query == "" && t.Query != "" { + result[idx].Query = t.Query + } + if len(result[idx].Parts) == 0 && len(t.Parts) > 0 { + result[idx].Parts = t.Parts + } + } else { + tableIndex[key] = len(result) + result = append(result, &t) + } + } + if err := filepath.Walk(metadataPath, func(filePath string, info os.FileInfo, err error) error { if err != nil { return err @@ -106,7 +122,7 @@ func (b *Backuper) getTableListByPatternLocal(ctx context.Context, metadataPath // .sql file will enrich Query partitionsIdMap, _ := partition.ConvertPartitionsToIdsMapAndNamesList(ctx, b.ch, nil, ListOfTables{&t}, partitions) filterPartsAndFilesByPartitionsFilter(t, partitionsIdMap[metadata.TableTitle{Database: t.Database, Table: t.Table}]) - result = addTableToListIfNotExistsOrEnrichQueryAndParts(result, t) + addToResult(t) return nil } var t metadata.TableMetadata @@ -115,7 +131,7 @@ func (b *Backuper) getTableListByPatternLocal(ctx context.Context, metadataPath } partitionsIdMap, partitionsNameList := partition.ConvertPartitionsToIdsMapAndNamesList(ctx, b.ch, nil, ListOfTables{&t}, partitions) filterPartsAndFilesByPartitionsFilter(t, partitionsIdMap[metadata.TableTitle{Database: t.Database, Table: t.Table}]) - result = addTableToListIfNotExistsOrEnrichQueryAndParts(result, t) + addToResult(t) for tt := range partitionsNameList { if _, exists := resultPartitionNames[tt]; !exists { resultPartitionNames[tt] = []string{}