From 5bb0d4833c99961eb81df7786fa7a78f2cc5fbf0 Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Tue, 14 Oct 2025 15:29:52 +0200 Subject: [PATCH 1/6] Merge pull request #1040 from Altinity/fp_antalya_25_8_list_objects_cache Antalya 25.8 - Forward port of #805 List objects cache --- programs/server/Server.cpp | 11 + src/Access/Common/AccessType.h | 1 + src/Common/ProfileEvents.cpp | 4 + src/Common/TTLCachePolicy.h | 4 +- src/Core/ServerSettings.cpp | 5 +- src/Core/Settings.cpp | 3 + src/Core/SettingsChangesHistory.cpp | 2 + .../AzureBlobStorage/AzureObjectStorage.h | 2 + .../ObjectStorages/IObjectStorage.h | 2 + .../ObjectStorages/S3/S3ObjectStorage.h | 2 + src/Interpreters/InterpreterSystemQuery.cpp | 9 +- src/Parsers/ASTSystemQuery.cpp | 1 + src/Parsers/ASTSystemQuery.h | 1 + .../Cache/ObjectStorageListObjectsCache.cpp | 210 ++++++++++++++++++ .../Cache/ObjectStorageListObjectsCache.h | 78 +++++++ ...test_object_storage_list_objects_cache.cpp | 160 +++++++++++++ .../StorageObjectStorageSource.cpp | 52 ++++- .../StorageObjectStorageSource.h | 33 ++- .../01271_show_privileges.reference | 1 + ...bject_storage_list_objects_cache.reference | 103 +++++++++ ...3377_object_storage_list_objects_cache.sql | 115 ++++++++++ 21 files changed, 779 insertions(+), 20 deletions(-) create mode 100644 src/Storages/Cache/ObjectStorageListObjectsCache.cpp create mode 100644 src/Storages/Cache/ObjectStorageListObjectsCache.h create mode 100644 src/Storages/Cache/tests/gtest_object_storage_list_objects_cache.cpp create mode 100644 tests/queries/0_stateless/03377_object_storage_list_objects_cache.reference create mode 100644 tests/queries/0_stateless/03377_object_storage_list_objects_cache.sql diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index f919d4c407b7..7dd783531892 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -87,6 +87,7 @@ #include #include #include +#include #include #include #include @@ -415,6 +416,9 @@ namespace ServerSetting extern const ServerSettingsUInt64 keeper_server_socket_send_timeout_sec; extern const ServerSettingsString hdfs_libhdfs3_conf; extern const ServerSettingsString config_file; + extern const ServerSettingsUInt64 object_storage_list_objects_cache_ttl; + extern const ServerSettingsUInt64 object_storage_list_objects_cache_size; + extern const ServerSettingsUInt64 object_storage_list_objects_cache_max_entries; } namespace ErrorCodes @@ -425,6 +429,9 @@ namespace ErrorCodes namespace FileCacheSetting { extern const FileCacheSettingsBool load_metadata_asynchronously; + extern const ServerSettingsUInt64 object_storage_list_objects_cache_size; + extern const ServerSettingsUInt64 object_storage_list_objects_cache_max_entries; + extern const ServerSettingsUInt64 object_storage_list_objects_cache_ttl; } } @@ -2737,6 +2744,10 @@ try /// try set up encryption. There are some errors in config, error will be printed and server wouldn't start. CompressionCodecEncrypted::Configuration::instance().load(config(), "encryption_codecs"); + ObjectStorageListObjectsCache::instance().setMaxSizeInBytes(server_settings[ServerSetting::object_storage_list_objects_cache_size]); + ObjectStorageListObjectsCache::instance().setMaxCount(server_settings[ServerSetting::object_storage_list_objects_cache_max_entries]); + ObjectStorageListObjectsCache::instance().setTTL(server_settings[ServerSetting::object_storage_list_objects_cache_ttl]); + auto replicas_reconnector = ReplicasReconnector::init(global_context); /// Set current database name before loading tables and databases because diff --git a/src/Access/Common/AccessType.h b/src/Access/Common/AccessType.h index 0eaa21082a5c..a87368afa16c 100644 --- a/src/Access/Common/AccessType.h +++ b/src/Access/Common/AccessType.h @@ -334,6 +334,7 @@ enum class AccessType : uint8_t M(SYSTEM_DROP_SCHEMA_CACHE, "SYSTEM CLEAR SCHEMA CACHE, SYSTEM DROP SCHEMA CACHE, DROP SCHEMA CACHE", GLOBAL, SYSTEM_DROP_CACHE) \ M(SYSTEM_DROP_FORMAT_SCHEMA_CACHE, "SYSTEM CLEAR FORMAT SCHEMA CACHE, SYSTEM DROP FORMAT SCHEMA CACHE, DROP FORMAT SCHEMA CACHE", GLOBAL, SYSTEM_DROP_CACHE) \ M(SYSTEM_DROP_S3_CLIENT_CACHE, "SYSTEM CLEAR S3 CLIENT CACHE, SYSTEM DROP S3 CLIENT, DROP S3 CLIENT CACHE", GLOBAL, SYSTEM_DROP_CACHE) \ + M(SYSTEM_DROP_OBJECT_STORAGE_LIST_OBJECTS_CACHE, "SYSTEM DROP OBJECT STORAGE LIST OBJECTS CACHE", GLOBAL, SYSTEM_DROP_CACHE) \ M(SYSTEM_DROP_CACHE, "DROP CACHE", GROUP, SYSTEM) \ M(SYSTEM_RELOAD_CONFIG, "RELOAD CONFIG", GLOBAL, SYSTEM_RELOAD) \ M(SYSTEM_RELOAD_USERS, "RELOAD USERS", GLOBAL, SYSTEM_RELOAD) \ diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index 3f9bd174afeb..969040e6bc72 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -1298,6 +1298,10 @@ The server successfully detected this situation and will download merged part fr M(RuntimeFilterRowsChecked, "Number of rows checked by JOIN Runtime Filters", ValueType::Number) \ M(RuntimeFilterRowsPassed, "Number of rows that passed (not filtered out by) JOIN Runtime Filters", ValueType::Number) \ M(RuntimeFilterRowsSkipped, "Number of rows in blocks that were skipped by JOIN Runtime Filters", ValueType::Number) \ + M(ObjectStorageListObjectsCacheHits, "Number of times object storage list objects operation hit the cache.", ValueType::Number) \ + M(ObjectStorageListObjectsCacheMisses, "Number of times object storage list objects operation miss the cache.", ValueType::Number) \ + M(ObjectStorageListObjectsCacheExactMatchHits, "Number of times object storage list objects operation hit the cache with an exact match.", ValueType::Number) \ + M(ObjectStorageListObjectsCachePrefixMatchHits, "Number of times object storage list objects operation miss the cache using prefix matching.", ValueType::Number) \ #ifdef APPLY_FOR_EXTERNAL_EVENTS diff --git a/src/Common/TTLCachePolicy.h b/src/Common/TTLCachePolicy.h index be2cf781a91e..d2dc56aaab45 100644 --- a/src/Common/TTLCachePolicy.h +++ b/src/Common/TTLCachePolicy.h @@ -273,10 +273,10 @@ class TTLCachePolicy : public ICachePolicy; Cache cache; - +private: /// TODO To speed up removal of stale entries, we could also add another container sorted on expiry times which maps keys to iterators /// into the cache. To insert an entry, add it to the cache + add the iterator to the sorted container. To remove stale entries, do a /// binary search on the sorted container and erase all left of the found key. diff --git a/src/Core/ServerSettings.cpp b/src/Core/ServerSettings.cpp index 9df634a0a8b1..3e395a9843da 100644 --- a/src/Core/ServerSettings.cpp +++ b/src/Core/ServerSettings.cpp @@ -1469,7 +1469,10 @@ The policy on how to perform a scheduling of CPU slots specified by `concurrent_ ```xml 1 ``` - )", 0) + )", 0) \ + DECLARE(UInt64, object_storage_list_objects_cache_size, 500000000, "Maximum size of ObjectStorage list objects cache in bytes. Zero means disabled.", 0) \ + DECLARE(UInt64, object_storage_list_objects_cache_max_entries, 1000, "Maximum size of ObjectStorage list objects cache in entries. Zero means disabled.", 0) \ + DECLARE(UInt64, object_storage_list_objects_cache_ttl, 3600, "Time to live of records in ObjectStorage list objects cache in seconds. Zero means unlimited", 0) /// Settings with a path are server settings with at least one layer of nesting that have a fixed structure (no lists, lists, enumerations, repetitions, ...). #define LIST_OF_SERVER_SETTINGS_WITH_PATH(DECLARE, ALIAS) \ diff --git a/src/Core/Settings.cpp b/src/Core/Settings.cpp index e72653747d54..4020ccdde7bd 100644 --- a/src/Core/Settings.cpp +++ b/src/Core/Settings.cpp @@ -7456,6 +7456,9 @@ Write full paths (including s3://) into iceberg metadata files. )", EXPERIMENTAL) \ DECLARE(String, iceberg_metadata_compression_method, "", R"( Method to compress `.metadata.json` file. +)", EXPERIMENTAL) \ + DECLARE(Bool, use_object_storage_list_objects_cache, false, R"( +Cache the list of objects returned by list objects calls in object storage )", EXPERIMENTAL) \ DECLARE(Bool, make_distributed_plan, false, R"( Make distributed query plan. diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index e8686e2a43ea..dff7a55f8380 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -289,6 +289,8 @@ const VersionToSettingsChangesMap & getSettingsChangesHistory() {"allow_experimental_lightweight_update", false, true, "Lightweight updates were moved to Beta."}, {"s3_slow_all_threads_after_retryable_error", false, false, "Added an alias for setting `backup_slow_all_threads_after_retryable_s3_error`"}, {"serialize_string_in_memory_with_zero_byte", true, true, "New setting"}, + {"iceberg_metadata_log_level", "none", "none", "New setting."}, + {"use_object_storage_list_objects_cache", false, false, "New setting."}, }); addSettingsChanges(settings_changes_history, "25.7", { diff --git a/src/Disks/DiskObjectStorage/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h b/src/Disks/DiskObjectStorage/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h index 8d832ae9ce98..58bef4753e5c 100644 --- a/src/Disks/DiskObjectStorage/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h +++ b/src/Disks/DiskObjectStorage/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h @@ -35,6 +35,8 @@ class AzureObjectStorage : public IObjectStorage const String & description_, const String & common_key_prefix_); + bool supportsListObjectsCache() override { return true; } + void listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const override; /// Sanitizer build may crash with max_keys=1; this looks like a false positive. diff --git a/src/Disks/DiskObjectStorage/ObjectStorages/IObjectStorage.h b/src/Disks/DiskObjectStorage/ObjectStorages/IObjectStorage.h index e2605c10438e..70851dfc6b20 100644 --- a/src/Disks/DiskObjectStorage/ObjectStorages/IObjectStorage.h +++ b/src/Disks/DiskObjectStorage/ObjectStorages/IObjectStorage.h @@ -329,6 +329,8 @@ class IObjectStorage throw Exception(ErrorCodes::NOT_IMPLEMENTED, "The method 'tagObjects' is only implemented for S3 and Azure storages"); } #endif + + virtual bool supportsListObjectsCache() { return false; } }; using ObjectStoragePtr = std::shared_ptr; diff --git a/src/Disks/DiskObjectStorage/ObjectStorages/S3/S3ObjectStorage.h b/src/Disks/DiskObjectStorage/ObjectStorages/S3/S3ObjectStorage.h index 2cc9d2626bca..f74924463f3b 100644 --- a/src/Disks/DiskObjectStorage/ObjectStorages/S3/S3ObjectStorage.h +++ b/src/Disks/DiskObjectStorage/ObjectStorages/S3/S3ObjectStorage.h @@ -63,6 +63,8 @@ class S3ObjectStorage : public IObjectStorage ObjectStorageType getType() const override { return ObjectStorageType::S3; } + bool supportsListObjectsCache() override { return true; } + bool exists(const StoredObject & object) const override; std::unique_ptr readObject( /// NOLINT diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index c94ebe324fa4..0898f2bd0b09 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -46,6 +46,7 @@ #include #include #include +#include #include #include #include @@ -470,7 +471,12 @@ BlockIO InterpreterSystemQuery::execute() #else throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "The server was compiled without the support for AWS S3"); #endif - + case Type::DROP_OBJECT_STORAGE_LIST_OBJECTS_CACHE: + { + getContext()->checkAccess(AccessType::SYSTEM_DROP_OBJECT_STORAGE_LIST_OBJECTS_CACHE); + ObjectStorageListObjectsCache::instance().clear(); + break; + } case Type::CLEAR_FILESYSTEM_CACHE: { getContext()->checkAccess(AccessType::SYSTEM_DROP_FILESYSTEM_CACHE); @@ -1998,6 +2004,7 @@ AccessRightsElements InterpreterSystemQuery::getRequiredAccessForDDLOnCluster() case Type::CLEAR_SCHEMA_CACHE: case Type::CLEAR_FORMAT_SCHEMA_CACHE: case Type::CLEAR_S3_CLIENT_CACHE: + case Type::DROP_OBJECT_STORAGE_LIST_OBJECTS_CACHE: { required_access.emplace_back(AccessType::SYSTEM_DROP_CACHE); break; diff --git a/src/Parsers/ASTSystemQuery.cpp b/src/Parsers/ASTSystemQuery.cpp index a7e0e7c2d57d..8f05f6ac4e12 100644 --- a/src/Parsers/ASTSystemQuery.cpp +++ b/src/Parsers/ASTSystemQuery.cpp @@ -576,6 +576,7 @@ void ASTSystemQuery::formatImpl(WriteBuffer & ostr, const FormatSettings & setti case Type::CLEAR_COMPILED_EXPRESSION_CACHE: case Type::CLEAR_S3_CLIENT_CACHE: case Type::CLEAR_ICEBERG_METADATA_CACHE: + case Type::DROP_OBJECT_STORAGE_LIST_OBJECTS_CACHE: case Type::RESET_COVERAGE: case Type::RESTART_REPLICAS: case Type::JEMALLOC_PURGE: diff --git a/src/Parsers/ASTSystemQuery.h b/src/Parsers/ASTSystemQuery.h index 14dcb8e04f73..0f76a56f60ce 100644 --- a/src/Parsers/ASTSystemQuery.h +++ b/src/Parsers/ASTSystemQuery.h @@ -51,6 +51,7 @@ class ASTSystemQuery : public IAST, public ASTQueryWithOnCluster CLEAR_SCHEMA_CACHE, CLEAR_FORMAT_SCHEMA_CACHE, CLEAR_S3_CLIENT_CACHE, + DROP_OBJECT_STORAGE_LIST_OBJECTS_CACHE, STOP_LISTEN, START_LISTEN, RESTART_REPLICAS, diff --git a/src/Storages/Cache/ObjectStorageListObjectsCache.cpp b/src/Storages/Cache/ObjectStorageListObjectsCache.cpp new file mode 100644 index 000000000000..a7aec57d9161 --- /dev/null +++ b/src/Storages/Cache/ObjectStorageListObjectsCache.cpp @@ -0,0 +1,210 @@ +#include +#include +#include +#include + +namespace ProfileEvents +{ +extern const Event ObjectStorageListObjectsCacheHits; +extern const Event ObjectStorageListObjectsCacheMisses; +extern const Event ObjectStorageListObjectsCacheExactMatchHits; +extern const Event ObjectStorageListObjectsCachePrefixMatchHits; +} + +namespace DB +{ + +template +class ObjectStorageListObjectsCachePolicy : public TTLCachePolicy +{ +public: + using BasePolicy = TTLCachePolicy; + using typename BasePolicy::MappedPtr; + using typename BasePolicy::KeyMapped; + using BasePolicy::cache; + + ObjectStorageListObjectsCachePolicy() + : BasePolicy(CurrentMetrics::end(), CurrentMetrics::end(), std::make_unique()) + { + } + + std::optional getWithKey(const Key & key) override + { + if (const auto it = cache.find(key); it != cache.end()) + { + if (!IsStaleFunction()(it->first)) + { + return std::make_optional({it->first, it->second}); + } + // found a stale entry, remove it but don't return. We still want to perform the prefix matching search + BasePolicy::remove(it->first); + } + + if (const auto it = findBestMatchingPrefixAndRemoveExpiredEntries(key); it != cache.end()) + { + return std::make_optional({it->first, it->second}); + } + + return std::nullopt; + } + +private: + auto findBestMatchingPrefixAndRemoveExpiredEntries(Key key) + { + while (!key.prefix.empty()) + { + if (const auto it = cache.find(key); it != cache.end()) + { + if (IsStaleFunction()(it->first)) + { + BasePolicy::remove(it->first); + } + else + { + return it; + } + } + + key.prefix.pop_back(); + } + + return cache.end(); + } +}; + +ObjectStorageListObjectsCache::Key::Key( + const String & storage_description_, + const String & bucket_, + const String & prefix_, + const std::chrono::steady_clock::time_point & expires_at_, + std::optional user_id_) + : storage_description(storage_description_), bucket(bucket_), prefix(prefix_), expires_at(expires_at_), user_id(user_id_) {} + +bool ObjectStorageListObjectsCache::Key::operator==(const Key & other) const +{ + return storage_description == other.storage_description && bucket == other.bucket && prefix == other.prefix; +} + +size_t ObjectStorageListObjectsCache::KeyHasher::operator()(const Key & key) const +{ + std::size_t seed = 0; + + boost::hash_combine(seed, key.storage_description); + boost::hash_combine(seed, key.bucket); + boost::hash_combine(seed, key.prefix); + + return seed; +} + +bool ObjectStorageListObjectsCache::IsStale::operator()(const Key & key) const +{ + return key.expires_at < std::chrono::steady_clock::now(); +} + +size_t ObjectStorageListObjectsCache::WeightFunction::operator()(const Value & value) const +{ + std::size_t weight = 0; + + for (const auto & object : value) + { + const auto object_metadata = object->metadata; + weight += object->relative_path.capacity() + sizeof(object_metadata); + + // variable size + if (object_metadata) + { + weight += object_metadata->etag.capacity(); + weight += object_metadata->attributes.size() * (sizeof(std::string) * 2); + + for (const auto & [k, v] : object_metadata->attributes) + { + weight += k.capacity() + v.capacity(); + } + } + } + + return weight; +} + +ObjectStorageListObjectsCache::ObjectStorageListObjectsCache() + : cache(std::make_unique>()) +{ +} + +void ObjectStorageListObjectsCache::set( + const Key & key, + const std::shared_ptr & value) +{ + auto key_with_ttl = key; + key_with_ttl.expires_at = std::chrono::steady_clock::now() + std::chrono::seconds(ttl_in_seconds); + + cache.set(key_with_ttl, value); +} + +void ObjectStorageListObjectsCache::clear() +{ + cache.clear(); +} + +std::optional ObjectStorageListObjectsCache::get(const Key & key, bool filter_by_prefix) +{ + const auto pair = cache.getWithKey(key); + + if (!pair) + { + ProfileEvents::increment(ProfileEvents::ObjectStorageListObjectsCacheMisses); + return {}; + } + + ProfileEvents::increment(ProfileEvents::ObjectStorageListObjectsCacheHits); + + if (pair->key == key) + { + ProfileEvents::increment(ProfileEvents::ObjectStorageListObjectsCacheExactMatchHits); + return *pair->mapped; + } + + ProfileEvents::increment(ProfileEvents::ObjectStorageListObjectsCachePrefixMatchHits); + + if (!filter_by_prefix) + { + return *pair->mapped; + } + + Value filtered_objects; + + filtered_objects.reserve(pair->mapped->size()); + + for (const auto & object : *pair->mapped) + { + if (object->relative_path.starts_with(key.prefix)) + { + filtered_objects.push_back(object); + } + } + + return filtered_objects; +} + +void ObjectStorageListObjectsCache::setMaxSizeInBytes(std::size_t size_in_bytes_) +{ + cache.setMaxSizeInBytes(size_in_bytes_); +} + +void ObjectStorageListObjectsCache::setMaxCount(std::size_t count) +{ + cache.setMaxCount(count); +} + +void ObjectStorageListObjectsCache::setTTL(std::size_t ttl_in_seconds_) +{ + ttl_in_seconds = ttl_in_seconds_; +} + +ObjectStorageListObjectsCache & ObjectStorageListObjectsCache::instance() +{ + static ObjectStorageListObjectsCache instance; + return instance; +} + +} diff --git a/src/Storages/Cache/ObjectStorageListObjectsCache.h b/src/Storages/Cache/ObjectStorageListObjectsCache.h new file mode 100644 index 000000000000..6cb6c3694d93 --- /dev/null +++ b/src/Storages/Cache/ObjectStorageListObjectsCache.h @@ -0,0 +1,78 @@ +#pragma once + +#include +#include +#include +#include + +namespace DB +{ + +class ObjectStorageListObjectsCache +{ + friend class ObjectStorageListObjectsCacheTest; +public: + ObjectStorageListObjectsCache(const ObjectStorageListObjectsCache &) = delete; + ObjectStorageListObjectsCache(ObjectStorageListObjectsCache &&) noexcept = delete; + + ObjectStorageListObjectsCache& operator=(const ObjectStorageListObjectsCache &) = delete; + ObjectStorageListObjectsCache& operator=(ObjectStorageListObjectsCache &&) noexcept = delete; + + static ObjectStorageListObjectsCache & instance(); + + struct Key + { + Key( + const String & storage_description_, + const String & bucket_, + const String & prefix_, + const std::chrono::steady_clock::time_point & expires_at_ = std::chrono::steady_clock::now(), + std::optional user_id_ = std::nullopt); + + std::string storage_description; + std::string bucket; + std::string prefix; + std::chrono::steady_clock::time_point expires_at; + std::optional user_id; + + bool operator==(const Key & other) const; + }; + + using Value = StorageObjectStorage::ObjectInfos; + struct KeyHasher + { + size_t operator()(const Key & key) const; + }; + + struct IsStale + { + bool operator()(const Key & key) const; + }; + + struct WeightFunction + { + size_t operator()(const Value & value) const; + }; + + using Cache = CacheBase; + + void set( + const Key & key, + const std::shared_ptr & value); + + std::optional get(const Key & key, bool filter_by_prefix = true); + + void clear(); + + void setMaxSizeInBytes(std::size_t size_in_bytes_); + void setMaxCount(std::size_t count); + void setTTL(std::size_t ttl_in_seconds_); + +private: + ObjectStorageListObjectsCache(); + + Cache cache; + size_t ttl_in_seconds {0}; +}; + +} diff --git a/src/Storages/Cache/tests/gtest_object_storage_list_objects_cache.cpp b/src/Storages/Cache/tests/gtest_object_storage_list_objects_cache.cpp new file mode 100644 index 000000000000..3b719d4df3e3 --- /dev/null +++ b/src/Storages/Cache/tests/gtest_object_storage_list_objects_cache.cpp @@ -0,0 +1,160 @@ +#include +#include +#include +#include + +namespace DB +{ + +class ObjectStorageListObjectsCacheTest : public ::testing::Test +{ +protected: + void SetUp() override + { + cache = std::unique_ptr(new ObjectStorageListObjectsCache()); + cache->setTTL(3); + cache->setMaxCount(100); + cache->setMaxSizeInBytes(1000000); + } + + std::unique_ptr cache; + static ObjectStorageListObjectsCache::Key default_key; + + static std::shared_ptr createTestValue(const std::vector& paths) + { + auto value = std::make_shared(); + for (const auto & path : paths) + { + value->push_back(std::make_shared(path)); + } + return value; + } +}; + +ObjectStorageListObjectsCache::Key ObjectStorageListObjectsCacheTest::default_key {"default", "test-bucket", "test-prefix/"}; + +TEST_F(ObjectStorageListObjectsCacheTest, BasicSetAndGet) +{ + cache->clear(); + auto value = createTestValue({"test-prefix/file1.txt", "test-prefix/file2.txt"}); + + cache->set(default_key, value); + + auto result = cache->get(default_key).value(); + + ASSERT_EQ(result.size(), 2); + EXPECT_EQ(result[0]->getPath(), "test-prefix/file1.txt"); + EXPECT_EQ(result[1]->getPath(), "test-prefix/file2.txt"); +} + +TEST_F(ObjectStorageListObjectsCacheTest, CacheMiss) +{ + cache->clear(); + + EXPECT_FALSE(cache->get(default_key)); +} + +TEST_F(ObjectStorageListObjectsCacheTest, ClearCache) +{ + cache->clear(); + auto value = createTestValue({"test-prefix/file1.txt", "test-prefix/file2.txt"}); + + cache->set(default_key, value); + cache->clear(); + + EXPECT_FALSE(cache->get(default_key)); +} + +TEST_F(ObjectStorageListObjectsCacheTest, PrefixMatching) +{ + cache->clear(); + + auto short_prefix_key = default_key; + short_prefix_key.prefix = "parent/"; + + auto mid_prefix_key = default_key; + mid_prefix_key.prefix = "parent/child/"; + + auto long_prefix_key = default_key; + long_prefix_key.prefix = "parent/child/grandchild/"; + + auto value = createTestValue( + { + "parent/child/grandchild/file1.txt", + "parent/child/grandchild/file2.txt"}); + + cache->set(mid_prefix_key, value); + + auto result1 = cache->get(mid_prefix_key).value(); + EXPECT_EQ(result1.size(), 2); + + auto result2 = cache->get(long_prefix_key).value(); + EXPECT_EQ(result2.size(), 2); + + EXPECT_FALSE(cache->get(short_prefix_key)); +} + +TEST_F(ObjectStorageListObjectsCacheTest, PrefixFiltering) +{ + cache->clear(); + + auto key_with_short_prefix = default_key; + key_with_short_prefix.prefix = "parent/"; + + auto key_with_mid_prefix = default_key; + key_with_mid_prefix.prefix = "parent/child1/"; + + auto value = createTestValue({ + "parent/file1.txt", + "parent/child1/file2.txt", + "parent/child2/file3.txt" + }); + + cache->set(key_with_short_prefix, value); + + auto result = cache->get(key_with_mid_prefix, true).value(); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->getPath(), "parent/child1/file2.txt"); +} + +TEST_F(ObjectStorageListObjectsCacheTest, TTLExpiration) +{ + cache->clear(); + auto value = createTestValue({"test-prefix/file1.txt"}); + + cache->set(default_key, value); + + // Verify we can get it immediately + auto result1 = cache->get(default_key).value(); + EXPECT_EQ(result1.size(), 1); + + std::this_thread::sleep_for(std::chrono::seconds(4)); + + EXPECT_FALSE(cache->get(default_key)); +} + +TEST_F(ObjectStorageListObjectsCacheTest, BestPrefixMatch) +{ + cache->clear(); + + auto short_prefix_key = default_key; + short_prefix_key.prefix = "a/b/"; + + auto mid_prefix_key = default_key; + mid_prefix_key.prefix = "a/b/c/"; + + auto long_prefix_key = default_key; + long_prefix_key.prefix = "a/b/c/d/"; + + auto short_prefix = createTestValue({"a/b/c/d/file1.txt", "a/b/c/file1.txt", "a/b/file2.txt"}); + auto mid_prefix = createTestValue({"a/b/c/d/file1.txt", "a/b/c/file1.txt"}); + + cache->set(short_prefix_key, short_prefix); + cache->set(mid_prefix_key, mid_prefix); + + // should pick mid_prefix, which has size 2. filter_by_prefix=false so we can assert by size + auto result = cache->get(long_prefix_key, false).value(); + EXPECT_EQ(result.size(), 2u); +} + +} diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp index 580e102544e1..744ded3fab5b 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp @@ -72,6 +72,7 @@ namespace Setting extern const SettingsBool cluster_function_process_archive_on_multiple_nodes; extern const SettingsBool table_engine_read_through_distributed_cache; extern const SettingsUInt64 s3_path_filter_limit; + extern const SettingsBool use_object_storage_list_objects_cache; } namespace ErrorCodes @@ -201,6 +202,30 @@ std::shared_ptr StorageObjectStorageSource::createFileIterator( // If paths contains a value, validate the extracted paths and use the key-based iterator // (even if the result is empty, indicating no scanning is required). if (!paths) + { + std::shared_ptr object_iterator = nullptr; + std::unique_ptr cache_ptr = nullptr; + + if (local_context->getSettingsRef()[Setting::use_object_storage_list_objects_cache] && object_storage->supportsListObjectsCache()) + { + auto & cache = ObjectStorageListObjectsCache::instance(); + ObjectStorageListObjectsCache::Key cache_key {object_storage->getDescription(), configuration->getNamespace(), configuration->getRawPath().cutGlobs(configuration->supportsPartialPathPrefix())}; + + if (auto objects_info = cache.get(cache_key, /*filter_by_prefix=*/ false)) + { + object_iterator = std::make_shared(std::move(*objects_info)); + } + else + { + cache_ptr = std::make_unique(cache, cache_key); + object_iterator = object_storage->iterate(configuration->getRawPath().cutGlobs(configuration->supportsPartialPathPrefix()), query_settings.list_object_keys_size, with_tags); + } + } + else + { + object_iterator = object_storage->iterate(configuration->getRawPath().cutGlobs(configuration->supportsPartialPathPrefix()), query_settings.list_object_keys_size, with_tags); + } + iterator = std::make_unique( object_storage, configuration, @@ -212,7 +237,9 @@ std::shared_ptr StorageObjectStorageSource::createFileIterator( query_settings.list_object_keys_size, query_settings.throw_on_zero_files_match, with_tags, - file_progress_callback); + file_progress_callback, + std::move(cache_ptr)); + } else { // Validate that extracted paths match the glob pattern to prevent scanning unallowed data @@ -892,19 +919,18 @@ std::unique_ptr createReadBuffer( } StorageObjectStorageSource::GlobIterator::GlobIterator( - ObjectStoragePtr object_storage_, - StorageObjectStorageConfigurationPtr configuration_, + const ObjectStorageIteratorPtr & object_storage_iterator_, + ConfigurationPtr configuration_, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns_, const NamesAndTypesList & hive_columns_, ContextPtr context_, ObjectInfos * read_keys_, - size_t list_object_keys_size, bool throw_on_zero_files_match_, - bool with_tags, - std::function file_progress_callback_) + std::function file_progress_callback_, + std::unique_ptr list_cache_) : WithContext(context_) - , object_storage(object_storage_) + , object_storage_iterator(object_storage_iterator_) , configuration(configuration_) , virtual_columns(virtual_columns_) , hive_columns(hive_columns_) @@ -913,6 +939,7 @@ StorageObjectStorageSource::GlobIterator::GlobIterator( , read_keys(read_keys_) , local_context(context_) , file_progress_callback(file_progress_callback_) + , list_cache(std::move(list_cache_)) { const auto & reading_path = configuration->getPathForRead(); if (reading_path.hasGlobs()) @@ -920,8 +947,6 @@ StorageObjectStorageSource::GlobIterator::GlobIterator( const auto & key_with_globs = reading_path; const auto key_prefix = reading_path.cutGlobs(configuration->supportsPartialPathPrefix()); - object_storage_iterator = object_storage->iterate(key_prefix, list_object_keys_size, with_tags); - matcher = std::make_unique(makeRegexpPatternFromGlobs(key_with_globs.path)); if (!matcher->ok()) { @@ -985,6 +1010,10 @@ ObjectInfoPtr StorageObjectStorageSource::GlobIterator::nextUnlocked(size_t /* p auto result = object_storage_iterator->getCurrentBatchAndScheduleNext(); if (!result.has_value()) { + if (list_cache) + { + list_cache->set(std::move(object_list)); + } is_finished = true; return {}; } @@ -995,6 +1024,11 @@ ObjectInfoPtr StorageObjectStorageSource::GlobIterator::nextUnlocked(size_t /* p std::back_inserter(new_batch), [&](const std::shared_ptr & object) { return std::make_shared(*object); }); + if (list_cache) + { + object_list.insert(object_list.end(), new_batch.begin(), new_batch.end()); + } + for (auto it = new_batch.begin(); it != new_batch.end();) { if (!recursive && !re2::RE2::FullMatch((*it)->getPath(), *matcher)) diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.h b/src/Storages/ObjectStorage/StorageObjectStorageSource.h index 5f7907270023..77fa731de958 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.h @@ -10,6 +10,9 @@ #include #include #include +#include + + namespace DB { @@ -176,18 +179,33 @@ class StorageObjectStorageSource::ReadTaskIterator : public IObjectIterator, pri class StorageObjectStorageSource::GlobIterator : public IObjectIterator, WithContext { public: + struct ListObjectsCacheWithKey + { + ListObjectsCacheWithKey(ObjectStorageListObjectsCache & cache_, const ObjectStorageListObjectsCache::Key & key_) : cache(cache_), key(key_) {} + + void set(ObjectStorageListObjectsCache::Value && value) const + { + cache.set(key, std::make_shared(std::move(value))); + } + + private: + ObjectStorageListObjectsCache & cache; + ObjectStorageListObjectsCache::Key key; + }; + + using ConfigurationPtr = std::shared_ptr; + GlobIterator( - ObjectStoragePtr object_storage_, - StorageObjectStorageConfigurationPtr configuration_, + const ObjectStorageIteratorPtr & object_storage_iterator_, + ConfigurationPtr configuration_, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns_, const NamesAndTypesList & hive_columns_, ContextPtr context_, ObjectInfos * read_keys_, - size_t list_object_keys_size, bool throw_on_zero_files_match_, - bool with_tags, - std::function file_progress_callback_ = {}); + std::function file_progress_callback_ = {}, + std::unique_ptr list_cache_ = nullptr); ~GlobIterator() override = default; @@ -200,7 +218,7 @@ class StorageObjectStorageSource::GlobIterator : public IObjectIterator, WithCon void createFilterAST(const String & any_key); void fillBufferForKey(const std::string & uri_key); - const ObjectStoragePtr object_storage; + ObjectStorageIteratorPtr object_storage_iterator; const StorageObjectStorageConfigurationPtr configuration; const NamesAndTypesList virtual_columns; const NamesAndTypesList hive_columns; @@ -212,7 +230,6 @@ class StorageObjectStorageSource::GlobIterator : public IObjectIterator, WithCon ObjectInfos object_infos; ObjectInfos * read_keys; ExpressionActionsPtr filter_expr; - ObjectStorageIteratorPtr object_storage_iterator; bool recursive{false}; std::vector expanded_keys; std::vector::iterator expanded_keys_iter; @@ -225,6 +242,8 @@ class StorageObjectStorageSource::GlobIterator : public IObjectIterator, WithCon const ContextPtr local_context; std::function file_progress_callback; + std::unique_ptr list_cache; + ObjectInfos object_list; }; class StorageObjectStorageSource::KeysIterator : public IObjectIterator diff --git a/tests/queries/0_stateless/01271_show_privileges.reference b/tests/queries/0_stateless/01271_show_privileges.reference index b63f357d776a..53c4d2b8e38c 100644 --- a/tests/queries/0_stateless/01271_show_privileges.reference +++ b/tests/queries/0_stateless/01271_show_privileges.reference @@ -144,6 +144,7 @@ SYSTEM DROP PAGE CACHE ['SYSTEM CLEAR PAGE CACHE','SYSTEM DROP PAGE CACHE','DROP SYSTEM DROP SCHEMA CACHE ['SYSTEM CLEAR SCHEMA CACHE','SYSTEM DROP SCHEMA CACHE','DROP SCHEMA CACHE'] GLOBAL SYSTEM DROP CACHE SYSTEM DROP FORMAT SCHEMA CACHE ['SYSTEM CLEAR FORMAT SCHEMA CACHE','SYSTEM DROP FORMAT SCHEMA CACHE','DROP FORMAT SCHEMA CACHE'] GLOBAL SYSTEM DROP CACHE SYSTEM DROP S3 CLIENT CACHE ['SYSTEM CLEAR S3 CLIENT CACHE','SYSTEM DROP S3 CLIENT','DROP S3 CLIENT CACHE'] GLOBAL SYSTEM DROP CACHE +SYSTEM DROP OBJECT STORAGE LIST OBJECTS CACHE ['SYSTEM DROP OBJECT STORAGE LIST OBJECTS CACHE'] GLOBAL SYSTEM DROP CACHE SYSTEM DROP CACHE ['DROP CACHE'] \N SYSTEM SYSTEM RELOAD CONFIG ['RELOAD CONFIG'] GLOBAL SYSTEM RELOAD SYSTEM RELOAD USERS ['RELOAD USERS'] GLOBAL SYSTEM RELOAD diff --git a/tests/queries/0_stateless/03377_object_storage_list_objects_cache.reference b/tests/queries/0_stateless/03377_object_storage_list_objects_cache.reference new file mode 100644 index 000000000000..76535ad25106 --- /dev/null +++ b/tests/queries/0_stateless/03377_object_storage_list_objects_cache.reference @@ -0,0 +1,103 @@ +-- { echoOn } + +-- The cached key should be `dir_`, and that includes all three files: 1, 2 and 3. Cache should return all three, but ClickHouse should filter out the third. +SELECT _path, * FROM s3(s3_conn, filename='dir_a/dir_b/t_03377_sample_{1..2}.parquet') order by id SETTINGS use_object_storage_list_objects_cache=1; +test/dir_a/dir_b/t_03377_sample_1.parquet 1 +test/dir_a/dir_b/t_03377_sample_2.parquet 2 +-- Make sure the filtering did not interfere with the cached values +SELECT _path, * FROM s3(s3_conn, filename='dir_a/dir_b/t_03377_sample_*.parquet') order by id SETTINGS use_object_storage_list_objects_cache=1; +test/dir_a/dir_b/t_03377_sample_1.parquet 1 +test/dir_a/dir_b/t_03377_sample_2.parquet 2 +test/dir_a/dir_b/t_03377_sample_3.parquet 3 +SYSTEM FLUSH LOGS; +SELECT ProfileEvents['ObjectStorageListObjectsCacheMisses'] > 0 as miss +FROM system.query_log +where log_comment = 'cold_list_cache' +AND type = 'QueryFinish' +ORDER BY event_time desc +LIMIT 1; +1 +SELECT ProfileEvents['ObjectStorageListObjectsCacheHits'] > 0 as hit +FROM system.query_log +where log_comment = 'warm_list_exact_cache' +AND type = 'QueryFinish' +ORDER BY event_time desc +LIMIT 1; +1 +SELECT ProfileEvents['ObjectStorageListObjectsCacheExactMatchHits'] > 0 as hit +FROM system.query_log +where log_comment = 'warm_list_exact_cache' +AND type = 'QueryFinish' +ORDER BY event_time desc +LIMIT 1; +1 +SELECT ProfileEvents['ObjectStorageListObjectsCachePrefixMatchHits'] > 0 as prefix_match_hit +FROM system.query_log +where log_comment = 'warm_list_exact_cache' +AND type = 'QueryFinish' +ORDER BY event_time desc +LIMIT 1; +0 +SELECT ProfileEvents['ObjectStorageListObjectsCacheHits'] > 0 as hit +FROM system.query_log +where log_comment = 'warm_list_prefix_match_cache' +AND type = 'QueryFinish' +ORDER BY event_time desc +LIMIT 1; +1 +SELECT ProfileEvents['ObjectStorageListObjectsCacheExactMatchHits'] > 0 as exact_match_hit +FROM system.query_log +where log_comment = 'warm_list_prefix_match_cache' +AND type = 'QueryFinish' +ORDER BY event_time desc +LIMIT 1; +0 +SELECT ProfileEvents['ObjectStorageListObjectsCachePrefixMatchHits'] > 0 as prefix_match_hit +FROM system.query_log +where log_comment = 'warm_list_prefix_match_cache' +AND type = 'QueryFinish' +ORDER BY event_time desc +LIMIT 1; +1 +SELECT ProfileEvents['ObjectStorageListObjectsCacheHits'] > 0 as hit +FROM system.query_log +where log_comment = 'even_shorter_prefix' +AND type = 'QueryFinish' +ORDER BY event_time desc +LIMIT 1; +0 +SELECT ProfileEvents['ObjectStorageListObjectsCacheMisses'] > 0 as miss +FROM system.query_log +where log_comment = 'even_shorter_prefix' +AND type = 'QueryFinish' +ORDER BY event_time desc +LIMIT 1; +1 +SELECT ProfileEvents['ObjectStorageListObjectsCacheHits'] > 0 as hit +FROM system.query_log +where log_comment = 'still_exact_match_after_shorter_prefix' +AND type = 'QueryFinish' +ORDER BY event_time desc +LIMIT 1; +1 +SELECT ProfileEvents['ObjectStorageListObjectsCacheExactMatchHits'] > 0 as exact_match_hit +FROM system.query_log +where log_comment = 'still_exact_match_after_shorter_prefix' +AND type = 'QueryFinish' +ORDER BY event_time desc +LIMIT 1; +1 +SELECT ProfileEvents['ObjectStorageListObjectsCacheHits'] > 0 as hit +FROM system.query_log +where log_comment = 'after_drop' +AND type = 'QueryFinish' +ORDER BY event_time desc +LIMIT 1; +0 +SELECT ProfileEvents['ObjectStorageListObjectsCacheMisses'] > 0 as miss +FROM system.query_log +where log_comment = 'after_drop' +AND type = 'QueryFinish' +ORDER BY event_time desc +LIMIT 1; +1 diff --git a/tests/queries/0_stateless/03377_object_storage_list_objects_cache.sql b/tests/queries/0_stateless/03377_object_storage_list_objects_cache.sql new file mode 100644 index 000000000000..9638faa88d23 --- /dev/null +++ b/tests/queries/0_stateless/03377_object_storage_list_objects_cache.sql @@ -0,0 +1,115 @@ +-- Tags: no-parallel, no-fasttest + +SYSTEM DROP OBJECT STORAGE LIST OBJECTS CACHE; + +INSERT INTO TABLE FUNCTION s3(s3_conn, filename='dir_a/dir_b/t_03377_sample_{_partition_id}.parquet', format='Parquet', structure='id UInt64') PARTITION BY id SETTINGS s3_truncate_on_insert=1 VALUES (1), (2), (3); + +SELECT * FROM s3(s3_conn, filename='dir_**.parquet') Format Null SETTINGS use_object_storage_list_objects_cache=1, log_comment='cold_list_cache'; +SELECT * FROM s3(s3_conn, filename='dir_**.parquet') Format Null SETTINGS use_object_storage_list_objects_cache=1, log_comment='warm_list_exact_cache'; +SELECT * FROM s3(s3_conn, filename='dir_a/dir_b**.parquet') Format Null SETTINGS use_object_storage_list_objects_cache=1, log_comment='warm_list_prefix_match_cache'; +SELECT * FROM s3(s3_conn, filename='dirr_**.parquet') Format Null SETTINGS use_object_storage_list_objects_cache=1, log_comment='warm_list_cache_miss'; -- { serverError CANNOT_EXTRACT_TABLE_STRUCTURE } +SELECT * FROM s3(s3_conn, filename='d**.parquet') Format Null SETTINGS use_object_storage_list_objects_cache=1, log_comment='even_shorter_prefix'; +SELECT * FROM s3(s3_conn, filename='dir_**.parquet') Format Null SETTINGS use_object_storage_list_objects_cache=1, log_comment='still_exact_match_after_shorter_prefix'; +SYSTEM DROP OBJECT STORAGE LIST OBJECTS CACHE; +SELECT * FROM s3(s3_conn, filename='dir_**.parquet') Format Null SETTINGS use_object_storage_list_objects_cache=1, log_comment='after_drop'; + +-- { echoOn } + +-- The cached key should be `dir_`, and that includes all three files: 1, 2 and 3. Cache should return all three, but ClickHouse should filter out the third. +SELECT _path, * FROM s3(s3_conn, filename='dir_a/dir_b/t_03377_sample_{1..2}.parquet') order by id SETTINGS use_object_storage_list_objects_cache=1; + +-- Make sure the filtering did not interfere with the cached values +SELECT _path, * FROM s3(s3_conn, filename='dir_a/dir_b/t_03377_sample_*.parquet') order by id SETTINGS use_object_storage_list_objects_cache=1; + +SYSTEM FLUSH LOGS; + +SELECT ProfileEvents['ObjectStorageListObjectsCacheMisses'] > 0 as miss +FROM system.query_log +where log_comment = 'cold_list_cache' +AND type = 'QueryFinish' +ORDER BY event_time desc +LIMIT 1; + +SELECT ProfileEvents['ObjectStorageListObjectsCacheHits'] > 0 as hit +FROM system.query_log +where log_comment = 'warm_list_exact_cache' +AND type = 'QueryFinish' +ORDER BY event_time desc +LIMIT 1; + +SELECT ProfileEvents['ObjectStorageListObjectsCacheExactMatchHits'] > 0 as hit +FROM system.query_log +where log_comment = 'warm_list_exact_cache' +AND type = 'QueryFinish' +ORDER BY event_time desc +LIMIT 1; + +SELECT ProfileEvents['ObjectStorageListObjectsCachePrefixMatchHits'] > 0 as prefix_match_hit +FROM system.query_log +where log_comment = 'warm_list_exact_cache' +AND type = 'QueryFinish' +ORDER BY event_time desc +LIMIT 1; + +SELECT ProfileEvents['ObjectStorageListObjectsCacheHits'] > 0 as hit +FROM system.query_log +where log_comment = 'warm_list_prefix_match_cache' +AND type = 'QueryFinish' +ORDER BY event_time desc +LIMIT 1; + +SELECT ProfileEvents['ObjectStorageListObjectsCacheExactMatchHits'] > 0 as exact_match_hit +FROM system.query_log +where log_comment = 'warm_list_prefix_match_cache' +AND type = 'QueryFinish' +ORDER BY event_time desc +LIMIT 1; + +SELECT ProfileEvents['ObjectStorageListObjectsCachePrefixMatchHits'] > 0 as prefix_match_hit +FROM system.query_log +where log_comment = 'warm_list_prefix_match_cache' +AND type = 'QueryFinish' +ORDER BY event_time desc +LIMIT 1; + +SELECT ProfileEvents['ObjectStorageListObjectsCacheHits'] > 0 as hit +FROM system.query_log +where log_comment = 'even_shorter_prefix' +AND type = 'QueryFinish' +ORDER BY event_time desc +LIMIT 1; + +SELECT ProfileEvents['ObjectStorageListObjectsCacheMisses'] > 0 as miss +FROM system.query_log +where log_comment = 'even_shorter_prefix' +AND type = 'QueryFinish' +ORDER BY event_time desc +LIMIT 1; + +SELECT ProfileEvents['ObjectStorageListObjectsCacheHits'] > 0 as hit +FROM system.query_log +where log_comment = 'still_exact_match_after_shorter_prefix' +AND type = 'QueryFinish' +ORDER BY event_time desc +LIMIT 1; + +SELECT ProfileEvents['ObjectStorageListObjectsCacheExactMatchHits'] > 0 as exact_match_hit +FROM system.query_log +where log_comment = 'still_exact_match_after_shorter_prefix' +AND type = 'QueryFinish' +ORDER BY event_time desc +LIMIT 1; + +SELECT ProfileEvents['ObjectStorageListObjectsCacheHits'] > 0 as hit +FROM system.query_log +where log_comment = 'after_drop' +AND type = 'QueryFinish' +ORDER BY event_time desc +LIMIT 1; + +SELECT ProfileEvents['ObjectStorageListObjectsCacheMisses'] > 0 as miss +FROM system.query_log +where log_comment = 'after_drop' +AND type = 'QueryFinish' +ORDER BY event_time desc +LIMIT 1; From fbe10c0abb8bf7d0211c6414ee3a6fc13d12c933 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Sat, 14 Feb 2026 15:43:19 -0300 Subject: [PATCH 2/6] settings history --- src/Core/SettingsChangesHistory.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index dff7a55f8380..8a122405c1aa 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -78,6 +78,7 @@ const VersionToSettingsChangesMap & getSettingsChangesHistory() {"throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert", true, false, "It becomes obsolete."}, {"database_datalake_require_metadata_access", true, true, "New setting."}, {"automatic_parallel_replicas_min_bytes_per_replica", 0, 1_MiB, "Better default value derived from testing results"}, + {"use_object_storage_list_objects_cache", false, false, "New setting."}, }); addSettingsChanges(settings_changes_history, "25.12", { @@ -289,7 +290,6 @@ const VersionToSettingsChangesMap & getSettingsChangesHistory() {"allow_experimental_lightweight_update", false, true, "Lightweight updates were moved to Beta."}, {"s3_slow_all_threads_after_retryable_error", false, false, "Added an alias for setting `backup_slow_all_threads_after_retryable_s3_error`"}, {"serialize_string_in_memory_with_zero_byte", true, true, "New setting"}, - {"iceberg_metadata_log_level", "none", "none", "New setting."}, {"use_object_storage_list_objects_cache", false, false, "New setting."}, }); addSettingsChanges(settings_changes_history, "25.7", From 127db0091ddb269cb288840a9d4ee74915b26957 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Sun, 15 Feb 2026 21:41:52 -0300 Subject: [PATCH 3/6] fix header --- src/Storages/Cache/ObjectStorageListObjectsCache.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/Cache/ObjectStorageListObjectsCache.h b/src/Storages/Cache/ObjectStorageListObjectsCache.h index 6cb6c3694d93..90b9c9ee5b75 100644 --- a/src/Storages/Cache/ObjectStorageListObjectsCache.h +++ b/src/Storages/Cache/ObjectStorageListObjectsCache.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include #include #include From 79d5de4439b10e5050fae4105e3f925e0d20bdd4 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Sun, 15 Feb 2026 22:19:02 -0300 Subject: [PATCH 4/6] build fixes with some nasty workarounds --- .../Cache/ObjectStorageListObjectsCache.cpp | 12 +++++++++--- .../Cache/ObjectStorageListObjectsCache.h | 4 ++-- .../ObjectStorage/StorageObjectStorageSource.cpp | 16 ++++++++++++---- 3 files changed, 23 insertions(+), 9 deletions(-) diff --git a/src/Storages/Cache/ObjectStorageListObjectsCache.cpp b/src/Storages/Cache/ObjectStorageListObjectsCache.cpp index a7aec57d9161..5cd183e67dc4 100644 --- a/src/Storages/Cache/ObjectStorageListObjectsCache.cpp +++ b/src/Storages/Cache/ObjectStorageListObjectsCache.cpp @@ -107,8 +107,9 @@ size_t ObjectStorageListObjectsCache::WeightFunction::operator()(const Value & v for (const auto & object : value) { - const auto object_metadata = object->metadata; - weight += object->relative_path.capacity() + sizeof(object_metadata); + const auto object_metadata = object->getObjectMetadata(); + + weight += object->getPath().capacity() + sizeof(object_metadata); // variable size if (object_metadata) @@ -120,6 +121,11 @@ size_t ObjectStorageListObjectsCache::WeightFunction::operator()(const Value & v { weight += k.capacity() + v.capacity(); } + + for (const auto & [k, v] : object_metadata->tags) + { + weight += k.capacity() + v.capacity(); + } } } @@ -177,7 +183,7 @@ std::optional ObjectStorageListObjectsCach for (const auto & object : *pair->mapped) { - if (object->relative_path.starts_with(key.prefix)) + if (object->getPath().starts_with(key.prefix)) { filtered_objects.push_back(object); } diff --git a/src/Storages/Cache/ObjectStorageListObjectsCache.h b/src/Storages/Cache/ObjectStorageListObjectsCache.h index 90b9c9ee5b75..0d7649f7f85b 100644 --- a/src/Storages/Cache/ObjectStorageListObjectsCache.h +++ b/src/Storages/Cache/ObjectStorageListObjectsCache.h @@ -2,7 +2,7 @@ #include #include -#include +#include #include namespace DB @@ -38,7 +38,7 @@ class ObjectStorageListObjectsCache bool operator==(const Key & other) const; }; - using Value = StorageObjectStorage::ObjectInfos; + using Value = ObjectInfos; struct KeyHasher { size_t operator()(const Key & key) const; diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp index 744ded3fab5b..5c9a3a04a63f 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp @@ -213,7 +213,17 @@ std::shared_ptr StorageObjectStorageSource::createFileIterator( if (auto objects_info = cache.get(cache_key, /*filter_by_prefix=*/ false)) { - object_iterator = std::make_shared(std::move(*objects_info)); + /// suboptimal because of the recent upstream changes to the ObjectInfo structure + /// re-think this with more time and see if there is a more optimized approach + RelativePathsWithMetadata relative_path_with_metadata; + relative_path_with_metadata.reserve(objects_info->size()); + + for (const auto & object_info : *objects_info) + { + relative_path_with_metadata.emplace_back(std::make_shared(object_info->getPath())); + } + + object_iterator = std::make_shared(std::move(relative_path_with_metadata)); } else { @@ -227,16 +237,14 @@ std::shared_ptr StorageObjectStorageSource::createFileIterator( } iterator = std::make_unique( - object_storage, + object_iterator, configuration, predicate, virtual_columns, hive_columns, local_context, is_archive ? nullptr : read_keys, - query_settings.list_object_keys_size, query_settings.throw_on_zero_files_match, - with_tags, file_progress_callback, std::move(cache_ptr)); } From 16031ce18c3d7840b334007326d90d4526db56b7 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Sun, 15 Feb 2026 22:33:51 -0300 Subject: [PATCH 5/6] fix list objects cache --- src/Storages/ObjectStorage/StorageObjectStorageSource.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp index 5c9a3a04a63f..81509e8ae30c 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp @@ -220,7 +220,7 @@ std::shared_ptr StorageObjectStorageSource::createFileIterator( for (const auto & object_info : *objects_info) { - relative_path_with_metadata.emplace_back(std::make_shared(object_info->getPath())); + relative_path_with_metadata.emplace_back(std::make_shared(object_info->getPath(), object_info->getObjectMetadata())); } object_iterator = std::make_shared(std::move(relative_path_with_metadata)); From 988de0d8ffedcfd45a1e27856241e95adfa6d49b Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Sun, 15 Feb 2026 22:44:26 -0300 Subject: [PATCH 6/6] thx ai --- .../Cache/ObjectStorageListObjectsCache.cpp | 10 +++- ...test_object_storage_list_objects_cache.cpp | 51 +++++++++++++++++++ 2 files changed, 60 insertions(+), 1 deletion(-) diff --git a/src/Storages/Cache/ObjectStorageListObjectsCache.cpp b/src/Storages/Cache/ObjectStorageListObjectsCache.cpp index 5cd183e67dc4..25d4f6ad3406 100644 --- a/src/Storages/Cache/ObjectStorageListObjectsCache.cpp +++ b/src/Storages/Cache/ObjectStorageListObjectsCache.cpp @@ -142,7 +142,15 @@ void ObjectStorageListObjectsCache::set( const std::shared_ptr & value) { auto key_with_ttl = key; - key_with_ttl.expires_at = std::chrono::steady_clock::now() + std::chrono::seconds(ttl_in_seconds); + + if (ttl_in_seconds == 0) + { + key_with_ttl.expires_at = std::chrono::steady_clock::time_point::max(); + } + else + { + key_with_ttl.expires_at = std::chrono::steady_clock::now() + std::chrono::seconds(ttl_in_seconds); + } cache.set(key_with_ttl, value); } diff --git a/src/Storages/Cache/tests/gtest_object_storage_list_objects_cache.cpp b/src/Storages/Cache/tests/gtest_object_storage_list_objects_cache.cpp index 3b719d4df3e3..06205d24174f 100644 --- a/src/Storages/Cache/tests/gtest_object_storage_list_objects_cache.cpp +++ b/src/Storages/Cache/tests/gtest_object_storage_list_objects_cache.cpp @@ -133,6 +133,57 @@ TEST_F(ObjectStorageListObjectsCacheTest, TTLExpiration) EXPECT_FALSE(cache->get(default_key)); } +TEST_F(ObjectStorageListObjectsCacheTest, TTLUnlimited) +{ + cache->clear(); + cache->setTTL(0); // 0 means unlimited + auto value = createTestValue({"test-prefix/file1.txt"}); + + cache->set(default_key, value); + + // Verify we can get it immediately + auto result1 = cache->get(default_key).value(); + EXPECT_EQ(result1.size(), 1); + + // Sleep for a reasonable amount (longer than the default 3 second TTL from SetUp) + std::this_thread::sleep_for(std::chrono::seconds(5)); + + // Should still be available since TTL is unlimited + auto result2 = cache->get(default_key).value(); + EXPECT_EQ(result2.size(), 1); + EXPECT_EQ(result2[0]->getPath(), "test-prefix/file1.txt"); +} + +TEST_F(ObjectStorageListObjectsCacheTest, TTLSwitchFromUnlimitedToFinite) +{ + cache->clear(); + cache->setTTL(0); // Start with unlimited + auto value1 = createTestValue({"test-prefix/file1.txt"}); + auto key1 = default_key; + key1.prefix = "unlimited/"; + + cache->set(key1, value1); + + // Switch to finite TTL and add another entry + cache->setTTL(1); + auto value2 = createTestValue({"test-prefix/file2.txt"}); + auto key2 = default_key; + key2.prefix = "finite/"; + + cache->set(key2, value2); + + // Verify both are available immediately + EXPECT_TRUE(cache->get(key1).has_value()); + EXPECT_TRUE(cache->get(key2).has_value()); + + // Wait for finite TTL entry to expire + std::this_thread::sleep_for(std::chrono::seconds(2)); + + // Unlimited entry should still be there, finite should be gone + EXPECT_TRUE(cache->get(key1).has_value()); + EXPECT_FALSE(cache->get(key2).has_value()); +} + TEST_F(ObjectStorageListObjectsCacheTest, BestPrefixMatch) { cache->clear();