From 7a09edb1b7a4498b663b1e874a511439dacbd173 Mon Sep 17 00:00:00 2001 From: rockeet Date: Sun, 15 Mar 2026 23:49:24 +0800 Subject: [PATCH 001/102] string_util.cc: NumberToHumanString/BytesToHumanString: Fixed width This makes ToplingDB Web auto refresh pretty --- util/string_util.cc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/util/string_util.cc b/util/string_util.cc index b47140cbdc..02213c8ef0 100644 --- a/util/string_util.cc +++ b/util/string_util.cc @@ -118,13 +118,13 @@ std::string NumberToHumanString(int64_t num) { char buf[19]; int64_t absnum = num < 0 ? -num : num; if (absnum < 10000) { - snprintf(buf, sizeof(buf), "%" PRIi64, num); + snprintf(buf, sizeof(buf), "%4" PRIi64 " ", num); } else if (absnum < 10000000) { - snprintf(buf, sizeof(buf), "%" PRIi64 "K", num / 1000); + snprintf(buf, sizeof(buf), "%4" PRIi64 "K", num / 1000); } else if (absnum < 10000000000LL) { - snprintf(buf, sizeof(buf), "%" PRIi64 "M", num / 1000000); + snprintf(buf, sizeof(buf), "%4" PRIi64 "M", num / 1000000); } else { - snprintf(buf, sizeof(buf), "%" PRIi64 "G", num / 1000000000); + snprintf(buf, sizeof(buf), "%4" PRIi64 "G", num / 1000000000); } return std::string(buf); } @@ -144,7 +144,7 @@ std::string BytesToHumanString(uint64_t bytes) { } char buf[20]; - snprintf(buf, sizeof(buf), "%.2f %s", final_size, size_name[size_idx]); + snprintf(buf, sizeof(buf), "%7.2f %s", final_size, size_name[size_idx]); return std::string(buf); } From 6735b70df95dfaf6e4298c184541c8949a6196aa Mon Sep 17 00:00:00 2001 From: rockeet Date: Mon, 16 Mar 2026 00:04:25 +0800 Subject: [PATCH 002/102] internal_stats.cc: DumpDBStats() pretty align --- db/internal_stats.cc | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/db/internal_stats.cc b/db/internal_stats.cc index a953e5a8bb..940361e75f 100644 --- a/db/internal_stats.cc +++ b/db/internal_stats.cc @@ -1624,7 +1624,7 @@ void InternalStats::DumpDBStats(std::string* value) { // The format is the same for interval stats. snprintf(buf, sizeof(buf), "Cumulative writes: %s writes, %s keys, %s commit groups, " - "%.1f writes per commit group, ingest: %.2f GB, %.2f MB/s\n", + "%.1f writes per commit group, ingest: %7.2f GB, %7.2f MB/s\n", NumberToHumanString(write_other + write_self).c_str(), NumberToHumanString(num_keys_written).c_str(), NumberToHumanString(write_self).c_str(), @@ -1635,20 +1635,13 @@ void InternalStats::DumpDBStats(std::string* value) { value->append(buf); // WAL snprintf(buf, sizeof(buf), - "Cumulative WAL: %s writes, %s syncs, " - "%.2f writes per sync, written: %.2f GB, %.2f MB/s\n", + "Cumulative WAL : %s writes, %s sync, " + "%7.2f writes per sync, written: %7.2f GB, %7.2f MB/s\n", NumberToHumanString(write_with_wal).c_str(), NumberToHumanString(wal_synced).c_str(), write_with_wal / std::max(1.0, static_cast(wal_synced)), wal_bytes / kGB, wal_bytes / kMB / std::max(seconds_up, 0.001)); value->append(buf); - // Stall - AppendHumanMicros(write_stall_micros, human_micros, kHumanMicrosLen, true); - snprintf(buf, sizeof(buf), "Cumulative stall: %s, %.1f percent\n", - human_micros, - // 10000 = divide by 1M to get secs, then multiply by 100 for pct - write_stall_micros / 10000.0 / std::max(seconds_up, 0.001)); - value->append(buf); // Interval uint64_t interval_write_other = write_other - db_stats_snapshot_.write_other; @@ -1657,8 +1650,8 @@ void InternalStats::DumpDBStats(std::string* value) { num_keys_written - db_stats_snapshot_.num_keys_written; snprintf( buf, sizeof(buf), - "Interval writes: %s writes, %s keys, %s commit groups, " - "%.1f writes per commit group, ingest: %.2f MB, %.2f MB/s\n", + "Interval writes: %s writes, %s keys, %s commit groups, " + "%.1f writes per commit group, ingest: %7.2f MB, %7.2f MB/s\n", NumberToHumanString(interval_write_other + interval_write_self).c_str(), NumberToHumanString(interval_num_keys_written).c_str(), NumberToHumanString(interval_write_self).c_str(), @@ -1675,8 +1668,8 @@ void InternalStats::DumpDBStats(std::string* value) { uint64_t interval_wal_bytes = wal_bytes - db_stats_snapshot_.wal_bytes; snprintf(buf, sizeof(buf), - "Interval WAL: %s writes, %s syncs, " - "%.2f writes per sync, written: %.2f GB, %.2f MB/s\n", + "Interval WAL : %s writes, %s sync, " + "%7.2f writes per sync, written: %7.2f GB, %7.2f MB/s\n", NumberToHumanString(interval_write_with_wal).c_str(), NumberToHumanString(interval_wal_synced).c_str(), interval_write_with_wal / @@ -1685,10 +1678,17 @@ void InternalStats::DumpDBStats(std::string* value) { interval_wal_bytes / kMB / std::max(interval_seconds_up, 0.001)); value->append(buf); + // Stall + AppendHumanMicros(write_stall_micros, human_micros, kHumanMicrosLen, true); + snprintf(buf, sizeof(buf), "Cumulative stall : %s, %.1f percent\n", + human_micros, + // 10000 = divide by 1M to get secs, then multiply by 100 for pct + write_stall_micros / 10000.0 / std::max(seconds_up, 0.001)); + value->append(buf); // Stall AppendHumanMicros(write_stall_micros - db_stats_snapshot_.write_stall_micros, human_micros, kHumanMicrosLen, true); - snprintf(buf, sizeof(buf), "Interval stall: %s, %.1f percent\n", human_micros, + snprintf(buf, sizeof(buf), "Interval stall : %s, %.1f percent\n", human_micros, // 10000 = divide by 1M to get secs, then multiply by 100 for pct (write_stall_micros - db_stats_snapshot_.write_stall_micros) / 10000.0 / std::max(interval_seconds_up, 0.001)); From 943717b8edcb7b414f5a7ae268cbd421d58481e8 Mon Sep 17 00:00:00 2001 From: rockeet Date: Mon, 16 Mar 2026 00:54:54 +0800 Subject: [PATCH 003/102] update submodule rockside --- sideplugin/rockside | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sideplugin/rockside b/sideplugin/rockside index 07d3333f80..0484557cac 160000 --- a/sideplugin/rockside +++ b/sideplugin/rockside @@ -1 +1 @@ -Subproject commit 07d3333f80e154f14299a4877a13ed1801ff93b5 +Subproject commit 0484557cac646cd568bfbcd976321cf62ef7c122 From 345d747772aca1b2f13a3b4d0ccd959e2cb1da45 Mon Sep 17 00:00:00 2001 From: rockeet Date: Wed, 18 Mar 2026 22:27:24 +0800 Subject: [PATCH 004/102] Makefile: install topling site and db_bench_enterprise.yaml --- Makefile | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Makefile b/Makefile index 1decc8b01b..4cf612fcde 100644 --- a/Makefile +++ b/Makefile @@ -2625,6 +2625,11 @@ install-headers: gen-pc install -C -m 644 ${TOPLING_CORE_DIR}/src/terark/fsa/ppi/*.hpp $(DESTDIR)/$(PREFIX)/include/terark/fsa/ppi install -C -m 644 ${TOPLING_CORE_DIR}/src/terark/zbs/*.hpp $(DESTDIR)/$(PREFIX)/include/terark/zbs cp -ar ${TOPLING_CORE_DIR}/boost-include/boost $(DESTDIR)/$(PREFIX)/include + install -d $(DESTDIR)/$(PREFIX)/site + install -d $(DESTDIR)/$(PREFIX)/toplingdb-conf + install -C -m 644 sideplugin/rockside/src/topling/web/index.html $(DESTDIR)/$(PREFIX)/site + install -C -m 644 sideplugin/rockside/src/topling/web/style.css $(DESTDIR)/$(PREFIX)/site + install -C -m 644 sideplugin/rockside/sample-conf/db_bench_enterprise.yaml $(DESTDIR)/$(PREFIX)/site install -C -m 644 rocksdb.pc $(INSTALL_LIBDIR)/pkgconfig/rocksdb.pc install-static: $(LIBRARY) static_lib From 5fcbac691d66f446839f644de1928b652f8c3bb8 Mon Sep 17 00:00:00 2001 From: rockeet Date: Wed, 18 Mar 2026 22:48:05 +0800 Subject: [PATCH 005/102] README: Add toplingdb sdk trial version with ToplingZipTable --- README-zh_cn.md | 9 +++++++-- README.md | 10 ++++++++-- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/README-zh_cn.md b/README-zh_cn.md index 9be6c5e326..59e1675719 100644 --- a/README-zh_cn.md +++ b/README-zh_cn.md @@ -4,7 +4,7 @@ ToplingDB 由[北京拓扑岭科技有限公司](https://topling.cn)开发与维 ## 快速开始 ToplingDB 需要 C++17,推荐 gcc 8.3 以上,或者 clang 也行。 -ToplingDB 比 RocksDB 快得多,您可以自己快速验证: +ToplingDB 比 RocksDB 快得多,您可以自己快速验证,[下载 ToplingDB 企业版](https://topling-tools.oss-cn-qingdao.aliyuncs.com/toplingdb-8.10-trail90.tgz),或者自己编译: ### Compile & run db_bench ```bash sudo yum -y install git libaio-devel gcc-c++ gflags-devel zlib-devel bzip2-devel libcurl-devel liburing-devel snappy-devel jemalloc-devel @@ -15,7 +15,12 @@ make -j`nproc` db_bench DEBUG_LEVEL=0 sudo make install PREFIX=/some/path # default is /usr/local ``` -以上编译命令执行后,运行 [db_bench.sh](db_bench.sh)(需要[端口 2011](https://github.com/topling/rockside/blob/master/sample-conf/db_bench_enterprise.yaml#L4 "内嵌的 http web 服务使用端口 2011")),然后使用 ToplingDB:[原生 C++](https://github.com/topling/rockside/wiki/101 "典型场景是从 rocksdb 迁移过来)"),也支持 [Java](https://github.com/topling/rockside/wiki/SidePlugin-Java-Binding "内置在本 github 仓库中") 和 [Rust](https://github.com/topling/rust-toplingdb "另外的专门的 github 仓库")。 +下载解压或者自行编译后,运行 [db_bench.sh](db_bench.sh)(需要[端口 2011](https://github.com/topling/rockside/blob/master/sample-conf/db_bench_enterprise.yaml#L4 "内嵌的 http web 服务使用端口 2011")),然后使用 ToplingDB:[原生 C++](https://github.com/topling/rockside/wiki/101 "典型场景是从 rocksdb 迁移过来)"),也支持 [Java](https://github.com/topling/rockside/wiki/SidePlugin-Java-Binding "内置在本 github 仓库中") 和 [Rust](https://github.com/topling/rust-toplingdb "另外的专门的 github 仓库")。 + +> 自己编译的开源版没有 [Topling**Zip**Table](https://github.com/topling/rockside/wiki/ToplingZipTable)(采用Topling可检索压缩算法的SST), +[下载 ToplingDB 企业版](https://topling-tools.oss-cn-qingdao.aliyuncs.com/toplingdb-8.10-trail90.tgz) 包含 +[Topling**Zip**Table](https://github.com/topling/rockside/wiki/ToplingZipTable) 90 天试用版,开源版和企业版是二进制兼容的,可以互相替换, +唯一的不同是企业版包含[Topling**Zip**Table](https://github.com/topling/rockside/wiki/ToplingZipTable)。 ## 简单介绍 ToplingDB 的子模块 **[rockside](https://github.com/topling/rockside)** 是 ToplingDB 的入口,详情参考 **[SidePlugin wiki](https://github.com/topling/rockside/wiki)**。 diff --git a/README.md b/README.md index f1220135e1..0ee08b0b8e 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ ToplingDB is developed and maintained by [Topling Inc](https://topling.cn). See ## Quick Start ToplingDB requires C++17, gcc 8.3 or newer is recommended, clang also works. -ToplingDB is forked form [RocksDB](https://github.com/facebook/rocksdb), much faster than RocksDB, try it by yourself: +ToplingDB is forked form [RocksDB](https://github.com/facebook/rocksdb), much faster than RocksDB, you can [download ToplingDB Enterprise](https://topling-tools.oss-cn-qingdao.aliyuncs.com/toplingdb-8.10-trail90.tgz) or compile it by yourself: ### Compile & run db_bench ```bash sudo yum -y install git libaio-devel gcc-c++ gflags-devel zlib-devel bzip2-devel libcurl-devel liburing-devel snappy-devel jemalloc-devel @@ -16,7 +16,13 @@ make -j`nproc` db_bench DEBUG_LEVEL=0 sudo make install PREFIX=/some/path # default is /usr/local ``` -After compile, you can run bundled [db_bench.sh](db_bench.sh)(need [port 2011](https://github.com/topling/rockside/blob/master/sample-conf/db_bench_enterprise.yaml#L4 "use port 2011 for embeded http server")), then use ToplingDB [in C++](https://github.com/topling/sideplugin-wiki-en/wiki/101 "maybe migrate from rocksdb"), or in [Java](https://github.com/topling/sideplugin-wiki-en/wiki/SidePlugin-Java-Binding "Bundled in this repo"), [Rust](https://github.com/topling/rust-toplingdb "A seperated repo"). +After download+uncompress or compile, you can run bundled [db_bench.sh](db_bench.sh)(need [port 2011](https://github.com/topling/rockside/blob/master/sample-conf/db_bench_enterprise.yaml#L4 "use port 2011 for embeded http server")), then use ToplingDB [in C++](https://github.com/topling/sideplugin-wiki-en/wiki/101 "maybe migrate from rocksdb"), or in [Java](https://github.com/topling/sideplugin-wiki-en/wiki/SidePlugin-Java-Binding "Bundled in this repo"), [Rust](https://github.com/topling/rust-toplingdb "A seperated repo"). + +> The opensource version of ToplingDB lacks [Topling**Zip**Table](https://github.com/topling/rockside/wiki/ToplingZipTable)(SST with Topling searchable compression algo), +[download ToplingDB enterprise](https://topling-tools.oss-cn-qingdao.aliyuncs.com/toplingdb-8.10-trail90.tgz) to trial +[Topling**Zip**Table](https://github.com/topling/rockside/wiki/ToplingZipTable) 90 days, +the shared lib of opensource version and enterprise version are binary compitible which can be exchanged, +the unique difference is enterprise version has [Topling**Zip**Table](https://github.com/topling/rockside/wiki/ToplingZipTable). ## Introduction ToplingDB's submodule **[rockside](https://github.com/topling/rockside)** is the entry point of ToplingDB, see **[SidePlugin wiki](https://github.com/topling/sideplugin-wiki-en/wiki)**. From 2474da9205da0cd9447dd5732add0dfa78063674 Mon Sep 17 00:00:00 2001 From: rockeet Date: Wed, 18 Mar 2026 23:42:31 +0800 Subject: [PATCH 006/102] Makefile: bugfix: del '/' in $(DESTDIR)/$(PREFIX) --- Makefile | 66 ++++++++++++++++++++++++++++---------------------------- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/Makefile b/Makefile index 4cf612fcde..a31a6c4841 100644 --- a/Makefile +++ b/Makefile @@ -2592,44 +2592,44 @@ install-headers: gen-pc install -d $(INSTALL_LIBDIR) install -d $(INSTALL_LIBDIR)/pkgconfig for header_dir in `$(FIND) "include/rocksdb" -type d`; do \ - install -d $(DESTDIR)/$(PREFIX)/$$header_dir; \ + install -d $(DESTDIR)$(PREFIX)/$$header_dir; \ done for header in `$(FIND) "include/rocksdb" -type f -name *.h`; do \ - install -C -m 644 $$header $(DESTDIR)/$(PREFIX)/$$header; \ + install -C -m 644 $$header $(DESTDIR)$(PREFIX)/$$header; \ done for header in $(ROCKSDB_PLUGIN_HEADERS); do \ - install -d $(DESTDIR)/$(PREFIX)/include/rocksdb/`dirname $$header`; \ - install -C -m 644 $$header $(DESTDIR)/$(PREFIX)/include/rocksdb/$$header; \ + install -d $(DESTDIR)$(PREFIX)/include/rocksdb/`dirname $$header`; \ + install -C -m 644 $$header $(DESTDIR)$(PREFIX)/include/rocksdb/$$header; \ done - install -d $(DESTDIR)/$(PREFIX)/include/topling - install -C -m 644 sideplugin/rockside/src/topling/json.h $(DESTDIR)/$(PREFIX)/include/topling - install -C -m 644 sideplugin/rockside/src/topling/json_fwd.h $(DESTDIR)/$(PREFIX)/include/topling - install -C -m 644 sideplugin/rockside/src/topling/builtin_table_factory.h $(DESTDIR)/$(PREFIX)/include/topling - install -C -m 644 sideplugin/rockside/src/topling/side_plugin_repo.h $(DESTDIR)/$(PREFIX)/include/topling - install -C -m 644 sideplugin/rockside/src/topling/side_plugin_factory.h $(DESTDIR)/$(PREFIX)/include/topling - install -d $(DESTDIR)/$(PREFIX)/include/terark - install -d $(DESTDIR)/$(PREFIX)/include/terark/io - install -d $(DESTDIR)/$(PREFIX)/include/terark/succinct - install -d $(DESTDIR)/$(PREFIX)/include/terark/thread - install -d $(DESTDIR)/$(PREFIX)/include/terark/util - install -d $(DESTDIR)/$(PREFIX)/include/terark/fsa - install -d $(DESTDIR)/$(PREFIX)/include/terark/fsa/ppi - install -d $(DESTDIR)/$(PREFIX)/include/terark/zbs - install -C -m 644 ${TOPLING_CORE_DIR}/src/terark/*.hpp $(DESTDIR)/$(PREFIX)/include/terark - install -C -m 644 ${TOPLING_CORE_DIR}/src/terark/io/*.hpp $(DESTDIR)/$(PREFIX)/include/terark/io - install -C -m 644 ${TOPLING_CORE_DIR}/src/terark/succinct/*.hpp $(DESTDIR)/$(PREFIX)/include/terark/succinct - install -C -m 644 ${TOPLING_CORE_DIR}/src/terark/thread/*.hpp $(DESTDIR)/$(PREFIX)/include/terark/thread - install -C -m 644 ${TOPLING_CORE_DIR}/src/terark/util/*.hpp $(DESTDIR)/$(PREFIX)/include/terark/util - install -C -m 644 ${TOPLING_CORE_DIR}/src/terark/fsa/*.hpp $(DESTDIR)/$(PREFIX)/include/terark/fsa - install -C -m 644 ${TOPLING_CORE_DIR}/src/terark/fsa/*.inl $(DESTDIR)/$(PREFIX)/include/terark/fsa - install -C -m 644 ${TOPLING_CORE_DIR}/src/terark/fsa/ppi/*.hpp $(DESTDIR)/$(PREFIX)/include/terark/fsa/ppi - install -C -m 644 ${TOPLING_CORE_DIR}/src/terark/zbs/*.hpp $(DESTDIR)/$(PREFIX)/include/terark/zbs - cp -ar ${TOPLING_CORE_DIR}/boost-include/boost $(DESTDIR)/$(PREFIX)/include - install -d $(DESTDIR)/$(PREFIX)/site - install -d $(DESTDIR)/$(PREFIX)/toplingdb-conf - install -C -m 644 sideplugin/rockside/src/topling/web/index.html $(DESTDIR)/$(PREFIX)/site - install -C -m 644 sideplugin/rockside/src/topling/web/style.css $(DESTDIR)/$(PREFIX)/site - install -C -m 644 sideplugin/rockside/sample-conf/db_bench_enterprise.yaml $(DESTDIR)/$(PREFIX)/site + install -d $(DESTDIR)$(PREFIX)/include/topling + install -C -m 644 sideplugin/rockside/src/topling/json.h $(DESTDIR)$(PREFIX)/include/topling + install -C -m 644 sideplugin/rockside/src/topling/json_fwd.h $(DESTDIR)$(PREFIX)/include/topling + install -C -m 644 sideplugin/rockside/src/topling/builtin_table_factory.h $(DESTDIR)$(PREFIX)/include/topling + install -C -m 644 sideplugin/rockside/src/topling/side_plugin_repo.h $(DESTDIR)$(PREFIX)/include/topling + install -C -m 644 sideplugin/rockside/src/topling/side_plugin_factory.h $(DESTDIR)$(PREFIX)/include/topling + install -d $(DESTDIR)$(PREFIX)/include/terark + install -d $(DESTDIR)$(PREFIX)/include/terark/io + install -d $(DESTDIR)$(PREFIX)/include/terark/succinct + install -d $(DESTDIR)$(PREFIX)/include/terark/thread + install -d $(DESTDIR)$(PREFIX)/include/terark/util + install -d $(DESTDIR)$(PREFIX)/include/terark/fsa + install -d $(DESTDIR)$(PREFIX)/include/terark/fsa/ppi + install -d $(DESTDIR)$(PREFIX)/include/terark/zbs + install -C -m 644 ${TOPLING_CORE_DIR}/src/terark/*.hpp $(DESTDIR)$(PREFIX)/include/terark + install -C -m 644 ${TOPLING_CORE_DIR}/src/terark/io/*.hpp $(DESTDIR)$(PREFIX)/include/terark/io + install -C -m 644 ${TOPLING_CORE_DIR}/src/terark/succinct/*.hpp $(DESTDIR)$(PREFIX)/include/terark/succinct + install -C -m 644 ${TOPLING_CORE_DIR}/src/terark/thread/*.hpp $(DESTDIR)$(PREFIX)/include/terark/thread + install -C -m 644 ${TOPLING_CORE_DIR}/src/terark/util/*.hpp $(DESTDIR)$(PREFIX)/include/terark/util + install -C -m 644 ${TOPLING_CORE_DIR}/src/terark/fsa/*.hpp $(DESTDIR)$(PREFIX)/include/terark/fsa + install -C -m 644 ${TOPLING_CORE_DIR}/src/terark/fsa/*.inl $(DESTDIR)$(PREFIX)/include/terark/fsa + install -C -m 644 ${TOPLING_CORE_DIR}/src/terark/fsa/ppi/*.hpp $(DESTDIR)$(PREFIX)/include/terark/fsa/ppi + install -C -m 644 ${TOPLING_CORE_DIR}/src/terark/zbs/*.hpp $(DESTDIR)$(PREFIX)/include/terark/zbs + cp -ar ${TOPLING_CORE_DIR}/boost-include/boost $(DESTDIR)$(PREFIX)/include + install -d $(DESTDIR)$(PREFIX)/site + install -d $(DESTDIR)$(PREFIX)/toplingdb-conf + install -C -m 644 sideplugin/rockside/src/topling/web/index.html $(DESTDIR)$(PREFIX)/site + install -C -m 644 sideplugin/rockside/src/topling/web/style.css $(DESTDIR)$(PREFIX)/site + install -C -m 644 sideplugin/rockside/sample-conf/db_bench_enterprise.yaml $(DESTDIR)$(PREFIX)/toplingdb-conf install -C -m 644 rocksdb.pc $(INSTALL_LIBDIR)/pkgconfig/rocksdb.pc install-static: $(LIBRARY) static_lib From 68fbc5f75f4a8c1c0f18cdf2626e96231f3ab441 Mon Sep 17 00:00:00 2001 From: rockeet Date: Wed, 18 Mar 2026 23:46:39 +0800 Subject: [PATCH 007/102] Add build*.sh --- build-min-dep-jni.sh | 35 +++++++++++++++++++++++++++++++++++ build-min-dep-release.sh | 11 +++++++++++ build-trial.sh | 25 +++++++++++++++++++++++++ 3 files changed, 71 insertions(+) create mode 100644 build-min-dep-jni.sh create mode 100644 build-min-dep-release.sh create mode 100644 build-trial.sh diff --git a/build-min-dep-jni.sh b/build-min-dep-jni.sh new file mode 100644 index 0000000000..95bde57a19 --- /dev/null +++ b/build-min-dep-jni.sh @@ -0,0 +1,35 @@ +#!/usr/bin/bash + +# ex: topling-8.10.2-frocksdb-1.0, part will be ignored +if [ -z "${TOPLING_VERSION}" ]; then + GITHUB_REF=`git symbolic-ref HEAD` + TOPLING_VERSION=`echo ${GITHUB_REF} | sed -n 's:^refs/tags/topling-'${ROCKSDB_VERSION}'[-_a-z]*\([.0-9]\):\1:p'` + if [ -z "${TOPLING_VERSION}" ]; then + echo TOPLING_VERSION is not set and can not parse from HEAD ref >&2 + exit 1 + fi +fi + +export USE_LTO=1 +export UPDATE_REPO=0 +export DEBUG_LEVEL=0 +export DISABLE_JEMALLOC=1 +export ROCKSDB_DISABLE_GFLAGS=1 +export TOPLING_USE_DYNAMIC_TLS=1 +export TOPLING_ZIP_TABLE_TRIAL_DAYS=90 +MAJOR_DOT_MINOR=`build_tools/version.sh major`.`build_tools/version.sh minor` + +exebin=toplingdb-${MAJOR_DOT_MINOR}/bin/dcompact_worker.exe +strip ${exebin} +patchelf --replace-needed librocksdb.so.${MAJOR_DOT_MINOR} librocksdbjni-linux64.so ${exebin} +gzip ${exebin} +mv ${exebin}.gz java/target/dcompact_worker.gz + +ROCKSDB_VERSION=`build_tools/version.sh full` +ROCKSDB_JAVA_VERSION=${ROCKSDB_VERSION}-topling-${TOPLING_VERSION} +cd java/target +db_artifactId=`sed -n 's/.*\(f\?rocksdbjni\)<\/artifactId>.*/\1/p' ../pom.xml.template` +TARGET_JAR=${db_artifactId}-${ROCKSDB_JAVA_VERSION}.jar +mv rocksdbjni-${ROCKSDB_VERSION}-linux64.jar ${TARGET_JAR} +shasum -a 1 ${TARGET_JAR} > ${TARGET_JAR}.sha1 +md5sum ${TARGET_JAR} > ${TARGET_JAR}.md5 diff --git a/build-min-dep-release.sh b/build-min-dep-release.sh new file mode 100644 index 0000000000..8bb12c2501 --- /dev/null +++ b/build-min-dep-release.sh @@ -0,0 +1,11 @@ +#!/usr/bin/bash + +export USE_LTO=1 +export ROCKSDB_DISABLE_GFLAGS=1 +export TOPLING_ZIP_TABLE_TRIAL_DAYS=90 +make UPDATE_REPO=0 DEBUG_LEVEL=0 DISABLE_JEMALLOC=1 TOPLING_USE_DYNAMIC_TLS=1 -j60 libsnappy.a liblz4.a libbz2.a +make rocksdbjava install-dcompact -j`nproc` + DEBUG_LEVEL=0 UPDATE_REPO=0 PREFIX=install-here \ + DISABLE_JEMALLOC=1 TOPLING_USE_DYNAMIC_TLS=1 \ + STRIP_DEBUG_INFO=1 ROCKSDB_JAR_WITH_DYNAMIC_LIBS=1 + diff --git a/build-trial.sh b/build-trial.sh new file mode 100644 index 0000000000..9e6bf4085b --- /dev/null +++ b/build-trial.sh @@ -0,0 +1,25 @@ +#!/usr/bin/bash + +export USE_LTO=1 +export UPDATE_REPO=0 +export DEBUG_LEVEL=0 +export DISABLE_JEMALLOC=1 +#export ROCKSDB_DISABLE_GFLAGS=1 +export TOPLING_USE_DYNAMIC_TLS=1 +export TOPLING_ZIP_TABLE_TRIAL_DAYS=90 +MAJOR_DOT_MINOR=`build_tools/version.sh major`.`build_tools/version.sh minor` + +make -j60 libsnappy.a liblz4.a libbz2.a +make install-dcompact install-dev db_bench -j`nproc` \ + PREFIX=toplingdb-${MAJOR_DOT_MINOR} STRIP_DEBUG_INFO=1 + +install -C -m 755 db_bench toplingdb-${MAJOR_DOT_MINOR}/bin +install -C -m 755 db_bench.sh toplingdb-${MAJOR_DOT_MINOR} +sed -e 's:sideplugin/rockside/src/topling/web:site:' \ + -e 's:sideplugin/rockside/sample-conf:toplingdb-conf:' \ + -e 's:\./db_bench:bin/db_bench:' \ + -e '/ulimit/iexport LD_LIBRARY_PATH=lib:$LD_LIBRARY_PATH' \ + -i toplingdb-${MAJOR_DOT_MINOR}/db_bench.sh +sdk=toplingdb-${MAJOR_DOT_MINOR}-trail${TOPLING_ZIP_TABLE_TRIAL_DAYS}.tgz +tar czf ${sdk} toplingdb-${MAJOR_DOT_MINOR} +ossutil cp --region=cn-qingdao -f ${sdk} oss://topling-tools/ From 4a689cbbd994996971f4ebdacd24981fbd9eae8c Mon Sep 17 00:00:00 2001 From: rockeet Date: Thu, 19 Mar 2026 01:49:10 +0800 Subject: [PATCH 008/102] update submodule rockside --- sideplugin/rockside | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sideplugin/rockside b/sideplugin/rockside index 0484557cac..b4d5a4205d 160000 --- a/sideplugin/rockside +++ b/sideplugin/rockside @@ -1 +1 @@ -Subproject commit 0484557cac646cd568bfbcd976321cf62ef7c122 +Subproject commit b4d5a4205d13a102fe082b1b113a2a4e2cf2a6b0 From 89e4a4a91115c68342f40af59313ee35a41328bc Mon Sep 17 00:00:00 2001 From: rockeet Date: Thu, 19 Mar 2026 10:06:50 +0800 Subject: [PATCH 009/102] Makefile: install-shared & install-dcompact respect var STRIP_DEBUG_INFO And strip db_bench in build-trial.sh --- Makefile | 6 ++++++ build-trial.sh | 1 + 2 files changed, 7 insertions(+) diff --git a/Makefile b/Makefile index a31a6c4841..3895439346 100644 --- a/Makefile +++ b/Makefile @@ -2639,6 +2639,9 @@ install-static: $(LIBRARY) static_lib install-shared: $(SHARED4) shared_lib install -d $(INSTALL_LIBDIR) install -C -m 755 $(SHARED4) $(INSTALL_LIBDIR) +ifeq ($(STRIP_DEBUG_INFO),1) + $(STRIP_CMD) $(INSTALL_LIBDIR)/$(SHARED4) +endif ln -fs $(SHARED4) $(INSTALL_LIBDIR)/$(SHARED3) ln -fs $(SHARED4) $(INSTALL_LIBDIR)/$(SHARED2) ln -fs $(SHARED4) $(INSTALL_LIBDIR)/$(SHARED1) @@ -2652,6 +2655,9 @@ install-dev: install-dev-${LIB_MODE} install-dcompact: install dcompact_worker install -d $(DESTDIR)$(PREFIX)/bin install -C -m 755 sideplugin/topling-dcompact/tools/dcompact/${ORIG_OBJ_DIR}/dcompact_worker.exe $(DESTDIR)$(PREFIX)/bin +ifeq ($(STRIP_DEBUG_INFO),1) + $(STRIP_CMD) $(DESTDIR)$(PREFIX)/bin/dcompact_worker.exe +endif install-tools: install tools mkdir -p $(DESTDIR)$(PREFIX)/bin diff --git a/build-trial.sh b/build-trial.sh index 9e6bf4085b..c767b70466 100644 --- a/build-trial.sh +++ b/build-trial.sh @@ -15,6 +15,7 @@ make install-dcompact install-dev db_bench -j`nproc` \ install -C -m 755 db_bench toplingdb-${MAJOR_DOT_MINOR}/bin install -C -m 755 db_bench.sh toplingdb-${MAJOR_DOT_MINOR} +strip toplingdb-${MAJOR_DOT_MINOR}/bin/db_bench sed -e 's:sideplugin/rockside/src/topling/web:site:' \ -e 's:sideplugin/rockside/sample-conf:toplingdb-conf:' \ -e 's:\./db_bench:bin/db_bench:' \ From 1aa07fad57e5e7dd7814cb84b62c25a017ade79e Mon Sep 17 00:00:00 2001 From: rockeet Date: Thu, 19 Mar 2026 12:01:55 +0800 Subject: [PATCH 010/102] Update build-min-dep-jni.sh --- build-min-dep-jni.sh | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/build-min-dep-jni.sh b/build-min-dep-jni.sh index 95bde57a19..9ee0d752f0 100644 --- a/build-min-dep-jni.sh +++ b/build-min-dep-jni.sh @@ -19,17 +19,34 @@ export TOPLING_USE_DYNAMIC_TLS=1 export TOPLING_ZIP_TABLE_TRIAL_DAYS=90 MAJOR_DOT_MINOR=`build_tools/version.sh major`.`build_tools/version.sh minor` -exebin=toplingdb-${MAJOR_DOT_MINOR}/bin/dcompact_worker.exe -strip ${exebin} +make -j60 libsnappy.a liblz4.a libbz2.a +make rocksdbjava install-dcompact -j`nproc` BUILD_PREFIX=min-dep-jni/ \ + PREFIX=min-dep-jni STRIP_DEBUG_INFO=1 ROCKSDB_JAR_WITH_DYNAMIC_LIBS=1 + +exebin=min-dep-jni/bin/dcompact_worker.exe patchelf --replace-needed librocksdb.so.${MAJOR_DOT_MINOR} librocksdbjni-linux64.so ${exebin} -gzip ${exebin} -mv ${exebin}.gz java/target/dcompact_worker.gz ROCKSDB_VERSION=`build_tools/version.sh full` -ROCKSDB_JAVA_VERSION=${ROCKSDB_VERSION}-topling-${TOPLING_VERSION} +ROCKSDB_JAVA_VERSION=${ROCKSDB_VERSION}-topling-${TOPLING_VERSION}-trial${TOPLING_ZIP_TABLE_TRIAL_DAYS} cd java/target db_artifactId=`sed -n 's/.*\(f\?rocksdbjni\)<\/artifactId>.*/\1/p' ../pom.xml.template` TARGET_JAR=${db_artifactId}-${ROCKSDB_JAVA_VERSION}.jar mv rocksdbjni-${ROCKSDB_VERSION}-linux64.jar ${TARGET_JAR} +rm *.sha1 +jar -uf ${TARGET_JAR} ../../${exebin} shasum -a 1 ${TARGET_JAR} > ${TARGET_JAR}.sha1 md5sum ${TARGET_JAR} > ${TARGET_JAR}.md5 + +#ospart # e.g. "/centos7" +dir=topling-tools/toplingdb${ospart}/cn/topling/${db_artifactId}/${ROCKSDB_JAVA_VERSION} +for file in ${TARGET_JAR}{,.sha1,.md5} ; do + ossutil cp --region=cn-qingdao -f $file oss://${dir}/ +done +set +x +echo =========================================== +echo ======== Download URL +echo =========================================== +echo https://topling-tools.oss-cn-qingdao.aliyuncs.com/${dir}/${TARGET_JAR} +echo https://topling-tools.oss-cn-qingdao.aliyuncs.com/${dir}/${TARGET_JAR}.sha1 +echo https://topling-tools.oss-cn-qingdao.aliyuncs.com/${dir}/${TARGET_JAR}.md5 +echo =========================================== From ec99b75ebc53ed55ab720e29786eb211b396e1b8 Mon Sep 17 00:00:00 2001 From: rockeet Date: Thu, 19 Mar 2026 20:14:27 +0800 Subject: [PATCH 011/102] update submodule rockside --- sideplugin/rockside | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sideplugin/rockside b/sideplugin/rockside index b4d5a4205d..9382bacf64 160000 --- a/sideplugin/rockside +++ b/sideplugin/rockside @@ -1 +1 @@ -Subproject commit b4d5a4205d13a102fe082b1b113a2a4e2cf2a6b0 +Subproject commit 9382bacf64df91574d2353c221aa2661c03b5959 From bcd74b33dced73bec9809332acecd1bfb41eb5cf Mon Sep 17 00:00:00 2001 From: rockeet Date: Fri, 20 Mar 2026 09:38:11 +0800 Subject: [PATCH 012/102] Makefile: WITH_TOPLING_ROCKS pre-fix for trial --- Makefile | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 3895439346..d5b393289e 100644 --- a/Makefile +++ b/Makefile @@ -613,9 +613,8 @@ ifneq (,$(wildcard sideplugin/topling-rocks)) CXXFLAGS += -Isideplugin/topling-rocks/src CXXFLAGS += -DHAS_TOPLING_ROCKS TOPLING_ROCKS_GIT_VER_SRC = ${BUILD_ROOT}/git-version-topling_rocks.cc - EXTRA_LIB_SOURCES += \ - $(wildcard sideplugin/topling-rocks/src/table/*.cc) \ - sideplugin/topling-rocks/${TOPLING_ROCKS_GIT_VER_SRC} + EXTRA_LIB_SOURCES += $(wildcard sideplugin/topling-rocks/src/table/*.cc) + EXTRA_LIB_SOURCES += sideplugin/topling-rocks/${TOPLING_ROCKS_GIT_VER_SRC} else $(warning NotFound sideplugin/topling-rocks, this is ok, only ToplingZipTable is disabled) endif From 650d307de6c6ee509cad2df984e95f37621cdc2a Mon Sep 17 00:00:00 2001 From: rockeet Date: Fri, 20 Mar 2026 10:40:31 +0800 Subject: [PATCH 013/102] Makefile: let USE_LTO take effect only on DEBUG_LEVEL=0 --- Makefile | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index d5b393289e..71fb3ba00c 100644 --- a/Makefile +++ b/Makefile @@ -245,8 +245,10 @@ endif # better when combined with profile-guided optimizations (not currently # supported natively in Makefile). ifeq ($(USE_LTO), 1) - CXXFLAGS += -flto - LDFLAGS += -flto=auto -fuse-linker-plugin + ifeq (${DEBUG_LEVEL},0) + CXXFLAGS += -flto + LDFLAGS += -flto=auto -fuse-linker-plugin + endif endif # `COERCE_CONTEXT_SWITCH=1` will inject spurious wakeup and From 7824afd11d824d2dd56f927af78f8293e457aed9 Mon Sep 17 00:00:00 2001 From: rockeet Date: Fri, 20 Mar 2026 11:07:14 +0800 Subject: [PATCH 014/102] Fix build-min-dep-jni.sh --- build-min-dep-jni.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/build-min-dep-jni.sh b/build-min-dep-jni.sh index 9ee0d752f0..5d4a4f4b8d 100644 --- a/build-min-dep-jni.sh +++ b/build-min-dep-jni.sh @@ -38,9 +38,9 @@ shasum -a 1 ${TARGET_JAR} > ${TARGET_JAR}.sha1 md5sum ${TARGET_JAR} > ${TARGET_JAR}.md5 #ospart # e.g. "/centos7" -dir=topling-tools/toplingdb${ospart}/cn/topling/${db_artifactId}/${ROCKSDB_JAVA_VERSION} +dir=toplingdb${ospart}/cn/topling/${db_artifactId}/${ROCKSDB_JAVA_VERSION} for file in ${TARGET_JAR}{,.sha1,.md5} ; do - ossutil cp --region=cn-qingdao -f $file oss://${dir}/ + ossutil cp --region=cn-qingdao -f $file oss://topling-tools/${dir}/ done set +x echo =========================================== From d8476261584b323e6158df5f464b364f9e6a4382 Mon Sep 17 00:00:00 2001 From: rockeet Date: Fri, 20 Mar 2026 13:15:52 +0800 Subject: [PATCH 015/102] Makefile: trial obj --- Makefile | 55 ++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 34 insertions(+), 21 deletions(-) diff --git a/Makefile b/Makefile index 71fb3ba00c..14d92a9412 100644 --- a/Makefile +++ b/Makefile @@ -244,10 +244,14 @@ endif # interfaces/internal abstractions, like in the iterator hierarchy. It works # better when combined with profile-guided optimizations (not currently # supported natively in Makefile). +OPTION_jemalloc := jemalloc-$(if $(filter 1,${DISABLE_JEMALLOC}),0,1) +OPTION_dyna_tls := dyna_tls-$(if $(filter 1,${TOPLING_USE_DYNAMIC_TLS}),1,0) +OPTION_lto := lto-0 ifeq ($(USE_LTO), 1) ifeq (${DEBUG_LEVEL},0) CXXFLAGS += -flto LDFLAGS += -flto=auto -fuse-linker-plugin + OPTION_lto := lto-$(if $(filter 1,${USE_LTO}),1,0) endif endif @@ -408,6 +412,9 @@ endif ORIG_OBJ_DIR := ${OBJ_DIR} OBJ_DIR := ${BUILD_PREFIX}${OBJ_DIR}/v${ROCKSDB_FULL_VERSION} +# COMPILER is in ignored +TRIAL_urldir := toplingdb/gpl-trial/${OPTION_lto}-${OPTION_jemalloc}-${OPTION_dyna_tls}/${UNAME_MachineSystem}-bmi2-${WITH_BMI2}/${BUILD_TYPE_SIG} + # 1. we define ROCKSDB_DISABLE_ZSTD=1 on build_detect_platform. # 2. zstd lib is included in libterark-zbs # 3. we alway use ZSTD @@ -435,32 +442,18 @@ ifndef WITH_TOPLING_ROCKS git submodule update --init --recursive \ ) endif - ifeq (,$(wildcard sideplugin/topling-rocks)) - WITH_TOPLING_ROCKS := 0 - else - WITH_TOPLING_ROCKS := 1 - endif + # default 1 + WITH_TOPLING_ROCKS := 1 endif ifeq (${WITH_TOPLING_ROCKS},1) -ifeq (,$(wildcard sideplugin/topling-rocks)) - # topling specific: just for people who has permission to topling-rocks - dummy := $(shell set -e -x; \ - cd sideplugin; \ - git clone ${GIT_TOPLING_ROCKS}; \ - cd topling-rocks; \ - git submodule update --init --recursive \ - ) -else +ifneq (,$(wildcard sideplugin/topling-rocks)) ifneq (${UPDATE_REPO},0) ifeq (${MAKE_RESTARTS},) dummy := $(shell set -ex; cd sideplugin/topling-rocks && git pull) endif endif endif -ifeq (,$(wildcard sideplugin/topling-rocks/src/table/top_zip_table_builder.cc)) - $(error WITH_TOPLING_ROCKS=1 but repo sideplugin/topling-rocks is broken) -endif endif ifeq (,$(wildcard sideplugin/cspp-memtable)) @@ -615,10 +608,13 @@ ifneq (,$(wildcard sideplugin/topling-rocks)) CXXFLAGS += -Isideplugin/topling-rocks/src CXXFLAGS += -DHAS_TOPLING_ROCKS TOPLING_ROCKS_GIT_VER_SRC = ${BUILD_ROOT}/git-version-topling_rocks.cc - EXTRA_LIB_SOURCES += $(wildcard sideplugin/topling-rocks/src/table/*.cc) - EXTRA_LIB_SOURCES += sideplugin/topling-rocks/${TOPLING_ROCKS_GIT_VER_SRC} -else - $(warning NotFound sideplugin/topling-rocks, this is ok, only ToplingZipTable is disabled) + ifeq (,${TOPLING_ZIP_TABLE_TRIAL_DAYS}) + EXTRA_LIB_SOURCES += $(wildcard sideplugin/topling-rocks/src/table/*.cc) + EXTRA_LIB_SOURCES += sideplugin/topling-rocks/${TOPLING_ROCKS_GIT_VER_SRC} + else + # no TOPLING_ROCKS_GIT_VER_SRC + EXTRA_LIB_SOURCES += sideplugin/topling-zip_table_reader/top_zip_table_builder.cc + endif endif endif @@ -1101,6 +1097,12 @@ ifneq ($(PPC_LIBC_IS_GNU),0) LIB_OBJECTS += $(patsubst %.cc, $(OBJ_DIR)/%.o, $(RANGE_TREE_SOURCES)) endif +ifeq (${WITH_TOPLING_ROCKS},1) + ifeq (,$(wildcard sideplugin/topling-rocks)) + LIB_OBJECTS += $(OBJ_DIR)/sideplugin/topling-zip_table_reader/top_zip_table_builder.o + endif +endif + GTEST = $(OBJ_DIR)/$(GTEST_DIR)/gtest/gtest-all.o TESTUTIL = $(OBJ_DIR)/test_util/testutil.o TESTHARNESS = $(OBJ_DIR)/test_util/testharness.o $(TESTUTIL) $(GTEST) @@ -2652,6 +2654,10 @@ install: install-${LIB_MODE} install-dev-static: install-headers install-static install-dev-shared: install-headers install-shared install-dev: install-dev-${LIB_MODE} +upload-trial: ${OBJ_DIR}/sideplugin/topling-zip_table_reader/top_zip_table_builder.o + ossutil cp --region=cn-qingdao -f \ + $(OBJ_DIR)/sideplugin/topling-zip_table_reader/top_zip_table_builder.o \ + oss://topling-tools/${TRIAL_urldir}/ install-dcompact: install dcompact_worker install -d $(DESTDIR)$(PREFIX)/bin @@ -3298,8 +3304,15 @@ ${BUILD_ROOT}/lib_static/libterark-zbs-${COMPILER}-${BUILD_TYPE_SIG}.a: ifeq (${WITH_TOPLING_ROCKS},1) ifneq (,$(wildcard sideplugin/topling-rocks)) sideplugin/topling-rocks/${TOPLING_ROCKS_GIT_VER_SRC}: \ + sideplugin/topling-rocks/Makefile \ $(shell find sideplugin/topling-rocks/{src,tools} -name '*.cc' -o -name '*.h') +make -C sideplugin/topling-rocks ${TOPLING_ROCKS_GIT_VER_SRC} +sideplugin/topling-zip_table_reader/top_zip_table_builder.cc: sideplugin/topling-rocks/${TOPLING_ROCKS_GIT_VER_SRC} +else +${OBJ_DIR}/sideplugin/topling-zip_table_reader/top_zip_table_builder.o: + @mkdir -p $(dir $@) + @cd $(dir $@) && \ + wget https://topling-tools.oss-cn-qingdao.aliyuncs.com/${TRIAL_urldir}/top_zip_table_builder.o endif endif From 355304b07715c967044bf810049bae72ced856a1 Mon Sep 17 00:00:00 2001 From: rockeet Date: Fri, 20 Mar 2026 13:18:10 +0800 Subject: [PATCH 016/102] Update build-trial.sh --- build-trial.sh | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/build-trial.sh b/build-trial.sh index c767b70466..c862d65e25 100644 --- a/build-trial.sh +++ b/build-trial.sh @@ -1,15 +1,22 @@ #!/usr/bin/bash -export USE_LTO=1 export UPDATE_REPO=0 -export DEBUG_LEVEL=0 -export DISABLE_JEMALLOC=1 #export ROCKSDB_DISABLE_GFLAGS=1 -export TOPLING_USE_DYNAMIC_TLS=1 export TOPLING_ZIP_TABLE_TRIAL_DAYS=90 MAJOR_DOT_MINOR=`build_tools/version.sh major`.`build_tools/version.sh minor` make -j60 libsnappy.a liblz4.a libbz2.a +GetDebugLevel=(2 0) +for ((i=0;i<16;i++)); do + export DEBUG_LEVEL=${GetDebugLevel[$((i/1%2))]} + export USE_LTO=$((i/2%2)) + export DISABLE_JEMALLOC=$((i/4%2)) + export TOPLING_USE_DYNAMIC_TLS=$((i/8%2)) + make -j`nproc` upload-trial BUILD_PREFIX=bconf-${i}/ +done +# The last bconf-15 is release build which: +# DEBUG_LEVEL=0,USE_LTO=1,DISABLE_JEMALLOC=1,TOPLING_USE_DYNAMIC_TLS=1 +export BUILD_PREFIX=bconf-15/ make install-dcompact install-dev db_bench -j`nproc` \ PREFIX=toplingdb-${MAJOR_DOT_MINOR} STRIP_DEBUG_INFO=1 From 9d4d7a71f63ee4792487547fb0bc16d6afe4bb69 Mon Sep 17 00:00:00 2001 From: rockeet Date: Fri, 20 Mar 2026 15:13:10 +0800 Subject: [PATCH 017/102] add btest-trial.sh --- btest-trial.sh | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 btest-trial.sh diff --git a/btest-trial.sh b/btest-trial.sh new file mode 100644 index 0000000000..9f91f34c3c --- /dev/null +++ b/btest-trial.sh @@ -0,0 +1,9 @@ +#!/usr/bin/bash + +if [ -e sideplugin/topling-rocks-bak ]; then + echo sideplugin/topling-rocks-bak exists >&2 + exit 1 +fi +mv sideplugin/{topling-rocks,topling-rocks-bak} +make WITH_TOPLING_ROCKS=1 $@ +mv sideplugin/{topling-rocks-bak,topling-rocks} From ea724e4cd2ff9101be559328858f20148c942e0b Mon Sep 17 00:00:00 2001 From: rockeet Date: Fri, 20 Mar 2026 15:33:50 +0800 Subject: [PATCH 018/102] README: update quotes for quick start --- README-zh_cn.md | 5 +---- README.md | 6 +----- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/README-zh_cn.md b/README-zh_cn.md index 59e1675719..edb60f3d1c 100644 --- a/README-zh_cn.md +++ b/README-zh_cn.md @@ -17,10 +17,7 @@ sudo make install PREFIX=/some/path # default is /usr/local 下载解压或者自行编译后,运行 [db_bench.sh](db_bench.sh)(需要[端口 2011](https://github.com/topling/rockside/blob/master/sample-conf/db_bench_enterprise.yaml#L4 "内嵌的 http web 服务使用端口 2011")),然后使用 ToplingDB:[原生 C++](https://github.com/topling/rockside/wiki/101 "典型场景是从 rocksdb 迁移过来)"),也支持 [Java](https://github.com/topling/rockside/wiki/SidePlugin-Java-Binding "内置在本 github 仓库中") 和 [Rust](https://github.com/topling/rust-toplingdb "另外的专门的 github 仓库")。 -> 自己编译的开源版没有 [Topling**Zip**Table](https://github.com/topling/rockside/wiki/ToplingZipTable)(采用Topling可检索压缩算法的SST), -[下载 ToplingDB 企业版](https://topling-tools.oss-cn-qingdao.aliyuncs.com/toplingdb-8.10-trail90.tgz) 包含 -[Topling**Zip**Table](https://github.com/topling/rockside/wiki/ToplingZipTable) 90 天试用版,开源版和企业版是二进制兼容的,可以互相替换, -唯一的不同是企业版包含[Topling**Zip**Table](https://github.com/topling/rockside/wiki/ToplingZipTable)。 +> 自己编译开源版时会自动下载预编译的试用版 ToplingZipTable,如果下载失败,可以给 `make` 传递变量 `WITH_TOPLING_ROCKS=0` 禁用它。 ## 简单介绍 ToplingDB 的子模块 **[rockside](https://github.com/topling/rockside)** 是 ToplingDB 的入口,详情参考 **[SidePlugin wiki](https://github.com/topling/rockside/wiki)**。 diff --git a/README.md b/README.md index 0ee08b0b8e..eec53b19b1 100644 --- a/README.md +++ b/README.md @@ -18,11 +18,7 @@ sudo make install PREFIX=/some/path # default is /usr/local After download+uncompress or compile, you can run bundled [db_bench.sh](db_bench.sh)(need [port 2011](https://github.com/topling/rockside/blob/master/sample-conf/db_bench_enterprise.yaml#L4 "use port 2011 for embeded http server")), then use ToplingDB [in C++](https://github.com/topling/sideplugin-wiki-en/wiki/101 "maybe migrate from rocksdb"), or in [Java](https://github.com/topling/sideplugin-wiki-en/wiki/SidePlugin-Java-Binding "Bundled in this repo"), [Rust](https://github.com/topling/rust-toplingdb "A seperated repo"). -> The opensource version of ToplingDB lacks [Topling**Zip**Table](https://github.com/topling/rockside/wiki/ToplingZipTable)(SST with Topling searchable compression algo), -[download ToplingDB enterprise](https://topling-tools.oss-cn-qingdao.aliyuncs.com/toplingdb-8.10-trail90.tgz) to trial -[Topling**Zip**Table](https://github.com/topling/rockside/wiki/ToplingZipTable) 90 days, -the shared lib of opensource version and enterprise version are binary compitible which can be exchanged, -the unique difference is enterprise version has [Topling**Zip**Table](https://github.com/topling/rockside/wiki/ToplingZipTable). +> During compiling, precompiled ToplingZipTable will be auto downloaded, if download failed, you can pass `WITH_TOPLING_ROCKS=0` to `make` to disalbe it. ## Introduction ToplingDB's submodule **[rockside](https://github.com/topling/rockside)** is the entry point of ToplingDB, see **[SidePlugin wiki](https://github.com/topling/sideplugin-wiki-en/wiki)**. From 08929077240afdaed7ee3b722624bba0fc127dab Mon Sep 17 00:00:00 2001 From: rockeet Date: Fri, 20 Mar 2026 17:43:15 +0800 Subject: [PATCH 019/102] README: contact us on compile --- README-zh_cn.md | 2 +- README.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README-zh_cn.md b/README-zh_cn.md index edb60f3d1c..aa49899863 100644 --- a/README-zh_cn.md +++ b/README-zh_cn.md @@ -17,7 +17,7 @@ sudo make install PREFIX=/some/path # default is /usr/local 下载解压或者自行编译后,运行 [db_bench.sh](db_bench.sh)(需要[端口 2011](https://github.com/topling/rockside/blob/master/sample-conf/db_bench_enterprise.yaml#L4 "内嵌的 http web 服务使用端口 2011")),然后使用 ToplingDB:[原生 C++](https://github.com/topling/rockside/wiki/101 "典型场景是从 rocksdb 迁移过来)"),也支持 [Java](https://github.com/topling/rockside/wiki/SidePlugin-Java-Binding "内置在本 github 仓库中") 和 [Rust](https://github.com/topling/rust-toplingdb "另外的专门的 github 仓库")。 -> 自己编译开源版时会自动下载预编译的试用版 ToplingZipTable,如果下载失败,可以给 `make` 传递变量 `WITH_TOPLING_ROCKS=0` 禁用它。 +> 自己编译开源版时会自动下载预编译的试用版(90天) ToplingZipTable,如果下载失败,可以给 `make` 传递变量 `WITH_TOPLING_ROCKS=0` 禁用它(或[联系我们](mailto:contact@topling.cn))。 ## 简单介绍 ToplingDB 的子模块 **[rockside](https://github.com/topling/rockside)** 是 ToplingDB 的入口,详情参考 **[SidePlugin wiki](https://github.com/topling/rockside/wiki)**。 diff --git a/README.md b/README.md index eec53b19b1..20ef7310ce 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ sudo make install PREFIX=/some/path # default is /usr/local After download+uncompress or compile, you can run bundled [db_bench.sh](db_bench.sh)(need [port 2011](https://github.com/topling/rockside/blob/master/sample-conf/db_bench_enterprise.yaml#L4 "use port 2011 for embeded http server")), then use ToplingDB [in C++](https://github.com/topling/sideplugin-wiki-en/wiki/101 "maybe migrate from rocksdb"), or in [Java](https://github.com/topling/sideplugin-wiki-en/wiki/SidePlugin-Java-Binding "Bundled in this repo"), [Rust](https://github.com/topling/rust-toplingdb "A seperated repo"). -> During compiling, precompiled ToplingZipTable will be auto downloaded, if download failed, you can pass `WITH_TOPLING_ROCKS=0` to `make` to disalbe it. +> During compiling, precompiled ToplingZipTable(90 days trial) will be auto downloaded, if download failed, you can pass `WITH_TOPLING_ROCKS=0` to `make` to disalbe it(or [contact us](mailto:contact@topling.cn)). ## Introduction ToplingDB's submodule **[rockside](https://github.com/topling/rockside)** is the entry point of ToplingDB, see **[SidePlugin wiki](https://github.com/topling/sideplugin-wiki-en/wiki)**. From eafb8dca289565c2aa706ae4b3e479fa47067de4 Mon Sep 17 00:00:00 2001 From: rockeet Date: Sat, 21 Mar 2026 12:24:19 +0800 Subject: [PATCH 020/102] Makefile: rpath: add '$ORIGIN/../lib' --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 14d92a9412..652f9aee13 100644 --- a/Makefile +++ b/Makefile @@ -782,7 +782,7 @@ endif ifeq ($(LIB_MODE),shared) # So that binaries are executable from build location, in addition to install location -EXEC_LDFLAGS += -Wl,-rpath -Wl,'$$ORIGIN' +EXEC_LDFLAGS += -Wl,-rpath -Wl,'$$ORIGIN:$$ORIGIN/../lib' endif ifeq ($(PLATFORM), OS_MACOSX) From 4d7df6be174a29db831bee29e04f01ae708a80ef Mon Sep 17 00:00:00 2001 From: rockeet Date: Sat, 21 Mar 2026 12:25:21 +0800 Subject: [PATCH 021/102] build-trial.sh: clean package dir before make --- build-trial.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/build-trial.sh b/build-trial.sh index c862d65e25..a45327a9d9 100644 --- a/build-trial.sh +++ b/build-trial.sh @@ -17,6 +17,7 @@ done # The last bconf-15 is release build which: # DEBUG_LEVEL=0,USE_LTO=1,DISABLE_JEMALLOC=1,TOPLING_USE_DYNAMIC_TLS=1 export BUILD_PREFIX=bconf-15/ +rm -rf toplingdb-${MAJOR_DOT_MINOR} make install-dcompact install-dev db_bench -j`nproc` \ PREFIX=toplingdb-${MAJOR_DOT_MINOR} STRIP_DEBUG_INFO=1 From 691954215dda1a2b83b374e72eaf06fb0b5cbdfb Mon Sep 17 00:00:00 2001 From: rockeet Date: Sat, 21 Mar 2026 12:27:24 +0800 Subject: [PATCH 022/102] db_bench.sh: comment out -value_size to use default and other changes: one arg per line --- db_bench.sh | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/db_bench.sh b/db_bench.sh index 3f0c11b30a..9fd9e122e8 100644 --- a/db_bench.sh +++ b/db_bench.sh @@ -12,20 +12,23 @@ export ROCKSDB_KICK_OUT_OPTIONS_FILE=1 ulimit -n 100000 args=( -json sideplugin/rockside/sample-conf/db_bench_enterprise.yaml - -num=10000000 -key_size=8 - -value_size=2000 + #-num=10000000 + -key_size=8 + #-value_size=2000 -batch_size=100 #-benchmarks=fillseq,compact,nextwithkey,nextwithkey,nextwithkey,nextwithkey,nextwithkey,readseq,readseq,readseq,readseq,readseq -benchmarks=fillrandom,readrandom #-benchmarks=fillseq,compact - #-benchmarks=compact -use_existing_db + #-benchmarks=compact #-benchmarks=readrandom #-benchmarks=readseq #-benchmarks=nextwithkey #-wkey_file=${HOME}/wikipedia-title-seq.txt #-rkey_file=${HOME}/wikipedia-title-seq.txt #-threads=8 - -scan_omit_key -scan_omit_value + #-use_existing_db + -scan_omit_key + -scan_omit_value -enable_zero_copy # ToplingDB specific, for point search by Get/MultiGet ) ./db_bench ${args[@]} "$@" From d0b54a709c3ca057e8b461da3966c645d7f1ec46 Mon Sep 17 00:00:00 2001 From: rockeet Date: Sat, 21 Mar 2026 12:33:09 +0800 Subject: [PATCH 023/102] Makefile: set DEBUG_LEVEL?=2 --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 652f9aee13..4e4e1b274f 100644 --- a/Makefile +++ b/Makefile @@ -61,8 +61,8 @@ quoted_perl_command = $(subst ','\'',$(perl_command)) # `make install-shared`, `make static_lib`, `make install-static` or # `make install` -# Set the default DEBUG_LEVEL to 1 -DEBUG_LEVEL?=1 +# Set the default DEBUG_LEVEL to 2 +DEBUG_LEVEL?=2 # OBJ_DIR is where the object files reside. Default to the current directory OBJ_DIR?=. From cb95981cbc48bdd3361ebd20ad30b435f7105582 Mon Sep 17 00:00:00 2001 From: rockeet Date: Sat, 21 Mar 2026 14:54:49 +0800 Subject: [PATCH 024/102] update submodule rockside --- sideplugin/rockside | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sideplugin/rockside b/sideplugin/rockside index 9382bacf64..fd5f1325ab 160000 --- a/sideplugin/rockside +++ b/sideplugin/rockside @@ -1 +1 @@ -Subproject commit 9382bacf64df91574d2353c221aa2661c03b5959 +Subproject commit fd5f1325abc42c3e4e69ee5de1d7eebf5a906ecd From 865e26d343a47fd2fa86e79a64c523f476a3205f Mon Sep 17 00:00:00 2001 From: rockeet Date: Sun, 22 Mar 2026 15:51:03 +0800 Subject: [PATCH 025/102] update submodule rockside: improve page refresh --- sideplugin/rockside | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sideplugin/rockside b/sideplugin/rockside index fd5f1325ab..c24deef3fb 160000 --- a/sideplugin/rockside +++ b/sideplugin/rockside @@ -1 +1 @@ -Subproject commit fd5f1325abc42c3e4e69ee5de1d7eebf5a906ecd +Subproject commit c24deef3fb6b6200918ccd76afa4f73ae2570d0b From 903594a9b2a76c5e42577c7e67bc95c6d26799fd Mon Sep 17 00:00:00 2001 From: rockeet Date: Sun, 22 Mar 2026 18:39:21 +0800 Subject: [PATCH 026/102] update build-trial.sh --- build-trial.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build-trial.sh b/build-trial.sh index a45327a9d9..540062679c 100644 --- a/build-trial.sh +++ b/build-trial.sh @@ -5,7 +5,7 @@ export UPDATE_REPO=0 export TOPLING_ZIP_TABLE_TRIAL_DAYS=90 MAJOR_DOT_MINOR=`build_tools/version.sh major`.`build_tools/version.sh minor` -make -j60 libsnappy.a liblz4.a libbz2.a +make -j60 libsnappy.a liblz4.a libbz2.a BUILD_PREFIX=bconf-0/ GetDebugLevel=(2 0) for ((i=0;i<16;i++)); do export DEBUG_LEVEL=${GetDebugLevel[$((i/1%2))]} From 3319b959551f1865e6e1b63b4dc0c8210283a403 Mon Sep 17 00:00:00 2001 From: rockeet Date: Tue, 24 Mar 2026 16:54:53 +0800 Subject: [PATCH 027/102] Makefile: check topling-rocks top_patent_algo.cc top_patent_algo.cc will be merged to top_zip_table_builder.cc by its Makefile, it is the reliable way to check topling-rocks integrity. --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 4e4e1b274f..2b388ba19c 100644 --- a/Makefile +++ b/Makefile @@ -3302,7 +3302,7 @@ ${BUILD_ROOT}/lib_static/libterark-zbs-${COMPILER}-${BUILD_TYPE_SIG}.a: +make -C ${TOPLING_CORE_DIR} core fsa zbs ifeq (${WITH_TOPLING_ROCKS},1) -ifneq (,$(wildcard sideplugin/topling-rocks)) +ifneq (,$(wildcard sideplugin/topling-rocks/src/table/top_patent_algo.cc)) sideplugin/topling-rocks/${TOPLING_ROCKS_GIT_VER_SRC}: \ sideplugin/topling-rocks/Makefile \ $(shell find sideplugin/topling-rocks/{src,tools} -name '*.cc' -o -name '*.h') From 2d56294b3433ffc456617fd7438013946f1c84de Mon Sep 17 00:00:00 2001 From: rockeet Date: Tue, 24 Mar 2026 21:58:43 +0800 Subject: [PATCH 028/102] Update README --- README-zh_cn.md | 6 ------ README.md | 10 ---------- 2 files changed, 16 deletions(-) diff --git a/README-zh_cn.md b/README-zh_cn.md index aa49899863..267458e2a3 100644 --- a/README-zh_cn.md +++ b/README-zh_cn.md @@ -86,12 +86,6 @@ toplingdb ``` make -j`nproc` EXTRA_CXXFLAGS='-DROCKSDB_DYNAMIC_CREATE_CF' rocksdbjava ``` -## License -为了兼容开源协议,下列原先禁止字节跳动使用本软件的条款从 2023-04-24 起已被删除,也就是说,字节跳动使用 ToplingDB 的行为不再是非法的,也不是无耻的。 - -~~我们禁止字节跳动使用本软件,其它条款与上游 RocksDB 完全相同,~~ 详情参考 [LICENSE.Apache](LICENSE.Apache), [COPYING](COPYING), [LICENSE.leveldb](LICENSE.leveldb). - -相应 LICENSE 文件中禁止字节跳动使用本软件的条款也已经删除:[LICENSE.Apache](LICENSE.Apache), [COPYING](COPYING), [LICENSE.leveldb](LICENSE.leveldb).
以下是上游 RocksDB 的原版 README diff --git a/README.md b/README.md index 20ef7310ce..5615b66384 100644 --- a/README.md +++ b/README.md @@ -90,16 +90,6 @@ To enable these features, add `-D${MACRO_NAME}` to var `EXTRA_CXXFLAGS`, such as ``` make -j`nproc` EXTRA_CXXFLAGS='-DROCKSDB_DYNAMIC_CREATE_CF' rocksdbjava ``` -## License -To conform open source license, the following term of disallowing bytedance is deleted since 2023-04-24, -that is say: bytedance using ToplingDB is no longer illeagal and is not a shame. - -~~We disallow bytedance using this software, other terms are identidal with -upstream rocksdb license,~~ see [LICENSE.Apache](LICENSE.Apache), [COPYING](COPYING) and -[LICENSE.leveldb](LICENSE.leveldb). - -The terms of disallowing bytedance are also deleted in [LICENSE.Apache](LICENSE.Apache), [COPYING](COPYING) and -[LICENSE.leveldb](LICENSE.leveldb).

From 425c95a22bcddba39e0fe6f63777ae24552daa8a Mon Sep 17 00:00:00 2001 From: rockeet Date: Thu, 26 Mar 2026 22:40:32 +0800 Subject: [PATCH 029/102] Minor fix --- Makefile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Makefile b/Makefile index 2b388ba19c..d8105a10d6 100644 --- a/Makefile +++ b/Makefile @@ -441,6 +441,10 @@ ifndef WITH_TOPLING_ROCKS cd topling-rocks; \ git submodule update --init --recursive \ ) + else + ifeq (,$(wildcard sideplugin/topling-rocks/src/table/top_patent_algo.cc)) + dummy := $(shell rm -rf sideplugin/topling-rocks) + endif endif # default 1 WITH_TOPLING_ROCKS := 1 From 98c5d8b8639bf99575f24fc1c7762e8f6653ed9d Mon Sep 17 00:00:00 2001 From: rockeet Date: Thu, 26 Mar 2026 23:30:30 +0800 Subject: [PATCH 030/102] merging_iterator.cc: simplify macro FORCE_INLINE --- table/merging_iterator.cc | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/table/merging_iterator.cc b/table/merging_iterator.cc index f2a7f8995a..4c05f71ee3 100644 --- a/table/merging_iterator.cc +++ b/table/merging_iterator.cc @@ -150,14 +150,7 @@ class MaxHeapItemComparator { const InternalKeyComparator* comparator_; }; -#if defined(_MSC_VER) /* Visual Studio */ -#define FORCE_INLINE __forceinline -#define __bswap_64 _byteswap_uint64 -#elif defined(__GNUC__) -#define FORCE_INLINE inline __attribute__((always_inline)) -#else -#define FORCE_INLINE inline -#endif +#define FORCE_INLINE terark_forceinline #if defined(__AVX512VL__) && defined(__AVX512BW__) // can be defined as 23 or 16 From cfc15a8ea98f2053c8746525a4f2de5a0e0ba8b4 Mon Sep 17 00:00:00 2001 From: rockeet Date: Fri, 27 Mar 2026 00:34:12 +0800 Subject: [PATCH 031/102] iterator_wrapper.h: use ForgeFuncPtr --- table/iterator_wrapper.h | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/table/iterator_wrapper.h b/table/iterator_wrapper.h index cde6320eb0..f9ccc4a57e 100644 --- a/table/iterator_wrapper.h +++ b/table/iterator_wrapper.h @@ -56,10 +56,8 @@ class IteratorWrapperBase { result_.is_valid = false; } else { #if TOPLING_USE_BOUND_PMF - next_and_get_result_ = ExtractFuncPtr - (_iter, &InternalIteratorBase::NextAndGetResult); - prepare_and_get_value_ = ExtractFuncPtr - (_iter, &InternalIteratorBase::PrepareAndGetValue); + next_and_get_result_ = ForgeFuncPtr(_iter, &InternalIteratorBase::NextAndGetResult); + prepare_and_get_value_ = ForgeFuncPtr(_iter, &InternalIteratorBase::PrepareAndGetValue); #endif Update(); } @@ -289,10 +287,8 @@ class ThinIteratorWrapperBase { iter_ = i; if (i) { #if TOPLING_USE_BOUND_PMF - next_and_get_result_ = ExtractFuncPtr - (i, &InternalIteratorBase::NextAndGetResult); - prepare_and_get_value_ = ExtractFuncPtr - (i, &InternalIteratorBase::PrepareAndGetValue); + next_and_get_result_ = ForgeFuncPtr(i, &InternalIteratorBase::NextAndGetResult); + prepare_and_get_value_ = ForgeFuncPtr(i, &InternalIteratorBase::PrepareAndGetValue); #endif } return old_iter; From b6bfd2668f1f788446746074653dcb7692e59b62 Mon Sep 17 00:00:00 2001 From: rockeet Date: Fri, 27 Mar 2026 15:00:19 +0800 Subject: [PATCH 032/102] use topling-zip _rvref --- java/rocksjni/rocksjni.cc | 1 + java/rocksjni/ttl.cc | 1 + utilities/transactions/pessimistic_transaction.cc | 6 ++++-- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/java/rocksjni/rocksjni.cc b/java/rocksjni/rocksjni.cc index 9dceb1997c..0e4e1c73b1 100644 --- a/java/rocksjni/rocksjni.cc +++ b/java/rocksjni/rocksjni.cc @@ -127,6 +127,7 @@ jlongArray rocksdb_open_helper( [](const char* str_data, const size_t str_len) { return std::string(str_data, str_len); }, + terark::_rvref * [&jco, &column_families](size_t idx, std::string cf_name) { ROCKSDB_NAMESPACE::ColumnFamilyOptions* cf_options = reinterpret_cast(jco[idx]); diff --git a/java/rocksjni/ttl.cc b/java/rocksjni/ttl.cc index 1fe2083d99..178ca0203f 100644 --- a/java/rocksjni/ttl.cc +++ b/java/rocksjni/ttl.cc @@ -81,6 +81,7 @@ jlongArray Java_org_rocksdb_TtlDB_openCF(JNIEnv* env, jclass, jlong jopt_handle, [](const char* str_data, const size_t str_len) { return std::string(str_data, str_len); }, + terark::_rvref * [&jco, &column_families](size_t idx, std::string cf_name) { ROCKSDB_NAMESPACE::ColumnFamilyOptions* cf_options = reinterpret_cast(jco[idx]); diff --git a/utilities/transactions/pessimistic_transaction.cc b/utilities/transactions/pessimistic_transaction.cc index 4a370fcb53..ca71ac2763 100644 --- a/utilities/transactions/pessimistic_transaction.cc +++ b/utilities/transactions/pessimistic_transaction.cc @@ -28,6 +28,8 @@ namespace ROCKSDB_NAMESPACE { +using terark::_rvref; + struct WriteOptions; std::atomic PessimisticTransaction::txn_id_counter_(1); @@ -692,7 +694,7 @@ Status WriteCommittedTxn::CommitWithoutPrepareInternal() { Slice commit_ts(commit_ts_buf, sizeof(commit_ts_buf)); Status s = - wb->UpdateTimestamps(commit_ts, [wbwi, this](uint32_t cf) -> size_t { + wb->UpdateTimestamps(commit_ts, _rvref*[wbwi, this](uint32_t cf) -> size_t { auto cf_iter = cfs_with_ts_tracked_when_indexing_disabled_.find(cf); if (cf_iter != cfs_with_ts_tracked_when_indexing_disabled_.end()) { return sizeof(kMaxTxnTimestamp); @@ -776,7 +778,7 @@ Status WriteCommittedTxn::CommitInternal() { s = WriteBatchInternal::MarkCommitWithTimestamp(working_batch, name_, commit_ts); if (s.ok()) { - s = wb->UpdateTimestamps(commit_ts, [wbwi, this](uint32_t cf) -> size_t { + s = wb->UpdateTimestamps(commit_ts, _rvref*[wbwi, this](uint32_t cf) -> size_t { if (cfs_with_ts_tracked_when_indexing_disabled_.find(cf) != cfs_with_ts_tracked_when_indexing_disabled_.end()) { return sizeof(kMaxTxnTimestamp); From 227fd720c4c6b6e0c775c6390f4e0b8cbb6fd2d0 Mon Sep 17 00:00:00 2001 From: rockeet Date: Fri, 27 Mar 2026 20:08:56 +0800 Subject: [PATCH 033/102] remove unused func ReplaceAll --- db/compaction/compaction_executor.cc | 13 ------------- db/compaction/compaction_executor.h | 2 -- 2 files changed, 15 deletions(-) diff --git a/db/compaction/compaction_executor.cc b/db/compaction/compaction_executor.cc index c4a1888704..534abd5b3a 100644 --- a/db/compaction/compaction_executor.cc +++ b/db/compaction/compaction_executor.cc @@ -278,19 +278,6 @@ std::string ReplacePrefix(Slice Old, Slice New, Slice str) { int(str.size()), str.data(), int(Old.size()), Old.data()); } -void ReplaceAll(std::string& str, Slice from, Slice to) { - if (from.empty()) return; - size_t start_pos = 0; - while ((start_pos = str.find(from.data(), start_pos)) != std::string::npos) { - str.replace(start_pos, from.size(), to.data(), to.size()); - start_pos += to.size(); - } -} -std::string ReplaceAll(Slice str, Slice from, Slice to) { - std::string tmp(str.data(), str.size()); - ReplaceAll(tmp, from, to); - return tmp; -} std::string MakePath(std::string dir, Slice sub) { while (!dir.empty() && '/' == dir.back()) { dir.pop_back(); diff --git a/db/compaction/compaction_executor.h b/db/compaction/compaction_executor.h index 477d369ec0..5fd4cb29a0 100644 --- a/db/compaction/compaction_executor.h +++ b/db/compaction/compaction_executor.h @@ -182,8 +182,6 @@ class CompactionExecutorFactory { std::string GetDirFromEnv(const char* name, const char* Default = nullptr); bool ReplacePrefix(Slice Old, Slice New, Slice str, std::string* res); std::string ReplacePrefix(Slice Old, Slice New, Slice str); -void ReplaceAll(std::string& str, Slice from, Slice to); -std::string ReplaceAll(Slice str, Slice from, Slice to); std::string MakePath(std::string dir, Slice sub); std::string& AppendJobID(std::string& path, int job_id); std::string CatJobID(const std::string& path, int job_id); From f6ddc86c43beb8c916105bfe6956add1bcc9bb51 Mon Sep 17 00:00:00 2001 From: rockeet Date: Fri, 27 Mar 2026 20:24:15 +0800 Subject: [PATCH 034/102] version_set.cc: extract `fi = file_iter_.iter()` --- db/version_set.cc | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/db/version_set.cc b/db/version_set.cc index 38a7fcfe79..047773eac3 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -1209,12 +1209,11 @@ class LevelIterator final : public InternalIterator { iw->prepare_and_get_value_ = ForgeFuncPtr(this, &LevelIterator::PrepareAndGetValue); } else { - iw->work_iter_ = file_iter_.iter(); - iw->value_iter_ = file_iter_.iter(); - iw->next_and_get_result_ = ForgeFuncPtr(file_iter_.iter(), - &InternalIterator::NextAndGetResult); - iw->prepare_and_get_value_ = ForgeFuncPtr(file_iter_.iter(), - &InternalIterator::PrepareAndGetValue); + auto fi = file_iter_.iter(); + iw->work_iter_ = fi; + iw->value_iter_ = fi; + iw->next_and_get_result_ = ForgeFuncPtr(fi, &InternalIterator::NextAndGetResult); + iw->prepare_and_get_value_ = ForgeFuncPtr(fi, &InternalIterator::PrepareAndGetValue); } retry_already_goes_invalid_ = false; } From 12a8417ff82a2c13ecc671a5b745f3d9ddfc2489 Mon Sep 17 00:00:00 2001 From: rockeet Date: Sat, 28 Mar 2026 22:47:38 +0800 Subject: [PATCH 035/102] merging_iterator.cc: add a static_assert --- table/merging_iterator.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/table/merging_iterator.cc b/table/merging_iterator.cc index 4c05f71ee3..34b48b7e0c 100644 --- a/table/merging_iterator.cc +++ b/table/merging_iterator.cc @@ -265,6 +265,7 @@ struct UintPrefix { unsigned char data[MERGE_ITER_PREFIX_LEN] = {0}; UintPrefix(int=0) {} }; +static_assert(sizeof(UintPrefix) == 23); #endif // MERGE_ITER_PREFIX_LEN From 153fba2ce6618c045b9eb295bbad3000c1d74305 Mon Sep 17 00:00:00 2001 From: rockeet Date: Sun, 29 Mar 2026 15:48:04 +0800 Subject: [PATCH 036/102] db_iter.cc: SetUK: fix static_assert --- db/db_iter.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/db/db_iter.cc b/db/db_iter.cc index 969c8df2d6..d7fd18c402 100644 --- a/db/db_iter.cc +++ b/db/db_iter.cc @@ -595,7 +595,7 @@ struct VirtualCmpNoTS { template __always_inline void DBIter::FastIterKey::SetUK(const Slice& uk_slice) { - static_assert(UserKeyLen < sizeof(key)); + static_assert(UserKeyLen + 8 < sizeof(key)); auto uk_ptr = uk_slice.data(); auto uk_len = uk_slice.size(); if constexpr (UserKeyLen == 0) { From 7e519525214d2e8c985af3a31039a85a18fe61d9 Mon Sep 17 00:00:00 2001 From: rockeet Date: Sun, 29 Mar 2026 16:44:50 +0800 Subject: [PATCH 037/102] memtable: insert_hints_ use hash_strmap Old code use Slice as key, maybe danger, this commit will copy slice key, the danger is avoided, this will not introduce bug unless there is other suprising behavior. Unit tests passed! --- db/memtable.cc | 4 ++++ db/memtable.h | 3 ++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/db/memtable.cc b/db/memtable.cc index 1abb6ec0bb..a24d8d8443 100644 --- a/db/memtable.cc +++ b/db/memtable.cc @@ -170,6 +170,10 @@ MemTable::~MemTable() { assert(refs_ == 0); } +// for ApproximateMemoryUsage(insert_hints_) +static size_t ApproximateMemoryUsage(const terark::hash_strmap& map) { + return map.capacity() * 16 + map.strpool_capacity() + map.bucket_size() * 4; +} size_t MemTable::ApproximateMemoryUsage() { size_t usages[] = { arena_.ApproximateMemoryUsage(), table_->ApproximateMemoryUsage(), diff --git a/db/memtable.h b/db/memtable.h index e04a456d4c..f28c99fb7f 100644 --- a/db/memtable.h +++ b/db/memtable.h @@ -698,7 +698,8 @@ class MemTable : public CacheAlignedNewDelete { const SliceTransform* insert_with_hint_prefix_extractor_; // Insert hints for each prefix. - UnorderedMapH insert_hints_; + // UnorderedMapH insert_hints_; + terark::hash_strmap insert_hints_; // Timestamp of oldest key std::atomic oldest_key_time_; From 2c2dfda5927206bc930472ca0a1ffc78a18ae520 Mon Sep 17 00:00:00 2001 From: rockeet Date: Mon, 30 Mar 2026 22:29:11 +0800 Subject: [PATCH 038/102] db_impl.cc: add local async_io to help compiler optimzer --- db/db_impl/db_impl.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index 097163c60d..b03aabe3e0 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -3537,13 +3537,14 @@ if (UNLIKELY(!g_MultiGetUseFiber)) { get_value); counting++; }; - if (read_options.async_io) { + const bool async_io = read_options.async_io; + if (async_io) { gt_fiber_pool.update_fiber_count(read_options.async_queue_depth); } size_t memtab_miss = 0; for (size_t i = 0; i < num_keys; i++) { if (!ctx_vec[i].is_done()) { - if (read_options.async_io) { + if (async_io) { gt_fiber_pool.push({TERARK_C_CALLBACK(get_in_sst), i}); } else { get_in_sst(i); From 50cc86bd1c2995d5af06f7220ecf906c3a9bcd32 Mon Sep 17 00:00:00 2001 From: rockeet Date: Mon, 30 Mar 2026 22:38:10 +0800 Subject: [PATCH 039/102] merging_iterator.cc: static_assert, for commit 12a8417ff82a2c13ecc671a5b745f3d9ddfc2489 --- table/merging_iterator.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/table/merging_iterator.cc b/table/merging_iterator.cc index 34b48b7e0c..ea257d5e0a 100644 --- a/table/merging_iterator.cc +++ b/table/merging_iterator.cc @@ -265,7 +265,7 @@ struct UintPrefix { unsigned char data[MERGE_ITER_PREFIX_LEN] = {0}; UintPrefix(int=0) {} }; -static_assert(sizeof(UintPrefix) == 23); +static_assert(sizeof(UintPrefix) == MERGE_ITER_PREFIX_LEN); #endif // MERGE_ITER_PREFIX_LEN From 9b11f99e4aa7aa9abea233559eb91e0de52b782a Mon Sep 17 00:00:00 2001 From: rockeet Date: Tue, 31 Mar 2026 09:08:41 +0800 Subject: [PATCH 040/102] Makefile: prompt user set WITH_TOPLING_ROCKS=0 on download fail --- Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index d8105a10d6..79ddafa89d 100644 --- a/Makefile +++ b/Makefile @@ -3316,7 +3316,8 @@ else ${OBJ_DIR}/sideplugin/topling-zip_table_reader/top_zip_table_builder.o: @mkdir -p $(dir $@) @cd $(dir $@) && \ - wget https://topling-tools.oss-cn-qingdao.aliyuncs.com/${TRIAL_urldir}/top_zip_table_builder.o + wget https://topling-tools.oss-cn-qingdao.aliyuncs.com/${TRIAL_urldir}/top_zip_table_builder.o || \ + echo 'Download top_zip_table_builder fail, add WITH_TOPLING_ROCKS=0 to make command and try again' endif endif From c6ad29e1c1b7f330f4ee3729ba0e91a8350dd464 Mon Sep 17 00:00:00 2001 From: rockeet Date: Tue, 31 Mar 2026 09:47:10 +0800 Subject: [PATCH 041/102] Makefile: Fix CXXFLAGS += -DHAS_TOPLING_ROCKS --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 79ddafa89d..d1889d851c 100644 --- a/Makefile +++ b/Makefile @@ -458,6 +458,7 @@ ifneq (,$(wildcard sideplugin/topling-rocks)) endif endif endif +CXXFLAGS += -DHAS_TOPLING_ROCKS endif ifeq (,$(wildcard sideplugin/cspp-memtable)) @@ -610,7 +611,6 @@ endif # WITH_TOPLING_DCOMPACT ifeq (${WITH_TOPLING_ROCKS},1) ifneq (,$(wildcard sideplugin/topling-rocks)) CXXFLAGS += -Isideplugin/topling-rocks/src - CXXFLAGS += -DHAS_TOPLING_ROCKS TOPLING_ROCKS_GIT_VER_SRC = ${BUILD_ROOT}/git-version-topling_rocks.cc ifeq (,${TOPLING_ZIP_TABLE_TRIAL_DAYS}) EXTRA_LIB_SOURCES += $(wildcard sideplugin/topling-rocks/src/table/*.cc) From 615a76eb98508cb519bb27e01d981336b0906c8a Mon Sep 17 00:00:00 2001 From: rockeet Date: Tue, 31 Mar 2026 14:13:20 +0800 Subject: [PATCH 042/102] Fix build-min-dep-jni.sh --- build-min-dep-jni.sh | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/build-min-dep-jni.sh b/build-min-dep-jni.sh index 5d4a4f4b8d..14fca81ca8 100644 --- a/build-min-dep-jni.sh +++ b/build-min-dep-jni.sh @@ -1,14 +1,18 @@ #!/usr/bin/bash +ROCKSDB_VERSION=`build_tools/version.sh full` + # ex: topling-8.10.2-frocksdb-1.0, part will be ignored if [ -z "${TOPLING_VERSION}" ]; then - GITHUB_REF=`git symbolic-ref HEAD` - TOPLING_VERSION=`echo ${GITHUB_REF} | sed -n 's:^refs/tags/topling-'${ROCKSDB_VERSION}'[-_a-z]*\([.0-9]\):\1:p'` + GITHUB_REF=`git describe --tags --exact-match || git branch --show-current` + TOPLING_VERSION=`echo ${GITHUB_REF} | sed -n 's:^topling-'${ROCKSDB_VERSION}'[-_a-z]*\([.0-9]\):\1:p'` if [ -z "${TOPLING_VERSION}" ]; then echo TOPLING_VERSION is not set and can not parse from HEAD ref >&2 exit 1 fi fi +#ROCKSDB_JAVA_VERSION=${ROCKSDB_VERSION}-topling-${TOPLING_VERSION}-trial${TOPLING_ZIP_TABLE_TRIAL_DAYS} +ROCKSDB_JAVA_VERSION=${ROCKSDB_VERSION}-topling-${TOPLING_VERSION} export USE_LTO=1 export UPDATE_REPO=0 @@ -26,8 +30,6 @@ make rocksdbjava install-dcompact -j`nproc` BUILD_PREFIX=min-dep-jni/ \ exebin=min-dep-jni/bin/dcompact_worker.exe patchelf --replace-needed librocksdb.so.${MAJOR_DOT_MINOR} librocksdbjni-linux64.so ${exebin} -ROCKSDB_VERSION=`build_tools/version.sh full` -ROCKSDB_JAVA_VERSION=${ROCKSDB_VERSION}-topling-${TOPLING_VERSION}-trial${TOPLING_ZIP_TABLE_TRIAL_DAYS} cd java/target db_artifactId=`sed -n 's/.*\(f\?rocksdbjni\)<\/artifactId>.*/\1/p' ../pom.xml.template` TARGET_JAR=${db_artifactId}-${ROCKSDB_JAVA_VERSION}.jar From beb710c92fd9409691a9a92d6c18a2e81d3e824a Mon Sep 17 00:00:00 2001 From: rockeet Date: Wed, 1 Apr 2026 08:01:06 +0800 Subject: [PATCH 043/102] Update build sh script --- build-min-dep-jni.sh | 15 ++++++++++++--- build-trial.sh | 1 + 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/build-min-dep-jni.sh b/build-min-dep-jni.sh index 14fca81ca8..69b20ac6a1 100644 --- a/build-min-dep-jni.sh +++ b/build-min-dep-jni.sh @@ -23,22 +23,31 @@ export TOPLING_USE_DYNAMIC_TLS=1 export TOPLING_ZIP_TABLE_TRIAL_DAYS=90 MAJOR_DOT_MINOR=`build_tools/version.sh major`.`build_tools/version.sh minor` +rm -rf java/include +rm -rf snappy* lz4* bzip2* +rm -f libsnappy.a liblz4.a libbz2.a make -j60 libsnappy.a liblz4.a libbz2.a make rocksdbjava install-dcompact -j`nproc` BUILD_PREFIX=min-dep-jni/ \ PREFIX=min-dep-jni STRIP_DEBUG_INFO=1 ROCKSDB_JAR_WITH_DYNAMIC_LIBS=1 -exebin=min-dep-jni/bin/dcompact_worker.exe -patchelf --replace-needed librocksdb.so.${MAJOR_DOT_MINOR} librocksdbjni-linux64.so ${exebin} +patchelf --replace-needed librocksdb.so.${MAJOR_DOT_MINOR} librocksdbjni-linux64.so min-dep-jni/bin/dcompact_worker.exe cd java/target db_artifactId=`sed -n 's/.*\(f\?rocksdbjni\)<\/artifactId>.*/\1/p' ../pom.xml.template` TARGET_JAR=${db_artifactId}-${ROCKSDB_JAVA_VERSION}.jar mv rocksdbjni-${ROCKSDB_VERSION}-linux64.jar ${TARGET_JAR} rm *.sha1 -jar -uf ${TARGET_JAR} ../../${exebin} +( # in sub shell + cd ../../min-dep-jni/bin + jar -uf ../../java/target/${TARGET_JAR} dcompact_worker.exe +) shasum -a 1 ${TARGET_JAR} > ${TARGET_JAR}.sha1 md5sum ${TARGET_JAR} > ${TARGET_JAR}.md5 +source /etc/os-release +if [ "${ID}" = "centos" ]; then + ospart=/${ID}${VERSION_ID} +fi #ospart # e.g. "/centos7" dir=toplingdb${ospart}/cn/topling/${db_artifactId}/${ROCKSDB_JAVA_VERSION} for file in ${TARGET_JAR}{,.sha1,.md5} ; do diff --git a/build-trial.sh b/build-trial.sh index 540062679c..e3baac3150 100644 --- a/build-trial.sh +++ b/build-trial.sh @@ -18,6 +18,7 @@ done # DEBUG_LEVEL=0,USE_LTO=1,DISABLE_JEMALLOC=1,TOPLING_USE_DYNAMIC_TLS=1 export BUILD_PREFIX=bconf-15/ rm -rf toplingdb-${MAJOR_DOT_MINOR} +rm -rf librocksdb* db_bench make install-dcompact install-dev db_bench -j`nproc` \ PREFIX=toplingdb-${MAJOR_DOT_MINOR} STRIP_DEBUG_INFO=1 From 532e9832e59f6fb81618e3a4e9812da9128312b7 Mon Sep 17 00:00:00 2001 From: rockeet Date: Wed, 1 Apr 2026 10:13:54 +0800 Subject: [PATCH 044/102] Makefile: for legacy centos --- Makefile | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Makefile b/Makefile index d1889d851c..ce5c9afc7c 100644 --- a/Makefile +++ b/Makefile @@ -414,6 +414,12 @@ OBJ_DIR := ${BUILD_PREFIX}${OBJ_DIR}/v${ROCKSDB_FULL_VERSION} # COMPILER is in ignored TRIAL_urldir := toplingdb/gpl-trial/${OPTION_lto}-${OPTION_jemalloc}-${OPTION_dyna_tls}/${UNAME_MachineSystem}-bmi2-${WITH_BMI2}/${BUILD_TYPE_SIG} +ifeq (${PLATFORM},OS_LINUX) +LINUX_NAME := $(shell source /etc/os-release; echo $$ID) +ifeq (${LINUX_NAME},centos) +TRIAL_urldir := ${TRIAL_urldir}/$(shell source /etc/os-release; echo $$ID$$VERSION_ID) +endif +endif # 1. we define ROCKSDB_DISABLE_ZSTD=1 on build_detect_platform. # 2. zstd lib is included in libterark-zbs From 8b5b29b402079b158ef0efa774d045ca138e28ec Mon Sep 17 00:00:00 2001 From: rockeet Date: Wed, 1 Apr 2026 18:40:01 +0800 Subject: [PATCH 045/102] build sh add ospart --- Makefile | 2 +- build-trial.sh | 9 ++++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index ce5c9afc7c..075a51c484 100644 --- a/Makefile +++ b/Makefile @@ -120,7 +120,7 @@ endif ifeq (${DISABLE_JEMALLOC},1) ifeq (${ROCKSDB_DISABLE_JEMALLOC},) export_ROCKSDB_DISABLE_JEMALLOC := export ROCKSDB_DISABLE_JEMALLOC=1; - export ROCKSDB_DISABLE_JEMALLOC = 1 + export ROCKSDB_DISABLE_JEMALLOC = 1 endif endif diff --git a/build-trial.sh b/build-trial.sh index e3baac3150..cbd27cc466 100644 --- a/build-trial.sh +++ b/build-trial.sh @@ -19,6 +19,7 @@ done export BUILD_PREFIX=bconf-15/ rm -rf toplingdb-${MAJOR_DOT_MINOR} rm -rf librocksdb* db_bench +rm -rf sideplugin/topling-dcompact/tools/dcompact/build make install-dcompact install-dev db_bench -j`nproc` \ PREFIX=toplingdb-${MAJOR_DOT_MINOR} STRIP_DEBUG_INFO=1 @@ -30,6 +31,12 @@ sed -e 's:sideplugin/rockside/src/topling/web:site:' \ -e 's:\./db_bench:bin/db_bench:' \ -e '/ulimit/iexport LD_LIBRARY_PATH=lib:$LD_LIBRARY_PATH' \ -i toplingdb-${MAJOR_DOT_MINOR}/db_bench.sh -sdk=toplingdb-${MAJOR_DOT_MINOR}-trail${TOPLING_ZIP_TABLE_TRIAL_DAYS}.tgz +source /etc/os-release +if [ "${ID}" = "centos" ]; then + ospart=-${ID}${VERSION_ID} # e.g. "-centos7" +else + ospart="" # keep empty +fi +sdk=toplingdb-${MAJOR_DOT_MINOR}-trail${TOPLING_ZIP_TABLE_TRIAL_DAYS}${ospart}.tgz tar czf ${sdk} toplingdb-${MAJOR_DOT_MINOR} ossutil cp --region=cn-qingdao -f ${sdk} oss://topling-tools/ From cf1ea6a09cbc7b194d6e9175ac63394101c41594 Mon Sep 17 00:00:00 2001 From: rockeet Date: Wed, 1 Apr 2026 19:07:46 +0800 Subject: [PATCH 046/102] build-min-dep-jni.sh: fix comment --- build-min-dep-jni.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build-min-dep-jni.sh b/build-min-dep-jni.sh index 69b20ac6a1..d94fad8d92 100644 --- a/build-min-dep-jni.sh +++ b/build-min-dep-jni.sh @@ -2,9 +2,9 @@ ROCKSDB_VERSION=`build_tools/version.sh full` -# ex: topling-8.10.2-frocksdb-1.0, part will be ignored if [ -z "${TOPLING_VERSION}" ]; then GITHUB_REF=`git describe --tags --exact-match || git branch --show-current` + # GITHUB_REF seems like: topling-8.10.2-frocksdb-1.0, part will be ignored TOPLING_VERSION=`echo ${GITHUB_REF} | sed -n 's:^topling-'${ROCKSDB_VERSION}'[-_a-z]*\([.0-9]\):\1:p'` if [ -z "${TOPLING_VERSION}" ]; then echo TOPLING_VERSION is not set and can not parse from HEAD ref >&2 From 0dd8992d85476c819ad06377f9fc0b4ba4bb5981 Mon Sep 17 00:00:00 2001 From: rockeet Date: Thu, 2 Apr 2026 16:54:44 +0800 Subject: [PATCH 047/102] db_impl.cc: MultiGetOneCFH: NotSupported on conf error --- db/db_impl/db_impl.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index b03aabe3e0..c4c0eac821 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -3579,6 +3579,10 @@ if (UNLIKELY(!g_MultiGetUseFiber)) { if (!read_options.internal_is_in_pinning_section) ReturnAndCleanupSuperVersion(cfd, sv); +#else + for (size_t i = 0; i < num_keys; i++) { + statuses[i] = Status::NotSupported("macro TOPLINGDB_WITH_FIBER_AIO is 0 but env MultiGetUseFiber is true"); + } #endif // TOPLINGDB_WITH_FIBER_AIO } // g_MultiGetUseFiber } From 6247d745bf59b936e860dff58029a1c640acce9d Mon Sep 17 00:00:00 2001 From: rockeet Date: Thu, 2 Apr 2026 17:27:17 +0800 Subject: [PATCH 048/102] db_iter.cc: workaround clang17 bug(eager eval static_assert) --- db/db_iter.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/db/db_iter.cc b/db/db_iter.cc index d7fd18c402..c3435b2095 100644 --- a/db/db_iter.cc +++ b/db/db_iter.cc @@ -613,7 +613,9 @@ void DBIter::FastIterKey::SetUK(const Slice& uk_slice) { // do not write last 8 bytes(seq + value_type) }); #elif defined(__clang__) || !defined(__GNUC__) || __GNUC__ >= 13 - static_assert(false, "UserKeyLen == 64 should not on non-avx512"); + // (UserKeyLen != 64) == false here, for workardound clang(at least clang17) + // static_assert(false, "UserKeyLen == 64 should not on non-avx512"); // clang fail + static_assert(UserKeyLen != 64, "UserKeyLen == 64 should not on non-avx512"); // clang ok #endif } else { ROCKSDB_ASSERT_EQ(uk_len, UserKeyLen); From 563596601bbebdd8479b19cd2a5384fb34d28f3e Mon Sep 17 00:00:00 2001 From: rockeet Date: Thu, 2 Apr 2026 18:27:31 +0800 Subject: [PATCH 049/102] db_iter.cc: static_assert, not only for clang --- db/db_iter.cc | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/db/db_iter.cc b/db/db_iter.cc index c3435b2095..000953df88 100644 --- a/db/db_iter.cc +++ b/db/db_iter.cc @@ -612,10 +612,9 @@ void DBIter::FastIterKey::SetUK(const Slice& uk_slice) { _mm512_mask_storeu_epi8(buf, mask, r512); // do not write last 8 bytes(seq + value_type) }); - #elif defined(__clang__) || !defined(__GNUC__) || __GNUC__ >= 13 - // (UserKeyLen != 64) == false here, for workardound clang(at least clang17) - // static_assert(false, "UserKeyLen == 64 should not on non-avx512"); // clang fail - static_assert(UserKeyLen != 64, "UserKeyLen == 64 should not on non-avx512"); // clang ok + #else + // (UserKeyLen != 64) == false here, for workardound + static_assert(UserKeyLen != 64, "UserKeyLen == 64 should not on non-avx512"); #endif } else { ROCKSDB_ASSERT_EQ(uk_len, UserKeyLen); From 5c755a2d12dbb3014b3efbccefc5caf7c451ffd0 Mon Sep 17 00:00:00 2001 From: rockeet Date: Fri, 3 Apr 2026 21:12:04 +0800 Subject: [PATCH 050/102] java/rocksjni/transaction.cc: remove unused typedef FnGet --- java/rocksjni/transaction.cc | 5 ----- 1 file changed, 5 deletions(-) diff --git a/java/rocksjni/transaction.cc b/java/rocksjni/transaction.cc index 8a8438671b..3b408d315d 100644 --- a/java/rocksjni/transaction.cc +++ b/java/rocksjni/transaction.cc @@ -154,11 +154,6 @@ void Java_org_rocksdb_Transaction_rollbackToSavePoint(JNIEnv* env, } } -typedef std::function - FnGet; - /* * Class: org_rocksdb_Transaction * Method: get From ea34cf6b56caedd0d37d0ef4a8299004e0b0aa86 Mon Sep 17 00:00:00 2001 From: rockeet Date: Mon, 6 Apr 2026 20:55:40 +0800 Subject: [PATCH 051/102] db_iter.h: GetUK() add comment --- db/db_iter.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/db/db_iter.h b/db/db_iter.h index caea275161..9cc825f1c0 100644 --- a/db/db_iter.h +++ b/db/db_iter.h @@ -463,10 +463,10 @@ class DBIter final : public Iterator { if constexpr (FixLen == 64) // avx512 FixLen==64 means max is 64(without seqvt 8) return key.risk_to_str_local().notail(8); - if constexpr (FixLen != 0) + if constexpr (FixLen != 0) // FixLen != 0 means fixed len return key.risk_to_str_local_known_len().notail(8); else - return GetUserKey(); + return GetUserKey(); // not fixed len, a bit slower } Slice GetUserKey() const { return key.notail(8); } Slice GetInternalKey() const { return key.to(); } From 40eb92e1ed26b5e1114a6101fe91eda7dbed4bd7 Mon Sep 17 00:00:00 2001 From: rockeet Date: Tue, 7 Apr 2026 17:18:48 +0800 Subject: [PATCH 052/102] memtable.h: remove useless #include --- db/memtable.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/db/memtable.h b/db/memtable.h index f28c99fb7f..5cd0d56288 100644 --- a/db/memtable.h +++ b/db/memtable.h @@ -9,8 +9,6 @@ #pragma once #include -#include -#include #include #include #include @@ -23,15 +21,12 @@ #include "db/version_edit.h" #include "memory/allocator.h" #include "memory/concurrent_arena.h" -#include "monitoring/instrumented_mutex.h" #include "options/cf_options.h" #include "rocksdb/db.h" #include "rocksdb/memtablerep.h" #include "table/internal_iterator.h" #include "table/multiget_context.h" #include "util/dynamic_bloom.h" -#include "util/hash.h" -#include "util/hash_containers.h" #if defined(TOPLINGDB_WITH_TIMESTAMP) #include From 69eb32571c9de8880e9b5338006d68dfdca1cece Mon Sep 17 00:00:00 2001 From: rockeet Date: Fri, 10 Apr 2026 20:22:10 +0800 Subject: [PATCH 053/102] block_based_table_reader_test.cc: bugfix: get_context.reserve(keys.size()); --- table/block_based/block_based_table_reader_test.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/table/block_based/block_based_table_reader_test.cc b/table/block_based/block_based_table_reader_test.cc index 254546893f..86bb7b780c 100644 --- a/table/block_based/block_based_table_reader_test.cc +++ b/table/block_based/block_based_table_reader_test.cc @@ -417,6 +417,7 @@ TEST_P(BlockBasedTableReaderTest, MultiGet) { autovector get_context; autovector key_context; autovector sorted_keys; + get_context.reserve(keys.size()); for (size_t i = 0; i < keys.size(); ++i) { get_context.emplace_back(options.comparator, nullptr, nullptr, nullptr, GetContext::kNotFound, ExtractUserKey(keys[i]), From cb1574713591ce8901c5320ac630ddbc1b13bb56 Mon Sep 17 00:00:00 2001 From: rockeet Date: Fri, 10 Apr 2026 20:23:53 +0800 Subject: [PATCH 054/102] Makefile: auto MAKE_UNIT_TEST=1 on *test*.o --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 075a51c484..b922211ccb 100644 --- a/Makefile +++ b/Makefile @@ -395,7 +395,7 @@ endif TOPLING_LIB_OBJECTS = $(addprefix ${TOPLING_CORE_DIR}/, ${TOPLING_LIB_OBJ_LIST_VAR}) LDFLAGS += ${TOPLING_CORE_LD_LIBS_EXTRA} -ifneq ($(filter auto_all_tests check check_0 watch-log gen_parallel_tests %_test %_test2 jtest, $(MAKECMDGOALS)),) +ifneq ($(filter auto_all_tests check check_0 watch-log gen_parallel_tests %_test %_test2 jtest %_test.o %_test2.o, $(MAKECMDGOALS)),) MAKE_UNIT_TEST ?= 1 endif ifeq (${MAKE_UNIT_TEST},1) From 68216a0da71b9a014b84fcd5beff3e868c7b2c5a Mon Sep 17 00:00:00 2001 From: rockeet Date: Fri, 10 Apr 2026 20:29:09 +0800 Subject: [PATCH 055/102] dbformat.h: make ikey comparator more smart --- db/dbformat.h | 106 +++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 105 insertions(+), 1 deletion(-) diff --git a/db/dbformat.h b/db/dbformat.h index 9158cb9b7f..ee2db44b8f 100644 --- a/db/dbformat.h +++ b/db/dbformat.h @@ -22,6 +22,7 @@ #include "rocksdb/types.h" #include "util/coding.h" #include "util/user_comparator_wrapper.h" +#include namespace ROCKSDB_NAMESPACE { @@ -161,6 +162,18 @@ struct ParsedInternalKey { const char* addr = user_key.data() + user_key.size() - ts_sz; return Slice(const_cast(addr), ts_sz); } + + struct InternalKeyBuf : private terark::minimal_sso<64> { + explicit InternalKeyBuf(const ParsedInternalKey& pik) : + terark::minimal_sso<64>(pik.user_key.size() + 8, + [&](char* buf, size_t len) { + EncodeFixed64(buf + (len - 8), pik.GetTag()); + memcpy(buf, pik.user_key.data(), len - 8); + }) + {} + operator Slice() const { return this->to(); } + }; + InternalKeyBuf MakeInternalKeyBuf() const { return InternalKeyBuf(*this); } }; static_assert(sizeof(ParsedInternalKey) == 32); @@ -1247,6 +1260,78 @@ struct BytewiseCompareInternalKey { return GetUnalignedU64(px + n) > GetUnalignedU64(py + n); #endif } + __always_inline bool operator()(const ParsedInternalKey& x, Slice y) const noexcept { + ROCKSDB_ASSERT_GE(y.size_, 8); + #if !TOPLINGDB_USE_MANUAL_MEMCMP + size_t n = std::min(x.user_key.size_, y.size_ - 8); + int cmp = memcmp(x.user_key.data_, y.data_, n); + if (0 != cmp) return cmp < 0; + if (x.user_key.size_ != y.size_ - 8) return x.user_key.size_ < y.size_ - 8; + return x.GetTag() > GetUnalignedU64(y.data_ + n); + #else + auto px = (const unsigned char*)x.user_key.data(); size_t nx = x.user_key.size(); + auto py = (const unsigned char*)y.data(); size_t ny = y.size() - 8; + size_t i = 0, n = std::min(nx, ny); + for (; i + 8 <= n; i += 8) { + auto ux = NativeOfBigEndian64(*(const uint64_t*)(px + i)); + auto uy = NativeOfBigEndian64(*(const uint64_t*)(py + i)); + if (ux != uy) + return ux < uy; + } + if (n % sizeof(uint64_t) >= 4) { + auto ux = NativeOfBigEndian32(*(const uint32_t*)(px + i)); + auto uy = NativeOfBigEndian32(*(const uint32_t*)(py + i)); + if (ux != uy) + return ux < uy; + else + i += 4; + } + for (; i < n; i++) { + int ux = px[i], uy = py[i]; + if (ux != uy) + return ux < uy; + } + if (nx != ny) + return nx < ny; + return x.GetTag() > GetUnalignedU64(py + n); + #endif + } + __always_inline bool operator()(Slice x, const ParsedInternalKey& y) const noexcept { + ROCKSDB_ASSERT_GE(x.size_, 8); + #if !TOPLINGDB_USE_MANUAL_MEMCMP + size_t n = std::min(x.size_ - 8, y.user_key.size_); + int cmp = memcmp(x.data_, y.user_key.data_, n); + if (0 != cmp) return cmp < 0; + if (x.size_ - 8 != y.user_key.size_) return x.size_ - 8 < y.user_key.size_; + return GetUnalignedU64(x.data_ + n) > y.GetTag(); + #else + auto px = (const unsigned char*)x.data(); size_t nx = x.size() - 8; + auto py = (const unsigned char*)y.user_key.data(); size_t ny = y.user_key.size(); + size_t i = 0, n = std::min(nx, ny); + for (; i + 8 <= n; i += 8) { + auto ux = NativeOfBigEndian64(*(const uint64_t*)(px + i)); + auto uy = NativeOfBigEndian64(*(const uint64_t*)(py + i)); + if (ux != uy) + return ux < uy; + } + if (n % sizeof(uint64_t) >= 4) { + auto ux = NativeOfBigEndian32(*(const uint32_t*)(px + i)); + auto uy = NativeOfBigEndian32(*(const uint32_t*)(py + i)); + if (ux != uy) + return ux < uy; + else + i += 4; + } + for (; i < n; i++) { + int ux = px[i], uy = py[i]; + if (ux != uy) + return ux < uy; + } + if (nx != ny) + return nx < ny; + return GetUnalignedU64(px + n) > y.GetTag(); + #endif + } __always_inline bool operator()(uint64_t x, uint64_t y) const noexcept { return x < y; } @@ -1254,19 +1339,38 @@ struct BytewiseCompareInternalKey { }; struct RevBytewiseCompareInternalKey { __always_inline bool operator()(Slice x, Slice y) const noexcept { + ROCKSDB_ASSERT_GE(x.size_, 8); + ROCKSDB_ASSERT_GE(y.size_, 8); size_t n = std::min(x.size_, y.size_) - 8; int cmp = memcmp(x.data_, y.data_, n); if (0 != cmp) return cmp > 0; if (x.size_ != y.size_) return x.size_ > y.size_; return GetUnalignedU64(x.data_ + n) > GetUnalignedU64(y.data_ + n); } + __always_inline bool operator()(const ParsedInternalKey& x, Slice y) const noexcept { + ROCKSDB_ASSERT_GE(y.size_, 8); + size_t n = std::min(x.user_key.size_, y.size_ - 8); + int cmp = memcmp(x.user_key.data_, y.data_, n); + if (0 != cmp) return cmp > 0; + if (x.user_key.size_ != y.size_ - 8) return x.user_key.size_ > y.size_ - 8; + return x.GetTag() > GetUnalignedU64(y.data_ + n); + } + __always_inline bool operator()(Slice x, const ParsedInternalKey& y) const noexcept { + ROCKSDB_ASSERT_GE(x.size_, 8); + size_t n = std::min(x.size_ - 8, y.user_key.size_); + int cmp = memcmp(x.data_, y.user_key.data_, n); + if (0 != cmp) return cmp > 0; + if (x.size_ - 8 != y.user_key.size_) return x.size_ - 8 > y.user_key.size_; + return GetUnalignedU64(x.data_ + n) > y.GetTag(); + } __always_inline bool operator()(uint64_t x, uint64_t y) const noexcept { return x > y; } RevBytewiseCompareInternalKey(...) {} }; struct FallbackVirtCmp { - __always_inline bool operator()(Slice x, Slice y) const { + template + __always_inline bool operator()(const KeyX& x, const KeyY& y) const { return icmp->Compare(x, y) < 0; } const InternalKeyComparator* icmp; From df5d3e9341f0cddfe4f555c6747a792645ab801b Mon Sep 17 00:00:00 2001 From: rockeet Date: Fri, 10 Apr 2026 20:52:33 +0800 Subject: [PATCH 056/102] version_set.cc: minor fix: use auto --- db/version_set.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/db/version_set.cc b/db/version_set.cc index 047773eac3..0914ef1288 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -877,7 +877,7 @@ class FilePickerMultiGet { // key falls to the right of `search_right_bound_`'s corresponding // file. So, pass a limit one higher, which allows us to detect this // case. - Slice& ikey = mget_iter->ikey; + auto& ikey = mget_iter->ikey; start_index = FindFileInRange( *internal_comparator_, *curr_file_level_, ikey, static_cast(fp_ctx.search_left_bound), From 1a8bdc063eae95cc244e2587d0f23f50f3bb6333 Mon Sep 17 00:00:00 2001 From: rockeet Date: Sat, 11 Apr 2026 14:58:32 +0800 Subject: [PATCH 057/102] version_set.cc: GetInst: avoid copy `Status` on hot path --- db/version_set.cc | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/db/version_set.cc b/db/version_set.cc index 0914ef1288..7a015897e4 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -2726,7 +2726,7 @@ void Version::GetInst(const ReadOptions& read_options, const LookupKey& k, perf_level >= PerfLevel::kEnableTimeExceptForMutex && get_perf_context()->per_level_perf_context_enabled; StopWatchNano timer(clock_, timer_enabled /* auto_start */); - *status = table_cache_->Get( + Status s2 = table_cache_->Get( read_options, *internal_comparator(), *f->file_metadata, ikey, &get_context, mutable_cf_options_.block_protection_bytes_per_key, mutable_cf_options_.prefix_extractor, @@ -2739,12 +2739,14 @@ void Version::GetInst(const ReadOptions& read_options, const LookupKey& k, PERF_COUNTER_BY_LEVEL_ADD(get_from_table_nanos, timer.ElapsedNanos(), fp.GetHitFileLevel()); } - if (!status->ok()) { + if (UNLIKELY(!s2.ok())) { + *status = std::move(s2); if (db_statistics_ != nullptr) { get_context.ReportCounters(); } return; } + status->SetAsOK(); // report the counters before returning if (get_context.State() != GetContext::kNotFound && From dd5b50ebbec573a8d46612b05a989f52b9ae2b91 Mon Sep 17 00:00:00 2001 From: rockeet Date: Sat, 11 Apr 2026 14:59:35 +0800 Subject: [PATCH 058/102] version_set.cc: GetInst: TOPLINGDB_WITH_FABRICATED_COMPLEXITY --- db/version_set.cc | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/db/version_set.cc b/db/version_set.cc index 7a015897e4..310415f6c7 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -2679,10 +2679,12 @@ void Version::GetInst(const ReadOptions& read_options, const LookupKey& k, } uint64_t tracing_get_id = BlockCacheTraceHelper::kReservedGetId; +#if defined(TOPLINGDB_WITH_FABRICATED_COMPLEXITY) if (vset_ && vset_->block_cache_tracer_ && vset_->block_cache_tracer_->is_tracing_enabled()) { tracing_get_id = vset_->block_cache_tracer_->NextGetId(); } +#endif // Note: the old StackableDB-based BlobDB passes in // GetImplOptions::is_blob_index; for the integrated BlobDB implementation, we @@ -2730,9 +2732,14 @@ void Version::GetInst(const ReadOptions& read_options, const LookupKey& k, read_options, *internal_comparator(), *f->file_metadata, ikey, &get_context, mutable_cf_options_.block_protection_bytes_per_key, mutable_cf_options_.prefix_extractor, + #if defined(TOPLINGDB_WITH_FABRICATED_COMPLEXITY) cfd_->internal_stats()->GetFileReadHist(fp.GetHitFileLevel()), IsFilterSkipped(static_cast(fp.GetHitFileLevel()), fp.IsHitFileLastInLevel()), + #else + nullptr, + false, + #endif fp.GetHitFileLevel(), max_file_size_for_l0_meta_pin_); // TODO: examine the behavior for corrupted key if (timer_enabled) { From 2c4eb123391d9f36e3e89bd30973a4686c2d1608 Mon Sep 17 00:00:00 2001 From: rockeet Date: Sat, 11 Apr 2026 15:02:01 +0800 Subject: [PATCH 059/102] get_context: do not stat if statistics is null --- table/get_context.cc | 6 ++++++ table/get_context.h | 2 ++ 2 files changed, 8 insertions(+) diff --git a/table/get_context.cc b/table/get_context.cc index c2a4dc887b..9b1dd47026 100644 --- a/table/get_context.cc +++ b/table/get_context.cc @@ -61,6 +61,9 @@ GetContext::GetContext( if (seq) { *seq = kMaxSequenceNumber; } + if (statistics) { + new(&get_context_stats_)GetContextStats(); + } switch (g_how_sampling) { case GetContextSampleRead::kAlways: sample_ = true; break; case GetContextSampleRead::kNone : sample_ = false; break; @@ -133,6 +136,9 @@ void GetContext::SaveValue(const Slice& value, SequenceNumber /*seq*/) { } void GetContext::ReportCounters() { + if (!statistics_) { + return; + } if (get_context_stats_.num_cache_hit > 0) { RecordTick(statistics_, BLOCK_CACHE_HIT, get_context_stats_.num_cache_hit); } diff --git a/table/get_context.h b/table/get_context.h index 8579d08982..dbea262d6f 100644 --- a/table/get_context.h +++ b/table/get_context.h @@ -77,7 +77,9 @@ class GetContext { kUnexpectedBlobIndex, kMergeOperatorFailed, }; + union { GetContextStats get_context_stats_; + }; // Constructor // @param value Holds the value corresponding to user_key. If its nullptr From 106913115a52cdb8e233afac4bbef1add1f567a1 Mon Sep 17 00:00:00 2001 From: rockeet Date: Sat, 11 Apr 2026 15:03:24 +0800 Subject: [PATCH 060/102] Makefile: optimize lto flags for clang --- Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Makefile b/Makefile index b922211ccb..cbd6007a22 100644 --- a/Makefile +++ b/Makefile @@ -1040,6 +1040,9 @@ endif # topling specific WARNING_FLAGS WARNING_FLAGS := -Wall -Wno-shadow ifeq "$(shell a=${COMPILER};echo $${a:0:5})" "clang" + CXXFLAGS := $(patsubst -flto, -flto=thin, ${CXXFLAGS}) + LLD_LTO_FLAGS := -fuse-ld=lld -flto=thin -Wl,--thinlto-jobs=all + LDFLAGS := $(patsubst -flto=auto, ${LLD_LTO_FLAGS}, ${LDFLAGS}) LDFLAGS += -latomic #$(error LDFLAGS = ${LDFLAGS}) WARNING_FLAGS += -Wno-deprecated-builtins From 749d64b2a9be3038780aad664233304863d3ab31 Mon Sep 17 00:00:00 2001 From: rockeet Date: Fri, 10 Apr 2026 23:05:22 +0800 Subject: [PATCH 061/102] omit LookupKey on Get() for non TOPLINGDB_WITH_TIMESTAMP --- db/blob/db_blob_basic_test.cc | 4 +- db/db_impl/db_impl.cc | 26 +++++---- db/db_impl/db_impl_readonly.cc | 4 +- db/db_impl/db_impl_secondary.cc | 2 +- db/db_memtable_test.cc | 2 +- db/dbformat.h | 2 + db/flush_job.cc | 2 +- db/lookup_key.h | 5 ++ db/memtable.cc | 49 ++++++++++++---- db/memtable.h | 8 ++- db/memtable_list.cc | 8 +-- db/memtable_list.h | 12 ++-- db/memtable_list_test.cc | 6 ++ db/table_cache.cc | 9 ++- db/table_cache.h | 15 ++--- db/table_cache_sync_and_async.h | 3 +- db/version_set.cc | 56 +++++++++++++------ db/version_set.h | 9 ++- db/version_set_sync_and_async.h | 2 +- include/rocksdb/memtablerep.h | 12 ++++ .../block_based_table_reader_sync_and_async.h | 8 ++- table/block_based/filter_block.h | 6 +- table/block_based/partitioned_filter_block.h | 6 ++ table/multiget_context.h | 51 +++++++++++++---- table/table_reader.h | 13 ++++- table/table_test.cc | 4 +- 26 files changed, 235 insertions(+), 89 deletions(-) diff --git a/db/blob/db_blob_basic_test.cc b/db/blob/db_blob_basic_test.cc index 1c0caba93d..617b7939fc 100644 --- a/db/blob/db_blob_basic_test.cc +++ b/db/blob/db_blob_basic_test.cc @@ -942,9 +942,11 @@ TEST_F(DBBlobBasicTest, MultiGetBlob_CorruptIndex) { "Version::MultiGet::TamperWithBlobIndex", [&key](void* arg) { KeyContext* const key_context = static_cast(arg); assert(key_context); + #if defined(TOPLINGDB_WITH_TIMESTAMP) assert(key_context->key); + #endif - if (*(key_context->key) == key) { + if (key_context->ukey_without_ts == key) { Slice* const blob_index = key_context->value; assert(blob_index); assert(!blob_index->empty()); diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index c4c0eac821..f7634407c9 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -151,16 +151,20 @@ struct ToplingMGetCtx : protected MergeContext { #if defined(TOPLINGDB_WITH_TIMESTAMP) std::string* timestamp = nullptr; -#endif union { LookupKey lkey; }; +#endif + union { + ParsedInternalKey pikey; + }; void InitLookupKey(const Slice& user_key, SequenceNumber seq, const Slice* ts) { #if defined(TOPLINGDB_WITH_TIMESTAMP) new(&lkey)LookupKey(user_key, seq, ts); + new(&pikey)ParsedInternalKey(lkey.internal_key()); #else - new(&lkey)LookupKey(user_key, seq); + new(&pikey)ParsedInternalKey(user_key, seq, kValueTypeForSeek); (void)ts; assert(ts == nullptr); #endif @@ -168,8 +172,10 @@ struct ToplingMGetCtx : protected MergeContext { } ToplingMGetCtx() {} ~ToplingMGetCtx() { +#if defined(TOPLINGDB_WITH_TIMESTAMP) if (this->ext_flags_ & FLAG_lkey_initialized) lkey.~LookupKey(); +#endif } void set_done() { this->ext_flags_ |= FLAG_done; } bool is_done() const { return (this->ext_flags_ & FLAG_done) != 0; } @@ -2442,7 +2448,7 @@ Status DBImpl::GetInst(const ReadOptions& read_options, const Slice& key, #if defined(TOPLINGDB_WITH_TIMESTAMP) LookupKey lkey(key, snapshot, read_options.timestamp); #else - LookupKey lkey(key, snapshot); + ParsedInternalKey lkey(key, snapshot, kValueTypeForSeek); #endif PERF_TIMER_STOP(get_snapshot_time); @@ -2769,7 +2775,7 @@ std::vector DBImpl::MultiGet( LookupKey lkey(keys[keys_read], consistent_seqnum, read_options.timestamp); #else std::string* timestamp = nullptr; - LookupKey lkey(keys[keys_read], consistent_seqnum); + ParsedInternalKey lkey(keys[keys_read], consistent_seqnum, kValueTypeForSeek); #endif auto cfh = static_cast_with_check( @@ -3241,7 +3247,7 @@ struct CompareKeyContext { // Both keys are from the same column family int cmp = comparator->CompareWithoutTimestamp( - *(lhs->key), /*a_has_ts=*/false, *(rhs->key), /*b_has_ts=*/false); + lhs->ukey_without_ts, /*a_has_ts=*/false, rhs->ukey_without_ts, /*b_has_ts=*/false); if (cmp < 0) { return true; } @@ -3253,7 +3259,7 @@ struct CompareKeyContextSameCF { const Comparator* comparator; inline bool operator()(const KeyContext* lhs, const KeyContext* rhs) { int cmp = comparator->CompareWithoutTimestamp( - *(lhs->key), /*a_has_ts=*/false, *(rhs->key), /*b_has_ts=*/false); + lhs->ukey_without_ts, /*a_has_ts=*/false, rhs->ukey_without_ts, /*b_has_ts=*/false); return cmp < 0; } }; @@ -3498,7 +3504,7 @@ if (UNLIKELY(!g_MultiGetUseFiber)) { auto& max_covering_tombstone_seq = ctx_vec[i].max_covering_tombstone_seq; MergeContext& merge_context = ctx_vec[i].merge_context(); Status& s = statuses[i]; - if (sv->mem->Get(ctx_vec[i].lkey, &values[i], columns, + if (sv->mem->Get(ctx_vec[i].pikey, &values[i], columns, timestamp, &s, &merge_context, &max_covering_tombstone_seq, read_options, false, // immutable_memtable @@ -3506,7 +3512,7 @@ if (UNLIKELY(!g_MultiGetUseFiber)) { ctx_vec[i].set_done(); hits++; } else if ((s.ok() || s.IsMergeInProgress()) && - sv->imm->Get(ctx_vec[i].lkey, &values[i], columns, + sv->imm->Get(ctx_vec[i].pikey, &values[i], columns, timestamp, &s, &merge_context, &max_covering_tombstone_seq, read_options, callback, is_blob_index)) { @@ -3527,7 +3533,7 @@ if (UNLIKELY(!g_MultiGetUseFiber)) { bool* value_found = nullptr; bool get_value = true; sv->current->Get( - read_options, ctx_vec[i].lkey, &values[i], columns, + read_options, ctx_vec[i].pikey, &values[i], columns, timestamp, &statuses[i], &merge_context, &max_covering_tombstone_seq, &pinned_iters_mgr, value_found, @@ -6254,7 +6260,7 @@ Status DBImpl::GetLatestSequenceForKey( #if !defined(NDEBUG) constexpr size_t ts_sz = 0; #endif - LookupKey lkey(key, current_seq); + ParsedInternalKey lkey(key, current_seq, kValueTypeForSeek); #endif *seq = kMaxSequenceNumber; diff --git a/db/db_impl/db_impl_readonly.cc b/db/db_impl/db_impl_readonly.cc index 83e7a9a3bb..6607fc7063 100644 --- a/db/db_impl/db_impl_readonly.cc +++ b/db/db_impl/db_impl_readonly.cc @@ -94,10 +94,12 @@ Status DBImplReadOnly::GetImpl(const ReadOptions& read_options, return s; } } + LookupKey lkey(key, snapshot, read_options.timestamp); + #else + ParsedInternalKey lkey(key, snapshot, kValueTypeForSeek); #endif MergeContext merge_context; SequenceNumber max_covering_tombstone_seq = 0; - LookupKey lkey(key, snapshot, read_options.timestamp); PERF_TIMER_STOP(get_snapshot_time); // Look up starts here diff --git a/db/db_impl/db_impl_secondary.cc b/db/db_impl/db_impl_secondary.cc index 99422b4eb1..a7f64cb74c 100644 --- a/db/db_impl/db_impl_secondary.cc +++ b/db/db_impl/db_impl_secondary.cc @@ -424,7 +424,7 @@ Status DBImplSecondary::GetImpl(const ReadOptions& read_options, #if defined(TOPLINGDB_WITH_TIMESTAMP) LookupKey lkey(key, snapshot, read_options.timestamp); #else - LookupKey lkey(key, snapshot); + ParsedInternalKey lkey(key, snapshot, kValueTypeForSeek); #endif PERF_TIMER_STOP(get_snapshot_time); bool done = false; diff --git a/db/db_memtable_test.cc b/db/db_memtable_test.cc index 2942379729..bd17b7c047 100644 --- a/db/db_memtable_test.cc +++ b/db/db_memtable_test.cc @@ -302,7 +302,7 @@ TEST_F(DBMemTableTest, ConcurrentMergeWrite) { Status status; ReadOptions roptions; SequenceNumber max_covering_tombstone_seq = 0; - LookupKey lkey("key", kMaxSequenceNumber); + ParsedInternalKey lkey("key", kMaxSequenceNumber, kValueTypeForSeek); PinnableSlice pin; bool res = mem->Get(lkey, &pin, /*columns=*/nullptr, /*timestamp=*/nullptr, &status, &merge_context, &max_covering_tombstone_seq, diff --git a/db/dbformat.h b/db/dbformat.h index ee2db44b8f..c90b1b2cec 100644 --- a/db/dbformat.h +++ b/db/dbformat.h @@ -24,6 +24,8 @@ #include "util/user_comparator_wrapper.h" #include +#define TOPLINGDB_OMIT_LOOKUP_KEY 1 + namespace ROCKSDB_NAMESPACE { // The file declares data structures and functions that deal with internal diff --git a/db/flush_job.cc b/db/flush_job.cc index 9da817ac56..1adb729d23 100644 --- a/db/flush_job.cc +++ b/db/flush_job.cc @@ -748,7 +748,7 @@ bool FlushJob::MemPurgeDecider(double threshold) { // Count entry bytes as payload. payload += entry_size; - LookupKey lkey(res.user_key, kMaxSequenceNumber); + ParsedInternalKey lkey(res.user_key, kMaxSequenceNumber, kValueTypeForSeek); // Paranoia: zero out these values just in case. max_covering_tombstone_seq = 0; diff --git a/db/lookup_key.h b/db/lookup_key.h index ee9c889c37..133f150778 100644 --- a/db/lookup_key.h +++ b/db/lookup_key.h @@ -14,6 +14,7 @@ #include "rocksdb/slice.h" #include "rocksdb/types.h" #include "port/likely.h" +#include "dbformat.h" namespace ROCKSDB_NAMESPACE { @@ -62,6 +63,10 @@ class LookupKey { return Slice(longstart_, klength_ - 8); } + operator ParsedInternalKey() const { + return ParsedInternalKey(internal_key()); + } + private: // We construct a char array of the form: // short keys: klength_ <= sizeof(space_) diff --git a/db/memtable.cc b/db/memtable.cc index a24d8d8443..6c8bbfab61 100644 --- a/db/memtable.cc +++ b/db/memtable.cc @@ -302,6 +302,26 @@ int MemTable::KeyComparator::operator()( return comparator.CompareKeySeq(a, key); } +int MemTable::KeyComparator::operator()(const char* prefix_len_key, + const ParsedInternalKey& b) const { + Slice a = GetLengthPrefixedSlice(prefix_len_key); + return comparator.Compare(a, b); +} + +int MemTable::KeyComparator::operator()(const ParsedInternalKey& a, + const char* prefix_len_key) const { + Slice b = GetLengthPrefixedSlice(prefix_len_key); + return comparator.Compare(a, b); +} + +void MemTableRep::GetPIK(const struct ReadOptions& ro, + const ParsedInternalKey& pik, void* callback_args, + bool (*callback_func)(void* arg, const KeyValuePair&)) +{ + LookupKey lk(pik.user_key, pik.sequence); + Get(ro, lk, callback_args, callback_func); +} + void MemTableRep::InsertConcurrently(KeyHandle /*handle*/) { throw std::runtime_error("concurrent insert not supported"); } @@ -928,7 +948,12 @@ namespace { struct Saver { Status* status; - const LookupKey* key; + struct LikeLookupKey : private Slice { + using Slice::operator=; + const Slice& user_key() const { return *this; } + const LikeLookupKey* operator->() const { return this; } + }; + LikeLookupKey key; PinnableSlice* value; PinnableWideColumns* columns; SequenceNumber seq; @@ -1312,7 +1337,7 @@ static bool SaveValue(void* arg, const MemTableRep::KeyValuePair& pair) { } ROCKSDB_FLATTEN -bool MemTable::Get(const LookupKey& key, PinnableSlice* value, +bool MemTable::Get(const ParsedInternalKey& key, PinnableSlice* value, PinnableWideColumns* columns, std::string* timestamp, Status* s, MergeContext* merge_context, SequenceNumber* max_covering_tombstone_seq, @@ -1329,11 +1354,11 @@ bool MemTable::Get(const LookupKey& key, PinnableSlice* value, std::unique_ptr range_del_iter( NewRangeTombstoneIterator(read_opts, - GetInternalKeySeqno(key.internal_key()), + key.sequence, immutable_memtable)); if (range_del_iter != nullptr) { SequenceNumber covering_seq = - range_del_iter->MaxCoveringTombstoneSeqnum(key.user_key()); + range_del_iter->MaxCoveringTombstoneSeqnum(key.user_key); if (covering_seq > *max_covering_tombstone_seq) { *max_covering_tombstone_seq = covering_seq; if (timestamp) { @@ -1349,9 +1374,9 @@ bool MemTable::Get(const LookupKey& key, PinnableSlice* value, bool may_contain = true; #if defined(TOPLINGDB_WITH_TIMESTAMP) size_t ts_sz = GetInternalKeyComparator().user_comparator()->timestamp_size(); - Slice user_key_without_ts = StripTimestampFromUserKey(key.user_key(), ts_sz); + Slice user_key_without_ts = StripTimestampFromUserKey(key.user_key, ts_sz); #else - Slice user_key_without_ts = key.user_key(); + Slice user_key_without_ts = key.user_key; #endif bool bloom_checked = false; // when both memtable_whole_key_filtering and prefix_extractor_ are set, @@ -1384,7 +1409,7 @@ bool MemTable::Get(const LookupKey& key, PinnableSlice* value, saver.status = s; saver.found_final_value = false; saver.merge_in_progress = s->IsMergeInProgress(); - saver.key = &key; + saver.key = key.user_key; saver.value = value; saver.columns = columns; saver.timestamp = timestamp; @@ -1406,7 +1431,7 @@ bool MemTable::Get(const LookupKey& key, PinnableSlice* value, if (LIKELY(value != nullptr)) { value->Reset(); } - table_->Get(read_opts, key, &saver, SaveValue); + table_->GetPIK(read_opts, key, &saver, SaveValue); *seq = saver.seq; // No change to value, since we have not yet found a Put/Delete @@ -1464,10 +1489,10 @@ void MemTable::MultiGet(const ReadOptions& read_options, MultiGetRange* range, if (!no_range_del) { std::unique_ptr range_del_iter( NewRangeTombstoneIteratorInternal( - read_options, GetInternalKeySeqno(iter->lkey->internal_key()), + read_options, iter->ikey.sequence, immutable_memtable)); SequenceNumber covering_seq = - range_del_iter->MaxCoveringTombstoneSeqnum(iter->lkey->user_key()); + range_del_iter->MaxCoveringTombstoneSeqnum(iter->ikey.user_key); if (covering_seq > iter->max_covering_tombstone_seq) { iter->max_covering_tombstone_seq = covering_seq; if (iter->timestamp) { @@ -1482,7 +1507,7 @@ void MemTable::MultiGet(const ReadOptions& read_options, MultiGetRange* range, saver.status = iter->s; saver.found_final_value = false; saver.merge_in_progress = iter->s->IsMergeInProgress(); - saver.key = iter->lkey; + saver.key = iter->ikey.user_key; saver.value = iter->value; // not null if (saver.value) saver.value->Reset(); @@ -1503,7 +1528,7 @@ void MemTable::MultiGet(const ReadOptions& read_options, MultiGetRange* range, saver.allow_data_in_errors = moptions_.allow_data_in_errors; saver.is_zero_copy = read_options.internal_is_in_pinning_section; saver.needs_user_key_cmp_in_get = needs_user_key_cmp_in_get_; - table_->Get(read_options, *(iter->lkey), &saver, SaveValue); + table_->GetPIK(read_options, iter->ikey, &saver, SaveValue); if (!saver.found_final_value && saver.merge_in_progress) { *(iter->s) = Status::MergeInProgress(); diff --git a/db/memtable.h b/db/memtable.h index 5cd0d56288..04549a3187 100644 --- a/db/memtable.h +++ b/db/memtable.h @@ -134,6 +134,10 @@ class MemTable : public CacheAlignedNewDelete { const char* prefix_len_key2) const override; virtual int operator()(const char* prefix_len_key, const DecodedType& key) const override; + virtual int operator()(const char* prefix_len_key, + const ParsedInternalKey&) const override; + virtual int operator()(const ParsedInternalKey&, + const char* prefix_len_key) const override; virtual const InternalKeyComparator* icomparator() const override; }; @@ -301,7 +305,7 @@ class MemTable : public CacheAlignedNewDelete { // @param immutable_memtable Whether this memtable is immutable. Used // internally by NewRangeTombstoneIterator(). See comment above // NewRangeTombstoneIterator() for more detail. - bool Get(const LookupKey& key, PinnableSlice* value, + bool Get(const ParsedInternalKey& key, PinnableSlice* value, PinnableWideColumns* columns, std::string* timestamp, Status* s, MergeContext* merge_context, SequenceNumber* max_covering_tombstone_seq, SequenceNumber* seq, @@ -309,7 +313,7 @@ class MemTable : public CacheAlignedNewDelete { ReadCallback* callback = nullptr, bool* is_blob_index = nullptr, bool do_merge = true); - bool Get(const LookupKey& key, PinnableSlice* value, + bool Get(const ParsedInternalKey& key, PinnableSlice* value, PinnableWideColumns* columns, std::string* timestamp, Status* s, MergeContext* merge_context, SequenceNumber* max_covering_tombstone_seq, diff --git a/db/memtable_list.cc b/db/memtable_list.cc index deeccae575..9eacca84e9 100644 --- a/db/memtable_list.cc +++ b/db/memtable_list.cc @@ -110,7 +110,7 @@ int MemTableList::NumFlushed() const { // Search all the memtables starting from the most recent one. // Return the most recent value found, if any. // Operands stores the list of merge operations to apply, so far. -bool MemTableListVersion::Get(const LookupKey& key, PinnableSlice* value, +bool MemTableListVersion::Get(const ParsedInternalKey& key, PinnableSlice* value, PinnableWideColumns* columns, std::string* timestamp, Status* s, MergeContext* merge_context, @@ -135,7 +135,7 @@ void MemTableListVersion::MultiGet(const ReadOptions& read_options, } bool MemTableListVersion::GetMergeOperands( - const LookupKey& key, Status* s, MergeContext* merge_context, + const ParsedInternalKey& key, Status* s, MergeContext* merge_context, SequenceNumber* max_covering_tombstone_seq, const ReadOptions& read_opts) { for (MemTable* memtable : memlist_) { bool done = memtable->Get( @@ -150,7 +150,7 @@ bool MemTableListVersion::GetMergeOperands( } bool MemTableListVersion::GetFromHistory( - const LookupKey& key, PinnableSlice* value, PinnableWideColumns* columns, + const ParsedInternalKey& key, PinnableSlice* value, PinnableWideColumns* columns, std::string* timestamp, Status* s, MergeContext* merge_context, SequenceNumber* max_covering_tombstone_seq, SequenceNumber* seq, const ReadOptions& read_opts, bool* is_blob_index) { @@ -160,7 +160,7 @@ bool MemTableListVersion::GetFromHistory( } bool MemTableListVersion::GetFromList( - std::list* list, const LookupKey& key, PinnableSlice* value, + std::list* list, const ParsedInternalKey& key, PinnableSlice* value, PinnableWideColumns* columns, std::string* timestamp, Status* s, MergeContext* merge_context, SequenceNumber* max_covering_tombstone_seq, SequenceNumber* seq, const ReadOptions& read_opts, ReadCallback* callback, diff --git a/db/memtable_list.h b/db/memtable_list.h index 328d160e83..6630046b68 100644 --- a/db/memtable_list.h +++ b/db/memtable_list.h @@ -57,14 +57,14 @@ class MemTableListVersion { // If any operation was found for this key, its most recent sequence number // will be stored in *seq on success (regardless of whether true/false is // returned). Otherwise, *seq will be set to kMaxSequenceNumber. - bool Get(const LookupKey& key, PinnableSlice* value, + bool Get(const ParsedInternalKey&, PinnableSlice* value, PinnableWideColumns* columns, std::string* timestamp, Status* s, MergeContext* merge_context, SequenceNumber* max_covering_tombstone_seq, SequenceNumber* seq, const ReadOptions& read_opts, ReadCallback* callback = nullptr, bool* is_blob_index = nullptr); - bool Get(const LookupKey& key, PinnableSlice* value, + bool Get(const ParsedInternalKey& key, PinnableSlice* value, PinnableWideColumns* columns, std::string* timestamp, Status* s, MergeContext* merge_context, SequenceNumber* max_covering_tombstone_seq, @@ -83,7 +83,7 @@ class MemTableListVersion { // Returns all the merge operands corresponding to the key by searching all // memtables starting from the most recent one. - bool GetMergeOperands(const LookupKey& key, Status* s, + bool GetMergeOperands(const ParsedInternalKey&, Status* s, MergeContext* merge_context, SequenceNumber* max_covering_tombstone_seq, const ReadOptions& read_opts); @@ -92,13 +92,13 @@ class MemTableListVersion { // have already been flushed. Should only be used from in-memory only // queries (such as Transaction validation) as the history may contain // writes that are also present in the SST files. - bool GetFromHistory(const LookupKey& key, PinnableSlice* value, + bool GetFromHistory(const ParsedInternalKey&, PinnableSlice* value, PinnableWideColumns* columns, std::string* timestamp, Status* s, MergeContext* merge_context, SequenceNumber* max_covering_tombstone_seq, SequenceNumber* seq, const ReadOptions& read_opts, bool* is_blob_index = nullptr); - bool GetFromHistory(const LookupKey& key, PinnableSlice* value, + bool GetFromHistory(const ParsedInternalKey& key, PinnableSlice* value, PinnableWideColumns* columns, std::string* timestamp, Status* s, MergeContext* merge_context, SequenceNumber* max_covering_tombstone_seq, @@ -162,7 +162,7 @@ class MemTableListVersion { // Return true if memtable is trimmed bool TrimHistory(autovector* to_delete, size_t usage); - bool GetFromList(std::list* list, const LookupKey& key, + bool GetFromList(std::list* list, const ParsedInternalKey& key, PinnableSlice* value, PinnableWideColumns* columns, std::string* timestamp, Status* s, MergeContext* merge_context, diff --git a/db/memtable_list_test.cc b/db/memtable_list_test.cc index 1766456791..1314a7557c 100644 --- a/db/memtable_list_test.cc +++ b/db/memtable_list_test.cc @@ -34,6 +34,12 @@ static auto g_cspp_fac = []()-> std::shared_ptr { return nullptr; }(); +struct HideLookupKey : ParsedInternalKey { + HideLookupKey(Slice uk, uint64_t seq) : + ParsedInternalKey(uk, seq, kValueTypeForSeek) {} +}; +#define LookupKey HideLookupKey + class MemTableListTest : public testing::Test { public: std::string dbname; diff --git a/db/table_cache.cc b/db/table_cache.cc index 0dd427049e..3544cb0d61 100644 --- a/db/table_cache.cc +++ b/db/table_cache.cc @@ -439,7 +439,8 @@ bool TableCache::GetFromRowCache(const Slice& user_key, IterKey& row_cache_key, Status TableCache::GetWithRowCache( const ReadOptions& options, const InternalKeyComparator& internal_comparator, - const FileMetaData& file_meta, const Slice& k, GetContext* get_context, + const FileMetaData& file_meta, const ParsedInternalKey& pik, + GetContext* get_context, uint8_t block_protection_bytes_per_key, const std::shared_ptr& prefix_extractor, HistogramImpl* file_read_hist, bool skip_filters, int level, @@ -453,6 +454,8 @@ Status TableCache::GetWithRowCache( // Check row cache if enabled. // Reuse row_cache_key sequence number when row cache hits. if (ioptions_.row_cache && !get_context->NeedToReadSequence()) { + const auto ikbuf = pik.MakeInternalKeyBuf(); + const Slice k = ikbuf; auto user_key = ExtractUserKey(k); uint64_t cache_entry_seq_no = CreateRowCacheKeyPrefix(options, fd, k, get_context, row_cache_key); @@ -486,7 +489,7 @@ Status TableCache::GetWithRowCache( t->NewRangeTombstoneIterator(options)); if (range_del_iter != nullptr) { SequenceNumber seq = - range_del_iter->MaxCoveringTombstoneSeqnum(ExtractUserKey(k)); + range_del_iter->MaxCoveringTombstoneSeqnum(pik.user_key); if (seq > *max_covering_tombstone_seq) { *max_covering_tombstone_seq = seq; if (get_context->NeedTimestamp()) { @@ -498,7 +501,7 @@ Status TableCache::GetWithRowCache( } if (s.ok()) { get_context->SetReplayLog(row_cache_entry); // nullptr if no cache. - s = t->Get(options, k, get_context, prefix_extractor.get(), skip_filters); + s = t->GetPIK(options, pik, get_context, prefix_extractor.get(), skip_filters); get_context->SetReplayLog(nullptr); } else if (options.read_tier == kBlockCacheTier && s.IsIncomplete()) { // Couldn't find Table in cache but treat as kFound if no_io set diff --git a/db/table_cache.h b/db/table_cache.h index ee85f7ba4e..91b4d7d377 100644 --- a/db/table_cache.h +++ b/db/table_cache.h @@ -116,7 +116,7 @@ class TableCache { Status Get( const ReadOptions& options, const InternalKeyComparator& internal_comparator, - const FileMetaData& file_meta, const Slice& k, GetContext* get_context, + const FileMetaData& file_meta, const ParsedInternalKey& k, GetContext* get_context, uint8_t block_protection_bytes_per_key, const std::shared_ptr& prefix_extractor = nullptr, HistogramImpl* file_read_hist = nullptr, bool skip_filters = false, @@ -136,7 +136,7 @@ class TableCache { Status GetWithRowCache( const ReadOptions& options, const InternalKeyComparator& internal_comparator, - const FileMetaData& file_meta, const Slice& k, GetContext* get_context, + const FileMetaData& file_meta, const ParsedInternalKey&, GetContext*, uint8_t block_protection_bytes_per_key, const std::shared_ptr& prefix_extractor, HistogramImpl* file_read_hist, bool skip_filters, @@ -144,7 +144,7 @@ class TableCache { Status GetNoneRowCache( const ReadOptions& options, const InternalKeyComparator& internal_comparator, - const FileMetaData& file_meta, const Slice& k, GetContext* get_context, + const FileMetaData& file_meta, const ParsedInternalKey&, GetContext*, uint8_t block_protection_bytes_per_key, const std::shared_ptr& prefix_extractor, HistogramImpl* file_read_hist, bool skip_filters, @@ -329,7 +329,8 @@ __always_inline Status TableCache::GetNoneRowCache( const ReadOptions& options, const InternalKeyComparator& internal_comparator, - const FileMetaData& file_meta, const Slice& k, GetContext* get_context, + const FileMetaData& file_meta, const ParsedInternalKey& pik, + GetContext* get_context, uint8_t block_protection_bytes_per_key, const std::shared_ptr& prefix_extractor, HistogramImpl* file_read_hist, bool skip_filters, int level, @@ -355,7 +356,7 @@ Status TableCache::GetNoneRowCache( std::unique_ptr range_del_iter( t->NewRangeTombstoneIterator(options)); if (range_del_iter != nullptr) { - auto seq = range_del_iter->MaxCoveringTombstoneSeqnum(ExtractUserKey(k)); + auto seq = range_del_iter->MaxCoveringTombstoneSeqnum(pik.user_key); if (seq > *max_covering_tombstone_seq) { *max_covering_tombstone_seq = seq; if (get_context->NeedTimestamp()) { @@ -365,9 +366,9 @@ Status TableCache::GetNoneRowCache( } } if (LIKELY(handle == nullptr)) { // optimize for compiler tail call - return t->Get(options, k, get_context, prefix_extractor.get(), skip_filters); + return t->GetPIK(options, pik, get_context, prefix_extractor.get(), skip_filters); } else { - Status s = t->Get(options, k, get_context, prefix_extractor.get(), skip_filters); + Status s = t->GetPIK(options, pik, get_context, prefix_extractor.get(), skip_filters); cache_.Release(handle); return s; } diff --git a/db/table_cache_sync_and_async.h b/db/table_cache_sync_and_async.h index 8ff03ec501..16b11094c0 100644 --- a/db/table_cache_sync_and_async.h +++ b/db/table_cache_sync_and_async.h @@ -40,7 +40,8 @@ DEFINE_SYNC_AND_ASYNC(Status, TableCache::MultiGet) // sequence numbers, we cannot use it if we need to fetch the sequence. if (lookup_row_cache) { GetContext* first_context = first_key.get_context; - CreateRowCacheKeyPrefix(options, fd, first_key.ikey, first_context, + const auto first_ik = first_key.InternalKeyBuf(); + CreateRowCacheKeyPrefix(options, fd, first_ik, first_context, row_cache_key); row_cache_key_prefix_size = row_cache_key.Size(); diff --git a/db/version_set.cc b/db/version_set.cc index 310415f6c7..93c36b6506 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -109,9 +109,28 @@ namespace { #define __builtin_prefetch(ptr) #endif +inline uint64_t HostPrefixCache(const ParsedInternalKey& ikey) { + if (LIKELY(ikey.user_key.size_ >= 8)) { + uint64_t data = GetUnalignedU64(ikey.user_key.data_); + return NativeOfBigEndian64(data); + } else { + #if defined(__AVX512VL__) && defined(__AVX512BW__) + //#pragma message "__AVX512VL__ && __AVX512BW__, use _mm_maskz_loadu_epi8" + // load 128 bits, keep low 64 bits, discard high 64 bits + auto mask = _bzhi_u32(-1, uint32_t(ikey.user_key.size_)); + auto m128 = _mm_maskz_loadu_epi8(mask, ikey.user_key.data_); + uint64_t data = (uint64_t)_mm_extract_epi64(m128, 0); + #else + uint64_t data = 0; + memcpy(&data, ikey.user_key.data_, ikey.user_key.size_); + #endif + return NativeOfBigEndian64(data); + } +} + template size_t FindFileInRangeTmpl(Cmp cmp, const LevelFilesBrief& brief, - Slice key, size_t lo, size_t hi) { + const ParsedInternalKey& key, size_t lo, size_t hi) { const uint64_t* pxcache = brief.prefix_cache; const uint64_t key_prefix = HostPrefixCache(key); const FdWithKeyRange* a = brief.files; @@ -141,7 +160,7 @@ size_t FindFileInRangeTmpl(Cmp cmp, const LevelFilesBrief& brief, static size_t FindFileInRangeTmpl(FallbackVirtCmp cmp, const LevelFilesBrief& brief, - Slice key, size_t lo, size_t hi) { + const ParsedInternalKey& key, size_t lo, size_t hi) { const FdWithKeyRange* a = brief.files; while (lo < hi) { size_t mid = (lo + hi) / 2; @@ -157,7 +176,7 @@ template static ROCKSDB_FLATTEN int FindFileInRangeInst(const InternalKeyComparator* icmp, const LevelFilesBrief& brief, - Slice key, size_t lo, size_t hi) { + const ParsedInternalKey& key, size_t lo, size_t hi) { return (int)FindFileInRangeTmpl(Cmp{icmp}, brief, key, lo, hi); } @@ -167,7 +186,7 @@ int FindFileInRangeInst(const InternalKeyComparator* icmp, __attribute_noinline__ #endif int FindFileInRange(const InternalKeyComparator& icmp, - const LevelFilesBrief& file_level, const Slice& key, + const LevelFilesBrief& file_level, const ParsedInternalKey& key, uint32_t left, uint32_t right) { #ifdef TOPLINGDB_NO_OPT_FindFileInRange #pragma message "TOPLINGDB_NO_OPT_FindFileInRange is defined, intended for benchmark baseline" @@ -240,12 +259,12 @@ template class FilePicker { __always_inline int FindFileInRange(const InternalKeyComparator& icmp, - const LevelFilesBrief& file_level, const Slice& key, + const LevelFilesBrief& file_level, const ParsedInternalKey& key, size_t left, size_t right) { return (int)FindFileInRangeTmpl(IKCmp{&icmp}, file_level, key, left, right); } public: - FilePicker(const Slice& user_key, const Slice& ikey, + FilePicker(const Slice& user_key, const ParsedInternalKey& ikey, autovector* file_levels, unsigned int num_levels, FileIndexer* file_indexer, const Comparator* user_comparator, const InternalKeyComparator* internal_comparator) @@ -270,7 +289,7 @@ class FilePicker { for (unsigned int i = 0; i < (*level_files_brief_)[0].num_files; ++i) { auto* r = (*level_files_brief_)[0].files[i].fd.table_reader; if (r) { - r->Prepare(ikey); + r->PreparePIK(ikey); } } } @@ -367,7 +386,7 @@ class FilePicker { unsigned int curr_index_in_curr_level_; unsigned int start_index_in_curr_level_; Slice user_key_; - Slice ikey_; + ParsedInternalKey ikey_; FileIndexer* file_indexer_; const Comparator* user_comparator_; const InternalKeyComparator* internal_comparator_; @@ -494,7 +513,7 @@ class FilePickerMultiGet { auto* r = (*level_files_brief_)[0].files[i].fd.table_reader; if (r) { for (auto iter = range_.begin(); iter != range_.end(); ++iter) { - r->Prepare(iter->ikey); + r->PreparePIK(iter->ikey); } } } @@ -947,6 +966,12 @@ Version::~Version() { int FindFile(const InternalKeyComparator& icmp, const LevelFilesBrief& file_level, const Slice& key) { + return FindFileInRange(icmp, file_level, ParsedInternalKey(key), 0, + static_cast(file_level.num_files)); +} + +int FindFile(const InternalKeyComparator& icmp, + const LevelFilesBrief& file_level, const ParsedInternalKey& key) { return FindFileInRange(icmp, file_level, key, 0, static_cast(file_level.num_files)); } @@ -2660,7 +2685,7 @@ void Version::MultiGetBlob( template ROCKSDB_FLATTEN -void Version::GetInst(const ReadOptions& read_options, const LookupKey& k, +void Version::GetInst(const ReadOptions& read_options, const ParsedInternalKey& ikey, PinnableSlice* value, PinnableWideColumns* columns, std::string* timestamp, Status* status, MergeContext* merge_context, @@ -2668,8 +2693,7 @@ void Version::GetInst(const ReadOptions& read_options, const LookupKey& k, PinnedIteratorsManager* pinned_iters_mgr, bool* value_found, bool* key_exists, SequenceNumber* seq, ReadCallback* callback, bool* is_blob, bool do_merge) { - Slice ikey = k.internal_key(); - Slice user_key = k.user_key(); + const Slice& user_key = ikey.user_key; assert(status->ok() || status->IsMergeInProgress()); @@ -3079,7 +3103,7 @@ void Version::MultiGet(const ReadOptions& read_options, MultiGetRange* range, for (auto iter = range->begin(); s.ok() && iter != range->end(); ++iter) { GetContext& get_context = *iter->get_context; Status* status = iter->s; - Slice user_key = iter->lkey->user_key(); + const Slice& user_key = iter->ikey.user_key; if (db_statistics_ != nullptr) { get_context.ReportCounters(); @@ -4909,7 +4933,7 @@ uint64_t VersionStorageInfo::NumLevelRawKV(int level) const { int VersionStorageInfo::FindFileInRange(int level, const Slice& key, uint32_t left, uint32_t right) const { return ROCKSDB_NAMESPACE::FindFileInRange(*internal_comparator_, - level_files_brief_[level], key, left, right); + level_files_brief_[level], ParsedInternalKey(key), left, right); } const char* VersionStorageInfo::LevelSummary( @@ -7133,7 +7157,7 @@ VersionSet::ApproximateSizeTmpl(const SizeApproximationOptions& options, // identify the file position for start key const int idx_start = - (int)FindFileInRangeTmpl(cmp, files_brief, start, 0, + (int)FindFileInRangeTmpl(cmp, files_brief, ParsedInternalKey(start), 0, static_cast(files_brief.num_files - 1)); assert(static_cast(idx_start) < files_brief.num_files); @@ -7141,7 +7165,7 @@ VersionSet::ApproximateSizeTmpl(const SizeApproximationOptions& options, int idx_end = idx_start; if (cmp(files_brief.files[idx_end].largest_key, end)) { idx_end = - (int)FindFileInRangeTmpl(cmp, files_brief, end, idx_start, + (int)FindFileInRangeTmpl(cmp, files_brief, ParsedInternalKey(end), idx_start, static_cast(files_brief.num_files - 1)); } assert(idx_end >= idx_start && diff --git a/db/version_set.h b/db/version_set.h index 647ae00fbf..d05e70c845 100644 --- a/db/version_set.h +++ b/db/version_set.h @@ -101,6 +101,9 @@ using VersionEditParams = VersionEdit; extern int FindFile(const InternalKeyComparator& icmp, const LevelFilesBrief& file_level, const Slice& key); +extern int FindFile(const InternalKeyComparator& icmp, + const LevelFilesBrief& file_level, const ParsedInternalKey&); + // Returns true iff some file in "files" overlaps the user key range // [*smallest,*largest]. // smallest==nullptr represents a key smaller than all keys in the DB. @@ -894,7 +897,7 @@ class Version { // merge_context.operands_list and don't merge the operands // REQUIRES: lock is not held // REQUIRES: pinned_iters_mgr != nullptr - void Get(const ReadOptions& ro, const LookupKey& key, PinnableSlice* value, + void Get(const ReadOptions& ro, const ParsedInternalKey& key, PinnableSlice* value, PinnableWideColumns* columns, std::string* timestamp, Status* status, MergeContext* merge_context, SequenceNumber* max_covering_tombstone_seq, @@ -910,7 +913,7 @@ class Version { private: template - void GetInst(const ReadOptions&, const LookupKey& key, PinnableSlice* value, + void GetInst(const ReadOptions&, const ParsedInternalKey&, PinnableSlice* value, PinnableWideColumns* columns, std::string* timestamp, Status* status, MergeContext* merge_context, SequenceNumber* max_covering_tombstone_seq, @@ -920,7 +923,7 @@ class Version { bool* is_blob, bool do_merge); void (*m_get)(Version*, - const ReadOptions&, const LookupKey& key, PinnableSlice* value, + const ReadOptions&, const ParsedInternalKey&, PinnableSlice* value, PinnableWideColumns* columns, std::string* timestamp, Status* status, MergeContext* merge_context, SequenceNumber* max_covering_tombstone_seq, diff --git a/db/version_set_sync_and_async.h b/db/version_set_sync_and_async.h index 75776b620c..ec107252c3 100644 --- a/db/version_set_sync_and_async.h +++ b/db/version_set_sync_and_async.h @@ -145,7 +145,7 @@ DEFINE_SYNC_AND_ASYNC(Status, Version::MultiGetFromSST) continue; case GetContext::kCorrupt: *status = - Status::Corruption("corrupted key for ", iter->lkey->user_key()); + Status::Corruption("corrupted key for ", iter->ikey.user_key); file_range.MarkKeyDone(iter); continue; case GetContext::kUnexpectedBlobIndex: diff --git a/include/rocksdb/memtablerep.h b/include/rocksdb/memtablerep.h index 36888a9bd8..5ddfc6d76a 100644 --- a/include/rocksdb/memtablerep.h +++ b/include/rocksdb/memtablerep.h @@ -83,6 +83,14 @@ class MemTableRep : public CacheAlignedNewDelete { virtual int operator()(const char* prefix_len_key, const Slice& key) const = 0; + // Compare prefix_len_key (encoded internal key) with user_key + tag + virtual int operator()(const char* prefix_len_key, + const struct ParsedInternalKey&) const = 0; + + // Compare user_key + tag with prefix_len_key (encoded internal key) + virtual int operator()(const struct ParsedInternalKey&, + const char* prefix_len_key) const = 0; + virtual const InternalKeyComparator* icomparator() const = 0; virtual ~KeyComparator() {} @@ -242,6 +250,10 @@ class MemTableRep : public CacheAlignedNewDelete { const LookupKey&, void* callback_args, bool (*callback_func)(void* arg, const KeyValuePair&)) = 0; + virtual void GetPIK(const struct ReadOptions&, + const struct ParsedInternalKey&, void* callback_args, + bool (*callback_func)(void* arg, const KeyValuePair&)); + virtual uint64_t ApproximateNumEntries(const Slice& /*start_ikey*/, const Slice& /*end_key*/) { return 0; diff --git a/table/block_based/block_based_table_reader_sync_and_async.h b/table/block_based/block_based_table_reader_sync_and_async.h index e7621909cc..0cbe31b7e4 100644 --- a/table/block_based/block_based_table_reader_sync_and_async.h +++ b/table/block_based/block_based_table_reader_sync_and_async.h @@ -408,8 +408,9 @@ DEFINE_SYNC_AND_ASYNC(void, BlockBasedTable::MultiGet) for (auto miter = data_block_range.begin(); miter != data_block_range.end(); ++miter) { - const Slice& key = miter->ikey; - iiter->Seek(miter->ikey); + const auto ikbuf = miter->InternalKeyBuf(); + const Slice key = ikbuf; + iiter->Seek(key); IndexValue v; if (iiter->Valid()) { @@ -580,7 +581,8 @@ DEFINE_SYNC_AND_ASYNC(void, BlockBasedTable::MultiGet) ++miter) { Status s; GetContext* get_context = miter->get_context; - const Slice& key = miter->ikey; + const auto ikbuf = miter->InternalKeyBuf(); + const Slice key = ikbuf; bool matched = false; // if such user key matched a key in SST bool done = false; bool first_block = true; diff --git a/table/block_based/filter_block.h b/table/block_based/filter_block.h index b14858c020..6e400f25a4 100644 --- a/table/block_based/filter_block.h +++ b/table/block_based/filter_block.h @@ -120,7 +120,8 @@ class FilterBlockReader { const ReadOptions& read_options) { for (auto iter = range->begin(); iter != range->end(); ++iter) { const Slice ukey_without_ts = iter->ukey_without_ts; - const Slice ikey = iter->ikey; + const auto ikbuf = iter->InternalKeyBuf(); + const Slice ikey = ikbuf; // convert from named ikbuf GetContext* const get_context = iter->get_context; if (!KeyMayMatch(ukey_without_ts, no_io, &ikey, get_context, lookup_context, read_options)) { @@ -145,7 +146,8 @@ class FilterBlockReader { const ReadOptions& read_options) { for (auto iter = range->begin(); iter != range->end(); ++iter) { const Slice ukey_without_ts = iter->ukey_without_ts; - const Slice ikey = iter->ikey; + const auto ikbuf = iter->InternalKeyBuf(); + const Slice ikey = ikbuf; // convert from named ikbuf GetContext* const get_context = iter->get_context; if (prefix_extractor->InDomain(ukey_without_ts) && !PrefixMayMatch(prefix_extractor->Transform(ukey_without_ts), no_io, diff --git a/table/block_based/partitioned_filter_block.h b/table/block_based/partitioned_filter_block.h index 817fe94245..565ca6da82 100644 --- a/table/block_based/partitioned_filter_block.h +++ b/table/block_based/partitioned_filter_block.h @@ -138,6 +138,12 @@ class PartitionedFilterBlockReader BlockHandle GetFilterPartitionHandle( const CachableEntry& filter_block, const Slice& entry) const; + BlockHandle GetFilterPartitionHandle( + const CachableEntry& filter_block, + const ParsedInternalKey& entry) const { + // overload this function can minimize diff, the caller need not change + return GetFilterPartitionHandle(filter_block, entry.MakeInternalKeyBuf()); + } Status GetFilterPartitionBlock( FilePrefetchBuffer* prefetch_buffer, const BlockHandle& handle, bool no_io, GetContext* get_context, diff --git a/table/multiget_context.h b/table/multiget_context.h index 8c7beb2c11..0aafd460b3 100644 --- a/table/multiget_context.h +++ b/table/multiget_context.h @@ -25,11 +25,25 @@ class GetContext; class PinnableWideColumns; struct KeyContext { +#if defined(TOPLINGDB_WITH_TIMESTAMP) const Slice* key; LookupKey* lkey; - Slice ukey_with_ts; + union { + ParsedInternalKey ikey; + Slice ukey_with_ts; // at ikey.user_key + }; Slice ukey_without_ts; - Slice ikey; + // long live & fast + auto InternalKeyBuf() const { return lkey->internal_key(); } +#else + union { + ParsedInternalKey ikey; + Slice ukey_with_ts; // at ikey.user_key + Slice ukey_without_ts; // at ikey.user_key + }; + // temporary & slow + auto InternalKeyBuf() const { return ikey.MakeInternalKeyBuf(); } +#endif ColumnFamilyHandle* column_family; Status* s; MergeContext merge_context; @@ -45,8 +59,13 @@ struct KeyContext { KeyContext(ColumnFamilyHandle* col_family, const Slice& user_key, PinnableSlice* val, PinnableWideColumns* cols, std::string* ts, Status* stat) +#if defined(TOPLINGDB_WITH_TIMESTAMP) : key(&user_key), lkey(nullptr), + ukey_without_ts(user_key), // must init +#else + : ukey_without_ts(user_key), // keep ikey.tag raw mem +#endif column_family(col_family), s(stat), max_covering_tombstone_seq(0), @@ -113,8 +132,11 @@ class MultiGetContext { Statistics* stats) : num_keys_(num_keys), value_mask_(0), - value_size_(0), + value_size_(0) +#if defined(TOPLINGDB_WITH_TIMESTAMP) + , lookup_key_ptr_(reinterpret_cast(lookup_key_stack_buf)) +#endif #if USE_COROUTINES , reader_(fs, stats), @@ -124,41 +146,44 @@ class MultiGetContext { (void)fs; (void)stats; assert(num_keys <= MAX_BATCH_SIZE); +#if defined(TOPLINGDB_WITH_TIMESTAMP) if (num_keys > MAX_LOOKUP_KEYS_ON_STACK) { lookup_key_heap_buf.reset(new char[sizeof(LookupKey) * num_keys]); lookup_key_ptr_ = reinterpret_cast(lookup_key_heap_buf.get()); } +#endif + ROCKSDB_ASSERT_LE(begin + num_keys, sorted_keys->size()); for (size_t iter = 0; iter != num_keys_; ++iter) { // autovector may not be contiguous storage, so make a copy sorted_keys_[iter] = (*sorted_keys)[begin + iter]; + #if defined(TOPLINGDB_WITH_TIMESTAMP) sorted_keys_[iter]->lkey = new (&lookup_key_ptr_[iter]) LookupKey(*sorted_keys_[iter]->key, snapshot, read_opts.timestamp); sorted_keys_[iter]->ukey_with_ts = sorted_keys_[iter]->lkey->user_key(); - #if defined(TOPLINGDB_WITH_TIMESTAMP) sorted_keys_[iter]->ukey_without_ts = StripTimestampFromUserKey( sorted_keys_[iter]->lkey->user_key(), read_opts.timestamp == nullptr ? 0 : read_opts.timestamp->size()); - #else - sorted_keys_[iter]->ukey_without_ts = sorted_keys_[iter]->lkey->user_key(); - #endif - - sorted_keys_[iter]->ikey = sorted_keys_[iter]->lkey->internal_key(); - - #if defined(TOPLINGDB_WITH_TIMESTAMP) sorted_keys_[iter]->timestamp = (*sorted_keys)[begin + iter]->timestamp; + #else + static_assert(offsetof(KeyContext, ikey.user_key) == offsetof(KeyContext, ukey_without_ts)); #endif + static_assert(offsetof(KeyContext, ikey.user_key) == offsetof(KeyContext, ukey_with_ts)); + sorted_keys_[iter]->ikey.sequence = snapshot; + sorted_keys_[iter]->ikey.type = kValueTypeForSeek; sorted_keys_[iter]->get_context = (*sorted_keys)[begin + iter]->get_context; } } +#if defined(TOPLINGDB_WITH_TIMESTAMP) ~MultiGetContext() { for (size_t i = 0; i < num_keys_; ++i) { lookup_key_ptr_[i].~LookupKey(); } } +#endif #if USE_COROUTINES SingleThreadExecutor& executor() { return executor_; } @@ -168,15 +193,19 @@ class MultiGetContext { private: static const int MAX_LOOKUP_KEYS_ON_STACK = 16; +#if defined(TOPLINGDB_WITH_TIMESTAMP) alignas( alignof(LookupKey)) char lookup_key_stack_buf[sizeof(LookupKey) * MAX_LOOKUP_KEYS_ON_STACK]; +#endif std::array sorted_keys_; size_t num_keys_; Mask value_mask_; uint64_t value_size_; +#if defined(TOPLINGDB_WITH_TIMESTAMP) std::unique_ptr lookup_key_heap_buf; LookupKey* lookup_key_ptr_; +#endif #if USE_COROUTINES AsyncFileReader reader_; SingleThreadExecutor executor_; diff --git a/table/table_reader.h b/table/table_reader.h index 3a2c46c62e..8db7ade1d9 100644 --- a/table/table_reader.h +++ b/table/table_reader.h @@ -113,6 +113,9 @@ class TableReader : public CacheAlignedNewDelete { // Prepare work that can be done before the real Get() virtual void Prepare(const Slice& /*target*/) {} + virtual void PreparePIK(const ParsedInternalKey& pik) { + Prepare(pik.MakeInternalKeyBuf()); + } // Report an approximation of how much memory has been used. virtual size_t ApproximateMemoryUsage() const = 0; @@ -133,6 +136,14 @@ class TableReader : public CacheAlignedNewDelete { const SliceTransform* prefix_extractor, bool skip_filters = false) = 0; + virtual Status GetPIK(const ReadOptions& ro, const ParsedInternalKey& pik, + GetContext* get_context, + const SliceTransform* prefix_extractor, + bool skip_filters = false) { + auto ikbuf = pik.MakeInternalKeyBuf(); + return Get(ro, ikbuf, get_context, prefix_extractor, skip_filters); + } + // Use bloom filters in the table file, if present, to filter out keys. The // mget_range will be updated to skip keys that get a negative result from // the filter lookup. @@ -147,7 +158,7 @@ class TableReader : public CacheAlignedNewDelete { const SliceTransform* prefix_extractor, bool skip_filters = false) { for (auto iter = mget_range->begin(); iter != mget_range->end(); ++iter) { - *iter->s = Get(readOptions, iter->ikey, iter->get_context, + *iter->s = GetPIK(readOptions, iter->ikey, iter->get_context, prefix_extractor, skip_filters); } } diff --git a/table/table_test.cc b/table/table_test.cc index 33d823920d..56f60f94d2 100644 --- a/table/table_test.cc +++ b/table/table_test.cc @@ -3751,14 +3751,14 @@ TEST_P(BlockBasedTableTest, TracingMultiGetTest) { /*PinnableWideColumns omitted*/ nullptr, /*timestamp omitted*/ nullptr, statuses.data()); key_context[0].ukey_without_ts = ukeys[0]; - key_context[0].ikey = encoded_keys[0]; + key_context[0].ikey = ParsedInternalKey(encoded_keys[0]); key_context[0].get_context = get_contexts.data(); key_context.emplace_back(/*ColumnFamilyHandle omitted*/ nullptr, ukeys[1], &values[1], /*PinnableWideColumns omitted*/ nullptr, /*timestamp omitted*/ nullptr, &statuses[1]); key_context[1].ukey_without_ts = ukeys[1]; - key_context[1].ikey = encoded_keys[1]; + key_context[1].ikey = ParsedInternalKey(encoded_keys[1]); key_context[1].get_context = &get_contexts[1]; autovector sorted_keys; sorted_keys.push_back(&key_context[0]); From 026bd0710bd0aba873ab7b0951751d3b6ae4bdbc Mon Sep 17 00:00:00 2001 From: rockeet Date: Sat, 11 Apr 2026 23:19:41 +0800 Subject: [PATCH 062/102] update submodule rockside --- sideplugin/rockside | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sideplugin/rockside b/sideplugin/rockside index c24deef3fb..362c7d3cf3 160000 --- a/sideplugin/rockside +++ b/sideplugin/rockside @@ -1 +1 @@ -Subproject commit c24deef3fb6b6200918ccd76afa4f73ae2570d0b +Subproject commit 362c7d3cf3961b13ea38a19f293b0d6929bc0e0d From c4e2f0b81ddbf4d2dbf6d715c071e49e0994cdc8 Mon Sep 17 00:00:00 2001 From: rockeet Date: Sat, 11 Apr 2026 23:20:02 +0800 Subject: [PATCH 063/102] add compile.sh for claude code checking correctness --- compile.sh | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 compile.sh diff --git a/compile.sh b/compile.sh new file mode 100644 index 0000000000..21a9416435 --- /dev/null +++ b/compile.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +export SANDCASTLE=1 +export BUILD_PREFIX=../build-toplingdb/ +export CXX=clang++ +export CC=clang + +ROCKSDB_VERSION=`build_tools/version.sh full` +TOPLING_CORE_DIR=sideplugin/topling-zip +COMPILER=`bash ${TOPLING_CORE_DIR}/get-compiler-name.sh` +WITH_BMI2=`bash ${TOPLING_CORE_DIR}/cpu_has_bmi2.sh` +UNAME_MachineSystem=`uname -m -s | sed 's:[ /]:-:g'` +BUILD_NAME=${UNAME_MachineSystem}-${COMPILER}-bmi2-${WITH_BMI2} +BUILD_ROOT=build/${BUILD_NAME} + +dir=${BUILD_PREFIX}build/${BUILD_NAME}/dbg/v${ROCKSDB_VERSION} +dir_ut=${BUILD_PREFIX}build-ut/${BUILD_NAME}/dbg/v${ROCKSDB_VERSION} +DEBUG_LEVEL=2 +function map() { + if [[ $1 == *test*.o ]]; then + echo $dir_ut/$1 + elif [[ $1 == *.o ]]; then + echo $dir/$1 + else + echo $1 + fi +} +targets=(`for i in $@;do map $i;done`) + +make PREFIX=/opt UPDATE_REPO=0 -j`nproc` ${targets[@]} From 69622883d64a5719e1d234ae8e338d763e2a623d Mon Sep 17 00:00:00 2001 From: rockeet Date: Sun, 12 Apr 2026 16:40:36 +0800 Subject: [PATCH 064/102] version_set.cc: FilePicker: remove redundant field `user_key_` --- db/version_set.cc | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/db/version_set.cc b/db/version_set.cc index 93c36b6506..4561d4357c 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -264,7 +264,7 @@ class FilePicker { return (int)FindFileInRangeTmpl(IKCmp{&icmp}, file_level, key, left, right); } public: - FilePicker(const Slice& user_key, const ParsedInternalKey& ikey, + FilePicker(const ParsedInternalKey& ikey, autovector* file_levels, unsigned int num_levels, FileIndexer* file_indexer, const Comparator* user_comparator, const InternalKeyComparator* internal_comparator) @@ -277,7 +277,6 @@ class FilePicker { level_files_brief_(file_levels), is_hit_file_last_in_level_(false), curr_file_level_(nullptr), - user_key_(user_key), ikey_(ikey), file_indexer_(file_indexer), user_comparator_(user_comparator), @@ -322,11 +321,11 @@ class FilePicker { // range. assert(curr_level_ == 0 || curr_index_in_curr_level_ == start_index_in_curr_level_ || - cmp(user_key_, ExtractUserKey(f->smallest_key)) <= 0); + cmp(ikey_.user_key, ExtractUserKey(f->smallest_key)) <= 0); - int cmp_smallest = cmp(user_key_, ExtractUserKey(f->smallest_key)); + int cmp_smallest = cmp(ikey_.user_key, ExtractUserKey(f->smallest_key)); if (cmp_smallest >= 0) { - cmp_largest = cmp(user_key_, ExtractUserKey(f->largest_key)); + cmp_largest = cmp(ikey_.user_key, ExtractUserKey(f->largest_key)); } // Setup file search bound for the next level based on the @@ -385,7 +384,6 @@ class FilePicker { LevelFilesBrief* curr_file_level_; unsigned int curr_index_in_curr_level_; unsigned int start_index_in_curr_level_; - Slice user_key_; ParsedInternalKey ikey_; FileIndexer* file_indexer_; const Comparator* user_comparator_; @@ -2732,7 +2730,7 @@ void Version::GetInst(const ReadOptions& read_options, const ParsedInternalKey& pinned_iters_mgr->StartPinning(); } - FilePicker fp(user_key, ikey, &storage_info_.level_files_brief_, + FilePicker fp(ikey, &storage_info_.level_files_brief_, storage_info_.num_non_empty_levels_, &storage_info_.file_indexer_, user_comparator(), internal_comparator()); From 8f629adbcb33f506fd543d4b3253a17e2ece66a4 Mon Sep 17 00:00:00 2001 From: rockeet Date: Sun, 12 Apr 2026 16:56:18 +0800 Subject: [PATCH 065/102] version_set.cc: GetInst: tell compiler more truth --- db/version_set.cc | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/db/version_set.cc b/db/version_set.cc index 4561d4357c..ea5911fa0f 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -2707,6 +2707,12 @@ void Version::GetInst(const ReadOptions& read_options, const ParsedInternalKey& tracing_get_id = vset_->block_cache_tracer_->NextGetId(); } #endif +#if !defined(TOPLINGDB_WITH_TIMESTAMP) + timestamp = nullptr; // tell compiler it is always null +#endif +#if !defined(TOPLINGDB_WITH_WIDE_COLUMNS) + columns = nullptr; // tell compiler it is always null +#endif // Note: the old StackableDB-based BlobDB passes in // GetImplOptions::is_blob_index; for the integrated BlobDB implementation, we From 5669b357a6f45f3ef70a596b7db0efa3fca1236e Mon Sep 17 00:00:00 2001 From: rockeet Date: Tue, 14 Apr 2026 00:18:34 +0800 Subject: [PATCH 066/102] version_set.cc: Remove the negative refactory FilePicker Refactory of FilePicker was introduced at 2014-07-16 (0418e66e2a7cb96924455b12032cdb954576f4ec), it harms performance and make the code complicated. This commit makes the code simpler and faster. ToplingDB is more than 10x faster than RocksDB, the improvement from this change is significant: The latency is reduced from 260ns to 234ns on E5 2682 v4 in wsl2 with -benchmarks=fillseq,compact,readrandom -value_size=15 -num=100000000 in readrandom DBImpl::Get consumes 73%, others are db_bench framework, the DBImpl::Get consumes 234*73% = 170ns --- db/version_set.cc | 388 +++++++++++++++++++--------------------------- 1 file changed, 162 insertions(+), 226 deletions(-) diff --git a/db/version_set.cc b/db/version_set.cc index ea5911fa0f..6d9c177bdb 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -255,215 +255,6 @@ Status OverlapWithIterator(const Comparator* ucmp, // levels. Therefore we are guaranteed that if we find data // in a smaller level, later levels are irrelevant (unless we // are MergeInProgress). -template -class FilePicker { - __always_inline - int FindFileInRange(const InternalKeyComparator& icmp, - const LevelFilesBrief& file_level, const ParsedInternalKey& key, - size_t left, size_t right) { - return (int)FindFileInRangeTmpl(IKCmp{&icmp}, file_level, key, left, right); - } - public: - FilePicker(const ParsedInternalKey& ikey, - autovector* file_levels, unsigned int num_levels, - FileIndexer* file_indexer, const Comparator* user_comparator, - const InternalKeyComparator* internal_comparator) - : num_levels_(num_levels), - curr_level_(static_cast(-1)), - returned_file_level_(static_cast(-1)), - hit_file_level_(static_cast(-1)), - search_left_bound_(0), - search_right_bound_(FileIndexer::kLevelMaxIndex), - level_files_brief_(file_levels), - is_hit_file_last_in_level_(false), - curr_file_level_(nullptr), - ikey_(ikey), - file_indexer_(file_indexer), - user_comparator_(user_comparator), - internal_comparator_(internal_comparator) { - // Setup member variables to search first level. - search_ended_ = !PrepareNextLevel(); - if (!search_ended_) { - // Prefetch Level 0 table data to avoid cache miss if possible. - for (unsigned int i = 0; i < (*level_files_brief_)[0].num_files; ++i) { - auto* r = (*level_files_brief_)[0].files[i].fd.table_reader; - if (r) { - r->PreparePIK(ikey); - } - } - } - } - - int GetCurrentLevel() const { return curr_level_; } - - FdWithKeyRange* GetNextFile() { - UKCmp cmp{user_comparator_}; - while (!search_ended_) { // Loops over different levels. - while (curr_index_in_curr_level_ < curr_file_level_->num_files) { - // Loops over all files in current level. - FdWithKeyRange* f = &curr_file_level_->files[curr_index_in_curr_level_]; - hit_file_level_ = curr_level_; - is_hit_file_last_in_level_ = - curr_index_in_curr_level_ == curr_file_level_->num_files - 1; - int cmp_largest = -1; - - // Do key range filtering of files or/and fractional cascading if: - // (1) not all the files are in level 0, or - // (2) there are more than 3 current level files - // If there are only 3 or less current level files in the system, we - // skip the key range filtering. In this case, more likely, the system - // is highly tuned to minimize number of tables queried by each query, - // so it is unlikely that key range filtering is more efficient than - // querying the files. - if (num_levels_ > 1 || curr_file_level_->num_files > 3) { - // Check if key is within a file's range. If search left bound and - // right bound point to the same find, we are sure key falls in - // range. - assert(curr_level_ == 0 || - curr_index_in_curr_level_ == start_index_in_curr_level_ || - cmp(ikey_.user_key, ExtractUserKey(f->smallest_key)) <= 0); - - int cmp_smallest = cmp(ikey_.user_key, ExtractUserKey(f->smallest_key)); - if (cmp_smallest >= 0) { - cmp_largest = cmp(ikey_.user_key, ExtractUserKey(f->largest_key)); - } - - // Setup file search bound for the next level based on the - // comparison results - if (curr_level_ > 0) { - file_indexer_->GetNextLevelIndex( - curr_level_, curr_index_in_curr_level_, cmp_smallest, - cmp_largest, &search_left_bound_, &search_right_bound_); - } - // Key falls out of current file's range - if (cmp_smallest < 0 || cmp_largest > 0) { - if (curr_level_ == 0) { - ++curr_index_in_curr_level_; - continue; - } else { - // Search next level. - break; - } - } - } - - returned_file_level_ = curr_level_; - if (curr_level_ > 0 && cmp_largest < 0) { - // No more files to search in this level. - search_ended_ = !PrepareNextLevel(); - } else { - ++curr_index_in_curr_level_; - } - return f; - } - // Start searching next level. - search_ended_ = !PrepareNextLevel(); - } - // Search ended. - return nullptr; - } - - // getter for current file level - // for GET_HIT_L0, GET_HIT_L1 & GET_HIT_L2_AND_UP counts - unsigned int GetHitFileLevel() { return hit_file_level_; } - - // Returns true if the most recent "hit file" (i.e., one returned by - // GetNextFile()) is at the last index in its level. - bool IsHitFileLastInLevel() { return is_hit_file_last_in_level_; } - - private: - unsigned int num_levels_; - unsigned int curr_level_; - unsigned int returned_file_level_; - unsigned int hit_file_level_; - int32_t search_left_bound_; - int32_t search_right_bound_; - autovector* level_files_brief_; - bool search_ended_; - bool is_hit_file_last_in_level_; - LevelFilesBrief* curr_file_level_; - unsigned int curr_index_in_curr_level_; - unsigned int start_index_in_curr_level_; - ParsedInternalKey ikey_; - FileIndexer* file_indexer_; - const Comparator* user_comparator_; - const InternalKeyComparator* internal_comparator_; - - // Setup local variables to search next level. - // Returns false if there are no more levels to search. - bool PrepareNextLevel() { - curr_level_++; - while (curr_level_ < num_levels_) { - curr_file_level_ = &(*level_files_brief_)[curr_level_]; - if (curr_file_level_->num_files == 0) { - // When current level is empty, the search bound generated from upper - // level must be [0, -1] or [0, FileIndexer::kLevelMaxIndex] if it is - // also empty. - assert(search_left_bound_ == 0); - assert(search_right_bound_ == -1 || - search_right_bound_ == FileIndexer::kLevelMaxIndex); - // Since current level is empty, it will need to search all files in - // the next level - search_left_bound_ = 0; - search_right_bound_ = FileIndexer::kLevelMaxIndex; - curr_level_++; - continue; - } - - // Some files may overlap each other. We find - // all files that overlap user_key and process them in order from - // newest to oldest. In the context of merge-operator, this can occur at - // any level. Otherwise, it only occurs at Level-0 (since Put/Deletes - // are always compacted into a single entry). - int32_t start_index; - if (curr_level_ == 0) { - // On Level-0, we read through all files to check for overlap. - start_index = 0; - } else { - // On Level-n (n>=1), files are sorted. Binary search to find the - // earliest file whose largest key >= ikey. Search left bound and - // right bound are used to narrow the range. - if (search_left_bound_ <= search_right_bound_) { - if (search_right_bound_ == FileIndexer::kLevelMaxIndex) { - search_right_bound_ = - static_cast(curr_file_level_->num_files) - 1; - } - // `search_right_bound_` is an inclusive upper-bound, but since it was - // determined based on user key, it is still possible the lookup key - // falls to the right of `search_right_bound_`'s corresponding file. - // So, pass a limit one higher, which allows us to detect this case. - start_index = - FindFileInRange(*internal_comparator_, *curr_file_level_, ikey_, - static_cast(search_left_bound_), - static_cast(search_right_bound_) + 1); - if (start_index == search_right_bound_ + 1) { - // `ikey_` comes after `search_right_bound_`. The lookup key does - // not exist on this level, so let's skip this level and do a full - // binary search on the next level. - search_left_bound_ = 0; - search_right_bound_ = FileIndexer::kLevelMaxIndex; - curr_level_++; - continue; - } - } else { - // search_left_bound > search_right_bound, key does not exist in - // this level. Since no comparison is done in this level, it will - // need to search all files in the next level. - search_left_bound_ = 0; - search_right_bound_ = FileIndexer::kLevelMaxIndex; - curr_level_++; - continue; - } - } - start_index_in_curr_level_ = start_index; - curr_index_in_curr_level_ = start_index; - - return true; - } - // curr_level_ = num_levels_. So, no more levels to search. - return false; - } -}; } // anonymous namespace class FilePickerMultiGet { @@ -2736,17 +2527,154 @@ void Version::GetInst(const ReadOptions& read_options, const ParsedInternalKey& pinned_iters_mgr->StartPinning(); } - FilePicker fp(ikey, &storage_info_.level_files_brief_, - storage_info_.num_non_empty_levels_, - &storage_info_.file_indexer_, user_comparator(), - internal_comparator()); - FdWithKeyRange* f = fp.GetNextFile(); +// FilePicker is a negative optimization, revert it! - while (f != nullptr) { +#if defined(ROCKSDB_UNIT_TEST) +// Prefetch Level 0 table data to avoid cache miss if possible. +if (storage_info_.num_non_empty_levels_ > 0 && + storage_info_.level_files_brief_[0].num_files > 0) { + for (size_t i = 0; i < storage_info_.level_files_brief_[0].num_files; ++i) { + if (auto r = storage_info_.level_files_brief_[0].files[i].fd.table_reader) { + r->PreparePIK(ikey); + } + } +} +#endif +int curr_level = -1; +int32_t search_left_bound = 0; +int32_t search_right_bound = FileIndexer::kLevelMaxIndex; +LevelFilesBrief* curr_file_level = nullptr; +unsigned int curr_index_in_curr_level = 0; +unsigned int start_index_in_curr_level = 0; + +auto prepare_next_level = [&]() -> bool { + curr_level++; + while (curr_level < storage_info_.num_non_empty_levels_) { + curr_file_level = &storage_info_.level_files_brief_[curr_level]; + if (curr_file_level->num_files == 0) { + // When current level is empty, the search bound generated from upper + // level must be [0, -1] or [0, FileIndexer::kLevelMaxIndex] if it is + // also empty. + assert(search_left_bound == 0); + assert(search_right_bound == -1 || + search_right_bound == FileIndexer::kLevelMaxIndex); + // Since current level is empty, it will need to search all files in + // the next level + search_left_bound = 0; + search_right_bound = FileIndexer::kLevelMaxIndex; + curr_level++; + continue; + } + + // Some files may overlap each other. We find + // all files that overlap user_key and process them in order from + // newest to oldest. In the context of merge-operator, this can occur at + // any level. Otherwise, it only occurs at Level-0 (since Put/Deletes + // are always compacted into a single entry). + int32_t start_index; + if (curr_level == 0) { + // On Level-0, we read through all files to check for overlap. + start_index = 0; + } else { + // On Level-n (n>=1), files are sorted. Binary search to find the + // earliest file whose largest key >= ikey. Search left bound and + // right bound are used to narrow the range. + if (search_left_bound <= search_right_bound) { + if (search_right_bound == FileIndexer::kLevelMaxIndex) { + search_right_bound = + static_cast(curr_file_level->num_files) - 1; + } + // `search_right_bound` is an inclusive upper-bound, but since it was + // determined based on user key, it is still possible the lookup key + // falls to the right of `search_right_bound`'s corresponding file. + // So, pass a limit one higher, which allows us to detect this case. + start_index = static_cast(FindFileInRangeTmpl( + IKCmp{internal_comparator()}, *curr_file_level, ikey, + static_cast(search_left_bound), + static_cast(search_right_bound) + 1)); + if (start_index == search_right_bound + 1) { + // `ikey` comes after `search_right_bound`. The lookup key does + // not exist on this level, so let's skip this level and do a full + // binary search on the next level. + search_left_bound = 0; + search_right_bound = FileIndexer::kLevelMaxIndex; + curr_level++; + continue; + } + } else { + // search_left_bound > search_right_bound, key does not exist in + // this level. Since no comparison is done in this level, it will + // need to search all files in the next level. + search_left_bound = 0; + search_right_bound = FileIndexer::kLevelMaxIndex; + curr_level++; + continue; + } + } + start_index_in_curr_level = start_index; + curr_index_in_curr_level = start_index; + + return true; + } + // curr_level = num_non_empty_levels_. So, no more levels to search. + return false; +}; + +while (prepare_next_level()) { + while (curr_index_in_curr_level < curr_file_level->num_files) { + FdWithKeyRange* f = &curr_file_level->files[curr_index_in_curr_level]; + int hit_file_level = curr_level; + bool is_hit_file_last_in_level = + curr_index_in_curr_level == curr_file_level->num_files - 1; + (void)is_hit_file_last_in_level; + int cmp_largest = -1; + + // Do key range filtering of files or/and fractional cascading if: + // (1) not all the files are in level 0, or + // (2) there are more than 3 current level files + // If there are only 3 or less current level files in the system, we + // skip the key range filtering. In this case, more likely, the system + // is highly tuned to minimize number of tables queried by each query, + // so it is unlikely that key range filtering is more efficient than + // querying the files. + if (storage_info_.num_non_empty_levels_ > 1 || curr_file_level->num_files > 3) { + // Check if key is within a file's range. If search left bound and + // right bound point to the same find, we are sure key falls in + // range. + UKCmp ucmp{user_comparator()}; + assert(curr_level == 0 || + curr_index_in_curr_level == start_index_in_curr_level || + ucmp(ikey.user_key, ExtractUserKey(f->smallest_key)) <= 0); + + int cmp_smallest = ucmp(ikey.user_key, ExtractUserKey(f->smallest_key)); + if (cmp_smallest >= 0) { + cmp_largest = ucmp(ikey.user_key, ExtractUserKey(f->largest_key)); + } + + // Setup file search bound for the next level based on the + // comparison results + if (curr_level > 0) { + storage_info_.file_indexer_.GetNextLevelIndex( + curr_level, curr_index_in_curr_level, cmp_smallest, + cmp_largest, &search_left_bound, &search_right_bound); + } + // Key falls out of current file's range + if (cmp_smallest < 0 || cmp_largest > 0) { + if (curr_level == 0) { + ++curr_index_in_curr_level; + continue; + } else { + // Search next level. + break; + } + } + } + + // File passed filtering, process it if (*max_covering_tombstone_seq > 0) { // The remaining files we look at will only contain covered keys, so we // stop here. - break; + goto search_complete; } if (get_context.sample()) { sample_file_read_inc(f->file_metadata); @@ -2761,18 +2689,18 @@ void Version::GetInst(const ReadOptions& read_options, const ParsedInternalKey& &get_context, mutable_cf_options_.block_protection_bytes_per_key, mutable_cf_options_.prefix_extractor, #if defined(TOPLINGDB_WITH_FABRICATED_COMPLEXITY) - cfd_->internal_stats()->GetFileReadHist(fp.GetHitFileLevel()), - IsFilterSkipped(static_cast(fp.GetHitFileLevel()), - fp.IsHitFileLastInLevel()), + cfd_->internal_stats()->GetFileReadHist(hit_file_level), + IsFilterSkipped(static_cast(hit_file_level), + is_hit_file_last_in_level), #else nullptr, false, #endif - fp.GetHitFileLevel(), max_file_size_for_l0_meta_pin_); + hit_file_level, max_file_size_for_l0_meta_pin_); // TODO: examine the behavior for corrupted key if (timer_enabled) { PERF_COUNTER_BY_LEVEL_ADD(get_from_table_nanos, timer.ElapsedNanos(), - fp.GetHitFileLevel()); + hit_file_level); } if (UNLIKELY(!s2.ok())) { *status = std::move(s2); @@ -2797,16 +2725,16 @@ void Version::GetInst(const ReadOptions& read_options, const ParsedInternalKey& // TODO: update per-level perfcontext user_key_return_count for kMerge break; case GetContext::kFound: - if (fp.GetHitFileLevel() == 0) { + if (hit_file_level == 0) { RecordTick(db_statistics_, GET_HIT_L0); - } else if (fp.GetHitFileLevel() == 1) { + } else if (hit_file_level == 1) { RecordTick(db_statistics_, GET_HIT_L1); - } else if (fp.GetHitFileLevel() >= 2) { + } else if (hit_file_level >= 2) { RecordTick(db_statistics_, GET_HIT_L2_AND_UP); } PERF_COUNTER_BY_LEVEL_ADD(user_key_return_count, 1, - fp.GetHitFileLevel()); + hit_file_level); if (is_blob_index && do_merge && (value || columns)) { Slice blob_index = @@ -2857,8 +2785,16 @@ void Version::GetInst(const ReadOptions& read_options, const ParsedInternalKey& *status = Status::Corruption(Status::SubCode::kMergeOperatorFailed); return; } - f = fp.GetNextFile(); + + // Move to next file or level + if (curr_level > 0 && cmp_largest < 0) { + // No more files to search in this level. + break; + } + ++curr_index_in_curr_level; } +} +search_complete: if (db_statistics_ != nullptr) { get_context.ReportCounters(); } From 3396bce66aa5bb3cd918ef1e478a4e1de33f7dee Mon Sep 17 00:00:00 2001 From: rockeet Date: Tue, 14 Apr 2026 05:07:50 +0800 Subject: [PATCH 067/102] version_set.cc: GetInst manually inline `prepare_next_level` This change make the code simpler and faster. Now it is 230ns, 4ns is reduced. --- db/version_set.cc | 126 ++++++++++++++++++++-------------------------- 1 file changed, 54 insertions(+), 72 deletions(-) diff --git a/db/version_set.cc b/db/version_set.cc index 6d9c177bdb..f0f0c8cf0d 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -2540,87 +2540,69 @@ if (storage_info_.num_non_empty_levels_ > 0 && } } #endif -int curr_level = -1; int32_t search_left_bound = 0; int32_t search_right_bound = FileIndexer::kLevelMaxIndex; -LevelFilesBrief* curr_file_level = nullptr; -unsigned int curr_index_in_curr_level = 0; -unsigned int start_index_in_curr_level = 0; - -auto prepare_next_level = [&]() -> bool { - curr_level++; - while (curr_level < storage_info_.num_non_empty_levels_) { - curr_file_level = &storage_info_.level_files_brief_[curr_level]; - if (curr_file_level->num_files == 0) { - // When current level is empty, the search bound generated from upper - // level must be [0, -1] or [0, FileIndexer::kLevelMaxIndex] if it is - // also empty. - assert(search_left_bound == 0); - assert(search_right_bound == -1 || - search_right_bound == FileIndexer::kLevelMaxIndex); - // Since current level is empty, it will need to search all files in - // the next level - search_left_bound = 0; - search_right_bound = FileIndexer::kLevelMaxIndex; - curr_level++; - continue; - } - - // Some files may overlap each other. We find - // all files that overlap user_key and process them in order from - // newest to oldest. In the context of merge-operator, this can occur at - // any level. Otherwise, it only occurs at Level-0 (since Put/Deletes - // are always compacted into a single entry). - int32_t start_index; - if (curr_level == 0) { - // On Level-0, we read through all files to check for overlap. +for (int curr_level = 0; curr_level < storage_info_.num_non_empty_levels_; curr_level++) { + LevelFilesBrief* curr_file_level = &storage_info_.level_files_brief_[curr_level]; + if (curr_file_level->num_files == 0) { + // When current level is empty, the search bound generated from upper + // level must be [0, -1] or [0, FileIndexer::kLevelMaxIndex] if it is + // also empty. + assert(search_left_bound == 0); + assert(search_right_bound == -1 || + search_right_bound == FileIndexer::kLevelMaxIndex); + // Since current level is empty, it will need to search all files in + // the next level + search_left_bound = 0; + search_right_bound = FileIndexer::kLevelMaxIndex; + continue; + } + + // Some files may overlap each other. We find + // all files that overlap user_key and process them in order from + // newest to oldest. In the context of merge-operator, this can occur at + // any level. Otherwise, it only occurs at Level-0 (since Put/Deletes + // are always compacted into a single entry). + int32_t start_index; + if (curr_level == 0) { + // On Level-0, we read through all files to check for overlap. start_index = 0; - } else { - // On Level-n (n>=1), files are sorted. Binary search to find the - // earliest file whose largest key >= ikey. Search left bound and - // right bound are used to narrow the range. - if (search_left_bound <= search_right_bound) { - if (search_right_bound == FileIndexer::kLevelMaxIndex) { - search_right_bound = - static_cast(curr_file_level->num_files) - 1; - } - // `search_right_bound` is an inclusive upper-bound, but since it was - // determined based on user key, it is still possible the lookup key - // falls to the right of `search_right_bound`'s corresponding file. - // So, pass a limit one higher, which allows us to detect this case. - start_index = static_cast(FindFileInRangeTmpl( - IKCmp{internal_comparator()}, *curr_file_level, ikey, - static_cast(search_left_bound), - static_cast(search_right_bound) + 1)); - if (start_index == search_right_bound + 1) { - // `ikey` comes after `search_right_bound`. The lookup key does - // not exist on this level, so let's skip this level and do a full - // binary search on the next level. - search_left_bound = 0; - search_right_bound = FileIndexer::kLevelMaxIndex; - curr_level++; - continue; - } - } else { - // search_left_bound > search_right_bound, key does not exist in - // this level. Since no comparison is done in this level, it will - // need to search all files in the next level. + } else { + // On Level-n (n>=1), files are sorted. Binary search to find the + // earliest file whose largest key >= ikey. Search left bound and + // right bound are used to narrow the range. + if (search_left_bound <= search_right_bound) { + if (search_right_bound == FileIndexer::kLevelMaxIndex) { + search_right_bound = + static_cast(curr_file_level->num_files) - 1; + } + // `search_right_bound` is an inclusive upper-bound, but since it was + // determined based on user key, it is still possible the lookup key + // falls to the right of `search_right_bound`'s corresponding file. + // So, pass a limit one higher, which allows us to detect this case. + start_index = static_cast(FindFileInRangeTmpl( + IKCmp{internal_comparator()}, *curr_file_level, ikey, + static_cast(search_left_bound), + static_cast(search_right_bound) + 1)); + if (start_index == search_right_bound + 1) { + // `ikey` comes after `search_right_bound`. The lookup key does + // not exist on this level, so let's skip this level and do a full + // binary search on the next level. search_left_bound = 0; search_right_bound = FileIndexer::kLevelMaxIndex; - curr_level++; continue; } + } else { + // search_left_bound > search_right_bound, key does not exist in + // this level. Since no comparison is done in this level, it will + // need to search all files in the next level. + search_left_bound = 0; + search_right_bound = FileIndexer::kLevelMaxIndex; + continue; } - start_index_in_curr_level = start_index; - curr_index_in_curr_level = start_index; - - return true; } - // curr_level = num_non_empty_levels_. So, no more levels to search. - return false; -}; - -while (prepare_next_level()) { + unsigned int start_index_in_curr_level __attribute__((unused)) = start_index; + unsigned int curr_index_in_curr_level = start_index; while (curr_index_in_curr_level < curr_file_level->num_files) { FdWithKeyRange* f = &curr_file_level->files[curr_index_in_curr_level]; int hit_file_level = curr_level; From 563d9245f2484489e6a8cc6dfe0e10140bd5f6f6 Mon Sep 17 00:00:00 2001 From: rockeet Date: Tue, 14 Apr 2026 17:59:05 +0800 Subject: [PATCH 068/102] TableCache::GetNoneRowCache: check TOPLINGDB_WITH_TIMESTAMP --- db/table_cache.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/db/table_cache.h b/db/table_cache.h index 91b4d7d377..fc97e10ff3 100644 --- a/db/table_cache.h +++ b/db/table_cache.h @@ -359,9 +359,11 @@ Status TableCache::GetNoneRowCache( auto seq = range_del_iter->MaxCoveringTombstoneSeqnum(pik.user_key); if (seq > *max_covering_tombstone_seq) { *max_covering_tombstone_seq = seq; + #if defined(TOPLINGDB_WITH_TIMESTAMP) if (get_context->NeedTimestamp()) { get_context->SetTimestampFromRangeTombstone(range_del_iter->timestamp()); } + #endif } } } From c172d98ba935f4dde2208d519eee66bb1866e6ac Mon Sep 17 00:00:00 2001 From: rockeet Date: Tue, 14 Apr 2026 18:12:30 +0800 Subject: [PATCH 069/102] version_set: add m_get_no_watch With template trick, propagate template param PerfStepTimer and StopWatchNano, improves 3ns, about 1.5%. --- db/db_impl/db_impl.cc | 2 +- db/version_set.cc | 8 +++++++- db/version_set.h | 11 +++++++++-- util/stop_watch.h | 5 +++++ 4 files changed, 22 insertions(+), 4 deletions(-) diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index f7634407c9..7ed7dbbe73 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -2517,7 +2517,7 @@ Status DBImpl::GetInst(const ReadOptions& read_options, const Slice& key, PinnedIteratorsManager pinned_iters_mgr; if (!done) { PERF_TIMER_GUARD(get_from_output_files_time); - sv->current->Get( + sv->current->template Get( read_options, lkey, get_impl_options.value, get_impl_options.columns, timestamp, &s, &merge_context, &max_covering_tombstone_seq, &pinned_iters_mgr, diff --git a/db/version_set.cc b/db/version_set.cc index f0f0c8cf0d..573c57e970 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -2331,14 +2331,19 @@ Version::Version(ColumnFamilyData* column_family_data, VersionSet* vset, if (IsForwardBytewiseComparator(user_comparator())) { m_get = ExtractFuncPtr(this, &Version::GetInst ); + m_get_no_watch = ExtractFuncPtr(this, &Version::GetInst + ); } else if (IsReverseBytewiseComparator(user_comparator())) { m_get = ExtractFuncPtr(this, &Version::GetInst ); + m_get_no_watch = ExtractFuncPtr(this, &Version::GetInst + ); } else { m_get = ExtractFuncPtr(this, &Version::GetInst ); + m_get_no_watch = m_get; // do not instantiate more } } } @@ -2472,7 +2477,7 @@ void Version::MultiGetBlob( } } -template +template ROCKSDB_FLATTEN void Version::GetInst(const ReadOptions& read_options, const ParsedInternalKey& ikey, PinnableSlice* value, PinnableWideColumns* columns, @@ -2663,6 +2668,7 @@ for (int curr_level = 0; curr_level < storage_info_.num_non_empty_levels_; curr_ } bool timer_enabled = + !std::is_same_v && perf_level >= PerfLevel::kEnableTimeExceptForMutex && get_perf_context()->per_level_perf_context_enabled; StopWatchNano timer(clock_, timer_enabled /* auto_start */); diff --git a/db/version_set.h b/db/version_set.h index d05e70c845..2c4b3bf7e5 100644 --- a/db/version_set.h +++ b/db/version_set.h @@ -897,6 +897,8 @@ class Version { // merge_context.operands_list and don't merge the operands // REQUIRES: lock is not held // REQUIRES: pinned_iters_mgr != nullptr + template void Get(const ReadOptions& ro, const ParsedInternalKey& key, PinnableSlice* value, PinnableWideColumns* columns, std::string* timestamp, Status* status, MergeContext* merge_context, @@ -906,13 +908,17 @@ class Version { SequenceNumber* seq = nullptr, ReadCallback* callback = nullptr, bool* is_blob = nullptr, bool do_merge = true) { - return m_get(this, ro, key, value, columns, timestamp, status, + auto f_get = std::is_same_v + ? m_get_no_watch : m_get; + return f_get(this, ro, key, value, columns, timestamp, status, merge_context, max_covering_tombstone_seq, pinned_iters_mgr, value_found, key_exists, seq, callback, is_blob, do_merge); } private: - template + template void GetInst(const ReadOptions&, const ParsedInternalKey&, PinnableSlice* value, PinnableWideColumns* columns, std::string* timestamp, Status* status, MergeContext* merge_context, @@ -931,6 +937,7 @@ class Version { bool* value_found, bool* key_exists, SequenceNumber* seq, ReadCallback* callback, bool* is_blob, bool do_merge); + decltype(m_get) m_get_no_watch; public: diff --git a/util/stop_watch.h b/util/stop_watch.h index 105a99c1fa..8d2ea93e14 100644 --- a/util/stop_watch.h +++ b/util/stop_watch.h @@ -15,6 +15,9 @@ #endif namespace ROCKSDB_NAMESPACE { + +class StopWatchNano; + // Auto-scoped. // When statistics is not nullptr, records the measured time into any enabled // histograms supplied to the constructor. A histogram argument may be omitted @@ -23,6 +26,7 @@ namespace ROCKSDB_NAMESPACE { // added to *elapsed if overwrite is false. class StopWatch { public: + typedef StopWatchNano WatchNano; inline StopWatch(SystemClock* clock, Statistics* statistics, const uint32_t hist_type) noexcept : @@ -223,6 +227,7 @@ class StopWatchNano { }; struct FakeStopWatch { + typedef FakeStopWatch WatchNano; FakeStopWatch(...) {} void DelayStart() {} void DelayStop() {} From 71194bd5edfdff9ad8ec7e90f7c97083b7819452 Mon Sep 17 00:00:00 2001 From: rockeet Date: Mon, 20 Apr 2026 22:57:29 +0800 Subject: [PATCH 070/102] db_bench.sh: convenient fillseq,compact,readrandom --- db_bench.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/db_bench.sh b/db_bench.sh index 9fd9e122e8..4e0fbc5244 100644 --- a/db_bench.sh +++ b/db_bench.sh @@ -18,7 +18,7 @@ args=( -batch_size=100 #-benchmarks=fillseq,compact,nextwithkey,nextwithkey,nextwithkey,nextwithkey,nextwithkey,readseq,readseq,readseq,readseq,readseq -benchmarks=fillrandom,readrandom - #-benchmarks=fillseq,compact + #-benchmarks=fillseq,compact,readrandom # rand DB::Get < 100 nanosec #-benchmarks=compact #-benchmarks=readrandom #-benchmarks=readseq From 5e3a0390e2380dfd9d1c62be14c8884a430191fd Mon Sep 17 00:00:00 2001 From: rockeet Date: Mon, 20 Apr 2026 22:58:05 +0800 Subject: [PATCH 071/102] compile.sh: export PATH=/usr/local/bin:$PATH --- compile.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/compile.sh b/compile.sh index 21a9416435..b50a735c9f 100644 --- a/compile.sh +++ b/compile.sh @@ -2,6 +2,7 @@ export SANDCASTLE=1 export BUILD_PREFIX=../build-toplingdb/ +export PATH=/usr/local/bin:$PATH export CXX=clang++ export CC=clang From c78a8929c3831a19d32560802367356cf6dca296 Mon Sep 17 00:00:00 2001 From: rockeet Date: Mon, 20 Apr 2026 23:09:38 +0800 Subject: [PATCH 072/102] slice.h: Slice::value_type/iterator/const_iterator --- include/rocksdb/slice.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/rocksdb/slice.h b/include/rocksdb/slice.h index 01f892f81c..5395e66697 100644 --- a/include/rocksdb/slice.h +++ b/include/rocksdb/slice.h @@ -33,6 +33,10 @@ namespace ROCKSDB_NAMESPACE { class Slice { public: + typedef char value_type; + typedef const char &const_reference, &reference; + typedef const char *const_iterator, *iterator; + // Create an empty slice. Slice() : data_(""), size_(0) {} From ef5952c2efc4d9925025a1abe9cc826e35fe6ee1 Mon Sep 17 00:00:00 2001 From: rockeet Date: Fri, 24 Apr 2026 22:00:26 +0800 Subject: [PATCH 073/102] db_impl.cc: DBImpl::GetAndRefSuperVersion(cfd,ro) add likely/unlikely --- db/db_impl/db_impl.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index 7ed7dbbe73..46c79846c8 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -5151,7 +5151,7 @@ ReadOptions::~ReadOptions() { SuperVersion* DBImpl::GetAndRefSuperVersion(ColumnFamilyData* cfd, const ReadOptions* ro) { - if (!ro->internal_is_in_pinning_section) { + if (UNLIKELY(!ro->internal_is_in_pinning_section)) { // do not use zero copy, same as old behavior return GetAndRefSuperVersion(cfd); } @@ -5159,7 +5159,7 @@ DBImpl::GetAndRefSuperVersion(ColumnFamilyData* cfd, const ReadOptions* ro) { ROCKSDB_ASSERT_EQ(tls->thread_id, ThisThreadID()); size_t cfid = cfd->GetID(); SuperVersion*& sv = tls->GetSuperVersionRef(cfid); - if (sv) { + if (LIKELY(sv != nullptr)) { if (LIKELY(sv->version_number == cfd->GetSuperVersionNumberNoAtomic())) { ROCKSDB_ASSERT_EQ(sv->cfd, cfd); return sv; From bc70eb4130e7380157d637bbaa2db9efddcd1950 Mon Sep 17 00:00:00 2001 From: rockeet Date: Sat, 25 Apr 2026 12:20:39 +0800 Subject: [PATCH 074/102] db_impl.h: Add several `final` onto virtual func --- db/db_impl/db_impl.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/db/db_impl/db_impl.h b/db/db_impl/db_impl.h index 97bc9c3e2e..d706c1db20 100644 --- a/db/db_impl/db_impl.h +++ b/db/db_impl/db_impl.h @@ -448,7 +448,7 @@ class DBImpl : public DB { virtual Status LockWAL() override; virtual Status UnlockWAL() override; - virtual SequenceNumber GetLatestSequenceNumber() const override; + virtual SequenceNumber GetLatestSequenceNumber() const override final; // IncreaseFullHistoryTsLow(ColumnFamilyHandle*, std::string) will acquire // and release db_mutex @@ -466,7 +466,7 @@ class DBImpl : public DB { virtual Status GetDbSessionId(std::string& session_id) const override; - ColumnFamilyHandle* DefaultColumnFamily() const override; + ColumnFamilyHandle* DefaultColumnFamily() const override final; ColumnFamilyHandle* PersistentStatsColumnFamily() const; @@ -677,7 +677,7 @@ class DBImpl : public DB { bool expose_blob_index = false, bool allow_refresh = true); - virtual SequenceNumber GetLastPublishedSequence() const { + virtual SequenceNumber GetLastPublishedSequence() const final { if (last_seq_same_as_publish_seq_) { return versions_->LastSequence(); } else { @@ -687,7 +687,7 @@ class DBImpl : public DB { // REQUIRES: joined the main write queue if two_write_queues is disabled, and // the second write queue otherwise. - virtual void SetLastPublishedSequence(SequenceNumber seq); + virtual void SetLastPublishedSequence(SequenceNumber seq) final; // Returns LastSequence in last_seq_same_as_publish_seq_ // mode and LastAllocatedSequence otherwise. This is useful when visiblility // depends also on data written to the WAL but not to the memtable. From 9f0964f149a4dbdc11cbae1a523fb7d54550d9aa Mon Sep 17 00:00:00 2001 From: leipeng Date: Fri, 1 May 2026 01:02:04 +0800 Subject: [PATCH 075/102] c: add C FFI for registering side plugin comparator, filter, merge_op, slice_transform, and filter_policy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Provide extern "C" registration functions enabling C-language plugin authors to register their custom plugins with the side plugin framework. Five plugin types are covered: - comparator: bare const pointer (PluginFactory) - merge_operator: shared_ptr (PluginFactory>) - compaction_filter_factory: shared_ptr (ditto for CompactionFilterFactory) - slicetransform: shared_ptr - filterpolicy: shared_ptr Each has a register/unregister pair. Two internal template helpers bridge the C/C++ boundary: - side_plugin_register_raw_ptr_plugin wraps a C creator (returning FFI_Object*) into a PluginFactory AcqFunc that returns Object*. Used for comparator, which the framework stores as bare const pointer. - side_plugin_register_shared_ptr_plugin likewise wraps a C creator, then wraps the returned pointer in a shared_ptr. The C bridge objects (rocksdb_mergeoperator_t, etc.) have proper destructors that invoke the user-provided cleanup callback, so normal shared_ptr deletion is safe — no intentional leak needed. Each wrapper lambda converts the C++ SidePluginRepo reference back to a C side_plugin_repo_t* (validated by static_assert that the C struct has the C++ object as its first member via offsetof == 0), and serializes the C++ json to a C string at the FFI boundary. --- db/c.cc | 86 ++++++++++++++++++++++++++++++++++++++++++++- include/rocksdb/c.h | 68 +++++++++++++++++++++++++++++++++++ sideplugin/rockside | 2 +- 3 files changed, 154 insertions(+), 2 deletions(-) diff --git a/db/c.cc b/db/c.cc index 407316f7ca..be6fa3ef6b 100644 --- a/db/c.cc +++ b/db/c.cc @@ -50,7 +50,7 @@ #include "rocksdb/write_buffer_manager.h" #include "util/stderr_logger.h" #include "utilities/merge_operators.h" -#include "topling/side_plugin_repo.h" +#include "topling/side_plugin_factory.h" using ROCKSDB_NAMESPACE::BackupEngine; using ROCKSDB_NAMESPACE::BackupEngineOptions; @@ -7079,4 +7079,88 @@ const char* rocksdb_get_name(rocksdb_t* p) { return p->rep->GetName().c_str(); } +} // end extern "C" + +using ROCKSDB_NAMESPACE::SidePluginRepo; +using ROCKSDB_NAMESPACE::PluginFactory; +using ROCKSDB_NAMESPACE::json; + +template +static void side_plugin_register_raw_ptr_plugin +(const char* name, FFI_Object*(*creator)(const char* strjson, const side_plugin_repo_t*)) +{ + auto cxx_creator = [creator](const json& js, const SidePluginRepo& repo) { + std::string strjson = js.dump(); + static_assert(offsetof(side_plugin_repo_t, repo) == 0); + Object* ptr = creator(strjson.c_str(), (const side_plugin_repo_t*)(&repo)); + return ptr; + }; + PluginFactory::DoReg(name, cxx_creator, __FILE__, __LINE__); +} + +template +static void side_plugin_register_shared_ptr_plugin +(const char* name, FFI_Object*(*creator)(const char* strjson, const side_plugin_repo_t*)) +{ + auto cxx_creator = [creator](const json& js, const SidePluginRepo& repo) { + std::string strjson = js.dump(); + static_assert(offsetof(side_plugin_repo_t, repo) == 0); + Object* ptr = creator(strjson.c_str(), (const side_plugin_repo_t*)(&repo)); + return std::shared_ptr(ptr); + }; + PluginFactory >::DoReg(name, cxx_creator, __FILE__, __LINE__); +} + +extern "C" { + +void side_plugin_register_comparator +(const char* name, rocksdb_comparator_creator_t creator) { + side_plugin_register_raw_ptr_plugin(name, creator); +} +void side_plugin_unregister_comparator(const char* name) { + PluginFactory::UnReg(name); +} + +void side_plugin_register_compaction_filter_factory +(const char* name, rocksdb_compactionfilterfactory_creator_t creator) { + side_plugin_register_shared_ptr_plugin(name, creator); +} +void side_plugin_unregister_compaction_filter_factory(const char* name) { + PluginFactory >::UnReg(name); +} + +void side_plugin_register_merge_operator +(const char* name, rocksdb_mergeoperator_creator_t creator) { + side_plugin_register_shared_ptr_plugin(name, creator); +} +void side_plugin_unregister_merge_operator(const char* name) { + PluginFactory >::UnReg(name); +} + +void side_plugin_register_slicetransform +(const char* name, rocksdb_slicetransform_creator_t creator) { + side_plugin_register_shared_ptr_plugin(name, creator); +} +void side_plugin_unregister_slicetransform(const char* name) { + PluginFactory >::UnReg(name); +} + +void side_plugin_register_filterpolicy +(const char* name, rocksdb_filterpolicy_creator_t creator) { + side_plugin_register_shared_ptr_plugin(name, creator); +} +void side_plugin_unregister_filterpolicy(const char* name) { + PluginFactory >::UnReg(name); +} + +#if 0 // rocksdb c api does not support custom rate limiter +void side_plugin_register_ratelimiter +(const char* name, rocksdb_ratelimiter_creator_t creator) { + side_plugin_register_shared_ptr_plugin(name, creator); +} +void side_plugin_unregister_ratelimiter(const char* name) { + PluginFactory >::UnReg(name); +} +#endif + } // end extern "C" diff --git a/include/rocksdb/c.h b/include/rocksdb/c.h index 263b40491a..a38fb95c05 100644 --- a/include/rocksdb/c.h +++ b/include/rocksdb/c.h @@ -3106,6 +3106,74 @@ extern ROCKSDB_LIBRARY_API_WEAK void side_plugin_repo_close_all(side_plugin_repo extern ROCKSDB_LIBRARY_API_WEAK const char* rocksdb_get_name(rocksdb_t*); +typedef const rocksdb_comparator_t* +(*rocksdb_comparator_creator_t) +(const char* strjson, const side_plugin_repo_t* repo); + +extern ROCKSDB_LIBRARY_API_WEAK +void side_plugin_register_comparator +(const char* name, rocksdb_comparator_creator_t); + +extern ROCKSDB_LIBRARY_API_WEAK +void side_plugin_unregister_comparator(const char* name); + +typedef rocksdb_mergeoperator_t* +(*rocksdb_mergeoperator_creator_t) +(const char* strjson, const side_plugin_repo_t* repo); + +extern ROCKSDB_LIBRARY_API_WEAK +void side_plugin_register_merge_operator +(const char* name, rocksdb_mergeoperator_creator_t); + +extern ROCKSDB_LIBRARY_API_WEAK +void side_plugin_unregister_merge_operator(const char* name); + +typedef rocksdb_compactionfilterfactory_t* +(*rocksdb_compactionfilterfactory_creator_t) +(const char* strjson, const side_plugin_repo_t* repo); + +extern ROCKSDB_LIBRARY_API_WEAK +void side_plugin_register_compaction_filter_factory +(const char* name, rocksdb_compactionfilterfactory_creator_t); + +extern ROCKSDB_LIBRARY_API_WEAK +void side_plugin_unregister_compaction_filter_factory(const char* name); + +typedef rocksdb_slicetransform_t* +(*rocksdb_slicetransform_creator_t) +(const char* strjson, const side_plugin_repo_t* repo); + +extern ROCKSDB_LIBRARY_API_WEAK +void side_plugin_register_slicetransform +(const char* name, rocksdb_slicetransform_creator_t); + +extern ROCKSDB_LIBRARY_API_WEAK +void side_plugin_unregister_slicetransform(const char* name); + +typedef rocksdb_filterpolicy_t* +(*rocksdb_filterpolicy_creator_t) +(const char* strjson, const side_plugin_repo_t* repo); + +extern ROCKSDB_LIBRARY_API_WEAK +void side_plugin_register_filterpolicy +(const char* name, rocksdb_filterpolicy_creator_t); + +extern ROCKSDB_LIBRARY_API_WEAK +void side_plugin_unregister_filterpolicy(const char* name); + +#if 0 // rocksdb c api does not support custom rate limiter +typedef rocksdb_ratelimiter_t* +(*rocksdb_ratelimiter_creator_t) +(const char* strjson, const side_plugin_repo_t* repo); + +extern ROCKSDB_LIBRARY_API_WEAK +void side_plugin_register_ratelimiter +(const char* name, rocksdb_ratelimiter_creator_t); + +extern ROCKSDB_LIBRARY_API_WEAK +void side_plugin_unregister_ratelimiter(const char* name); +#endif + #ifdef __cplusplus } /* end extern "C" */ #endif diff --git a/sideplugin/rockside b/sideplugin/rockside index 362c7d3cf3..ac58fb36cc 160000 --- a/sideplugin/rockside +++ b/sideplugin/rockside @@ -1 +1 @@ -Subproject commit 362c7d3cf3961b13ea38a19f293b0d6929bc0e0d +Subproject commit ac58fb36cc810ac30d6998d900bdc30f0d520929 From 70f61c4ce907663816f4696dcd19bf0bd441e0ab Mon Sep 17 00:00:00 2001 From: rockeet Date: Sat, 2 May 2026 15:36:54 +0800 Subject: [PATCH 076/102] chore: update submodule rockside to 5cdbb9b Include side_plugin_tpl_inst.cc: add explicit_instantiate_serde(FilterPolicy) --- sideplugin/rockside | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sideplugin/rockside b/sideplugin/rockside index ac58fb36cc..5cdbb9bdfd 160000 --- a/sideplugin/rockside +++ b/sideplugin/rockside @@ -1 +1 @@ -Subproject commit ac58fb36cc810ac30d6998d900bdc30f0d520929 +Subproject commit 5cdbb9bdfdae0454a1a1377c5c8e9871ebb35c99 From 15409cd9a1b9babf74c4d2cbc6c9c1bc03784cdd Mon Sep 17 00:00:00 2001 From: rockeet Date: Sat, 2 May 2026 15:37:18 +0800 Subject: [PATCH 077/102] c: rename FFI_Object to FFI_BridgeObject in register templates --- db/c.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/db/c.cc b/db/c.cc index be6fa3ef6b..a40ea680fe 100644 --- a/db/c.cc +++ b/db/c.cc @@ -7085,9 +7085,9 @@ using ROCKSDB_NAMESPACE::SidePluginRepo; using ROCKSDB_NAMESPACE::PluginFactory; using ROCKSDB_NAMESPACE::json; -template +template static void side_plugin_register_raw_ptr_plugin -(const char* name, FFI_Object*(*creator)(const char* strjson, const side_plugin_repo_t*)) +(const char* name, FFI_BridgeObject*(*creator)(const char* strjson, const side_plugin_repo_t*)) { auto cxx_creator = [creator](const json& js, const SidePluginRepo& repo) { std::string strjson = js.dump(); @@ -7098,9 +7098,9 @@ static void side_plugin_register_raw_ptr_plugin PluginFactory::DoReg(name, cxx_creator, __FILE__, __LINE__); } -template +template static void side_plugin_register_shared_ptr_plugin -(const char* name, FFI_Object*(*creator)(const char* strjson, const side_plugin_repo_t*)) +(const char* name, FFI_BridgeObject*(*creator)(const char* strjson, const side_plugin_repo_t*)) { auto cxx_creator = [creator](const json& js, const SidePluginRepo& repo) { std::string strjson = js.dump(); From 6ffacb8a57dc5b0bd6338e23d21276bd9f3352ba Mon Sep 17 00:00:00 2001 From: rockeet Date: Sat, 2 May 2026 17:43:25 +0800 Subject: [PATCH 078/102] c: add rocksdb_stdstr_t type with create/destroy API --- db/c.cc | 10 ++++++++++ include/rocksdb/c.h | 4 ++++ 2 files changed, 14 insertions(+) diff --git a/db/c.cc b/db/c.cc index a40ea680fe..f9d9497efc 100644 --- a/db/c.cc +++ b/db/c.cc @@ -6754,6 +6754,16 @@ const char* rocksdb_pinnableslice_value(const rocksdb_pinnableslice_t* v, return v->rep.data(); } +struct rocksdb_stdstr_t { + std::string rep; +}; + +rocksdb_stdstr_t* rocksdb_stdstr_create(const char* str, size_t len) { + return new rocksdb_stdstr_t{std::string(str, len)}; +} + +void rocksdb_stdstr_destroy(rocksdb_stdstr_t* v) { delete v; } + // container to keep databases and caches in order to use // ROCKSDB_NAMESPACE::MemoryUtil struct rocksdb_memory_consumers_t { diff --git a/include/rocksdb/c.h b/include/rocksdb/c.h index a38fb95c05..4303a3f1ed 100644 --- a/include/rocksdb/c.h +++ b/include/rocksdb/c.h @@ -143,6 +143,7 @@ typedef struct rocksdb_statistics_histogram_data_t rocksdb_statistics_histogram_data_t; typedef struct rocksdb_wait_for_compact_options_t rocksdb_wait_for_compact_options_t; +typedef struct rocksdb_stdstr_t rocksdb_stdstr_t; #if !defined(ROCKSDB_C_API_IMPLEMENTATION) struct rocksdb_slice_t { @@ -2970,6 +2971,9 @@ extern ROCKSDB_LIBRARY_API void rocksdb_pinnableslice_destroy( extern ROCKSDB_LIBRARY_API const char* rocksdb_pinnableslice_value( const rocksdb_pinnableslice_t* t, size_t* vlen); +extern ROCKSDB_LIBRARY_API rocksdb_stdstr_t* rocksdb_stdstr_create(const char* str, size_t len); +extern ROCKSDB_LIBRARY_API void rocksdb_stdstr_destroy(rocksdb_stdstr_t* v); + extern ROCKSDB_LIBRARY_API rocksdb_memory_consumers_t* rocksdb_memory_consumers_create(void); extern ROCKSDB_LIBRARY_API void rocksdb_memory_consumers_add_db( From b891be2c320b0de3f38649dd5d03d5ec187b7e81 Mon Sep 17 00:00:00 2001 From: rockeet Date: Sat, 2 May 2026 17:49:54 +0800 Subject: [PATCH 079/102] c: add FFI_SerDe, FFI_WebManip and side_plugin_register_ex Add FFI_SerDe/FFI_WebManip for distributed compaction serde and webview bridge. Introduce side_plugin_ex_vtab_t for C-side vtable. Update all register/unregister functions to support extended vtab (serde + web). --- db/c.cc | 183 ++++++++++++++++++++++++++++++++++++++++---- include/rocksdb/c.h | 29 +++++-- 2 files changed, 192 insertions(+), 20 deletions(-) diff --git a/db/c.cc b/db/c.cc index f9d9497efc..21ddfcbac7 100644 --- a/db/c.cc +++ b/db/c.cc @@ -51,6 +51,8 @@ #include "util/stderr_logger.h" #include "utilities/merge_operators.h" #include "topling/side_plugin_factory.h" +#include "db/compaction/compaction_executor.h" +#include "logging/logging.h" using ROCKSDB_NAMESPACE::BackupEngine; using ROCKSDB_NAMESPACE::BackupEngineOptions; @@ -7091,13 +7093,154 @@ const char* rocksdb_get_name(rocksdb_t* p) { } // end extern "C" +#define DoPrintLog(...) \ + info_log ? ROCKS_LOG_INFO(info_log, __VA_ARGS__) \ + : (void)fprintf(stderr, __VA_ARGS__) +#define PrintLog(level, fmt, ...) \ + do { if (SidePluginRepo::DebugLevel() >= level) \ + DoPrintLog("%s: " fmt "\n", \ + TERARK_PP_SmartForPrintf(rocksdb::StrDateTimeNow(), ## __VA_ARGS__)); \ + } while (0) +#define TRAC(...) PrintLog(4, "TRAC: " __VA_ARGS__) +#define DEBG(...) PrintLog(3, "DEBG: " __VA_ARGS__) +#define INFO(...) PrintLog(2, "INFO: " __VA_ARGS__) +#define WARN(...) PrintLog(1, "WARN: " __VA_ARGS__) + +namespace ROCKSDB_NAMESPACE { + +template +static void* get_ffi_obj(const FFI_BridgeObject* bridge) { + // existing rocksdb bridge class consitently name ffi_obj as state_. + // bridge itself is always const. + return bridge->state_; +} + +using terark::llong; +template +struct FFI_SerDe : public DcompactSerDeFunc { + virtual void SerializeRequest(FILE* fp, const Object& obj) const final { + ROCKSDB_VERIFY(!IsCompactionWorker()); // phase 1, DB Side + auto bridge = dynamic_cast(&obj); + ROCKSDB_VERIFY(nullptr != bridge); + DEBG("job-%05d cf-%d %s::SerializeRequest: job raw = %.3f GB, zip = %.3f GB, smallest_seqno = %lld", + job_id, m_cp->cf_id, m_name, rawzip[0]/1e9, rawzip[1]/1e9, (llong)m_cp->smallest_seqno); + ffi_vtab.serialize_request(fp, get_ffi_obj(bridge)); + } + virtual void DeSerializeRequest(FILE* fp, Object* obj) const final { + ROCKSDB_VERIFY(IsCompactionWorker()); // phase 2, compact worker side + DEBG("job-%05d cf-%d %s::DeSerializeRequest: job raw = %.3f GB, zip = %.3f GB, smallest_seqno = %lld", + job_id, m_cp->cf_id, m_name, rawzip[0]/1e9, rawzip[1]/1e9, (llong)m_cp->smallest_seqno); + auto bridge = dynamic_cast(obj); + ROCKSDB_VERIFY(nullptr != bridge); + ffi_vtab.deserialize_request(fp, get_ffi_obj(bridge)); + } + virtual void SerializeResponse(FILE* fp, const Object& obj) const final { + ROCKSDB_VERIFY(IsCompactionWorker()); // phase 3, compact worker side + auto bridge = dynamic_cast(&obj); + ROCKSDB_VERIFY(nullptr != bridge); + ffi_vtab.serialize_response(fp, get_ffi_obj(bridge)); + } + virtual void DeSerializeResponse(FILE* fp, Object* obj) const final { + ROCKSDB_VERIFY(!IsCompactionWorker()); // phase 4, DB side + auto bridge = dynamic_cast(obj); + ROCKSDB_VERIFY(nullptr != bridge); + ffi_vtab.deserialize_response(fp, get_ffi_obj(bridge)); + } + FFI_SerDe(const json& js, const SidePluginRepo& repo, + const std::string& name, const side_plugin_ex_vtab_t& vtab) + : m_name(name) + { + ffi_vtab = vtab; + auto cp = m_cp = JS_CompactionParamsDecodePtr(js); + info_log = cp->info_log; + const auto& smallest_user_key = Slice(cp->smallest_user_key).ToString(true/*hex*/); + const auto& largest_user_key = Slice(cp->largest_user_key).ToString(true/*hex*/); + job_id = cp->job_id; + cp->InputBytes(rawzip); + TRAC("job-%05d cf-%d %s::FFI_SerDe: smallest_user_key = %s, largest_user_key = %s, job raw = %.3f GB, zip = %.3f GB", + cp->job_id, cp->cf_id, name, smallest_user_key.c_str(), largest_user_key.c_str(), rawzip[0]/1e9, rawzip[1]/1e9); + } + std::string m_name; + side_plugin_ex_vtab_t ffi_vtab; + const CompactionParams* m_cp; + rocksdb::Logger* info_log; + int job_id; + size_t rawzip[2]; +}; + +template +struct FFI_WebManip : public PluginManipFunc { + virtual void Update(Object* obj, const json& query, const json& body, + const SidePluginRepo& repo) const { + std::string str_qry = query.dump(); + std::string str_body = body.dump(); + auto bridge = dynamic_cast(obj); + ROCKSDB_VERIFY(nullptr != bridge); + auto ffi_repo = (const side_plugin_repo_t*)(&repo); + if (m_ffi_vtab.web_update) { + m_ffi_vtab.web_update(get_ffi_obj(bridge), str_qry.c_str(), str_body.c_str(), ffi_repo); + } + } + virtual std::string ToString(const Object& obj, const json& query, + const SidePluginRepo& repo) const { + std::string str_qry = query.dump(); + auto bridge = dynamic_cast(&obj); + ROCKSDB_VERIFY(nullptr != bridge); + auto ffi_repo = (const side_plugin_repo_t*)(&repo); + rocksdb_stdstr_t* result = m_ffi_vtab.web_view(get_ffi_obj(bridge), str_qry.c_str(), ffi_repo); + TERARK_VERIFY(nullptr != result); + TERARK_SCOPE_EXIT(rocksdb_stdstr_destroy(result)); + return std::move(result->rep); + } + FFI_WebManip(const side_plugin_ex_vtab_t& ffi_vtab) : m_ffi_vtab(ffi_vtab) {} + side_plugin_ex_vtab_t m_ffi_vtab; +}; + +template +static void side_plugin_register_ex(const char* name, const side_plugin_ex_vtab_t* ex_vtab) { + if (ex_vtab->serialize_request) { + ROCKSDB_VERIFY(nullptr != ex_vtab->deserialize_request ); + ROCKSDB_VERIFY(nullptr != ex_vtab-> serialize_response); + ROCKSDB_VERIFY(nullptr != ex_vtab->deserialize_response); + using NoConstObj = std::remove_const_t; + auto cxx_creator = [name=std::string(name), cp_vtab=*ex_vtab] + (const json& js, const SidePluginRepo& repo) -> std::shared_ptr > { + static_assert(offsetof(side_plugin_repo_t, repo) == 0); + return std::make_shared >(js, repo, name, cp_vtab); + }; + SerDeFactory::DoReg(name, cxx_creator, __FILE__, __LINE__); + } + if (ex_vtab->web_view) { + auto cxx_creator = [ + singleton=std::make_shared >(*ex_vtab) + ](const json&, const SidePluginRepo&) -> const PluginManipFunc* { + static_assert(offsetof(side_plugin_repo_t, repo) == 0); + return singleton.get(); + }; + PluginManip::DoReg(name, cxx_creator, __FILE__, __LINE__); + } +} + +} // ROCKSDB_NAMESPACE + using ROCKSDB_NAMESPACE::SidePluginRepo; using ROCKSDB_NAMESPACE::PluginFactory; using ROCKSDB_NAMESPACE::json; +using ROCKSDB_NAMESPACE::side_plugin_register_ex; +using ROCKSDB_NAMESPACE::SerDeFactory; +using ROCKSDB_NAMESPACE::PluginManip; + +template +static void side_plugin_unregister_ex(const char* name) { + using NoConstObj = std::remove_const_t; + SerDeFactory::UnReg(name); + PluginManip::UnReg(name); +} template static void side_plugin_register_raw_ptr_plugin -(const char* name, FFI_BridgeObject*(*creator)(const char* strjson, const side_plugin_repo_t*)) +(const char* name, FFI_BridgeObject*(*creator)(const char* strjson, const side_plugin_repo_t*), + const side_plugin_ex_vtab_t* ex_vtab) { auto cxx_creator = [creator](const json& js, const SidePluginRepo& repo) { std::string strjson = js.dump(); @@ -7106,11 +7249,15 @@ static void side_plugin_register_raw_ptr_plugin return ptr; }; PluginFactory::DoReg(name, cxx_creator, __FILE__, __LINE__); + if (ex_vtab) { + side_plugin_register_ex(name, ex_vtab); + } } template static void side_plugin_register_shared_ptr_plugin -(const char* name, FFI_BridgeObject*(*creator)(const char* strjson, const side_plugin_repo_t*)) +(const char* name, FFI_BridgeObject*(*creator)(const char* strjson, const side_plugin_repo_t*), + const side_plugin_ex_vtab_t* ex_vtab) { auto cxx_creator = [creator](const json& js, const SidePluginRepo& repo) { std::string strjson = js.dump(); @@ -7119,54 +7266,62 @@ static void side_plugin_register_shared_ptr_plugin return std::shared_ptr(ptr); }; PluginFactory >::DoReg(name, cxx_creator, __FILE__, __LINE__); + if (ex_vtab) { + side_plugin_register_ex(name, ex_vtab); + } } extern "C" { void side_plugin_register_comparator -(const char* name, rocksdb_comparator_creator_t creator) { - side_plugin_register_raw_ptr_plugin(name, creator); +(const char* name, rocksdb_comparator_creator_t creator, const side_plugin_ex_vtab_t* ex_vtab) { + side_plugin_register_raw_ptr_plugin(name, creator, ex_vtab); } void side_plugin_unregister_comparator(const char* name) { PluginFactory::UnReg(name); + side_plugin_unregister_ex(name); } void side_plugin_register_compaction_filter_factory -(const char* name, rocksdb_compactionfilterfactory_creator_t creator) { - side_plugin_register_shared_ptr_plugin(name, creator); +(const char* name, rocksdb_compactionfilterfactory_creator_t creator, const side_plugin_ex_vtab_t* ex_vtab) { + side_plugin_register_shared_ptr_plugin(name, creator, ex_vtab); } void side_plugin_unregister_compaction_filter_factory(const char* name) { PluginFactory >::UnReg(name); + side_plugin_unregister_ex(name); } void side_plugin_register_merge_operator -(const char* name, rocksdb_mergeoperator_creator_t creator) { - side_plugin_register_shared_ptr_plugin(name, creator); +(const char* name, rocksdb_mergeoperator_creator_t creator, const side_plugin_ex_vtab_t* ex_vtab) { + side_plugin_register_shared_ptr_plugin(name, creator, ex_vtab); } void side_plugin_unregister_merge_operator(const char* name) { PluginFactory >::UnReg(name); + side_plugin_unregister_ex(name); } void side_plugin_register_slicetransform -(const char* name, rocksdb_slicetransform_creator_t creator) { - side_plugin_register_shared_ptr_plugin(name, creator); +(const char* name, rocksdb_slicetransform_creator_t creator, const side_plugin_ex_vtab_t* ex_vtab) { + side_plugin_register_shared_ptr_plugin(name, creator, ex_vtab); } void side_plugin_unregister_slicetransform(const char* name) { PluginFactory >::UnReg(name); + side_plugin_unregister_ex(name); } void side_plugin_register_filterpolicy -(const char* name, rocksdb_filterpolicy_creator_t creator) { - side_plugin_register_shared_ptr_plugin(name, creator); +(const char* name, rocksdb_filterpolicy_creator_t creator, const side_plugin_ex_vtab_t* ex_vtab) { + side_plugin_register_shared_ptr_plugin(name, creator, ex_vtab); } void side_plugin_unregister_filterpolicy(const char* name) { PluginFactory >::UnReg(name); + side_plugin_unregister_ex(name); } #if 0 // rocksdb c api does not support custom rate limiter void side_plugin_register_ratelimiter -(const char* name, rocksdb_ratelimiter_creator_t creator) { - side_plugin_register_shared_ptr_plugin(name, creator); +(const char* name, rocksdb_ratelimiter_creator_t creator, const side_plugin_ex_vtab_t* ex_vtab) { + side_plugin_register_shared_ptr_plugin(name, creator, ex_vtab); } void side_plugin_unregister_ratelimiter(const char* name) { PluginFactory >::UnReg(name); diff --git a/include/rocksdb/c.h b/include/rocksdb/c.h index 4303a3f1ed..6de277db64 100644 --- a/include/rocksdb/c.h +++ b/include/rocksdb/c.h @@ -67,6 +67,7 @@ extern "C" { #include #include #include +#include /* Exported types */ @@ -3110,13 +3111,29 @@ extern ROCKSDB_LIBRARY_API_WEAK void side_plugin_repo_close_all(side_plugin_repo extern ROCKSDB_LIBRARY_API_WEAK const char* rocksdb_get_name(rocksdb_t*); +struct side_plugin_ex_vtab_t { + // serialize_request == NULL means serde(all the 4) are not supported + void (* serialize_request )(FILE*, const void* obj); + void (*deserialize_request )(FILE*, void* obj); + void (* serialize_response)(FILE*, const void* obj); + void (*deserialize_response)(FILE*, void* obj); + + // web_view == NULL means web view and update are not supported + // web_update == NULL means web only update is not supported + rocksdb_stdstr_t* (*web_view)(const void* obj, const char* dump_options_json, const side_plugin_repo_t*); + void (*web_update)(void* obj, const char* dump_options_json, const char* body_json, const side_plugin_repo_t*); +}; +#if !defined(__cplusplus) +typedef struct side_plugin_ex_vtab_t side_plugin_ex_vtab_t; +#endif + typedef const rocksdb_comparator_t* (*rocksdb_comparator_creator_t) (const char* strjson, const side_plugin_repo_t* repo); extern ROCKSDB_LIBRARY_API_WEAK void side_plugin_register_comparator -(const char* name, rocksdb_comparator_creator_t); +(const char* name, rocksdb_comparator_creator_t, const side_plugin_ex_vtab_t*); extern ROCKSDB_LIBRARY_API_WEAK void side_plugin_unregister_comparator(const char* name); @@ -3127,7 +3144,7 @@ typedef rocksdb_mergeoperator_t* extern ROCKSDB_LIBRARY_API_WEAK void side_plugin_register_merge_operator -(const char* name, rocksdb_mergeoperator_creator_t); +(const char* name, rocksdb_mergeoperator_creator_t, const side_plugin_ex_vtab_t*); extern ROCKSDB_LIBRARY_API_WEAK void side_plugin_unregister_merge_operator(const char* name); @@ -3138,7 +3155,7 @@ typedef rocksdb_compactionfilterfactory_t* extern ROCKSDB_LIBRARY_API_WEAK void side_plugin_register_compaction_filter_factory -(const char* name, rocksdb_compactionfilterfactory_creator_t); +(const char* name, rocksdb_compactionfilterfactory_creator_t, const side_plugin_ex_vtab_t*); extern ROCKSDB_LIBRARY_API_WEAK void side_plugin_unregister_compaction_filter_factory(const char* name); @@ -3149,7 +3166,7 @@ typedef rocksdb_slicetransform_t* extern ROCKSDB_LIBRARY_API_WEAK void side_plugin_register_slicetransform -(const char* name, rocksdb_slicetransform_creator_t); +(const char* name, rocksdb_slicetransform_creator_t, const side_plugin_ex_vtab_t*); extern ROCKSDB_LIBRARY_API_WEAK void side_plugin_unregister_slicetransform(const char* name); @@ -3160,7 +3177,7 @@ typedef rocksdb_filterpolicy_t* extern ROCKSDB_LIBRARY_API_WEAK void side_plugin_register_filterpolicy -(const char* name, rocksdb_filterpolicy_creator_t); +(const char* name, rocksdb_filterpolicy_creator_t, const side_plugin_ex_vtab_t*); extern ROCKSDB_LIBRARY_API_WEAK void side_plugin_unregister_filterpolicy(const char* name); @@ -3172,7 +3189,7 @@ typedef rocksdb_ratelimiter_t* extern ROCKSDB_LIBRARY_API_WEAK void side_plugin_register_ratelimiter -(const char* name, rocksdb_ratelimiter_creator_t); +(const char* name, rocksdb_ratelimiter_creator_t, const side_plugin_ex_vtab_t*); extern ROCKSDB_LIBRARY_API_WEAK void side_plugin_unregister_ratelimiter(const char* name); From 207a070dc50dc2a0f6bbf0daef152fdcf308945d Mon Sep 17 00:00:00 2001 From: rockeet Date: Sat, 2 May 2026 15:40:35 +0800 Subject: [PATCH 080/102] c: add side_plugin_repo_forget_db API Allow closing a single DB instance managed by the repo. --- db/c.cc | 4 ++++ include/rocksdb/c.h | 2 ++ 2 files changed, 6 insertions(+) diff --git a/db/c.cc b/db/c.cc index 21ddfcbac7..a46a7be606 100644 --- a/db/c.cc +++ b/db/c.cc @@ -7087,6 +7087,10 @@ void side_plugin_repo_close_all(side_plugin_repo_t* r) { delete r; } +void side_plugin_repo_forget_db(side_plugin_repo_t* r, rocksdb_t* db) { + r->repo.CloseOneDB(db->rep, false); +} + const char* rocksdb_get_name(rocksdb_t* p) { return p->rep->GetName().c_str(); } diff --git a/include/rocksdb/c.h b/include/rocksdb/c.h index 6de277db64..5f801d0ca0 100644 --- a/include/rocksdb/c.h +++ b/include/rocksdb/c.h @@ -3109,6 +3109,8 @@ side_plugin_repo_put_cf_options(side_plugin_repo_t*, const char* name, rocksdb_o extern ROCKSDB_LIBRARY_API_WEAK void side_plugin_repo_close_all(side_plugin_repo_t*); +extern ROCKSDB_LIBRARY_API_WEAK void side_plugin_repo_forget_db(side_plugin_repo_t*, rocksdb_t*); + extern ROCKSDB_LIBRARY_API_WEAK const char* rocksdb_get_name(rocksdb_t*); struct side_plugin_ex_vtab_t { From 6ba2d57c105289543e24b76fa42168c75abc4ea8 Mon Sep 17 00:00:00 2001 From: rockeet Date: Sun, 3 May 2026 10:48:55 +0800 Subject: [PATCH 081/102] c: add side_plugin_repo_import(repo, json_str, errptr) --- db/c.cc | 6 ++++++ include/rocksdb/c.h | 3 +++ 2 files changed, 9 insertions(+) diff --git a/db/c.cc b/db/c.cc index a46a7be606..2da3355983 100644 --- a/db/c.cc +++ b/db/c.cc @@ -7017,6 +7017,12 @@ void side_plugin_repo_import_auto_file(side_plugin_repo_t* r, SaveError(errptr, s); } +void side_plugin_repo_import(side_plugin_repo_t* r, + const char* json_str, char** errptr) { + auto s = r->repo.Import(std::string(json_str)); + SaveError(errptr, s); +} + rocksdb_t* side_plugin_repo_open(side_plugin_repo_t* r, rocksdb_column_family_handle_t*** p_cfhs, size_t* num_cf, char** errptr) { if (p_cfhs) { // Open with column families diff --git a/include/rocksdb/c.h b/include/rocksdb/c.h index 5f801d0ca0..a25f5f88f9 100644 --- a/include/rocksdb/c.h +++ b/include/rocksdb/c.h @@ -3088,6 +3088,9 @@ extern ROCKSDB_LIBRARY_API_WEAK side_plugin_repo_t* side_plugin_repo_create(void extern ROCKSDB_LIBRARY_API_WEAK void side_plugin_repo_import_auto_file (side_plugin_repo_t*, const char* fname, char** errptr); +extern ROCKSDB_LIBRARY_API_WEAK void +side_plugin_repo_import(side_plugin_repo_t*, const char* json_str, char** errptr); + extern ROCKSDB_LIBRARY_API_WEAK rocksdb_t* side_plugin_repo_open(side_plugin_repo_t*, rocksdb_column_family_handle_t***, size_t* num_cf, char** errptr); From a1e611cb115f7e64f2f090a683a839064912f42a Mon Sep 17 00:00:00 2001 From: rockeet Date: Sun, 3 May 2026 21:47:57 +0800 Subject: [PATCH 082/102] c: add side_plugin_xxxx_get_state(const rocksdb_xxxx_t*) --- db/c.cc | 18 ++++++++++++++++++ include/rocksdb/c.h | 18 ++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/db/c.cc b/db/c.cc index 2da3355983..5697754b77 100644 --- a/db/c.cc +++ b/db/c.cc @@ -7291,6 +7291,9 @@ void side_plugin_unregister_comparator(const char* name) { PluginFactory::UnReg(name); side_plugin_unregister_ex(name); } +void* side_plugin_comparator_get_state(const rocksdb_comparator_t* p) { + return p->state_; +} void side_plugin_register_compaction_filter_factory (const char* name, rocksdb_compactionfilterfactory_creator_t creator, const side_plugin_ex_vtab_t* ex_vtab) { @@ -7300,6 +7303,9 @@ void side_plugin_unregister_compaction_filter_factory(const char* name) { PluginFactory >::UnReg(name); side_plugin_unregister_ex(name); } +void* side_plugin_compactionfilterfactory_get_state(const rocksdb_compactionfilterfactory_t* p) { + return p->state_; +} void side_plugin_register_merge_operator (const char* name, rocksdb_mergeoperator_creator_t creator, const side_plugin_ex_vtab_t* ex_vtab) { @@ -7309,6 +7315,9 @@ void side_plugin_unregister_merge_operator(const char* name) { PluginFactory >::UnReg(name); side_plugin_unregister_ex(name); } +void* side_plugin_mergeoperator_get_state(const rocksdb_mergeoperator_t* p) { + return p->state_; +} void side_plugin_register_slicetransform (const char* name, rocksdb_slicetransform_creator_t creator, const side_plugin_ex_vtab_t* ex_vtab) { @@ -7318,6 +7327,9 @@ void side_plugin_unregister_slicetransform(const char* name) { PluginFactory >::UnReg(name); side_plugin_unregister_ex(name); } +void* side_plugin_slicetransform_get_state(const rocksdb_slicetransform_t* p) { + return p->state_; +} void side_plugin_register_filterpolicy (const char* name, rocksdb_filterpolicy_creator_t creator, const side_plugin_ex_vtab_t* ex_vtab) { @@ -7327,6 +7339,9 @@ void side_plugin_unregister_filterpolicy(const char* name) { PluginFactory >::UnReg(name); side_plugin_unregister_ex(name); } +void* side_plugin_filterpolicy_get_state(const rocksdb_filterpolicy_t* p) { + return p->state_; +} #if 0 // rocksdb c api does not support custom rate limiter void side_plugin_register_ratelimiter @@ -7336,6 +7351,9 @@ void side_plugin_register_ratelimiter void side_plugin_unregister_ratelimiter(const char* name) { PluginFactory >::UnReg(name); } +void* side_plugin_ratelimiter_get_state(const rocksdb_ratelimiter_creator_t* p) { + return p->state_; +} #endif } // end extern "C" diff --git a/include/rocksdb/c.h b/include/rocksdb/c.h index a25f5f88f9..52132cf5ff 100644 --- a/include/rocksdb/c.h +++ b/include/rocksdb/c.h @@ -3143,6 +3143,9 @@ void side_plugin_register_comparator extern ROCKSDB_LIBRARY_API_WEAK void side_plugin_unregister_comparator(const char* name); +extern ROCKSDB_LIBRARY_API_WEAK +void* side_plugin_comparator_get_state(const rocksdb_comparator_t*); + typedef rocksdb_mergeoperator_t* (*rocksdb_mergeoperator_creator_t) (const char* strjson, const side_plugin_repo_t* repo); @@ -3154,6 +3157,9 @@ void side_plugin_register_merge_operator extern ROCKSDB_LIBRARY_API_WEAK void side_plugin_unregister_merge_operator(const char* name); +extern ROCKSDB_LIBRARY_API_WEAK +void* side_plugin_mergeoperator_get_state(const rocksdb_mergeoperator_t*); + typedef rocksdb_compactionfilterfactory_t* (*rocksdb_compactionfilterfactory_creator_t) (const char* strjson, const side_plugin_repo_t* repo); @@ -3165,6 +3171,9 @@ void side_plugin_register_compaction_filter_factory extern ROCKSDB_LIBRARY_API_WEAK void side_plugin_unregister_compaction_filter_factory(const char* name); +extern ROCKSDB_LIBRARY_API_WEAK +void* side_plugin_compactionfilterfactory_get_state(const rocksdb_compactionfilterfactory_t*); + typedef rocksdb_slicetransform_t* (*rocksdb_slicetransform_creator_t) (const char* strjson, const side_plugin_repo_t* repo); @@ -3176,6 +3185,9 @@ void side_plugin_register_slicetransform extern ROCKSDB_LIBRARY_API_WEAK void side_plugin_unregister_slicetransform(const char* name); +extern ROCKSDB_LIBRARY_API_WEAK +void* side_plugin_slicetransform_get_state(const rocksdb_slicetransform_t*); + typedef rocksdb_filterpolicy_t* (*rocksdb_filterpolicy_creator_t) (const char* strjson, const side_plugin_repo_t* repo); @@ -3187,6 +3199,9 @@ void side_plugin_register_filterpolicy extern ROCKSDB_LIBRARY_API_WEAK void side_plugin_unregister_filterpolicy(const char* name); +extern ROCKSDB_LIBRARY_API_WEAK +void* side_plugin_filterpolicy_get_state(const rocksdb_filterpolicy_t*); + #if 0 // rocksdb c api does not support custom rate limiter typedef rocksdb_ratelimiter_t* (*rocksdb_ratelimiter_creator_t) @@ -3198,6 +3213,9 @@ void side_plugin_register_ratelimiter extern ROCKSDB_LIBRARY_API_WEAK void side_plugin_unregister_ratelimiter(const char* name); + +extern ROCKSDB_LIBRARY_API_WEAK +void* side_plugin_ratelimiter_get_state(const rocksdb_ratelimiter_t*); #endif #ifdef __cplusplus From 65ce0e1e56670b2e5c246440f79fda3a69623337 Mon Sep 17 00:00:00 2001 From: rockeet Date: Mon, 4 May 2026 13:27:38 +0800 Subject: [PATCH 083/102] submodule rockside: add sample-conf/mytopling{,-2nd}.json --- sideplugin/rockside | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sideplugin/rockside b/sideplugin/rockside index 5cdbb9bdfd..8f91c1465e 160000 --- a/sideplugin/rockside +++ b/sideplugin/rockside @@ -1 +1 @@ -Subproject commit 5cdbb9bdfdae0454a1a1377c5c8e9871ebb35c99 +Subproject commit 8f91c1465ecea9e073b4c5981a4344f3e8fe2b3c From bf54b9c3d4482826c9dd2160d9cc8659ac5f2268 Mon Sep 17 00:00:00 2001 From: rockeet Date: Mon, 4 May 2026 16:18:50 +0800 Subject: [PATCH 084/102] c: add side_plugin_{db,cf}_options_update_from These two methods let users keep their existing DBOptions and CFOptions construction code while selectively applying options from a SidePluginRepo YAML/JSON config. Users load a config file into a SidePluginRepo, then call side_plugin_db_options_update_from(opt, repo, "name"); side_plugin_cf_options_update_from(opt, repo, "name"); to overlay the repo's named options onto hand-crafted Options objects. This migration path preserves all ToplingDB engine optimizations (MemTable, SST formats, etc.) but foregoes SidePlugin features that depend on full SidePluginRepo-managed lifecycle: Web UI observability, Prometheus integration, online config hot-update, and distributed compaction. --- db/c.cc | 8 ++++++++ include/rocksdb/c.h | 6 ++++++ sideplugin/rockside | 2 +- 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/db/c.cc b/db/c.cc index 5697754b77..259a0905b2 100644 --- a/db/c.cc +++ b/db/c.cc @@ -7088,6 +7088,14 @@ void side_plugin_repo_put_cf_options(side_plugin_repo_t* r, const char* name, r->repo.Put(name, std::make_shared(opt->rep)); } +bool side_plugin_db_options_update_from(rocksdb_options_t* opt, const side_plugin_repo_t* r, const char* name) { + return r->repo.DBOptionsUpdateFrom(&opt->rep, name); +} + +bool side_plugin_cf_options_update_from(rocksdb_options_t* opt, const side_plugin_repo_t* r, const char* name) { + return r->repo.CFOptionsUpdateFrom(&opt->rep, name); +} + void side_plugin_repo_close_all(side_plugin_repo_t* r) { r->repo.CloseAllDB(false); // also close http delete r; diff --git a/include/rocksdb/c.h b/include/rocksdb/c.h index 52132cf5ff..9f62ec5510 100644 --- a/include/rocksdb/c.h +++ b/include/rocksdb/c.h @@ -3110,6 +3110,12 @@ side_plugin_repo_get_cf_options(side_plugin_repo_t*, const char* name, char** er extern ROCKSDB_LIBRARY_API_WEAK void side_plugin_repo_put_cf_options(side_plugin_repo_t*, const char* name, rocksdb_options_t*); +extern ROCKSDB_LIBRARY_API_WEAK bool +side_plugin_db_options_update_from(rocksdb_options_t*, const side_plugin_repo_t*, const char* name); + +extern ROCKSDB_LIBRARY_API_WEAK bool +side_plugin_cf_options_update_from(rocksdb_options_t*, const side_plugin_repo_t*, const char* name); + extern ROCKSDB_LIBRARY_API_WEAK void side_plugin_repo_close_all(side_plugin_repo_t*); extern ROCKSDB_LIBRARY_API_WEAK void side_plugin_repo_forget_db(side_plugin_repo_t*, rocksdb_t*); diff --git a/sideplugin/rockside b/sideplugin/rockside index 8f91c1465e..eb090247f2 160000 --- a/sideplugin/rockside +++ b/sideplugin/rockside @@ -1 +1 @@ -Subproject commit 8f91c1465ecea9e073b4c5981a4344f3e8fe2b3c +Subproject commit eb090247f2cbeed1887387e16ceb9651bc076833 From 9f52c958909d8ba8f542f1281d094df40c65d9f4 Mon Sep 17 00:00:00 2001 From: rockeet Date: Mon, 4 May 2026 19:50:30 +0800 Subject: [PATCH 085/102] Override {DB,CF}Options in DB Open on env TOPLINGDB_EASY_MIGRATE_CONF Also override CFOptions in CreateColumnFamily, override options through MaybeOptionsUpdateFrom() If import the conf file failed, the process dies immediately --- db/db_impl/db_impl.cc | 1 + db/db_impl/db_impl.h | 4 ++++ db/db_impl/db_impl_open.cc | 3 +++ db/db_impl/db_impl_readonly.cc | 4 ++++ db/db_impl/db_impl_secondary.cc | 3 +++ sideplugin/rockside | 2 +- utilities/blob_db/blob_db.cc | 3 +++ 7 files changed, 19 insertions(+), 1 deletion(-) diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index 46c79846c8..501f7b97a5 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -4028,6 +4028,7 @@ Status DBImpl::CreateColumnFamilyImpl(const ColumnFamilyOptions& cf_options, Status s; *handle = nullptr; + MaybeCFOptionsUpdateFrom(const_cast(&cf_options), column_family_name); DBOptions db_options = BuildDBOptions(immutable_db_options_, mutable_db_options_); s = ColumnFamilyData::ValidateOptions(db_options, cf_options); diff --git a/db/db_impl/db_impl.h b/db/db_impl/db_impl.h index d706c1db20..54e3d32766 100644 --- a/db/db_impl/db_impl.h +++ b/db/db_impl/db_impl.h @@ -2805,6 +2805,10 @@ class GetWithTimestampReadCallback : public ReadCallback { } }; +extern bool MaybeCFOptionsUpdateFrom(ColumnFamilyOptions*, const std::string& src_name); +extern bool MaybeOptionsUpdateFrom +(DBOptions*, std::vector*, const std::string& src_dbo_name); + extern Options SanitizeOptions(const std::string& db, const Options& src, bool read_only = false, Status* logger_creation_s = nullptr); diff --git a/db/db_impl/db_impl_open.cc b/db/db_impl/db_impl_open.cc index 476ea032fa..9f397d860e 100644 --- a/db/db_impl/db_impl_open.cc +++ b/db/db_impl/db_impl_open.cc @@ -1980,6 +1980,9 @@ Status DBImpl::Open(const DBOptions& db_options, const std::string& dbname, const std::vector& column_families, std::vector* handles, DB** dbptr, const bool seq_per_batch, const bool batch_per_txn) { + MaybeOptionsUpdateFrom(const_cast(&db_options), + const_cast*>(&column_families), + "default"); Status s = ValidateOptionsByTable(db_options, column_families); if (!s.ok()) { return s; diff --git a/db/db_impl/db_impl_readonly.cc b/db/db_impl/db_impl_readonly.cc index 6607fc7063..491e9f109a 100644 --- a/db/db_impl/db_impl_readonly.cc +++ b/db/db_impl/db_impl_readonly.cc @@ -347,6 +347,10 @@ Status DBImplReadOnly::OpenForReadOnlyWithoutCheck( *dbptr = nullptr; handles->clear(); + MaybeOptionsUpdateFrom(const_cast(&db_options), + const_cast*>(&column_families), + "default"); + SuperVersionContext sv_context(/* create_superversion */ true); DBImplReadOnly* impl = new DBImplReadOnly(db_options, dbname); impl->mutex_.Lock(); diff --git a/db/db_impl/db_impl_secondary.cc b/db/db_impl/db_impl_secondary.cc index a7f64cb74c..995bf23212 100644 --- a/db/db_impl/db_impl_secondary.cc +++ b/db/db_impl/db_impl_secondary.cc @@ -801,6 +801,9 @@ Status DB::OpenAsSecondary( *dbptr = nullptr; DBOptions tmp_opts(db_options); + MaybeOptionsUpdateFrom(&tmp_opts, + const_cast*>(&column_families), + "default"); Status s; if (nullptr == tmp_opts.info_log) { s = CreateLoggerFromOptions(secondary_path, tmp_opts, &tmp_opts.info_log); diff --git a/sideplugin/rockside b/sideplugin/rockside index eb090247f2..07e1013ab6 160000 --- a/sideplugin/rockside +++ b/sideplugin/rockside @@ -1 +1 @@ -Subproject commit eb090247f2cbeed1887387e16ceb9651bc076833 +Subproject commit 07e1013ab69b2d0d7440a98f65c8e1d6aebc6e93 diff --git a/utilities/blob_db/blob_db.cc b/utilities/blob_db/blob_db.cc index b6fe039036..ba99a81dd7 100644 --- a/utilities/blob_db/blob_db.cc +++ b/utilities/blob_db/blob_db.cc @@ -46,6 +46,9 @@ Status BlobDB::Open(const DBOptions& db_options, return Status::NotSupported( "Blob DB doesn't support non-default column family."); } + MaybeOptionsUpdateFrom(const_cast(&db_options), + const_cast*>(&column_families), + "default"); BlobDBImpl* blob_db_impl = new BlobDBImpl(dbname, bdb_options, db_options, column_families[0].options); From 7b9e3a8b4f16ab898abd51d5e34ef83843a91b3e Mon Sep 17 00:00:00 2001 From: rockeet Date: Mon, 4 May 2026 21:32:15 +0800 Subject: [PATCH 086/102] submodule rockside: UpdateFromName: warn not found only on DebugLevel >= 1 --- sideplugin/rockside | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sideplugin/rockside b/sideplugin/rockside index 07e1013ab6..3a5c4cb612 160000 --- a/sideplugin/rockside +++ b/sideplugin/rockside @@ -1 +1 @@ -Subproject commit 07e1013ab69b2d0d7440a98f65c8e1d6aebc6e93 +Subproject commit 3a5c4cb612bf225adf906d03b16f178f27f6123f From 18084abb6589dce0d7d8376de2d172cbf08106f0 Mon Sep 17 00:00:00 2001 From: rockeet Date: Tue, 5 May 2026 20:36:00 +0800 Subject: [PATCH 087/102] Easy Migrate: DBOptions & CFOptions lookup by using dbpath as namespace Under TOPLINGDB_EASY_MIGRATE_CONF, db-path components are treated as a namespace hierarchy for config lookup, from most specific (full path) to most general ("default"). CFOptions now follow the same path-prefix walk with bare-name and "default" fallbacks that DBOptions always had. Design rationale: https://github.com/topling/rockside/wiki/Easy-Migrate-Without-Code-Change --- db/db_impl/db_impl.h | 2 +- db/db_impl/db_impl_open.cc | 2 +- db/db_impl/db_impl_readonly.cc | 2 +- db/db_impl/db_impl_secondary.cc | 2 +- sideplugin/rockside | 2 +- utilities/blob_db/blob_db.cc | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/db/db_impl/db_impl.h b/db/db_impl/db_impl.h index 54e3d32766..9116277a8f 100644 --- a/db/db_impl/db_impl.h +++ b/db/db_impl/db_impl.h @@ -2807,7 +2807,7 @@ class GetWithTimestampReadCallback : public ReadCallback { extern bool MaybeCFOptionsUpdateFrom(ColumnFamilyOptions*, const std::string& src_name); extern bool MaybeOptionsUpdateFrom -(DBOptions*, std::vector*, const std::string& src_dbo_name); +(DBOptions*, std::vector*, const std::string& dbpath); extern Options SanitizeOptions(const std::string& db, const Options& src, bool read_only = false, diff --git a/db/db_impl/db_impl_open.cc b/db/db_impl/db_impl_open.cc index 9f397d860e..a3a9faef4c 100644 --- a/db/db_impl/db_impl_open.cc +++ b/db/db_impl/db_impl_open.cc @@ -1982,7 +1982,7 @@ Status DBImpl::Open(const DBOptions& db_options, const std::string& dbname, const bool seq_per_batch, const bool batch_per_txn) { MaybeOptionsUpdateFrom(const_cast(&db_options), const_cast*>(&column_families), - "default"); + dbname); Status s = ValidateOptionsByTable(db_options, column_families); if (!s.ok()) { return s; diff --git a/db/db_impl/db_impl_readonly.cc b/db/db_impl/db_impl_readonly.cc index 491e9f109a..32706c34da 100644 --- a/db/db_impl/db_impl_readonly.cc +++ b/db/db_impl/db_impl_readonly.cc @@ -349,7 +349,7 @@ Status DBImplReadOnly::OpenForReadOnlyWithoutCheck( MaybeOptionsUpdateFrom(const_cast(&db_options), const_cast*>(&column_families), - "default"); + dbname); SuperVersionContext sv_context(/* create_superversion */ true); DBImplReadOnly* impl = new DBImplReadOnly(db_options, dbname); diff --git a/db/db_impl/db_impl_secondary.cc b/db/db_impl/db_impl_secondary.cc index 995bf23212..8325e0788a 100644 --- a/db/db_impl/db_impl_secondary.cc +++ b/db/db_impl/db_impl_secondary.cc @@ -803,7 +803,7 @@ Status DB::OpenAsSecondary( DBOptions tmp_opts(db_options); MaybeOptionsUpdateFrom(&tmp_opts, const_cast*>(&column_families), - "default"); + dbname); Status s; if (nullptr == tmp_opts.info_log) { s = CreateLoggerFromOptions(secondary_path, tmp_opts, &tmp_opts.info_log); diff --git a/sideplugin/rockside b/sideplugin/rockside index 3a5c4cb612..7428da13b8 160000 --- a/sideplugin/rockside +++ b/sideplugin/rockside @@ -1 +1 @@ -Subproject commit 3a5c4cb612bf225adf906d03b16f178f27f6123f +Subproject commit 7428da13b8ddef617e87d547189e3a964d5fc161 diff --git a/utilities/blob_db/blob_db.cc b/utilities/blob_db/blob_db.cc index ba99a81dd7..1bcf605884 100644 --- a/utilities/blob_db/blob_db.cc +++ b/utilities/blob_db/blob_db.cc @@ -48,7 +48,7 @@ Status BlobDB::Open(const DBOptions& db_options, } MaybeOptionsUpdateFrom(const_cast(&db_options), const_cast*>(&column_families), - "default"); + dbname); BlobDBImpl* blob_db_impl = new BlobDBImpl(dbname, bdb_options, db_options, column_families[0].options); From 078dd256823ab7ae4b7a0ea981017f8b3a825fe2 Mon Sep 17 00:00:00 2001 From: leipeng Date: Wed, 6 May 2026 14:59:50 +0800 Subject: [PATCH 088/102] DB::CreateColumnFamily use new MaybeCFOptionsUpdateFrom(cf_options, cfname, dbpath) --- db/db_impl/db_impl.cc | 3 ++- db/db_impl/db_impl.h | 3 ++- sideplugin/rockside | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index 501f7b97a5..c6352472f1 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -4028,7 +4028,8 @@ Status DBImpl::CreateColumnFamilyImpl(const ColumnFamilyOptions& cf_options, Status s; *handle = nullptr; - MaybeCFOptionsUpdateFrom(const_cast(&cf_options), column_family_name); + MaybeCFOptionsUpdateFrom(const_cast(&cf_options), + column_family_name, dbname_); DBOptions db_options = BuildDBOptions(immutable_db_options_, mutable_db_options_); s = ColumnFamilyData::ValidateOptions(db_options, cf_options); diff --git a/db/db_impl/db_impl.h b/db/db_impl/db_impl.h index 9116277a8f..2ef7e4daa5 100644 --- a/db/db_impl/db_impl.h +++ b/db/db_impl/db_impl.h @@ -2805,7 +2805,8 @@ class GetWithTimestampReadCallback : public ReadCallback { } }; -extern bool MaybeCFOptionsUpdateFrom(ColumnFamilyOptions*, const std::string& src_name); +extern bool MaybeCFOptionsUpdateFrom +(ColumnFamilyOptions*, const std::string& cfname, const std::string& dbpath); extern bool MaybeOptionsUpdateFrom (DBOptions*, std::vector*, const std::string& dbpath); diff --git a/sideplugin/rockside b/sideplugin/rockside index 7428da13b8..4cd091c29a 160000 --- a/sideplugin/rockside +++ b/sideplugin/rockside @@ -1 +1 @@ -Subproject commit 7428da13b8ddef617e87d547189e3a964d5fc161 +Subproject commit 4cd091c29aab15aed44405a08c75af2183159645 From fcc800a5d9adc00c170bed56ea7f8e3bf9265462 Mon Sep 17 00:00:00 2001 From: leipeng Date: Wed, 6 May 2026 22:58:56 +0800 Subject: [PATCH 089/102] feat(db): register DB with EasyMigrate on open and drop on destroy - Call MaybeRetainDB via ROCKSDB_SCOPE_EXIT after successful DBImpl::Open, ReadOnly, and Secondary opens. - Call MaybeForgetDB at the start of ~DBImpl. - Clear *dbptr before attaching scope exit to avoid retaining on early validation failure; setting *dbptr = nullptr here also fixes an obscure upstream bug where the output pointer could be left uncleared when validation fails before the later assignment. - Bump rockside for EasyMigrate HTTP gating and retain/forget implementation. --- db/db_impl/db_impl.cc | 2 ++ db/db_impl/db_impl.h | 2 ++ db/db_impl/db_impl_open.cc | 3 +++ db/db_impl/db_impl_readonly.cc | 1 + db/db_impl/db_impl_secondary.cc | 1 + sideplugin/rockside | 2 +- 6 files changed, 10 insertions(+), 1 deletion(-) diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index c6352472f1..a6de3f7dca 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -834,6 +834,8 @@ Status DBImpl::CloseHelper() { Status DBImpl::CloseImpl() { return CloseHelper(); } DBImpl::~DBImpl() { + MaybeForgetDB(this); + // TODO: remove this. init_logger_creation_s_.PermitUncheckedError(); diff --git a/db/db_impl/db_impl.h b/db/db_impl/db_impl.h index 2ef7e4daa5..758c8e737e 100644 --- a/db/db_impl/db_impl.h +++ b/db/db_impl/db_impl.h @@ -2809,6 +2809,8 @@ extern bool MaybeCFOptionsUpdateFrom (ColumnFamilyOptions*, const std::string& cfname, const std::string& dbpath); extern bool MaybeOptionsUpdateFrom (DBOptions*, std::vector*, const std::string& dbpath); +extern void MaybeRetainDB(DB*, const std::vector&); +extern void MaybeForgetDB(DB*); extern Options SanitizeOptions(const std::string& db, const Options& src, bool read_only = false, diff --git a/db/db_impl/db_impl_open.cc b/db/db_impl/db_impl_open.cc index a3a9faef4c..85fad0b46e 100644 --- a/db/db_impl/db_impl_open.cc +++ b/db/db_impl/db_impl_open.cc @@ -1983,6 +1983,9 @@ Status DBImpl::Open(const DBOptions& db_options, const std::string& dbname, MaybeOptionsUpdateFrom(const_cast(&db_options), const_cast*>(&column_families), dbname); + *dbptr = nullptr; + ROCKSDB_SCOPE_EXIT(MaybeRetainDB(*dbptr, *handles)); + Status s = ValidateOptionsByTable(db_options, column_families); if (!s.ok()) { return s; diff --git a/db/db_impl/db_impl_readonly.cc b/db/db_impl/db_impl_readonly.cc index 32706c34da..2cf43b8f07 100644 --- a/db/db_impl/db_impl_readonly.cc +++ b/db/db_impl/db_impl_readonly.cc @@ -350,6 +350,7 @@ Status DBImplReadOnly::OpenForReadOnlyWithoutCheck( MaybeOptionsUpdateFrom(const_cast(&db_options), const_cast*>(&column_families), dbname); + ROCKSDB_SCOPE_EXIT(MaybeRetainDB(*dbptr, *handles)); SuperVersionContext sv_context(/* create_superversion */ true); DBImplReadOnly* impl = new DBImplReadOnly(db_options, dbname); diff --git a/db/db_impl/db_impl_secondary.cc b/db/db_impl/db_impl_secondary.cc index 8325e0788a..98634cc51a 100644 --- a/db/db_impl/db_impl_secondary.cc +++ b/db/db_impl/db_impl_secondary.cc @@ -804,6 +804,7 @@ Status DB::OpenAsSecondary( MaybeOptionsUpdateFrom(&tmp_opts, const_cast*>(&column_families), dbname); + ROCKSDB_SCOPE_EXIT(MaybeRetainDB(*dbptr, *handles)); Status s; if (nullptr == tmp_opts.info_log) { s = CreateLoggerFromOptions(secondary_path, tmp_opts, &tmp_opts.info_log); diff --git a/sideplugin/rockside b/sideplugin/rockside index 4cd091c29a..e872b93494 160000 --- a/sideplugin/rockside +++ b/sideplugin/rockside @@ -1 +1 @@ -Subproject commit 4cd091c29aab15aed44405a08c75af2183159645 +Subproject commit e872b934946f8a0cbe588c4f4cc9c46293f740a3 From 992698709af9e8505378cc2d8ec3a9c745cecf9d Mon Sep 17 00:00:00 2001 From: leipeng Date: Wed, 6 May 2026 22:58:56 +0800 Subject: [PATCH 090/102] chore: add easy_db_bench.sh and bump rockside sample-conf --- easy_db_bench.sh | 34 ++++++++++++++++++++++++++++++++++ sideplugin/rockside | 2 +- 2 files changed, 35 insertions(+), 1 deletion(-) create mode 100644 easy_db_bench.sh diff --git a/easy_db_bench.sh b/easy_db_bench.sh new file mode 100644 index 0000000000..e24cbfe743 --- /dev/null +++ b/easy_db_bench.sh @@ -0,0 +1,34 @@ +#!/bin/bash -ex + +#rm -rf /dev/shm/db_bench_enterprise +#rm -rf /tmp/db_bench_enterprise +mkdir -p /dev/shm/db_bench_enterprise +#mkdir -p /tmp/db_bench_enterprise +cp sideplugin/rockside/src/topling/web/{index.html,style.css} /dev/shm/db_bench_enterprise/ + +export TOPLINGDB_GetContext_sampling=kNone +export ROCKSDB_KICK_OUT_OPTIONS_FILE=1 +export TOPLINGDB_EASY_MIGRATE_CONF=sideplugin/rockside/sample-conf/db_bench_enterprise.yaml +#export PRINT_NOT_FOUND=true +ulimit -n 100000 +args=( + #-num=10000000 + -key_size=8 + #-value_size=2000 + -batch_size=100 + #-benchmarks=fillseq,compact,nextwithkey,nextwithkey,nextwithkey,nextwithkey,nextwithkey,readseq,readseq,readseq,readseq,readseq + -benchmarks=fillrandom,readrandom + #-benchmarks=fillseq,compact,readrandom # rand DB::Get < 100 nanosec + #-benchmarks=compact + #-benchmarks=readrandom + #-benchmarks=readseq + #-benchmarks=nextwithkey + #-wkey_file=${HOME}/wikipedia-title-seq.txt + #-rkey_file=${HOME}/wikipedia-title-seq.txt + #-threads=8 + #-use_existing_db + -scan_omit_key + -scan_omit_value + -enable_zero_copy # ToplingDB specific, for point search by Get/MultiGet +) +./db_bench ${args[@]} "$@" diff --git a/sideplugin/rockside b/sideplugin/rockside index e872b93494..fdd3762c2b 160000 --- a/sideplugin/rockside +++ b/sideplugin/rockside @@ -1 +1 @@ -Subproject commit e872b934946f8a0cbe588c4f4cc9c46293f740a3 +Subproject commit fdd3762c2bf8db0c1630dea20164e2ff33c1ab25 From c8505008ad83e56bbbc97e87fed092a47fdf834a Mon Sep 17 00:00:00 2001 From: rockeet Date: Fri, 8 May 2026 00:38:16 +0800 Subject: [PATCH 091/102] fix(db): expose persist_stats_cf_handle for submodule handle fix The MaybeRetainDB fix in the rockside submodule needs to read persist_stats_cf_handle_ from a DB* pointer, but it is a private member with no accessor. Add a public accessor and a wrapper function, and update the submodule to use them. The submodule fix replaces dangling default_cfh and stat_cfh handles with live pointers before storing them in the sideplugin repo, since RocksDB deletes the original handles on simple Open. --- db/db_impl/db_impl.cc | 4 ++++ db/db_impl/db_impl.h | 2 ++ sideplugin/rockside | 2 +- 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index a6de3f7dca..d363e94965 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -4021,6 +4021,10 @@ void DB_UpdateMaxColumnFamily(DB* db, uint32_t max_cf_id) { cfset->UpdateMaxColumnFamily(max_cf_id); } +ColumnFamilyHandle* DB_persist_stats_cf_handle(const DB* db) { + return static_cast_with_check(db)->persist_stats_cf_handle(); +} + Status DBImpl::CreateColumnFamilyImpl(const ColumnFamilyOptions& cf_options, const std::string& column_family_name, ColumnFamilyHandle** handle) { diff --git a/db/db_impl/db_impl.h b/db/db_impl/db_impl.h index 758c8e737e..f8550d727a 100644 --- a/db/db_impl/db_impl.h +++ b/db/db_impl/db_impl.h @@ -2241,6 +2241,8 @@ class DBImpl : public DB { SnapshotImpl* GetSnapshotImpl(SequenceNumber snapshot_seq, bool is_write_conflict_boundary, bool lock = true); + ColumnFamilyHandle* persist_stats_cf_handle() const { return persist_stats_cf_handle_; } + protected: // If snapshot_seq != kMaxSequenceNumber, then this function can only be diff --git a/sideplugin/rockside b/sideplugin/rockside index fdd3762c2b..859cd3ceac 160000 --- a/sideplugin/rockside +++ b/sideplugin/rockside @@ -1 +1 @@ -Subproject commit fdd3762c2bf8db0c1630dea20164e2ff33c1ab25 +Subproject commit 859cd3ceacf6c7e9c1481ebe3eecec41f332cde6 From 88b5ef52cadf058e4d95009a19dad283fead14bf Mon Sep 17 00:00:00 2001 From: rockeet Date: Fri, 8 May 2026 00:51:55 +0800 Subject: [PATCH 092/102] feat(db): add MaybeRetainCF/MaybeForgetCF call sites Track column family lifecycle in the sideplugin repo during easy migrate: register CF in CreateColumnFamily via scope-exit guard, and unregister in DropColumnFamily before the handle becomes invalid. --- db/db_impl/db_impl.cc | 3 +++ db/db_impl/db_impl.h | 2 ++ 2 files changed, 5 insertions(+) diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index d363e94965..57ce84a663 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -4036,6 +4036,7 @@ Status DBImpl::CreateColumnFamilyImpl(const ColumnFamilyOptions& cf_options, MaybeCFOptionsUpdateFrom(const_cast(&cf_options), column_family_name, dbname_); + ROCKSDB_SCOPE_EXIT(MaybeRetainCF(this, *handle)); DBOptions db_options = BuildDBOptions(immutable_db_options_, mutable_db_options_); s = ColumnFamilyData::ValidateOptions(db_options, cf_options); @@ -4161,6 +4162,8 @@ Status DBImpl::DropColumnFamilyImpl(ColumnFamilyHandle* column_family) { return Status::InvalidArgument("Can't drop default column family"); } + MaybeForgetCF(this, column_family); + bool cf_support_snapshot = cfd->mem()->IsSnapshotSupported(); VersionEdit edit; diff --git a/db/db_impl/db_impl.h b/db/db_impl/db_impl.h index f8550d727a..e26709b39b 100644 --- a/db/db_impl/db_impl.h +++ b/db/db_impl/db_impl.h @@ -2813,6 +2813,8 @@ extern bool MaybeOptionsUpdateFrom (DBOptions*, std::vector*, const std::string& dbpath); extern void MaybeRetainDB(DB*, const std::vector&); extern void MaybeForgetDB(DB*); +extern void MaybeRetainCF(DB*, ColumnFamilyHandle*); +extern void MaybeForgetCF(DB*, ColumnFamilyHandle*); extern Options SanitizeOptions(const std::string& db, const Options& src, bool read_only = false, From ef3c0888feff9246ea91761c9a83d2400d4c6e87 Mon Sep 17 00:00:00 2001 From: rockeet Date: Fri, 8 May 2026 00:52:00 +0800 Subject: [PATCH 093/102] chore: update rockside submodule Pulls in the yaml config change and the MaybeRetainCF/MaybeForgetCF implementation in the submodule. --- sideplugin/rockside | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sideplugin/rockside b/sideplugin/rockside index 859cd3ceac..0388c6f0dd 160000 --- a/sideplugin/rockside +++ b/sideplugin/rockside @@ -1 +1 @@ -Subproject commit 859cd3ceacf6c7e9c1481ebe3eecec41f332cde6 +Subproject commit 0388c6f0dd547388e72225ce257f5ebaf6c4a779 From 529d779f8b08de83c863cbe74619f4152b2ac37b Mon Sep 17 00:00:00 2001 From: rockeet Date: Sat, 9 May 2026 14:56:30 +0800 Subject: [PATCH 094/102] feat(c-api): add rocksdb_readoptions_is_in_pinning_section Expose ReadOptions::internal_is_in_pinning_section through the C API, needed by Rust bindings for ReadOptionsScopePinIfNotPinned guard. --- db/c.cc | 4 ++++ include/rocksdb/c.h | 2 ++ 2 files changed, 6 insertions(+) diff --git a/db/c.cc b/db/c.cc index 259a0905b2..d62bfbfd71 100644 --- a/db/c.cc +++ b/db/c.cc @@ -4546,6 +4546,10 @@ void rocksdb_readoptions_finish_pin(rocksdb_readoptions_t* opt) { return opt->rep.FinishPin(); } +unsigned char rocksdb_readoptions_is_in_pinning_section(rocksdb_readoptions_t* opt) { + return opt->rep.internal_is_in_pinning_section ? 1 : 0; +} + void rocksdb_readoptions_set_async_queue_depth(rocksdb_readoptions_t* opt, size_t v) { v = std::min(v, (size_t)1024); opt->rep.async_queue_depth = v; diff --git a/include/rocksdb/c.h b/include/rocksdb/c.h index 9f62ec5510..2ed5667240 100644 --- a/include/rocksdb/c.h +++ b/include/rocksdb/c.h @@ -1943,6 +1943,8 @@ extern ROCKSDB_LIBRARY_API_WEAK void rocksdb_readoptions_start_pin( rocksdb_readoptions_t*); extern ROCKSDB_LIBRARY_API_WEAK void rocksdb_readoptions_finish_pin( rocksdb_readoptions_t*); +extern ROCKSDB_LIBRARY_API_WEAK unsigned char +rocksdb_readoptions_is_in_pinning_section(rocksdb_readoptions_t*); extern ROCKSDB_LIBRARY_API_WEAK void rocksdb_readoptions_set_async_queue_depth( rocksdb_readoptions_t*, size_t); extern ROCKSDB_LIBRARY_API_WEAK size_t rocksdb_readoptions_get_async_queue_depth( From 5d68e7eb9207f9c348f4ec8cc4521d2ccfcf5428 Mon Sep 17 00:00:00 2001 From: rockeet Date: Sat, 16 May 2026 15:13:04 +0800 Subject: [PATCH 095/102] update submodule rockside --- sideplugin/rockside | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sideplugin/rockside b/sideplugin/rockside index 0388c6f0dd..66c22863ec 160000 --- a/sideplugin/rockside +++ b/sideplugin/rockside @@ -1 +1 @@ -Subproject commit 0388c6f0dd547388e72225ce257f5ebaf6c4a779 +Subproject commit 66c22863ec4b01b77db6c937005070c5bad52b28 From 146594c010fd3ce1640ac668d337871a53201ea3 Mon Sep 17 00:00:00 2001 From: leipeng Date: Mon, 18 May 2026 23:19:33 +0800 Subject: [PATCH 096/102] Add Status::ToSSO() --- include/rocksdb/status.h | 2 ++ util/status.cc | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/include/rocksdb/status.h b/include/rocksdb/status.h index e6afa6fa09..02d6fa49c4 100644 --- a/include/rocksdb/status.h +++ b/include/rocksdb/status.h @@ -29,6 +29,7 @@ #endif #include "rocksdb/slice.h" +#include namespace ROCKSDB_NAMESPACE { @@ -495,6 +496,7 @@ class Status { // Return a string representation of this status suitable for printing. // Returns the string "OK" for success. std::string ToString() const; + terark::minimal_sso<32> ToSSO() const; void swap(Status& y) { static_assert(sizeof(Status) == 2*sizeof(uint64_t)); diff --git a/util/status.cc b/util/status.cc index 160755d54d..978f315b13 100644 --- a/util/status.cc +++ b/util/status.cc @@ -160,4 +160,8 @@ std::string Status::ToString() const { return result; } +terark::minimal_sso<32> Status::ToSSO() const { + return terark::minimal_sso<32>{ToString()}; +} + } // namespace ROCKSDB_NAMESPACE From 04772815a9a1f51d416382e428a124e37570d7e6 Mon Sep 17 00:00:00 2001 From: leipeng Date: Mon, 18 May 2026 23:46:40 +0800 Subject: [PATCH 097/102] fix(db): auto-detect WAL format instead of trusting memtable_as_log_index option When DB Open replays WAL files, the reader was unconditionally configured based on the current memtable_as_log_index option. If the option changed between runs (e.g. config file or env var), the WAL records would be parsed with the wrong header format, causing CRC mismatch and recovery failure. Now IsMemTableAsLogIndexFile() probes the actual on-disk format of each WAL file before replay, and the reader uses the detected format regardless of the current option setting. Co-Authored-By: Claude Opus 4.7 --- db/db_impl/db_impl_open.cc | 12 ++++++++++-- db/db_impl/db_impl_secondary.cc | 8 +++++++- db/repair.cc | 7 ++++++- db/transaction_log_impl.cc | 8 +++++++- db/wal_manager.cc | 7 ++++++- tools/ldb_cmd.cc | 14 +++++++++++++- 6 files changed, 49 insertions(+), 7 deletions(-) diff --git a/db/db_impl/db_impl_open.cc b/db/db_impl/db_impl_open.cc index 85fad0b46e..0b9cb5669e 100644 --- a/db/db_impl/db_impl_open.cc +++ b/db/db_impl/db_impl_open.cc @@ -1167,6 +1167,14 @@ Status DBImpl::RecoverLogFiles(const std::vector& wal_numbers, logFileDropped(); continue; } + bool wal_memtable_format = false; + if (IOStatus ios = log::Reader::IsMemTableAsLogIndexFile + (*fs_, fname, &wal_memtable_format); !ios.ok()) { + auto info_log = immutable_db_options_.info_log.get(); + ROCKS_LOG_WARN(info_log, "%s: %s", fname.c_str(), *ios.ToSSO()); + logFileDropped(); + continue; + } std::unique_ptr file_reader; { @@ -1207,7 +1215,7 @@ Status DBImpl::RecoverLogFiles(const std::vector& wal_numbers, log::Reader reader(immutable_db_options_.info_log, std::move(file_reader), &reporter, true /*checksum*/, wal_number); boost::intrusive_ptr fmap; - if (immutable_db_options_.memtable_as_log_index) { + if (wal_memtable_format) { reader.InitSetMemTableAsLogIndex(*fs_); IOStatus ios = ReadonlyFileMmap::New(&fmap, *fs_, wal_number, fname); if (!ios.ok() && ios.ToString() != "Invalid argument: Empty File") @@ -1255,7 +1263,7 @@ Status DBImpl::RecoverLogFiles(const std::vector& wal_numbers, if (!status.ok()) { return status; } - if (new_batch && immutable_db_options_.memtable_as_log_index) { + if (new_batch && wal_memtable_format) { return Status::NotSupported("memtable_as_log_index", "WriteBatchTimestampSizeDifference"); } diff --git a/db/db_impl/db_impl_secondary.cc b/db/db_impl/db_impl_secondary.cc index 98634cc51a..7feb7dfb4a 100644 --- a/db/db_impl/db_impl_secondary.cc +++ b/db/db_impl/db_impl_secondary.cc @@ -167,11 +167,17 @@ Status DBImplSecondary::MaybeInitLogReader( io_tracer_)); } + bool wal_memtable_format = false; + if (IOStatus ios = log::Reader::IsMemTableAsLogIndexFile + (*fs_, fname, &wal_memtable_format); !ios.ok()) { + return Status(ios); + } + // Create the log reader. LogReaderContainer* log_reader_container = new LogReaderContainer( env_, immutable_db_options_.info_log, fname, std::move(file_reader), log_number); - if (immutable_db_options_.memtable_as_log_index) { + if (wal_memtable_format) { // will tailing log Reader, so must preserve mmap size auto mmap_size = GetMaxTotalWalSize() + 8*1024*1024; if (mmap_size > (1ull << 40)) { diff --git a/db/repair.cc b/db/repair.cc index 69c662d22c..34aa64fcb0 100644 --- a/db/repair.cc +++ b/db/repair.cc @@ -401,7 +401,12 @@ class Repairer { std::string scratch; Slice record; WriteBatch batch; - if (db_options_.memtable_as_log_index) { + bool wal_memtable_format = false; + if (IOStatus ios = log::Reader::IsMemTableAsLogIndexFile + (*fs, logname, &wal_memtable_format); !ios.ok()) { + return Status(ios); + } + if (wal_memtable_format) { reader.InitSetMemTableAsLogIndex(*fs); auto [fmap, ios] = ReadonlyFileMmap::New(*fs, log, logname); if (!ios.ok() && ios.ToString() != "Invalid argument: Empty File") diff --git a/db/transaction_log_impl.cc b/db/transaction_log_impl.cc index 729420a8ec..26db569c76 100644 --- a/db/transaction_log_impl.cc +++ b/db/transaction_log_impl.cc @@ -288,10 +288,16 @@ Status TransactionLogIteratorImpl::OpenLogReader(const LogFile* log_file) { return s; } assert(file); + const std::string& fname = file->file_name(); + bool wal_memtable_format = false; + if (IOStatus ios = log::Reader::IsMemTableAsLogIndexFile + (*options_->fs, fname, &wal_memtable_format); !ios.ok()) { + return Status(ios); + } current_log_reader_.reset( new log::Reader(options_->info_log, std::move(file), &reporter_, read_options_.verify_checksums_, log_file->LogNumber())); - if (options_->memtable_as_log_index) { + if (wal_memtable_format) { current_log_reader_->InitSetMemTableAsLogIndex(*options_->fs); } return Status::OK(); diff --git a/db/wal_manager.cc b/db/wal_manager.cc index f3ff882325..d1c3ca7319 100644 --- a/db/wal_manager.cc +++ b/db/wal_manager.cc @@ -485,9 +485,14 @@ Status WalManager::ReadFirstLine(const std::string& fname, reporter.fname = fname.c_str(); reporter.status = &status; reporter.ignore_error = !db_options_.paranoid_checks; + bool wal_memtable_format = false; + if (IOStatus ios = log::Reader::IsMemTableAsLogIndexFile + (*db_options_.fs, fname, &wal_memtable_format); !ios.ok()) { + return Status(ios); + } log::Reader reader(db_options_.info_log, std::move(file_reader), &reporter, true /*checksum*/, number); - if (db_options_.memtable_as_log_index) { + if (wal_memtable_format) { reader.InitSetMemTableAsLogIndex(*db_options_.fs); } std::string scratch; diff --git a/tools/ldb_cmd.cc b/tools/ldb_cmd.cc index 5fe196c7dc..78feae033d 100644 --- a/tools/ldb_cmd.cc +++ b/tools/ldb_cmd.cc @@ -2749,9 +2749,21 @@ void DumpWalFile(Options options, std::string wal_file, bool print_header, // bogus input, carry on as best we can log_number = 0; } + bool wal_memtable_format = false; + if (IOStatus ios = log::Reader::IsMemTableAsLogIndexFile + (*fs, wal_file, &wal_memtable_format); !ios.ok()) { + if (exec_state) { + *exec_state = LDBCommandExecuteResult::Failed( + "Failed to detect WAL format " + ios.ToString()); + } else { + std::cerr << "Error: Failed to detect WAL format " + << ios.ToString() << std::endl; + } + return; + } log::Reader reader(options.info_log, std::move(wal_file_reader), &reporter, true /* checksum */, log_number); - if (options.memtable_as_log_index) { + if (wal_memtable_format) { reader.InitSetMemTableAsLogIndex(*fs); } std::string scratch; From 8fa8e5dd1e0bced93097b0663da571248a9ef39c Mon Sep 17 00:00:00 2001 From: leipeng Date: Tue, 19 May 2026 00:30:25 +0800 Subject: [PATCH 098/102] fix(db): add check_wal_format option to gate WAL format auto-detection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously WAL format was detected unconditionally via CRC32 self- consistency on every DB open, which carries a 1/2^32 false-positive risk per file. This risk is only relevant when memtable_as_log_index was actually changed between runs — an extremely rare event. Gate the detection behind DBOptions::check_wal_format (default false) so users only pay the risk when they know a format switch occurred. This reduces false-positive exposure by orders of magnitude without requiring any WAL format changes. Co-Authored-By: Claude Opus 4.7 --- db/db_impl/db_impl_open.cc | 16 +++++++++------- db/db_impl/db_impl_secondary.cc | 10 ++++++---- db/repair.cc | 10 ++++++---- db/transaction_log_impl.cc | 10 ++++++---- db/wal_manager.cc | 10 ++++++---- include/rocksdb/options.h | 12 ++++++++++++ options/db_options.cc | 7 +++++++ options/db_options.h | 1 + options/options_settable_test.cc | 1 + options/options_test.cc | 4 ++++ sideplugin/rockside | 2 +- tools/ldb_cmd.cc | 22 ++++++++++++---------- 12 files changed, 71 insertions(+), 34 deletions(-) diff --git a/db/db_impl/db_impl_open.cc b/db/db_impl/db_impl_open.cc index 0b9cb5669e..204a9fb98f 100644 --- a/db/db_impl/db_impl_open.cc +++ b/db/db_impl/db_impl_open.cc @@ -1167,13 +1167,15 @@ Status DBImpl::RecoverLogFiles(const std::vector& wal_numbers, logFileDropped(); continue; } - bool wal_memtable_format = false; - if (IOStatus ios = log::Reader::IsMemTableAsLogIndexFile - (*fs_, fname, &wal_memtable_format); !ios.ok()) { - auto info_log = immutable_db_options_.info_log.get(); - ROCKS_LOG_WARN(info_log, "%s: %s", fname.c_str(), *ios.ToSSO()); - logFileDropped(); - continue; + bool wal_memtable_format = immutable_db_options_.memtable_as_log_index; + if (immutable_db_options_.check_wal_format) { + if (IOStatus ios = log::Reader::IsMemTableAsLogIndexFile + (*fs_, fname, &wal_memtable_format); !ios.ok()) { + auto info_log = immutable_db_options_.info_log.get(); + ROCKS_LOG_WARN(info_log, "%s: %s", fname.c_str(), *ios.ToSSO()); + logFileDropped(); + continue; + } } std::unique_ptr file_reader; diff --git a/db/db_impl/db_impl_secondary.cc b/db/db_impl/db_impl_secondary.cc index 7feb7dfb4a..20750f683a 100644 --- a/db/db_impl/db_impl_secondary.cc +++ b/db/db_impl/db_impl_secondary.cc @@ -167,10 +167,12 @@ Status DBImplSecondary::MaybeInitLogReader( io_tracer_)); } - bool wal_memtable_format = false; - if (IOStatus ios = log::Reader::IsMemTableAsLogIndexFile - (*fs_, fname, &wal_memtable_format); !ios.ok()) { - return Status(ios); + bool wal_memtable_format = immutable_db_options_.memtable_as_log_index; + if (immutable_db_options_.check_wal_format) { + if (IOStatus ios = log::Reader::IsMemTableAsLogIndexFile + (*fs_, fname, &wal_memtable_format); !ios.ok()) { + return Status(ios); + } } // Create the log reader. diff --git a/db/repair.cc b/db/repair.cc index 34aa64fcb0..f1b2b8cc62 100644 --- a/db/repair.cc +++ b/db/repair.cc @@ -401,10 +401,12 @@ class Repairer { std::string scratch; Slice record; WriteBatch batch; - bool wal_memtable_format = false; - if (IOStatus ios = log::Reader::IsMemTableAsLogIndexFile - (*fs, logname, &wal_memtable_format); !ios.ok()) { - return Status(ios); + bool wal_memtable_format = db_options_.memtable_as_log_index; + if (db_options_.check_wal_format) { + if (IOStatus ios = log::Reader::IsMemTableAsLogIndexFile + (*fs, logname, &wal_memtable_format); !ios.ok()) { + return Status(ios); + } } if (wal_memtable_format) { reader.InitSetMemTableAsLogIndex(*fs); diff --git a/db/transaction_log_impl.cc b/db/transaction_log_impl.cc index 26db569c76..7ad930da01 100644 --- a/db/transaction_log_impl.cc +++ b/db/transaction_log_impl.cc @@ -289,10 +289,12 @@ Status TransactionLogIteratorImpl::OpenLogReader(const LogFile* log_file) { } assert(file); const std::string& fname = file->file_name(); - bool wal_memtable_format = false; - if (IOStatus ios = log::Reader::IsMemTableAsLogIndexFile - (*options_->fs, fname, &wal_memtable_format); !ios.ok()) { - return Status(ios); + bool wal_memtable_format = options_->memtable_as_log_index; + if (options_->check_wal_format) { + if (IOStatus ios = log::Reader::IsMemTableAsLogIndexFile + (*options_->fs, fname, &wal_memtable_format); !ios.ok()) { + return Status(ios); + } } current_log_reader_.reset( new log::Reader(options_->info_log, std::move(file), &reporter_, diff --git a/db/wal_manager.cc b/db/wal_manager.cc index d1c3ca7319..024991544f 100644 --- a/db/wal_manager.cc +++ b/db/wal_manager.cc @@ -485,10 +485,12 @@ Status WalManager::ReadFirstLine(const std::string& fname, reporter.fname = fname.c_str(); reporter.status = &status; reporter.ignore_error = !db_options_.paranoid_checks; - bool wal_memtable_format = false; - if (IOStatus ios = log::Reader::IsMemTableAsLogIndexFile - (*db_options_.fs, fname, &wal_memtable_format); !ios.ok()) { - return Status(ios); + bool wal_memtable_format = db_options_.memtable_as_log_index; + if (db_options_.check_wal_format) { + if (IOStatus ios = log::Reader::IsMemTableAsLogIndexFile + (*db_options_.fs, fname, &wal_memtable_format); !ios.ok()) { + return Status(ios); + } } log::Reader reader(db_options_.info_log, std::move(file_reader), &reporter, true /*checksum*/, number); diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h index 739724d8ba..8f75cd2106 100644 --- a/include/rocksdb/options.h +++ b/include/rocksdb/options.h @@ -925,6 +925,18 @@ struct DBOptions { bool memtable_as_log_index = false; + // If true, each WAL file is probed on DB open to auto-detect its on-disk + // format, so recovery works even when memtable_as_log_index was changed + // between runs. + // + // Defaults to false because the probe relies on CRC32 self-consistency + // rather than a magic number to distinguish the two formats, which carries + // a 1/2^32 false-positive risk per file. Always probing would expose this + // risk on every open; turning this on only when needed reduces the + // exposure by orders of magnitude (only when a format switch actually + // occurred). + bool check_wal_format = false; + // if not zero, periodically take stats snapshots and store in memory, the // memory size for stats snapshots is capped at stats_history_buffer_size // Default: 1MB diff --git a/options/db_options.cc b/options/db_options.cc index 4a1663e4eb..dc17320e4e 100644 --- a/options/db_options.cc +++ b/options/db_options.cc @@ -328,6 +328,10 @@ static std::unordered_map {offsetof(struct ImmutableDBOptions, memtable_as_log_index), OptionType::kBoolean, OptionVerificationType::kNormal, OptionTypeFlags::kNone}}, + {"check_wal_format", + {offsetof(struct ImmutableDBOptions, check_wal_format), + OptionType::kBoolean, OptionVerificationType::kNormal, + OptionTypeFlags::kNone}}, {"fail_if_options_file_error", {offsetof(struct ImmutableDBOptions, fail_if_options_file_error), OptionType::kBoolean, OptionVerificationType::kNormal, @@ -764,6 +768,7 @@ ImmutableDBOptions::ImmutableDBOptions(const DBOptions& options) avoid_unnecessary_blocking_io(options.avoid_unnecessary_blocking_io), persist_stats_to_disk(options.persist_stats_to_disk), memtable_as_log_index(options.memtable_as_log_index), + check_wal_format(options.check_wal_format), write_dbid_to_manifest(options.write_dbid_to_manifest), log_readahead_size(options.log_readahead_size), file_checksum_gen_factory(options.file_checksum_gen_factory), @@ -933,6 +938,8 @@ void ImmutableDBOptions::Dump(Logger* log) const { persist_stats_to_disk); ROCKS_LOG_HEADER(log, " Options.memtable_as_log_index: %u", memtable_as_log_index); + ROCKS_LOG_HEADER(log, " Options.check_wal_format: %u", + check_wal_format); ROCKS_LOG_HEADER(log, " Options.write_dbid_to_manifest: %d", write_dbid_to_manifest); ROCKS_LOG_HEADER( diff --git a/options/db_options.h b/options/db_options.h index 024af355a7..e0c618d637 100644 --- a/options/db_options.h +++ b/options/db_options.h @@ -90,6 +90,7 @@ struct ImmutableDBOptions { bool avoid_unnecessary_blocking_io; bool persist_stats_to_disk; bool memtable_as_log_index; + bool check_wal_format; bool write_dbid_to_manifest; size_t log_readahead_size; std::shared_ptr file_checksum_gen_factory; diff --git a/options/options_settable_test.cc b/options/options_settable_test.cc index 42340073f9..2cd675a16a 100644 --- a/options/options_settable_test.cc +++ b/options/options_settable_test.cc @@ -338,6 +338,7 @@ TEST_F(OptionsSettableTest, DBOptionsAllFieldsSettable) { "stats_persist_period_sec=54321;" "persist_stats_to_disk=true;" "memtable_as_log_index=true;" + "check_wal_format=true;" "stats_history_buffer_size=14159;" "allow_fallocate=true;" "allow_mmap_reads=false;" diff --git a/options/options_test.cc b/options/options_test.cc index a92d8f844c..450f38c400 100644 --- a/options/options_test.cc +++ b/options/options_test.cc @@ -170,6 +170,7 @@ TEST_F(OptionsTest, GetOptionsFromMapTest) { {"stats_persist_period_sec", "57"}, {"persist_stats_to_disk", "false"}, {"memtable_as_log_index", "false"}, + {"check_wal_format", "false"}, {"stats_history_buffer_size", "69"}, {"advise_random_on_open", "true"}, {"use_adaptive_mutex", "false"}, @@ -354,6 +355,7 @@ TEST_F(OptionsTest, GetOptionsFromMapTest) { ASSERT_EQ(new_db_opt.stats_persist_period_sec, 57U); ASSERT_EQ(new_db_opt.persist_stats_to_disk, false); ASSERT_EQ(new_db_opt.memtable_as_log_index, false); + ASSERT_EQ(new_db_opt.check_wal_format, false); ASSERT_EQ(new_db_opt.stats_history_buffer_size, 69U); ASSERT_EQ(new_db_opt.advise_random_on_open, true); ASSERT_EQ(new_db_opt.use_adaptive_mutex, false); @@ -2392,6 +2394,7 @@ TEST_F(OptionsOldApiTest, GetOptionsFromMapTest) { {"stats_persist_period_sec", "57"}, {"persist_stats_to_disk", "false"}, {"memtable_as_log_index", "false"}, + {"check_wal_format", "false"}, {"stats_history_buffer_size", "69"}, {"advise_random_on_open", "true"}, {"use_adaptive_mutex", "false"}, @@ -2578,6 +2581,7 @@ TEST_F(OptionsOldApiTest, GetOptionsFromMapTest) { ASSERT_EQ(new_db_opt.stats_persist_period_sec, 57U); ASSERT_EQ(new_db_opt.persist_stats_to_disk, false); ASSERT_EQ(new_db_opt.memtable_as_log_index, false); + ASSERT_EQ(new_db_opt.check_wal_format, false); ASSERT_EQ(new_db_opt.stats_history_buffer_size, 69U); ASSERT_EQ(new_db_opt.advise_random_on_open, true); ASSERT_EQ(new_db_opt.use_adaptive_mutex, false); diff --git a/sideplugin/rockside b/sideplugin/rockside index 66c22863ec..345c8cf538 160000 --- a/sideplugin/rockside +++ b/sideplugin/rockside @@ -1 +1 @@ -Subproject commit 66c22863ec4b01b77db6c937005070c5bad52b28 +Subproject commit 345c8cf5382ccfe7be0665f8478aa1f96f36ed67 diff --git a/tools/ldb_cmd.cc b/tools/ldb_cmd.cc index 78feae033d..33752d8b75 100644 --- a/tools/ldb_cmd.cc +++ b/tools/ldb_cmd.cc @@ -2749,17 +2749,19 @@ void DumpWalFile(Options options, std::string wal_file, bool print_header, // bogus input, carry on as best we can log_number = 0; } - bool wal_memtable_format = false; - if (IOStatus ios = log::Reader::IsMemTableAsLogIndexFile - (*fs, wal_file, &wal_memtable_format); !ios.ok()) { - if (exec_state) { - *exec_state = LDBCommandExecuteResult::Failed( - "Failed to detect WAL format " + ios.ToString()); - } else { - std::cerr << "Error: Failed to detect WAL format " - << ios.ToString() << std::endl; + bool wal_memtable_format = options.memtable_as_log_index; + if (options.check_wal_format) { + if (IOStatus ios = log::Reader::IsMemTableAsLogIndexFile + (*fs, wal_file, &wal_memtable_format); !ios.ok()) { + if (exec_state) { + *exec_state = LDBCommandExecuteResult::Failed( + "Failed to detect WAL format " + ios.ToString()); + } else { + std::cerr << "Error: Failed to detect WAL format " + << ios.ToString() << std::endl; + } + return; } - return; } log::Reader reader(options.info_log, std::move(wal_file_reader), &reporter, true /* checksum */, log_number); From ffc6b1146bed14247d9c1b7e9827f095309a337a Mon Sep 17 00:00:00 2001 From: leipeng Date: Tue, 19 May 2026 01:01:40 +0800 Subject: [PATCH 099/102] fix(db): return error on WAL format detection failure when check_wal_format is set MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the user explicitly opts into check_wal_format, a detection I/O error should not be silently skipped — it must be surfaced so the user can correct the situation (e.g. revert memtable_as_log_index and retry). Co-Authored-By: Claude Opus 4.7 --- db/db_impl/db_impl_open.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/db/db_impl/db_impl_open.cc b/db/db_impl/db_impl_open.cc index 204a9fb98f..8c62470ac0 100644 --- a/db/db_impl/db_impl_open.cc +++ b/db/db_impl/db_impl_open.cc @@ -1173,8 +1173,7 @@ Status DBImpl::RecoverLogFiles(const std::vector& wal_numbers, (*fs_, fname, &wal_memtable_format); !ios.ok()) { auto info_log = immutable_db_options_.info_log.get(); ROCKS_LOG_WARN(info_log, "%s: %s", fname.c_str(), *ios.ToSSO()); - logFileDropped(); - continue; + return Status(ios); } } From d4104a031458c04ebe06802b8656451c4f38cb90 Mon Sep 17 00:00:00 2001 From: leipeng Date: Tue, 19 May 2026 01:05:48 +0800 Subject: [PATCH 100/102] fix(db): add ROCKS_LOG_WARN before return on WAL format detection failure Ensures the log always records which file and what error caused the detection to fail, consistent across all call sites. Co-Authored-By: Claude Opus 4.7 --- db/db_impl/db_impl_secondary.cc | 2 ++ db/repair.cc | 4 +++- db/transaction_log_impl.cc | 2 ++ db/wal_manager.cc | 2 ++ 4 files changed, 9 insertions(+), 1 deletion(-) diff --git a/db/db_impl/db_impl_secondary.cc b/db/db_impl/db_impl_secondary.cc index 20750f683a..8f4d2e6415 100644 --- a/db/db_impl/db_impl_secondary.cc +++ b/db/db_impl/db_impl_secondary.cc @@ -171,6 +171,8 @@ Status DBImplSecondary::MaybeInitLogReader( if (immutable_db_options_.check_wal_format) { if (IOStatus ios = log::Reader::IsMemTableAsLogIndexFile (*fs_, fname, &wal_memtable_format); !ios.ok()) { + auto info_log = immutable_db_options_.info_log.get(); + ROCKS_LOG_WARN(info_log, "%s: %s", fname.c_str(), *ios.ToSSO()); return Status(ios); } } diff --git a/db/repair.cc b/db/repair.cc index f1b2b8cc62..2fd9992b28 100644 --- a/db/repair.cc +++ b/db/repair.cc @@ -405,7 +405,9 @@ class Repairer { if (db_options_.check_wal_format) { if (IOStatus ios = log::Reader::IsMemTableAsLogIndexFile (*fs, logname, &wal_memtable_format); !ios.ok()) { - return Status(ios); + ROCKS_LOG_WARN(db_options_.info_log, "%s: %s", + logname.c_str(), *ios.ToSSO()); + return Status(ios); } } if (wal_memtable_format) { diff --git a/db/transaction_log_impl.cc b/db/transaction_log_impl.cc index 7ad930da01..7683c4b357 100644 --- a/db/transaction_log_impl.cc +++ b/db/transaction_log_impl.cc @@ -293,6 +293,8 @@ Status TransactionLogIteratorImpl::OpenLogReader(const LogFile* log_file) { if (options_->check_wal_format) { if (IOStatus ios = log::Reader::IsMemTableAsLogIndexFile (*options_->fs, fname, &wal_memtable_format); !ios.ok()) { + ROCKS_LOG_WARN(options_->info_log, "%s: %s", + fname.c_str(), *ios.ToSSO()); return Status(ios); } } diff --git a/db/wal_manager.cc b/db/wal_manager.cc index 024991544f..66e1fc7cde 100644 --- a/db/wal_manager.cc +++ b/db/wal_manager.cc @@ -489,6 +489,8 @@ Status WalManager::ReadFirstLine(const std::string& fname, if (db_options_.check_wal_format) { if (IOStatus ios = log::Reader::IsMemTableAsLogIndexFile (*db_options_.fs, fname, &wal_memtable_format); !ios.ok()) { + ROCKS_LOG_WARN(db_options_.info_log, "%s: %s", + fname.c_str(), *ios.ToSSO()); return Status(ios); } } From 15b70eb6a272a7e24a1e7d9df00a5a1f57558409 Mon Sep 17 00:00:00 2001 From: rockeet Date: Tue, 19 May 2026 23:07:17 +0800 Subject: [PATCH 101/102] DBIter::Seek() remove call to value() for more lazy load --- db/db_iter.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/db/db_iter.cc b/db/db_iter.cc index 000953df88..56ca16d42a 100644 --- a/db/db_iter.cc +++ b/db/db_iter.cc @@ -2008,9 +2008,9 @@ void DBIter::Seek(const Slice& target) { if (statistics_ != nullptr) { // Decrement since we don't want to count this key as skipped RecordTick(statistics_, NUMBER_DB_SEEK_FOUND); - RecordTick(statistics_, ITER_BYTES_READ, key().size() + value().size()); + RecordTick(statistics_, ITER_BYTES_READ, key().size()); } - PERF_COUNTER_ADD(iter_read_bytes, key().size() + value().size()); + PERF_COUNTER_ADD(iter_read_bytes, key().size()); //local_stats_.BumpGlobalStatistics(statistics_); } From fc79b16a7d8eaafda9c1c22fdf92ae011925b956 Mon Sep 17 00:00:00 2001 From: rockeet Date: Sat, 23 May 2026 06:43:13 +0800 Subject: [PATCH 102/102] ReadOptions::ReadOptions() set cache_sst_file_iter --- include/rocksdb/options.h | 2 +- options/options.cc | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h index 8f75cd2106..eb3f17909c 100644 --- a/include/rocksdb/options.h +++ b/include/rocksdb/options.h @@ -1887,7 +1887,7 @@ struct ReadOptions { ~ScopePinIfNotPinned() { if (ro_) ro_->FinishPin(); } }; - ReadOptions() {} + ReadOptions(); ReadOptions(bool _verify_checksums, bool _fill_cache); explicit ReadOptions(Env::IOActivity _io_activity); ReadOptions(const ReadOptions&, BooleanDontCopyTrue/*dispatch_tag*/); diff --git a/options/options.cc b/options/options.cc index 05e800e5d8..ee7199e11d 100644 --- a/options/options.cc +++ b/options/options.cc @@ -718,6 +718,9 @@ DBOptions* DBOptions::IncreaseParallelism(int total_threads) { static const bool g_cache_sst_file_iter = terark::getEnvBool("TOPLINGDB_CACHE_SST_FILE_ITER", false); +ReadOptions::ReadOptions() { + cache_sst_file_iter = g_cache_sst_file_iter; +} ReadOptions::ReadOptions(bool _verify_checksums, bool _fill_cache) : verify_checksums(_verify_checksums), fill_cache(_fill_cache) { cache_sst_file_iter = g_cache_sst_file_iter;