From 19492c2175581dd1290cb40eba01dea6629a971f Mon Sep 17 00:00:00 2001 From: dmgcodevil Date: Sat, 4 Apr 2026 16:55:36 -0400 Subject: [PATCH 1/4] move code out of hpp --- include/common/types.hpp | 316 ++------------------------------------ include/core/edge.hpp | 110 +------------ include/core/node.hpp | 285 +++------------------------------- include/main/database.hpp | 233 ++++------------------------ src/common/types.cpp | 278 +++++++++++++++++++++++++++++++++ src/core/CMakeLists.txt | 2 + src/core/edge.cpp | 121 +++++++++++++++ src/core/node.cpp | 263 +++++++++++++++++++++++++++++++ src/main/database.cpp | 178 +++++++++++++++++++++ 9 files changed, 912 insertions(+), 874 deletions(-) create mode 100644 src/core/edge.cpp create mode 100644 src/core/node.cpp diff --git a/include/common/types.hpp b/include/common/types.hpp index 3925fb7..73657e9 100644 --- a/include/common/types.hpp +++ b/include/common/types.hpp @@ -127,157 +127,20 @@ class Value { } /// Append a single element (or splice another raw array) to this array. - arrow::Status append_element(Value element) { - if (element.holds_raw_array()) { - return append_all(std::move(element)); - } - if (type_ == ValueType::NA) { - type_ = ValueType::ARRAY; - data_ = std::vector{std::move(element)}; - return arrow::Status::OK(); - } - if (!holds_raw_array()) { - return arrow::Status::TypeError( - "APPEND: target value is not a raw array"); - } - as_raw_array_mut().push_back(std::move(element)); - return arrow::Status::OK(); - } - - /** Concatenate: [1,2] + [3,4] -> [1,2,3,4]. */ - arrow::Status append_all(Value array_value) { - if (!array_value.holds_raw_array()) { - return arrow::Status::TypeError( - "APPEND_ALL: source value is not a raw array"); - } - auto& src = array_value.as_raw_array_mut(); - if (type_ == ValueType::NA) { - type_ = ValueType::ARRAY; - data_ = std::move(src); - return arrow::Status::OK(); - } - if (!holds_raw_array()) { - return arrow::Status::TypeError( - "APPEND_ALL: target value is not a raw array"); - } - auto& dest = as_raw_array_mut(); - dest.reserve(dest.size() + src.size()); - for (auto& v : src) { - dest.push_back(std::move(v)); - } - return arrow::Status::OK(); - } + arrow::Status append_element(Value element); + /// Concatenate: [1,2] + [3,4] -> [1,2,3,4]. + arrow::Status append_all(Value array_value); /// Convert the value to a human-readable string (no quotes around strings). - [[nodiscard]] std::string to_string() const { - switch (type_) { - case ValueType::NA: - return ""; - case ValueType::INT32: - return std::to_string(as_int32()); - case ValueType::INT64: - return std::to_string(as_int64()); - case ValueType::DOUBLE: - return std::to_string(as_double()); - case ValueType::FIXED_STRING16: - case ValueType::FIXED_STRING32: - case ValueType::FIXED_STRING64: - case ValueType::STRING: - return as_string(); - case ValueType::BOOL: - return as_bool() ? "true" : "false"; - case ValueType::ARRAY: { - if (holds_array_ref()) { - const auto& arr = as_array_ref(); - std::string result = "["; - for (uint32_t i = 0; i < arr.length(); ++i) { - if (i > 0) result += ", "; - auto elem = Value::read_value_from_memory(arr.element_ptr(i), - arr.elem_type()); - result += elem.to_string(); - } - result += "]"; - return result; - } - return "[]"; - } - case ValueType::MAP: { - if (holds_map_ref()) { - const auto& m = as_map_ref(); - std::string result = "{"; - for (uint32_t i = 0; i < m.count(); ++i) { - if (i > 0) result += ", "; - const auto* e = m.entry_ptr(i); - result += e->key.to_string(); - result += ": "; - auto val = Value::read_value_from_memory( - e->value, static_cast(e->value_type)); - result += val.to_string(); - } - result += "}"; - return result; - } - if (holds_raw_map()) { - std::string result = "{"; - bool first = true; - for (const auto& [k, v] : as_raw_map()) { - if (!first) result += ", "; - first = false; - result += k; - result += ": "; - result += v.to_string(); - } - result += "}"; - return result; - } - return "{}"; - } - default: - return ""; - } - } + [[nodiscard]] std::string to_string() const; /// Reinterpret a raw memory pointer as a Value of the given type. - static Value read_value_from_memory(const char* ptr, const ValueType type) { - if (ptr == nullptr) { - return Value{}; - } - switch (type) { - case ValueType::INT64: - return Value{*reinterpret_cast(ptr)}; - case ValueType::INT32: - return Value{*reinterpret_cast(ptr)}; - case ValueType::DOUBLE: - return Value{*reinterpret_cast(ptr)}; - case ValueType::FLOAT: - return Value{*reinterpret_cast(ptr)}; - case ValueType::BOOL: - return Value{*reinterpret_cast(ptr)}; - case ValueType::STRING: - case ValueType::FIXED_STRING16: - case ValueType::FIXED_STRING32: - case ValueType::FIXED_STRING64: - // All string types stored as StringRef, but preserve the field's - // declared type - return Value{*reinterpret_cast(ptr), type}; - case ValueType::ARRAY: - return Value{*reinterpret_cast(ptr)}; - case ValueType::MAP: - return Value{*reinterpret_cast(ptr)}; - case ValueType::NA: - default: - return Value{}; - } - } + static Value read_value_from_memory(const char* ptr, ValueType type); - // Equality operator bool operator==(const Value& other) const { - if (type_ != other.type_) { - return false; - } + if (type_ != other.type_) return false; return data_ == other.data_; } - bool operator!=(const Value& other) const { return !(*this == other); } private: @@ -337,173 +200,14 @@ struct ValueRef { } /// Convert the referenced value to an Arrow Scalar for compute kernels. - arrow::Result> as_scalar() const { - switch (type) { - case ValueType::INT32: - return arrow::MakeScalar(as_int32()); - case ValueType::INT64: - return arrow::MakeScalar(as_int64()); - case ValueType::DOUBLE: - return arrow::MakeScalar(as_double()); - case ValueType::STRING: - return arrow::MakeScalar(as_string_ref().to_string()); - case ValueType::BOOL: - return arrow::MakeScalar(as_bool()); - case ValueType::NA: - return arrow::MakeNullScalar(arrow::null()); - case ValueType::ARRAY: - return arrow::Status::NotImplemented( - "Array scalar conversion not yet implemented"); - case ValueType::MAP: - return arrow::Status::NotImplemented( - "Map scalar conversion not yet implemented"); - default: - return arrow::Status::NotImplemented( - "Unsupported Value type for Arrow scalar conversion: ", - to_string(type)); - } - } - - bool operator==(const ValueRef& other) const { - if (type != other.type) { - std::cout << "different types. this: " << to_string(type) - << ", other: " << to_string(other.type) << std::endl; - return false; - } - - // Both null - if (data == nullptr && other.data == nullptr) { - return true; - } - - // One null, one not null - if (data == nullptr || other.data == nullptr) { - return false; - } - - // Compare values based on type - switch (type) { - case ValueType::NA: - return true; // Both are NA - - case ValueType::INT32: - return *reinterpret_cast(data) == - *reinterpret_cast(other.data); - - case ValueType::INT64: - return *reinterpret_cast(data) == - *reinterpret_cast(other.data); - - case ValueType::FLOAT: - return *reinterpret_cast(data) == - *reinterpret_cast(other.data); - - case ValueType::DOUBLE: - return *reinterpret_cast(data) == - *reinterpret_cast(other.data); - - case ValueType::BOOL: - return *reinterpret_cast(data) == - *reinterpret_cast(other.data); - - case ValueType::STRING: { - const StringRef& str1 = *reinterpret_cast(data); - const StringRef& str2 = *reinterpret_cast(other.data); - return str1 == str2; - } - - case ValueType::ARRAY: { - const ArrayRef& arr1 = *reinterpret_cast(data); - const ArrayRef& arr2 = *reinterpret_cast(other.data); - return arr1 == arr2; - } - - case ValueType::MAP: { - const MapRef& m1 = *reinterpret_cast(data); - const MapRef& m2 = *reinterpret_cast(other.data); - return m1 == m2; - } - - default: - return false; // Unknown type - } - } + arrow::Result> as_scalar() const; + bool operator==(const ValueRef& other) const; bool operator!=(const ValueRef& other) const { return !(*this == other); } - - [[nodiscard]] bool equals(const ValueRef& other) const { - return *this == other; - } + [[nodiscard]] bool equals(const ValueRef& other) const { return *this == other; } /// Human-readable string representation (strings are quoted). - std::string ToString() const { - if (data == nullptr) { - return "NULL"; - } - - switch (type) { - case ValueType::NA: - return "NULL"; - - case ValueType::INT32: - return std::to_string(as_int32()); - - case ValueType::INT64: - return std::to_string(as_int64()); - - case ValueType::FLOAT: - return std::to_string(as_float()); - - case ValueType::DOUBLE: - return std::to_string(as_double()); - - case ValueType::BOOL: - return as_bool() ? "true" : "false"; - - case ValueType::FIXED_STRING16: - case ValueType::FIXED_STRING32: - case ValueType::FIXED_STRING64: - case ValueType::STRING: { - const StringRef& str_ref = as_string_ref(); - if (str_ref.is_null()) { - return "NULL"; - } - // Use StringRef's to_string() method - return "\"" + str_ref.to_string() + "\""; - } - case ValueType::ARRAY: { - const ArrayRef& arr = as_array_ref(); - if (arr.is_null()) return "NULL"; - std::string result = "["; - for (uint32_t i = 0; i < arr.length(); ++i) { - if (i > 0) result += ", "; - auto elem = Value::read_value_from_memory(arr.element_ptr(i), - arr.elem_type()); - result += elem.to_string(); - } - result += "]"; - return result; - } - case ValueType::MAP: { - const MapRef& m = as_map_ref(); - if (m.is_null()) return "NULL"; - std::string result = "{"; - for (uint32_t i = 0; i < m.count(); ++i) { - if (i > 0) result += ", "; - const auto* e = m.entry_ptr(i); - result += e->key.to_string(); - result += ": "; - auto val = Value::read_value_from_memory( - e->value, static_cast(e->value_type)); - result += val.to_string(); - } - result += "}"; - return result; - } - default: - return "UNKNOWN_TYPE"; - } - } + std::string ToString() const; }; // Stream operator for ValueType diff --git a/include/core/edge.hpp b/include/core/edge.hpp index bc81b91..5ec795b 100644 --- a/include/core/edge.hpp +++ b/include/core/edge.hpp @@ -82,124 +82,24 @@ class Edge { [[nodiscard]] NodeArena* get_arena() const { return arena_.get(); } /// Read a field value by Field descriptor. - /// Structural fields (id, source_id, target_id, created_ts) are returned - /// directly; user-defined properties are read from the arena. [[nodiscard]] arrow::Result get_value( - const std::shared_ptr& field) const { - if (field && (field->name() == field_names::kId || - field->name() == field_names::kEdgeId)) { - return Value{id_}; - } - if (field && field->name() == field_names::kSourceId) - return Value{source_id_}; - if (field && field->name() == field_names::kTargetId) - return Value{target_id_}; - if (field && field->name() == field_names::kCreatedTs) - return Value{created_ts_}; - if (!arena_ || !handle_) { - return arrow::Status::Invalid( - "get_value requires arena-backed edge with valid handle"); - } - return NodeArena::get_value(*handle_, layout_, field); - } + const std::shared_ptr& field) const; /// Return a raw pointer to the field's in-memory representation. [[nodiscard]] arrow::Result get_value_ptr( - const std::shared_ptr& field) const { - if (!field) { - return arrow::Status::Invalid("Field is null"); - } - if (field->name() == field_names::kId || - field->name() == field_names::kEdgeId) { - return reinterpret_cast(&id_); - } - if (field->name() == field_names::kSourceId) - return reinterpret_cast(&source_id_); - if (field->name() == field_names::kTargetId) - return reinterpret_cast(&target_id_); - if (field->name() == field_names::kCreatedTs) - return reinterpret_cast(&created_ts_); - if (arena_ && handle_) { - return NodeArena::get_value_ptr(*handle_, layout_, field); - } - return arrow::Status::KeyError("Field not found: ", field->name()); - } + const std::shared_ptr& field) const; /// Apply a batch of field updates atomically (one new version). - arrow::Result update_fields(const std::vector& updates) { - if (!arena_ || !handle_) { - return arrow::Status::Invalid( - "update_fields requires arena-backed edge with valid handle"); - } - return arena_->apply_updates(*handle_, layout_, updates); - } + arrow::Result update_fields(const std::vector& updates); /// Update a single field (convenience wrapper around update_fields). arrow::Result update(const std::shared_ptr& field, Value value, - UpdateType update_type = UpdateType::SET) { - return update_fields({{field, std::move(value), update_type}}); - } + UpdateType update_type = UpdateType::SET); /// Create a point-in-time view of this edge. /// When @p ctx is nullptr the current (latest) version is used. - EdgeView view(TemporalContext* ctx = nullptr) { - if (!ctx) { - VersionInfo* vi = handle_ ? handle_->version_info_ : nullptr; - return {this, vi, layout_}; - } - VersionInfo* resolved = ctx->resolve_edge_version(id_, *handle_); - return {this, resolved, layout_}; - } + EdgeView view(TemporalContext* ctx = nullptr); }; -// ============================================================================ -// EdgeView inline implementations (after Edge is fully defined) -// ============================================================================ - -inline arrow::Result EdgeView::get_value( - const std::shared_ptr& field) const { - if (resolved_version_ == nullptr) { - return edge_->get_value(field); - } - const NodeHandle* handle = edge_->get_handle(); - assert(handle != nullptr && "Versioned edge must have a handle"); - return NodeArena::get_value_at_version(*handle, resolved_version_, layout_, - field); -} - -inline arrow::Result EdgeView::get_value_ptr( - const std::shared_ptr& field) const { - if (resolved_version_ == nullptr || !layout_) { - return edge_->get_value_ptr(field); - } - if (field && (field->name() == field_names::kId || - field->name() == field_names::kEdgeId || - field->name() == field_names::kSourceId || - field->name() == field_names::kTargetId || - field->name() == field_names::kCreatedTs)) { - return edge_->get_value_ptr(field); - } - const NodeHandle* handle = edge_->get_handle(); - if (!handle) { - return edge_->get_value_ptr(field); - } - return edge_->get_arena()->get_value_ptr_at_version( - *handle, resolved_version_, layout_, field); -} - -inline arrow::Result EdgeView::get_value_ref( - const std::shared_ptr& field) const { - ARROW_ASSIGN_OR_RAISE(const auto ptr, get_value_ptr(field)); - return ValueRef{ptr, field->type()}; -} - -inline bool EdgeView::is_visible() const { - const NodeHandle* handle = edge_->get_handle(); - if (!handle || !handle->is_versioned()) { - return true; - } - return resolved_version_ != nullptr; -} - } // namespace tundradb #endif // EDGE_HPP diff --git a/include/core/node.hpp b/include/core/node.hpp index acd5fe1..2e30a0f 100644 --- a/include/core/node.hpp +++ b/include/core/node.hpp @@ -48,76 +48,33 @@ class Node { /// Return a raw pointer to the field's in-memory representation. arrow::Result get_value_ptr( - const std::shared_ptr &field) const { - if (arena_ != nullptr) { - return arena_->get_value_ptr(*handle_, layout_, field); - } - return arrow::Status::NotImplemented(""); - } + const std::shared_ptr &field) const; /// Return a lightweight non-owning reference to the field value. [[nodiscard]] ValueRef get_value_ref( - const std::shared_ptr &field) const { - const char *ptr = arena_->get_value_ptr(*handle_, layout_, field); - return {ptr, field->type()}; - } + const std::shared_ptr &field) const; /// Read a field value by Field descriptor (returns a copy). - arrow::Result get_value(const std::shared_ptr &field) const { - if (!arena_ || !handle_) { - return arrow::Status::Invalid( - "get_value requires arena-backed node with valid handle"); - } - return NodeArena::get_value(*handle_, layout_, field); - } + arrow::Result get_value(const std::shared_ptr &field) const; [[nodiscard]] std::shared_ptr get_schema() const { return schema_; } [[nodiscard]] NodeHandle *get_handle() const { return handle_.get(); } [[nodiscard]] NodeArena *get_arena() const { return arena_.get(); } /// Apply a batch of field updates atomically (one new version). - arrow::Result update_fields(const std::vector &updates) { - if (!arena_ || !handle_) { - return arrow::Status::Invalid( - "update_fields requires arena-backed node with valid handle"); - } - return arena_->apply_updates(*handle_, layout_, updates); - } + arrow::Result update_fields(const std::vector &updates); /// Update a single field (convenience wrapper around update_fields). arrow::Result update(const std::shared_ptr &field, Value value, - UpdateType update_type = UpdateType::SET) { - return update_fields({{field, std::move(value), update_type}}); - } + UpdateType update_type = UpdateType::SET); /// Shorthand for update(field, value, UpdateType::SET). arrow::Result set_value(const std::shared_ptr &field, - const Value &value) { - return update(field, value, UpdateType::SET); - } + const Value &value); - /** - * Create a temporal view of this node. - * - * @param ctx TemporalContext with snapshot (valid_time, tx_time). - * If nullptr, returns view of current version (no time-travel). - * @return NodeView that resolves version once and caches it. - * - * Usage: - * TemporalContext ctx(TemporalSnapshot::as_of_valid(timestamp)); - * auto view = node.view(&ctx); - * auto age = view.get_value_ptr(age_field); - */ - NodeView view(TemporalContext *ctx = nullptr) { - if (!ctx) { - // No temporal context > use the current version - return {this, handle_->version_info_, arena_.get(), layout_}; - } - - // Resolve version using TemporalContext - VersionInfo *resolved = ctx->resolve_node_version(id, *handle_); - return {this, resolved, arena_.get(), layout_}; - } + /// Create a temporal view of this node. + /// @param ctx If nullptr, returns view of current version (no time-travel). + NodeView view(TemporalContext *ctx = nullptr); }; /// Owns the shared NodeArena and manages per-schema node collections. @@ -131,45 +88,18 @@ class NodeManager { /// @param use_node_arena Must be true (non-arena path is removed). /// @param enable_versioning Enable temporal version chains in the arena. explicit NodeManager(std::shared_ptr schema_registry, - const bool validation_enabled = true, - const bool use_node_arena = true, - const bool enable_versioning = false) { - validation_enabled_ = validation_enabled; - use_node_arena_ = use_node_arena; - schema_registry_ = std::move(schema_registry); - layout_registry_ = std::make_shared(); - // Create arena with versioning enabled if requested - node_arena_ = node_arena_factory::create_free_list_arena( - layout_registry_, NodeArena::kInitialSize, NodeArena::kMinFragmentSize, - enable_versioning); - } + bool validation_enabled = true, + bool use_node_arena = true, + bool enable_versioning = false); ~NodeManager() = default; /// Look up a node by schema name and ID. arrow::Result> get_node(const std::string &schema_name, - const int64_t id) { - auto schema_it = nodes_.find(schema_name); - if (schema_it == nodes_.end()) { - return arrow::Status::KeyError("Schema not found: ", schema_name); - } - - auto node_it = schema_it->second.find(id); - if (node_it == schema_it->second.end()) { - return arrow::Status::KeyError("Node not found: ", schema_name, ":", id); - } - - return node_it->second; - } + int64_t id); /// Remove a node from the in-memory index. Returns false if not found. - bool remove_node(const std::string &schema_name, const int64_t id) { - auto schema_it = nodes_.find(schema_name); - if (schema_it == nodes_.end()) { - return false; - } - return schema_it->second.erase(id) > 0; - } + bool remove_node(const std::string &schema_name, int64_t id); /// Create a new node, allocate arena storage, and populate initial fields. /// @param add When true, the caller supplies the "id" value (used during @@ -177,213 +107,40 @@ class NodeManager { arrow::Result> create_node( const std::string &schema_name, const std::unordered_map &data, - const bool add = false) { - if (schema_name.empty()) { - return arrow::Status::Invalid("Schema name cannot be empty"); - } - - init_schema(schema_name); - - // ARROW_ASSIGN_OR_RAISE(const auto schema, - // schema_registry_->get(schema_name)); - if (validation_enabled_) { - if (!add && data.contains("id")) { - return arrow::Status::Invalid("'id' column is auto generated"); - } - - if (add && !data.contains("id")) { - return arrow::Status::Invalid("'id' is missing"); - } - - for (const auto &field : schema_->fields()) { - // check required - if (field->name() != "id" && !field->nullable() && - (!data.contains(field->name()) || - data.find(field->name())->second.is_null())) { - return arrow::Status::Invalid("Field '", field->name(), - "' is required"); - } - - if (data.contains(field->name())) { - const auto value = data.find(field->name())->second; - if (field->type() != value.type()) { - return arrow::Status::Invalid( - "Type mismatch for field '", field->name(), "'. Expected ", - to_string(field->type()), " but got ", to_string(value.type())); - } - } - } - } - - int64_t id = 0; - if (!add) { - // Get or create per-schema ID counter - if (id_counters_.find(schema_name) == id_counters_.end()) { - id_counters_[schema_name].store(0); - } - id = id_counters_[schema_name].fetch_add(1); - } else { - id = data.at("id").as_int64(); - } - - if (!use_node_arena_) { - return arrow::Status::NotImplemented( - "NodeManager without arena is no longer supported"); - } - - NodeHandle node_handle = node_arena_->allocate_node(layout_); - - // Initial population of v0: write directly to base node - // Use set_field_value_v0 for all fields (doesn't create versions) - ARROW_RETURN_NOT_OK(node_arena_->set_field_value_v0( - node_handle, layout_, schema_->get_field(std::string(field_names::kId)), - Value{id})); - - for (const auto &field : schema_->fields()) { - if (field->name() == field_names::kId) continue; - - Value value; - if (data.contains(field->name())) { - value = data.find(field->name())->second; - } // else: Value() = NULL - - ARROW_RETURN_NOT_OK( - node_arena_->set_field_value_v0(node_handle, layout_, field, value)); - } - - auto node = std::make_shared( - id, schema_name, std::make_unique(std::move(node_handle)), - node_arena_, schema_, layout_); - nodes_[schema_name][id] = node; - return node; - } + bool add = false); /// Override the next-ID counter for a schema (used during restore). - void set_id_counter(const std::string &schema_name, const int64_t value) { - id_counters_[schema_name].store(value); - } + void set_id_counter(const std::string &schema_name, int64_t value); /// Return the current value of the per-schema ID counter. - int64_t get_id_counter(const std::string &schema_name) const { - auto it = id_counters_.find(schema_name); - if (it == id_counters_.end()) { - return 0; - } - return it->second.load(); - } + int64_t get_id_counter(const std::string &schema_name) const; /// Return all per-schema ID counters (for snapshot/manifest persistence). - std::unordered_map get_all_id_counters() const { - std::unordered_map result; - for (const auto &[schema_name, counter] : id_counters_) { - result[schema_name] = counter.load(); - } - return result; - } + std::unordered_map get_all_id_counters() const; /// Restore all per-schema ID counters from a snapshot/manifest. void set_all_id_counters( - const std::unordered_map &counters) { - for (const auto &[schema_name, value] : counters) { - id_counters_[schema_name].store(value); - } - } + const std::unordered_map &counters); private: - // Per-schema ID counters (schema_name -> counter) std::unordered_map> id_counters_; - - // Per-schema node storage (schema_name -> (node_id -> Node)) std::unordered_map>> nodes_; - std::shared_ptr schema_registry_; std::shared_ptr layout_registry_; std::shared_ptr node_arena_; bool validation_enabled_; bool use_node_arena_; - - // cache schema std::string schema_name_; std::shared_ptr schema_; - - // cache layout std::shared_ptr layout_; - // since node creation is single threaded, we can cache the layout - // w/o synchronization std::shared_ptr create_or_get_layout( - const std::string &schema_name) const { - if (layout_registry_->exists(schema_name)) { - return layout_registry_->get_layout(schema_name); - } - auto layout = layout_registry_->create_layout( - schema_registry_->get(schema_name).ValueOrDie()); - layout_registry_->register_layout(layout); - return layout; - } - - // since node creation is single threaded, we can cache the schema - // w/o synchronization - void init_schema(const std::string &schema_name) { - if (schema_name_ == schema_name) return; - schema_name_ = schema_name; - schema_ = schema_registry_->get(schema_name).ValueOrDie(); - layout_ = create_or_get_layout(schema_name); - } + const std::string &schema_name) const; + void init_schema(const std::string &schema_name); }; -// ============================================================================ -// NodeView inline implementations (after Node is fully defined) -// ============================================================================ - -inline arrow::Result NodeView::get_value_ptr( - const std::shared_ptr &field) const { - assert(arena_ != nullptr && "NodeView created with null arena"); - assert(node_ != nullptr && "NodeView created with null node"); - - if (resolved_version_ == nullptr) { - return node_->get_value_ptr(field); - } - - const NodeHandle *handle = node_->get_handle(); - assert(handle != nullptr && "Versioned node must have a handle"); - - return arena_->get_value_ptr_at_version(*handle, resolved_version_, layout_, - field); -} - -inline arrow::Result NodeView::get_value( - const std::shared_ptr &field) const { - assert(node_ != nullptr && "NodeView created with null node"); - - if (resolved_version_ == nullptr) { - return node_->get_value(field); - } - - const NodeHandle *handle = node_->get_handle(); - assert(handle != nullptr && "Versioned node must have a handle"); - - return NodeArena::get_value_at_version(*handle, resolved_version_, layout_, - field); -} - -inline bool NodeView::is_visible() const { - assert(arena_ != nullptr && "NodeView created with null arena"); - assert(node_ != nullptr && "NodeView created with null node"); - const NodeHandle *handle = node_->get_handle(); - assert(handle != nullptr && "Node must have a handle"); - - // Non-versioned nodes are always visible - if (!handle->is_versioned()) { - return true; - } - - // For versioned nodes, check if we found a visible version at the snapshot - return resolved_version_ != nullptr; -} - } // namespace tundradb #endif // NODE_HPP \ No newline at end of file diff --git a/include/main/database.hpp b/include/main/database.hpp index 0bd24a5..ca6e781 100644 --- a/include/main/database.hpp +++ b/include/main/database.hpp @@ -48,250 +48,85 @@ class Database { /// Construct a database from the given configuration. /// When persistence is enabled, the storage, metadata, and snapshot /// subsystems are initialised from `config.get_db_path()`. - explicit Database(const DatabaseConfig &config = DatabaseConfig()) - : schema_registry_(std::make_shared()), - shard_manager_( - std::make_shared(schema_registry_, config)), - node_manager_(std::make_shared( - schema_registry_, config.is_validation_enabled(), true, - config.is_versioning_enabled())), - config_(config), - persistence_enabled_(config.is_persistence_enabled()), - edge_store_(std::make_shared(0, config.get_chunk_size())) { - // Initialize Arrow Compute module early in database lifecycle - if (!initialize_arrow_compute()) { - log_error("Failed to initialize Arrow Compute module"); - // Continue anyway, some operations might still work - } - - if (persistence_enabled_) { - const std::string &db_path = config.get_db_path(); - if (db_path.empty()) { - log_error("Database path is empty but persistence is enabled"); - persistence_enabled_ = false; - return; - } - - std::string data_path = db_path + "/data"; - storage_ = std::make_shared( - std::move(data_path), schema_registry_, node_manager_, config); - metadata_manager_ = std::make_shared(db_path); - snapshot_manager_ = std::make_shared( - metadata_manager_, storage_, shard_manager_, edge_store_, - node_manager_, schema_registry_); - } - } + explicit Database(const DatabaseConfig &config = DatabaseConfig()); /// Return a copy of the configuration used to create this database. DatabaseConfig get_config() const { return config_; } - /// Return the schema registry (shared ownership). - std::shared_ptr get_schema_registry() { - return schema_registry_; - } - + std::shared_ptr get_schema_registry() { return schema_registry_; } /// Return the metadata manager (nullptr when persistence is disabled). - std::shared_ptr get_metadata_manager() { - return metadata_manager_; - } - + std::shared_ptr get_metadata_manager() { return metadata_manager_; } /// Return the node manager (shared ownership). std::shared_ptr get_node_manager() { return node_manager_; } + /// Return the edge store (shared ownership). + [[nodiscard]] std::shared_ptr get_edge_store() const { return edge_store_; } + /// Return the shard manager (shared ownership). + [[nodiscard]] std::shared_ptr get_shard_manager() const { return shard_manager_; } /// Initialise persistence subsystems (storage, metadata, snapshots). - /// Must be called once after construction when persistence is enabled. - /// Returns `true` on success or skips silently when persistence is off. - arrow::Result initialize() { - if (persistence_enabled_) { - auto storage_init = this->storage_->initialize(); - if (!storage_init.ok()) { - return storage_init.status(); - } - - auto metadata_init = this->metadata_manager_->initialize(); - if (!metadata_init.ok()) { - return metadata_init.status(); - } - - auto snapshot_init = this->snapshot_manager_->initialize(); - if (!snapshot_init.ok()) { - return snapshot_init.status(); - } - } - return true; - } + arrow::Result initialize(); /// Create a new node in the given schema with the supplied field values. - /// The node is assigned a unique auto-incremented ID and inserted into - /// the appropriate shard. arrow::Result> create_node( const std::string &schema_name, - const std::unordered_map &data) { - if (schema_name.empty()) { - return arrow::Status::Invalid("Schema name cannot be empty"); - } - ARROW_ASSIGN_OR_RAISE(auto node, - node_manager_->create_node(schema_name, data)); - ARROW_RETURN_NOT_OK(shard_manager_->insert_node(node)); // TODO optimize - return node; - } + const std::unordered_map &data); - /// Update a single field on a node identified by schema name and ID. - /// @param field Resolved field descriptor. - arrow::Result update_node(const std::string &schema_name, - const int64_t id, + /// Update a single field on a node (by Field descriptor). + arrow::Result update_node(const std::string &schema_name, int64_t id, const std::shared_ptr &field, - const Value &value, - const UpdateType update_type) { - return shard_manager_->update_node(schema_name, id, field, value, - update_type); - } + const Value &value, UpdateType update_type); - /// Update a single field on a node, looked up by field name. - arrow::Result update_node(const std::string &schema_name, - const int64_t id, + /// Update a single field on a node (by field name). + arrow::Result update_node(const std::string &schema_name, int64_t id, const std::string &field_name, - const Value &value, - const UpdateType update_type) { - return shard_manager_->update_node(schema_name, id, field_name, value, - update_type); - } + const Value &value, UpdateType update_type); - /** - * @brief Batch-update multiple fields on one node (creates 1 version). - */ + /// Batch-update multiple fields on one node (creates 1 version). arrow::Result update_node_fields( - const std::string &schema_name, const int64_t id, - const std::vector &field_updates, - const UpdateType update_type) { - return shard_manager_->update_node_fields(schema_name, id, field_updates, - update_type); - } + const std::string &schema_name, int64_t id, + const std::vector &field_updates, UpdateType update_type); /// Remove a node from both the node manager and its shard. - arrow::Result remove_node(const std::string &schema_name, - int64_t node_id) { - if (auto res = node_manager_->remove_node(schema_name, node_id); !res) { - return arrow::Status::Invalid("Failed to remove node: ", schema_name, ":", - node_id); - } - return shard_manager_->remove_node(schema_name, node_id); - } + arrow::Result remove_node(const std::string &schema_name, int64_t node_id); - /// Register a typed edge schema so that edges of @p edge_type can carry - /// the given property fields. + /// Register a typed edge schema for edges of @p edge_type. arrow::Result register_edge_schema( const std::string &edge_type, - const std::vector> &fields) { - return edge_store_->register_edge_schema(edge_type, fields); - } + const std::vector> &fields); /// Create an edge from @p source_id to @p target_id with the given type. - arrow::Result connect(const int64_t source_id, const std::string &type, - const int64_t target_id) { - const auto edge = - edge_store_->create_edge(source_id, type, target_id).ValueOrDie(); - ARROW_RETURN_NOT_OK(edge_store_->add(edge)); - return true; - } + arrow::Result connect(int64_t source_id, const std::string &type, + int64_t target_id); /// Create an edge with property values attached. - arrow::Result connect( - const int64_t source_id, const std::string &type, const int64_t target_id, - std::unordered_map properties) { - ARROW_ASSIGN_OR_RAISE(const auto edge, - edge_store_->create_edge(source_id, type, target_id, - std::move(properties))); - ARROW_RETURN_NOT_OK(edge_store_->add(edge)); - return true; - } + arrow::Result connect(int64_t source_id, const std::string &type, + int64_t target_id, + std::unordered_map properties); /// Remove an edge by its unique ID. - arrow::Result remove_edge(const int64_t edge_id) { - return edge_store_->remove(edge_id); - } + arrow::Result remove_edge(int64_t edge_id); /// Compact shards for a single schema, merging small shards together. - arrow::Result compact(const std::string &schema_name) { - return shard_manager_->compact(schema_name); - } - - /// Return the edge store (shared ownership). - [[nodiscard]] std::shared_ptr get_edge_store() const { - return edge_store_; - } - - /// Return the shard manager (shared ownership). - [[nodiscard]] std::shared_ptr get_shard_manager() const { - return shard_manager_; - } - + arrow::Result compact(const std::string &schema_name); /// Compact shards for every registered schema. - arrow::Result compact_all() { return shard_manager_->compact_all(); } + arrow::Result compact_all(); /// Materialise all nodes of a schema into an Arrow Table. - /// @param temporal_context Optional temporal filter; when non-null, only - /// field versions visible at the requested point-in-time are included. - /// @param chunk_size Maximum number of rows per Arrow record batch. arrow::Result> get_table( const std::string &schema_name, TemporalContext *temporal_context = nullptr, - size_t chunk_size = 10000) const { - ARROW_ASSIGN_OR_RAISE(const auto schema, - schema_registry_->get(schema_name)); - auto arrow_schema = schema->arrow(); - ARROW_ASSIGN_OR_RAISE(auto all_nodes, - shard_manager_->get_nodes(schema_name)); - - if (all_nodes.empty()) { - std::vector> empty_columns; - empty_columns.reserve(arrow_schema->num_fields()); - for (int i = 0; i < arrow_schema->num_fields(); i++) { - empty_columns.push_back(std::make_shared( - std::vector>{})); - } - return arrow::Table::Make(arrow_schema, empty_columns); - } - - std::ranges::sort(all_nodes, [](const std::shared_ptr &a, - const std::shared_ptr &b) { - return a->id < b->id; - }); - - return create_table(schema, all_nodes, chunk_size, temporal_context); - } + size_t chunk_size = 10000) const; /// Return the number of shards backing the given schema. - arrow::Result get_shard_count(const std::string &schema_name) const { - if (!schema_registry_->exists(schema_name)) { - return arrow::Status::Invalid("Schema '", schema_name, "' not found"); - } - return shard_manager_->get_shard_count(schema_name); - } - + arrow::Result get_shard_count(const std::string &schema_name) const; /// Return the node count in each shard for the given schema. - arrow::Result> get_shard_sizes( - const std::string &schema_name) const { - if (!schema_registry_->exists(schema_name)) { - return arrow::Status::Invalid("Schema '", schema_name, "' not found"); - } - return shard_manager_->get_shard_sizes(schema_name); - } - + arrow::Result> get_shard_sizes(const std::string &schema_name) const; /// Return the [min_id, max_id] range for each shard of the given schema. arrow::Result>> get_shard_ranges( - const std::string &schema_name) const { - if (!schema_registry_->exists(schema_name)) { - return arrow::Status::Invalid("Schema '", schema_name, "' not found"); - } - return shard_manager_->get_shard_ranges(schema_name); - } + const std::string &schema_name) const; /// Persist the current database state to disk as a new snapshot. - /// Requires persistence to be enabled. - arrow::Result create_snapshot() { - return snapshot_manager_->commit(); - } + arrow::Result create_snapshot(); /// Execute a read-only query and return the result set as an Arrow Table. [[nodiscard]] arrow::Result> query( diff --git a/src/common/types.cpp b/src/common/types.cpp index 63510e4..cf3f047 100644 --- a/src/common/types.cpp +++ b/src/common/types.cpp @@ -26,4 +26,282 @@ std::string ValueRef::as_string() const { return ""; } +// --------------------------------------------------------------------------- +// Value — large methods moved from types.hpp +// --------------------------------------------------------------------------- + +arrow::Status Value::append_element(Value element) { + if (element.holds_raw_array()) { + return append_all(std::move(element)); + } + if (type_ == ValueType::NA) { + type_ = ValueType::ARRAY; + data_ = std::vector{std::move(element)}; + return arrow::Status::OK(); + } + if (!holds_raw_array()) { + return arrow::Status::TypeError( + "APPEND: target value is not a raw array"); + } + as_raw_array_mut().push_back(std::move(element)); + return arrow::Status::OK(); +} + +arrow::Status Value::append_all(Value array_value) { + if (!array_value.holds_raw_array()) { + return arrow::Status::TypeError( + "APPEND_ALL: source value is not a raw array"); + } + auto& src = array_value.as_raw_array_mut(); + if (type_ == ValueType::NA) { + type_ = ValueType::ARRAY; + data_ = std::move(src); + return arrow::Status::OK(); + } + if (!holds_raw_array()) { + return arrow::Status::TypeError( + "APPEND_ALL: target value is not a raw array"); + } + auto& dest = as_raw_array_mut(); + dest.reserve(dest.size() + src.size()); + for (auto& v : src) { + dest.push_back(std::move(v)); + } + return arrow::Status::OK(); +} + +std::string Value::to_string() const { + switch (type_) { + case ValueType::NA: + return ""; + case ValueType::INT32: + return std::to_string(as_int32()); + case ValueType::INT64: + return std::to_string(as_int64()); + case ValueType::DOUBLE: + return std::to_string(as_double()); + case ValueType::FIXED_STRING16: + case ValueType::FIXED_STRING32: + case ValueType::FIXED_STRING64: + case ValueType::STRING: + return as_string(); + case ValueType::BOOL: + return as_bool() ? "true" : "false"; + case ValueType::ARRAY: { + if (holds_array_ref()) { + const auto& arr = as_array_ref(); + std::string result = "["; + for (uint32_t i = 0; i < arr.length(); ++i) { + if (i > 0) result += ", "; + auto elem = + Value::read_value_from_memory(arr.element_ptr(i), arr.elem_type()); + result += elem.to_string(); + } + result += "]"; + return result; + } + return "[]"; + } + case ValueType::MAP: { + if (holds_map_ref()) { + const auto& m = as_map_ref(); + std::string result = "{"; + for (uint32_t i = 0; i < m.count(); ++i) { + if (i > 0) result += ", "; + const auto* e = m.entry_ptr(i); + result += e->key.to_string(); + result += ": "; + auto val = Value::read_value_from_memory( + e->value, static_cast(e->value_type)); + result += val.to_string(); + } + result += "}"; + return result; + } + if (holds_raw_map()) { + std::string result = "{"; + bool first = true; + for (const auto& [k, v] : as_raw_map()) { + if (!first) result += ", "; + first = false; + result += k; + result += ": "; + result += v.to_string(); + } + result += "}"; + return result; + } + return "{}"; + } + default: + return ""; + } +} + +Value Value::read_value_from_memory(const char* ptr, const ValueType type) { + if (ptr == nullptr) { + return Value{}; + } + switch (type) { + case ValueType::INT64: + return Value{*reinterpret_cast(ptr)}; + case ValueType::INT32: + return Value{*reinterpret_cast(ptr)}; + case ValueType::DOUBLE: + return Value{*reinterpret_cast(ptr)}; + case ValueType::FLOAT: + return Value{*reinterpret_cast(ptr)}; + case ValueType::BOOL: + return Value{*reinterpret_cast(ptr)}; + case ValueType::STRING: + case ValueType::FIXED_STRING16: + case ValueType::FIXED_STRING32: + case ValueType::FIXED_STRING64: + return Value{*reinterpret_cast(ptr), type}; + case ValueType::ARRAY: + return Value{*reinterpret_cast(ptr)}; + case ValueType::MAP: + return Value{*reinterpret_cast(ptr)}; + case ValueType::NA: + default: + return Value{}; + } +} + +// --------------------------------------------------------------------------- +// ValueRef — large methods moved from types.hpp +// --------------------------------------------------------------------------- + +arrow::Result> ValueRef::as_scalar() const { + switch (type) { + case ValueType::INT32: + return arrow::MakeScalar(as_int32()); + case ValueType::INT64: + return arrow::MakeScalar(as_int64()); + case ValueType::DOUBLE: + return arrow::MakeScalar(as_double()); + case ValueType::STRING: + return arrow::MakeScalar(as_string_ref().to_string()); + case ValueType::BOOL: + return arrow::MakeScalar(as_bool()); + case ValueType::NA: + return arrow::MakeNullScalar(arrow::null()); + case ValueType::ARRAY: + return arrow::Status::NotImplemented( + "Array scalar conversion not yet implemented"); + case ValueType::MAP: + return arrow::Status::NotImplemented( + "Map scalar conversion not yet implemented"); + default: + return arrow::Status::NotImplemented( + "Unsupported Value type for Arrow scalar conversion: ", + to_string(type)); + } +} + +bool ValueRef::operator==(const ValueRef& other) const { + if (type != other.type) { + std::cout << "different types. this: " << to_string(type) + << ", other: " << to_string(other.type) << std::endl; + return false; + } + if (data == nullptr && other.data == nullptr) return true; + if (data == nullptr || other.data == nullptr) return false; + + switch (type) { + case ValueType::NA: + return true; + case ValueType::INT32: + return *reinterpret_cast(data) == + *reinterpret_cast(other.data); + case ValueType::INT64: + return *reinterpret_cast(data) == + *reinterpret_cast(other.data); + case ValueType::FLOAT: + return *reinterpret_cast(data) == + *reinterpret_cast(other.data); + case ValueType::DOUBLE: + return *reinterpret_cast(data) == + *reinterpret_cast(other.data); + case ValueType::BOOL: + return *reinterpret_cast(data) == + *reinterpret_cast(other.data); + case ValueType::STRING: { + const StringRef& str1 = *reinterpret_cast(data); + const StringRef& str2 = *reinterpret_cast(other.data); + return str1 == str2; + } + case ValueType::ARRAY: { + const ArrayRef& arr1 = *reinterpret_cast(data); + const ArrayRef& arr2 = *reinterpret_cast(other.data); + return arr1 == arr2; + } + case ValueType::MAP: { + const MapRef& m1 = *reinterpret_cast(data); + const MapRef& m2 = *reinterpret_cast(other.data); + return m1 == m2; + } + default: + return false; + } +} + +std::string ValueRef::ToString() const { + if (data == nullptr) return "NULL"; + + switch (type) { + case ValueType::NA: + return "NULL"; + case ValueType::INT32: + return std::to_string(as_int32()); + case ValueType::INT64: + return std::to_string(as_int64()); + case ValueType::FLOAT: + return std::to_string(as_float()); + case ValueType::DOUBLE: + return std::to_string(as_double()); + case ValueType::BOOL: + return as_bool() ? "true" : "false"; + case ValueType::FIXED_STRING16: + case ValueType::FIXED_STRING32: + case ValueType::FIXED_STRING64: + case ValueType::STRING: { + const StringRef& str_ref = as_string_ref(); + if (str_ref.is_null()) return "NULL"; + return "\"" + str_ref.to_string() + "\""; + } + case ValueType::ARRAY: { + const ArrayRef& arr = as_array_ref(); + if (arr.is_null()) return "NULL"; + std::string result = "["; + for (uint32_t i = 0; i < arr.length(); ++i) { + if (i > 0) result += ", "; + auto elem = + Value::read_value_from_memory(arr.element_ptr(i), arr.elem_type()); + result += elem.to_string(); + } + result += "]"; + return result; + } + case ValueType::MAP: { + const MapRef& m = as_map_ref(); + if (m.is_null()) return "NULL"; + std::string result = "{"; + for (uint32_t i = 0; i < m.count(); ++i) { + if (i > 0) result += ", "; + const auto* e = m.entry_ptr(i); + result += e->key.to_string(); + result += ": "; + auto val = Value::read_value_from_memory( + e->value, static_cast(e->value_type)); + result += val.to_string(); + } + result += "}"; + return result; + } + default: + return "UNKNOWN_TYPE"; + } +} + } // namespace tundradb diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index a0ebfba..ba40c8a 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -1,3 +1,5 @@ target_sources(core PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/edge.cpp ${CMAKE_CURRENT_SOURCE_DIR}/edge_store.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/node.cpp ) diff --git a/src/core/edge.cpp b/src/core/edge.cpp new file mode 100644 index 0000000..9e1c19f --- /dev/null +++ b/src/core/edge.cpp @@ -0,0 +1,121 @@ +#include "core/edge.hpp" + +namespace tundradb { + +// --------------------------------------------------------------------------- +// Edge +// --------------------------------------------------------------------------- + +arrow::Result Edge::get_value( + const std::shared_ptr& field) const { + if (field && (field->name() == field_names::kId || + field->name() == field_names::kEdgeId)) { + return Value{id_}; + } + if (field && field->name() == field_names::kSourceId) + return Value{source_id_}; + if (field && field->name() == field_names::kTargetId) + return Value{target_id_}; + if (field && field->name() == field_names::kCreatedTs) + return Value{created_ts_}; + if (!arena_ || !handle_) { + return arrow::Status::Invalid( + "get_value requires arena-backed edge with valid handle"); + } + return NodeArena::get_value(*handle_, layout_, field); +} + +arrow::Result Edge::get_value_ptr( + const std::shared_ptr& field) const { + if (!field) { + return arrow::Status::Invalid("Field is null"); + } + if (field->name() == field_names::kId || + field->name() == field_names::kEdgeId) { + return reinterpret_cast(&id_); + } + if (field->name() == field_names::kSourceId) + return reinterpret_cast(&source_id_); + if (field->name() == field_names::kTargetId) + return reinterpret_cast(&target_id_); + if (field->name() == field_names::kCreatedTs) + return reinterpret_cast(&created_ts_); + if (arena_ && handle_) { + return NodeArena::get_value_ptr(*handle_, layout_, field); + } + return arrow::Status::KeyError("Field not found: ", field->name()); +} + +arrow::Result Edge::update_fields( + const std::vector& updates) { + if (!arena_ || !handle_) { + return arrow::Status::Invalid( + "update_fields requires arena-backed edge with valid handle"); + } + return arena_->apply_updates(*handle_, layout_, updates); +} + +arrow::Result Edge::update(const std::shared_ptr& field, + Value value, UpdateType update_type) { + return update_fields({{field, std::move(value), update_type}}); +} + +EdgeView Edge::view(TemporalContext* ctx) { + if (!ctx) { + VersionInfo* vi = handle_ ? handle_->version_info_ : nullptr; + return {this, vi, layout_}; + } + VersionInfo* resolved = ctx->resolve_edge_version(id_, *handle_); + return {this, resolved, layout_}; +} + +// --------------------------------------------------------------------------- +// EdgeView +// --------------------------------------------------------------------------- + +arrow::Result EdgeView::get_value( + const std::shared_ptr& field) const { + if (resolved_version_ == nullptr) { + return edge_->get_value(field); + } + const NodeHandle* handle = edge_->get_handle(); + assert(handle != nullptr && "Versioned edge must have a handle"); + return NodeArena::get_value_at_version(*handle, resolved_version_, layout_, + field); +} + +arrow::Result EdgeView::get_value_ptr( + const std::shared_ptr& field) const { + if (resolved_version_ == nullptr || !layout_) { + return edge_->get_value_ptr(field); + } + if (field && (field->name() == field_names::kId || + field->name() == field_names::kEdgeId || + field->name() == field_names::kSourceId || + field->name() == field_names::kTargetId || + field->name() == field_names::kCreatedTs)) { + return edge_->get_value_ptr(field); + } + const NodeHandle* handle = edge_->get_handle(); + if (!handle) { + return edge_->get_value_ptr(field); + } + return edge_->get_arena()->get_value_ptr_at_version( + *handle, resolved_version_, layout_, field); +} + +arrow::Result EdgeView::get_value_ref( + const std::shared_ptr& field) const { + ARROW_ASSIGN_OR_RAISE(const auto ptr, get_value_ptr(field)); + return ValueRef{ptr, field->type()}; +} + +bool EdgeView::is_visible() const { + const NodeHandle* handle = edge_->get_handle(); + if (!handle || !handle->is_versioned()) { + return true; + } + return resolved_version_ != nullptr; +} + +} // namespace tundradb diff --git a/src/core/node.cpp b/src/core/node.cpp new file mode 100644 index 0000000..1b01b5f --- /dev/null +++ b/src/core/node.cpp @@ -0,0 +1,263 @@ +#include "core/node.hpp" + +namespace tundradb { + +// --------------------------------------------------------------------------- +// Node +// --------------------------------------------------------------------------- + +arrow::Result Node::get_value_ptr( + const std::shared_ptr &field) const { + if (arena_ != nullptr) { + return arena_->get_value_ptr(*handle_, layout_, field); + } + return arrow::Status::NotImplemented(""); +} + +ValueRef Node::get_value_ref(const std::shared_ptr &field) const { + const char *ptr = arena_->get_value_ptr(*handle_, layout_, field); + return {ptr, field->type()}; +} + +arrow::Result Node::get_value( + const std::shared_ptr &field) const { + if (!arena_ || !handle_) { + return arrow::Status::Invalid( + "get_value requires arena-backed node with valid handle"); + } + return NodeArena::get_value(*handle_, layout_, field); +} + +arrow::Result Node::update_fields( + const std::vector &updates) { + if (!arena_ || !handle_) { + return arrow::Status::Invalid( + "update_fields requires arena-backed node with valid handle"); + } + return arena_->apply_updates(*handle_, layout_, updates); +} + +arrow::Result Node::update(const std::shared_ptr &field, + Value value, UpdateType update_type) { + return update_fields({{field, std::move(value), update_type}}); +} + +arrow::Result Node::set_value(const std::shared_ptr &field, + const Value &value) { + return update(field, value, UpdateType::SET); +} + +NodeView Node::view(TemporalContext *ctx) { + if (!ctx) { + return {this, handle_->version_info_, arena_.get(), layout_}; + } + VersionInfo *resolved = ctx->resolve_node_version(id, *handle_); + return {this, resolved, arena_.get(), layout_}; +} + +// --------------------------------------------------------------------------- +// NodeManager +// --------------------------------------------------------------------------- + +NodeManager::NodeManager(std::shared_ptr schema_registry, + const bool validation_enabled, + const bool use_node_arena, + const bool enable_versioning) + : validation_enabled_(validation_enabled), + use_node_arena_(use_node_arena), + schema_registry_(std::move(schema_registry)), + layout_registry_(std::make_shared()), + node_arena_(node_arena_factory::create_free_list_arena( + layout_registry_, NodeArena::kInitialSize, NodeArena::kMinFragmentSize, + enable_versioning)) {} + +arrow::Result> NodeManager::get_node( + const std::string &schema_name, const int64_t id) { + auto schema_it = nodes_.find(schema_name); + if (schema_it == nodes_.end()) { + return arrow::Status::KeyError("Schema not found: ", schema_name); + } + auto node_it = schema_it->second.find(id); + if (node_it == schema_it->second.end()) { + return arrow::Status::KeyError("Node not found: ", schema_name, ":", id); + } + return node_it->second; +} + +bool NodeManager::remove_node(const std::string &schema_name, + const int64_t id) { + auto schema_it = nodes_.find(schema_name); + if (schema_it == nodes_.end()) { + return false; + } + return schema_it->second.erase(id) > 0; +} + +arrow::Result> NodeManager::create_node( + const std::string &schema_name, + const std::unordered_map &data, const bool add) { + if (schema_name.empty()) { + return arrow::Status::Invalid("Schema name cannot be empty"); + } + + init_schema(schema_name); + + if (validation_enabled_) { + if (!add && data.contains("id")) { + return arrow::Status::Invalid("'id' column is auto generated"); + } + if (add && !data.contains("id")) { + return arrow::Status::Invalid("'id' is missing"); + } + for (const auto &field : schema_->fields()) { + if (field->name() != "id" && !field->nullable() && + (!data.contains(field->name()) || + data.find(field->name())->second.is_null())) { + return arrow::Status::Invalid("Field '", field->name(), + "' is required"); + } + if (data.contains(field->name())) { + const auto value = data.find(field->name())->second; + if (field->type() != value.type()) { + return arrow::Status::Invalid( + "Type mismatch for field '", field->name(), "'. Expected ", + to_string(field->type()), " but got ", to_string(value.type())); + } + } + } + } + + int64_t id = 0; + if (!add) { + if (id_counters_.find(schema_name) == id_counters_.end()) { + id_counters_[schema_name].store(0); + } + id = id_counters_[schema_name].fetch_add(1); + } else { + id = data.at("id").as_int64(); + } + + if (!use_node_arena_) { + return arrow::Status::NotImplemented( + "NodeManager without arena is no longer supported"); + } + + NodeHandle node_handle = node_arena_->allocate_node(layout_); + + ARROW_RETURN_NOT_OK(node_arena_->set_field_value_v0( + node_handle, layout_, schema_->get_field(std::string(field_names::kId)), + Value{id})); + + for (const auto &field : schema_->fields()) { + if (field->name() == field_names::kId) continue; + Value value; + if (data.contains(field->name())) { + value = data.find(field->name())->second; + } + ARROW_RETURN_NOT_OK( + node_arena_->set_field_value_v0(node_handle, layout_, field, value)); + } + + auto node = std::make_shared( + id, schema_name, std::make_unique(std::move(node_handle)), + node_arena_, schema_, layout_); + nodes_[schema_name][id] = node; + return node; +} + +void NodeManager::set_id_counter(const std::string &schema_name, + const int64_t value) { + id_counters_[schema_name].store(value); +} + +int64_t NodeManager::get_id_counter(const std::string &schema_name) const { + auto it = id_counters_.find(schema_name); + if (it == id_counters_.end()) { + return 0; + } + return it->second.load(); +} + +std::unordered_map NodeManager::get_all_id_counters() + const { + std::unordered_map result; + for (const auto &[schema_name, counter] : id_counters_) { + result[schema_name] = counter.load(); + } + return result; +} + +void NodeManager::set_all_id_counters( + const std::unordered_map &counters) { + for (const auto &[schema_name, value] : counters) { + id_counters_[schema_name].store(value); + } +} + +std::shared_ptr NodeManager::create_or_get_layout( + const std::string &schema_name) const { + if (layout_registry_->exists(schema_name)) { + return layout_registry_->get_layout(schema_name); + } + auto layout = layout_registry_->create_layout( + schema_registry_->get(schema_name).ValueOrDie()); + layout_registry_->register_layout(layout); + return layout; +} + +void NodeManager::init_schema(const std::string &schema_name) { + if (schema_name_ == schema_name) return; + schema_name_ = schema_name; + schema_ = schema_registry_->get(schema_name).ValueOrDie(); + layout_ = create_or_get_layout(schema_name); +} + +// --------------------------------------------------------------------------- +// NodeView +// --------------------------------------------------------------------------- + +arrow::Result NodeView::get_value_ptr( + const std::shared_ptr &field) const { + assert(arena_ != nullptr && "NodeView created with null arena"); + assert(node_ != nullptr && "NodeView created with null node"); + + if (resolved_version_ == nullptr) { + return node_->get_value_ptr(field); + } + + const NodeHandle *handle = node_->get_handle(); + assert(handle != nullptr && "Versioned node must have a handle"); + + return arena_->get_value_ptr_at_version(*handle, resolved_version_, layout_, + field); +} + +arrow::Result NodeView::get_value( + const std::shared_ptr &field) const { + assert(node_ != nullptr && "NodeView created with null node"); + + if (resolved_version_ == nullptr) { + return node_->get_value(field); + } + + const NodeHandle *handle = node_->get_handle(); + assert(handle != nullptr && "Versioned node must have a handle"); + + return NodeArena::get_value_at_version(*handle, resolved_version_, layout_, + field); +} + +bool NodeView::is_visible() const { + assert(arena_ != nullptr && "NodeView created with null arena"); + assert(node_ != nullptr && "NodeView created with null node"); + const NodeHandle *handle = node_->get_handle(); + assert(handle != nullptr && "Node must have a handle"); + + if (!handle->is_versioned()) { + return true; + } + + return resolved_version_ != nullptr; +} + +} // namespace tundradb diff --git a/src/main/database.cpp b/src/main/database.cpp index 6b90614..d96b49f 100644 --- a/src/main/database.cpp +++ b/src/main/database.cpp @@ -31,6 +31,184 @@ namespace fs = std::filesystem; namespace tundradb { +// --------------------------------------------------------------------------- +// Database — methods moved from database.hpp +// --------------------------------------------------------------------------- + +Database::Database(const DatabaseConfig &config) + : schema_registry_(std::make_shared()), + shard_manager_(std::make_shared(schema_registry_, config)), + node_manager_(std::make_shared( + schema_registry_, config.is_validation_enabled(), true, + config.is_versioning_enabled())), + config_(config), + persistence_enabled_(config.is_persistence_enabled()), + edge_store_(std::make_shared(0, config.get_chunk_size())) { + if (!initialize_arrow_compute()) { + log_error("Failed to initialize Arrow Compute module"); + } + if (persistence_enabled_) { + const std::string &db_path = config.get_db_path(); + if (db_path.empty()) { + log_error("Database path is empty but persistence is enabled"); + persistence_enabled_ = false; + return; + } + std::string data_path = db_path + "/data"; + storage_ = std::make_shared(std::move(data_path), + schema_registry_, node_manager_, config); + metadata_manager_ = std::make_shared(db_path); + snapshot_manager_ = std::make_shared( + metadata_manager_, storage_, shard_manager_, edge_store_, node_manager_, + schema_registry_); + } +} + +arrow::Result Database::initialize() { + if (persistence_enabled_) { + ARROW_RETURN_NOT_OK(storage_->initialize().status()); + ARROW_RETURN_NOT_OK(metadata_manager_->initialize().status()); + ARROW_RETURN_NOT_OK(snapshot_manager_->initialize().status()); + } + return true; +} + +arrow::Result> Database::create_node( + const std::string &schema_name, + const std::unordered_map &data) { + if (schema_name.empty()) { + return arrow::Status::Invalid("Schema name cannot be empty"); + } + ARROW_ASSIGN_OR_RAISE(auto node, + node_manager_->create_node(schema_name, data)); + ARROW_RETURN_NOT_OK(shard_manager_->insert_node(node)); + return node; +} + +arrow::Result Database::update_node(const std::string &schema_name, + int64_t id, + const std::shared_ptr &field, + const Value &value, + UpdateType update_type) { + return shard_manager_->update_node(schema_name, id, field, value, + update_type); +} + +arrow::Result Database::update_node(const std::string &schema_name, + int64_t id, + const std::string &field_name, + const Value &value, + UpdateType update_type) { + return shard_manager_->update_node(schema_name, id, field_name, value, + update_type); +} + +arrow::Result Database::update_node_fields( + const std::string &schema_name, int64_t id, + const std::vector &field_updates, UpdateType update_type) { + return shard_manager_->update_node_fields(schema_name, id, field_updates, + update_type); +} + +arrow::Result Database::remove_node(const std::string &schema_name, + int64_t node_id) { + if (!node_manager_->remove_node(schema_name, node_id)) { + return arrow::Status::Invalid("Failed to remove node: ", schema_name, ":", + node_id); + } + return shard_manager_->remove_node(schema_name, node_id); +} + +arrow::Result Database::register_edge_schema( + const std::string &edge_type, + const std::vector> &fields) { + return edge_store_->register_edge_schema(edge_type, fields); +} + +arrow::Result Database::connect(int64_t source_id, + const std::string &type, + int64_t target_id) { + ARROW_ASSIGN_OR_RAISE(const auto edge, + edge_store_->create_edge(source_id, type, target_id)); + ARROW_RETURN_NOT_OK(edge_store_->add(edge)); + return true; +} + +arrow::Result Database::connect( + int64_t source_id, const std::string &type, int64_t target_id, + std::unordered_map properties) { + ARROW_ASSIGN_OR_RAISE( + const auto edge, + edge_store_->create_edge(source_id, type, target_id, + std::move(properties))); + ARROW_RETURN_NOT_OK(edge_store_->add(edge)); + return true; +} + +arrow::Result Database::remove_edge(int64_t edge_id) { + return edge_store_->remove(edge_id); +} + +arrow::Result Database::compact(const std::string &schema_name) { + return shard_manager_->compact(schema_name); +} + +arrow::Result Database::compact_all() { + return shard_manager_->compact_all(); +} + +arrow::Result> Database::get_table( + const std::string &schema_name, TemporalContext *temporal_context, + size_t chunk_size) const { + ARROW_ASSIGN_OR_RAISE(const auto schema, + schema_registry_->get(schema_name)); + auto arrow_schema = schema->arrow(); + ARROW_ASSIGN_OR_RAISE(auto all_nodes, + shard_manager_->get_nodes(schema_name)); + if (all_nodes.empty()) { + std::vector> empty_columns; + empty_columns.reserve(arrow_schema->num_fields()); + for (int i = 0; i < arrow_schema->num_fields(); i++) { + empty_columns.push_back(std::make_shared( + std::vector>{})); + } + return arrow::Table::Make(arrow_schema, empty_columns); + } + std::ranges::sort(all_nodes, [](const std::shared_ptr &a, + const std::shared_ptr &b) { + return a->id < b->id; + }); + return create_table(schema, all_nodes, chunk_size, temporal_context); +} + +arrow::Result Database::get_shard_count( + const std::string &schema_name) const { + if (!schema_registry_->exists(schema_name)) { + return arrow::Status::Invalid("Schema '", schema_name, "' not found"); + } + return shard_manager_->get_shard_count(schema_name); +} + +arrow::Result> Database::get_shard_sizes( + const std::string &schema_name) const { + if (!schema_registry_->exists(schema_name)) { + return arrow::Status::Invalid("Schema '", schema_name, "' not found"); + } + return shard_manager_->get_shard_sizes(schema_name); +} + +arrow::Result>> +Database::get_shard_ranges(const std::string &schema_name) const { + if (!schema_registry_->exists(schema_name)) { + return arrow::Status::Invalid("Schema '", schema_name, "' not found"); + } + return shard_manager_->get_shard_ranges(schema_name); +} + +arrow::Result Database::create_snapshot() { + return snapshot_manager_->commit(); +} + arrow::Result>>> populate_rows_bfs(int64_t node_id, const SchemaRef& start_schema, const std::shared_ptr& output_schema, From 4544e0672f2ae02ff7b6ea67329dcc2510583413 Mon Sep 17 00:00:00 2001 From: dmgcodevil Date: Sat, 4 Apr 2026 16:58:15 -0400 Subject: [PATCH 2/4] move code out of hpp --- include/common/types.hpp | 4 +- include/core/node.hpp | 3 +- include/main/database.hpp | 28 +++-- include/storage/metadata.hpp | 191 +++-------------------------------- src/common/types.cpp | 7 +- src/core/node.cpp | 4 +- src/main/database.cpp | 68 ++++++------- src/storage/metadata.cpp | 151 +++++++++++++++++++++++++++ 8 files changed, 223 insertions(+), 233 deletions(-) diff --git a/include/common/types.hpp b/include/common/types.hpp index 73657e9..4694cee 100644 --- a/include/common/types.hpp +++ b/include/common/types.hpp @@ -204,7 +204,9 @@ struct ValueRef { bool operator==(const ValueRef& other) const; bool operator!=(const ValueRef& other) const { return !(*this == other); } - [[nodiscard]] bool equals(const ValueRef& other) const { return *this == other; } + [[nodiscard]] bool equals(const ValueRef& other) const { + return *this == other; + } /// Human-readable string representation (strings are quoted). std::string ToString() const; diff --git a/include/core/node.hpp b/include/core/node.hpp index 2e30a0f..f753f96 100644 --- a/include/core/node.hpp +++ b/include/core/node.hpp @@ -106,8 +106,7 @@ class NodeManager { /// snapshot restore); otherwise an auto-incremented ID is used. arrow::Result> create_node( const std::string &schema_name, - const std::unordered_map &data, - bool add = false); + const std::unordered_map &data, bool add = false); /// Override the next-ID counter for a schema (used during restore). void set_id_counter(const std::string &schema_name, int64_t value); diff --git a/include/main/database.hpp b/include/main/database.hpp index ca6e781..cfa024c 100644 --- a/include/main/database.hpp +++ b/include/main/database.hpp @@ -53,15 +53,23 @@ class Database { /// Return a copy of the configuration used to create this database. DatabaseConfig get_config() const { return config_; } /// Return the schema registry (shared ownership). - std::shared_ptr get_schema_registry() { return schema_registry_; } + std::shared_ptr get_schema_registry() { + return schema_registry_; + } /// Return the metadata manager (nullptr when persistence is disabled). - std::shared_ptr get_metadata_manager() { return metadata_manager_; } + std::shared_ptr get_metadata_manager() { + return metadata_manager_; + } /// Return the node manager (shared ownership). std::shared_ptr get_node_manager() { return node_manager_; } /// Return the edge store (shared ownership). - [[nodiscard]] std::shared_ptr get_edge_store() const { return edge_store_; } + [[nodiscard]] std::shared_ptr get_edge_store() const { + return edge_store_; + } /// Return the shard manager (shared ownership). - [[nodiscard]] std::shared_ptr get_shard_manager() const { return shard_manager_; } + [[nodiscard]] std::shared_ptr get_shard_manager() const { + return shard_manager_; + } /// Initialise persistence subsystems (storage, metadata, snapshots). arrow::Result initialize(); @@ -87,7 +95,8 @@ class Database { const std::vector &field_updates, UpdateType update_type); /// Remove a node from both the node manager and its shard. - arrow::Result remove_node(const std::string &schema_name, int64_t node_id); + arrow::Result remove_node(const std::string &schema_name, + int64_t node_id); /// Register a typed edge schema for edges of @p edge_type. arrow::Result register_edge_schema( @@ -99,9 +108,9 @@ class Database { int64_t target_id); /// Create an edge with property values attached. - arrow::Result connect(int64_t source_id, const std::string &type, - int64_t target_id, - std::unordered_map properties); + arrow::Result connect( + int64_t source_id, const std::string &type, int64_t target_id, + std::unordered_map properties); /// Remove an edge by its unique ID. arrow::Result remove_edge(int64_t edge_id); @@ -120,7 +129,8 @@ class Database { /// Return the number of shards backing the given schema. arrow::Result get_shard_count(const std::string &schema_name) const; /// Return the node count in each shard for the given schema. - arrow::Result> get_shard_sizes(const std::string &schema_name) const; + arrow::Result> get_shard_sizes( + const std::string &schema_name) const; /// Return the [min_id, max_id] range for each shard of the given schema. arrow::Result>> get_shard_ranges( const std::string &schema_name) const; diff --git a/include/storage/metadata.hpp b/include/storage/metadata.hpp index 1edcbb7..bbeaad9 100644 --- a/include/storage/metadata.hpp +++ b/include/storage/metadata.hpp @@ -13,7 +13,6 @@ #include "common/logger.hpp" #include "common/types.hpp" #include "json.hpp" -#include "llvm/ADT/SmallVector.h" #include "schema/schema.hpp" #include "schema/type_descriptor.hpp" #include "storage/file_utils.hpp" @@ -103,190 +102,24 @@ struct SchemaMetadata { NLOHMANN_DEFINE_TYPE_INTRUSIVE(SchemaMetadata, name, version, fields) }; -static std::shared_ptr from_metadata(const FieldMetadata &metadata) { - return std::make_shared(metadata.name, metadata.to_type_descriptor(), - metadata.nullable); -} +std::shared_ptr from_metadata(const FieldMetadata &metadata); +std::shared_ptr from_metadata(const SchemaMetadata &metadata); -static std::shared_ptr from_metadata(const SchemaMetadata &metadata) { - llvm::SmallVector, 4> fields; +arrow::Result ArrowFieldToMetadata( + const std::shared_ptr &field); - fields.reserve(metadata.fields.size()); - for (const auto &field_meta : metadata.fields) { - fields.push_back(from_metadata(field_meta)); - } - - return std::make_shared(metadata.name, metadata.version, fields); -} - -/** - * @brief Convert an Arrow field to FieldMetadata - * - * @param field The Arrow field to convert - * @return arrow::Result The resulting field metadata - */ -inline arrow::Result ArrowFieldToMetadata( - const std::shared_ptr &field) { - FieldMetadata result; - result.name = field->name(); - result.nullable = field->nullable(); - - const auto &dt = field->type(); - switch (dt->id()) { - case arrow::Type::BOOL: - result.type = ValueType::BOOL; - break; - case arrow::Type::INT8: - case arrow::Type::INT16: - case arrow::Type::INT32: - case arrow::Type::INT64: - case arrow::Type::UINT8: - case arrow::Type::UINT16: - case arrow::Type::UINT32: - case arrow::Type::UINT64: - result.type = ValueType::INT64; - break; - case arrow::Type::FLOAT: - case arrow::Type::DOUBLE: - result.type = ValueType::DOUBLE; - break; - case arrow::Type::STRING: - case arrow::Type::LARGE_STRING: - result.type = ValueType::STRING; - break; - case arrow::Type::LIST: { - auto list_type = std::static_pointer_cast(dt); - // Recursively determine element type - FieldMetadata elem_meta; - elem_meta.name = "item"; - elem_meta.nullable = list_type->value_field()->nullable(); - auto elem_result = ArrowFieldToMetadata(list_type->value_field()); - if (!elem_result.ok()) return elem_result.status(); - result.type = ValueType::ARRAY; - result.element_type = elem_result.ValueOrDie().type; - result.fixed_size = 0; - break; - } - case arrow::Type::FIXED_SIZE_LIST: { - auto fsl_type = std::static_pointer_cast(dt); - auto elem_result = ArrowFieldToMetadata(fsl_type->value_field()); - if (!elem_result.ok()) return elem_result.status(); - result.type = ValueType::ARRAY; - result.element_type = elem_result.ValueOrDie().type; - result.fixed_size = static_cast(fsl_type->list_size()); - break; - } - case arrow::Type::MAP: - result.type = ValueType::MAP; - break; - default: - return arrow::Status::NotImplemented("Unsupported Arrow type: ", - dt->ToString()); - } - - return result; -} - -/** - * @brief Convert FieldMetadata to an Arrow field - * - * @param metadata The field metadata to convert - * @return arrow::Result> The resulting Arrow - * field - */ -/// Helper: map a scalar ValueType to an Arrow DataType. -inline std::shared_ptr scalar_vt_to_arrow(ValueType vt) { - switch (vt) { - case ValueType::BOOL: - return arrow::boolean(); - case ValueType::INT32: - return arrow::int32(); - case ValueType::INT64: - return arrow::int64(); - case ValueType::FLOAT: - return arrow::float32(); - case ValueType::DOUBLE: - return arrow::float64(); - case ValueType::STRING: - case ValueType::FIXED_STRING16: - case ValueType::FIXED_STRING32: - case ValueType::FIXED_STRING64: - return arrow::utf8(); - default: - return nullptr; - } -} - -inline arrow::Result> metadata_to_arrow_field( - const FieldMetadata &metadata) { - std::shared_ptr type; - - if (metadata.type == ValueType::ARRAY) { - const auto elem_dt = scalar_vt_to_arrow(metadata.element_type); - if (!elem_dt) { - return arrow::Status::NotImplemented( - "Unsupported array element type: ", - static_cast(metadata.element_type)); - } - if (metadata.fixed_size > 0) { - type = arrow::fixed_size_list(arrow::field("item", elem_dt), - static_cast(metadata.fixed_size)); - } else { - type = arrow::list(arrow::field("item", elem_dt)); - } - } else if (metadata.type == ValueType::MAP) { - type = arrow::map(arrow::utf8(), map_union_value_type()); - } else { - type = scalar_vt_to_arrow(metadata.type); - if (!type) { - return arrow::Status::NotImplemented("Unsupported ValueType: ", - static_cast(metadata.type)); - } - } +/// Map a scalar ValueType to an Arrow DataType. +std::shared_ptr scalar_vt_to_arrow(ValueType vt); - return arrow::field(metadata.name, type, metadata.nullable); -} +arrow::Result> metadata_to_arrow_field( + const FieldMetadata &metadata); -/** - * @brief Convert an Arrow schema to SchemaMetadata - * - * @param schema_name The name of the schema - * @param schema The Arrow schema to convert - * @return arrow::Result The resulting schema metadata - */ -inline arrow::Result arrow_schema_to_metadata( +arrow::Result arrow_schema_to_metadata( const std::string &schema_name, - const std::shared_ptr &schema) { - SchemaMetadata result; - result.name = schema_name; - result.version = 0; - - for (const auto &field : schema->fields()) { - ARROW_ASSIGN_OR_RAISE(auto field_metadata, ArrowFieldToMetadata(field)); - result.fields.push_back(field_metadata); - } - - return result; -} - -/** - * @brief Convert SchemaMetadata to an Arrow schema - * - * @param metadata The schema metadata to convert - * @return arrow::Result> The resulting Arrow - * schema - */ -inline arrow::Result> metadata_to_arrow_schema( - const SchemaMetadata &metadata) { - std::vector> fields; - - for (const auto &field_metadata : metadata.fields) { - ARROW_ASSIGN_OR_RAISE(auto field, metadata_to_arrow_field(field_metadata)); - fields.push_back(field); - } + const std::shared_ptr &schema); - return arrow::schema(fields); -} +arrow::Result> metadata_to_arrow_schema( + const SchemaMetadata &metadata); struct Snapshot { int64_t id = 0; diff --git a/src/common/types.cpp b/src/common/types.cpp index cf3f047..9936d3b 100644 --- a/src/common/types.cpp +++ b/src/common/types.cpp @@ -40,8 +40,7 @@ arrow::Status Value::append_element(Value element) { return arrow::Status::OK(); } if (!holds_raw_array()) { - return arrow::Status::TypeError( - "APPEND: target value is not a raw array"); + return arrow::Status::TypeError("APPEND: target value is not a raw array"); } as_raw_array_mut().push_back(std::move(element)); return arrow::Status::OK(); @@ -93,8 +92,8 @@ std::string Value::to_string() const { std::string result = "["; for (uint32_t i = 0; i < arr.length(); ++i) { if (i > 0) result += ", "; - auto elem = - Value::read_value_from_memory(arr.element_ptr(i), arr.elem_type()); + auto elem = Value::read_value_from_memory(arr.element_ptr(i), + arr.elem_type()); result += elem.to_string(); } result += "]"; diff --git a/src/core/node.cpp b/src/core/node.cpp index 1b01b5f..e7ce3c4 100644 --- a/src/core/node.cpp +++ b/src/core/node.cpp @@ -68,8 +68,8 @@ NodeManager::NodeManager(std::shared_ptr schema_registry, schema_registry_(std::move(schema_registry)), layout_registry_(std::make_shared()), node_arena_(node_arena_factory::create_free_list_arena( - layout_registry_, NodeArena::kInitialSize, NodeArena::kMinFragmentSize, - enable_versioning)) {} + layout_registry_, NodeArena::kInitialSize, + NodeArena::kMinFragmentSize, enable_versioning)) {} arrow::Result> NodeManager::get_node( const std::string &schema_name, const int64_t id) { diff --git a/src/main/database.cpp b/src/main/database.cpp index d96b49f..cecf68c 100644 --- a/src/main/database.cpp +++ b/src/main/database.cpp @@ -35,7 +35,7 @@ namespace tundradb { // Database — methods moved from database.hpp // --------------------------------------------------------------------------- -Database::Database(const DatabaseConfig &config) +Database::Database(const DatabaseConfig& config) : schema_registry_(std::make_shared()), shard_manager_(std::make_shared(schema_registry_, config)), node_manager_(std::make_shared( @@ -48,15 +48,15 @@ Database::Database(const DatabaseConfig &config) log_error("Failed to initialize Arrow Compute module"); } if (persistence_enabled_) { - const std::string &db_path = config.get_db_path(); + const std::string& db_path = config.get_db_path(); if (db_path.empty()) { log_error("Database path is empty but persistence is enabled"); persistence_enabled_ = false; return; } std::string data_path = db_path + "/data"; - storage_ = std::make_shared(std::move(data_path), - schema_registry_, node_manager_, config); + storage_ = std::make_shared(std::move(data_path), schema_registry_, + node_manager_, config); metadata_manager_ = std::make_shared(db_path); snapshot_manager_ = std::make_shared( metadata_manager_, storage_, shard_manager_, edge_store_, node_manager_, @@ -74,8 +74,8 @@ arrow::Result Database::initialize() { } arrow::Result> Database::create_node( - const std::string &schema_name, - const std::unordered_map &data) { + const std::string& schema_name, + const std::unordered_map& data) { if (schema_name.empty()) { return arrow::Status::Invalid("Schema name cannot be empty"); } @@ -85,32 +85,32 @@ arrow::Result> Database::create_node( return node; } -arrow::Result Database::update_node(const std::string &schema_name, +arrow::Result Database::update_node(const std::string& schema_name, int64_t id, - const std::shared_ptr &field, - const Value &value, + const std::shared_ptr& field, + const Value& value, UpdateType update_type) { return shard_manager_->update_node(schema_name, id, field, value, update_type); } -arrow::Result Database::update_node(const std::string &schema_name, +arrow::Result Database::update_node(const std::string& schema_name, int64_t id, - const std::string &field_name, - const Value &value, + const std::string& field_name, + const Value& value, UpdateType update_type) { return shard_manager_->update_node(schema_name, id, field_name, value, update_type); } arrow::Result Database::update_node_fields( - const std::string &schema_name, int64_t id, - const std::vector &field_updates, UpdateType update_type) { + const std::string& schema_name, int64_t id, + const std::vector& field_updates, UpdateType update_type) { return shard_manager_->update_node_fields(schema_name, id, field_updates, update_type); } -arrow::Result Database::remove_node(const std::string &schema_name, +arrow::Result Database::remove_node(const std::string& schema_name, int64_t node_id) { if (!node_manager_->remove_node(schema_name, node_id)) { return arrow::Status::Invalid("Failed to remove node: ", schema_name, ":", @@ -120,13 +120,13 @@ arrow::Result Database::remove_node(const std::string &schema_name, } arrow::Result Database::register_edge_schema( - const std::string &edge_type, - const std::vector> &fields) { + const std::string& edge_type, + const std::vector>& fields) { return edge_store_->register_edge_schema(edge_type, fields); } arrow::Result Database::connect(int64_t source_id, - const std::string &type, + const std::string& type, int64_t target_id) { ARROW_ASSIGN_OR_RAISE(const auto edge, edge_store_->create_edge(source_id, type, target_id)); @@ -135,12 +135,11 @@ arrow::Result Database::connect(int64_t source_id, } arrow::Result Database::connect( - int64_t source_id, const std::string &type, int64_t target_id, + int64_t source_id, const std::string& type, int64_t target_id, std::unordered_map properties) { - ARROW_ASSIGN_OR_RAISE( - const auto edge, - edge_store_->create_edge(source_id, type, target_id, - std::move(properties))); + ARROW_ASSIGN_OR_RAISE(const auto edge, + edge_store_->create_edge(source_id, type, target_id, + std::move(properties))); ARROW_RETURN_NOT_OK(edge_store_->add(edge)); return true; } @@ -149,7 +148,7 @@ arrow::Result Database::remove_edge(int64_t edge_id) { return edge_store_->remove(edge_id); } -arrow::Result Database::compact(const std::string &schema_name) { +arrow::Result Database::compact(const std::string& schema_name) { return shard_manager_->compact(schema_name); } @@ -158,13 +157,11 @@ arrow::Result Database::compact_all() { } arrow::Result> Database::get_table( - const std::string &schema_name, TemporalContext *temporal_context, + const std::string& schema_name, TemporalContext* temporal_context, size_t chunk_size) const { - ARROW_ASSIGN_OR_RAISE(const auto schema, - schema_registry_->get(schema_name)); + ARROW_ASSIGN_OR_RAISE(const auto schema, schema_registry_->get(schema_name)); auto arrow_schema = schema->arrow(); - ARROW_ASSIGN_OR_RAISE(auto all_nodes, - shard_manager_->get_nodes(schema_name)); + ARROW_ASSIGN_OR_RAISE(auto all_nodes, shard_manager_->get_nodes(schema_name)); if (all_nodes.empty()) { std::vector> empty_columns; empty_columns.reserve(arrow_schema->num_fields()); @@ -174,15 +171,14 @@ arrow::Result> Database::get_table( } return arrow::Table::Make(arrow_schema, empty_columns); } - std::ranges::sort(all_nodes, [](const std::shared_ptr &a, - const std::shared_ptr &b) { - return a->id < b->id; - }); + std::ranges::sort( + all_nodes, [](const std::shared_ptr& a, + const std::shared_ptr& b) { return a->id < b->id; }); return create_table(schema, all_nodes, chunk_size, temporal_context); } arrow::Result Database::get_shard_count( - const std::string &schema_name) const { + const std::string& schema_name) const { if (!schema_registry_->exists(schema_name)) { return arrow::Status::Invalid("Schema '", schema_name, "' not found"); } @@ -190,7 +186,7 @@ arrow::Result Database::get_shard_count( } arrow::Result> Database::get_shard_sizes( - const std::string &schema_name) const { + const std::string& schema_name) const { if (!schema_registry_->exists(schema_name)) { return arrow::Status::Invalid("Schema '", schema_name, "' not found"); } @@ -198,7 +194,7 @@ arrow::Result> Database::get_shard_sizes( } arrow::Result>> -Database::get_shard_ranges(const std::string &schema_name) const { +Database::get_shard_ranges(const std::string& schema_name) const { if (!schema_registry_->exists(schema_name)) { return arrow::Status::Invalid("Schema '", schema_name, "' not found"); } diff --git a/src/storage/metadata.cpp b/src/storage/metadata.cpp index dd7f6de..4d9e0c1 100644 --- a/src/storage/metadata.cpp +++ b/src/storage/metadata.cpp @@ -6,12 +6,163 @@ #include #include +#include "arrow/map_union_types.hpp" #include "common/logger.hpp" #include "common/utils.hpp" #include "json.hpp" +#include "llvm/ADT/SmallVector.h" namespace tundradb { +// --------------------------------------------------------------------------- +// Conversion functions moved from metadata.hpp +// --------------------------------------------------------------------------- + +std::shared_ptr from_metadata(const FieldMetadata &metadata) { + return std::make_shared(metadata.name, metadata.to_type_descriptor(), + metadata.nullable); +} + +std::shared_ptr from_metadata(const SchemaMetadata &metadata) { + llvm::SmallVector, 4> fields; + fields.reserve(metadata.fields.size()); + for (const auto &field_meta : metadata.fields) { + fields.push_back(from_metadata(field_meta)); + } + return std::make_shared(metadata.name, metadata.version, fields); +} + +arrow::Result ArrowFieldToMetadata( + const std::shared_ptr &field) { + FieldMetadata result; + result.name = field->name(); + result.nullable = field->nullable(); + + const auto &dt = field->type(); + switch (dt->id()) { + case arrow::Type::BOOL: + result.type = ValueType::BOOL; + break; + case arrow::Type::INT8: + case arrow::Type::INT16: + case arrow::Type::INT32: + case arrow::Type::INT64: + case arrow::Type::UINT8: + case arrow::Type::UINT16: + case arrow::Type::UINT32: + case arrow::Type::UINT64: + result.type = ValueType::INT64; + break; + case arrow::Type::FLOAT: + case arrow::Type::DOUBLE: + result.type = ValueType::DOUBLE; + break; + case arrow::Type::STRING: + case arrow::Type::LARGE_STRING: + result.type = ValueType::STRING; + break; + case arrow::Type::LIST: { + auto list_type = std::static_pointer_cast(dt); + auto elem_result = ArrowFieldToMetadata(list_type->value_field()); + if (!elem_result.ok()) return elem_result.status(); + result.type = ValueType::ARRAY; + result.element_type = elem_result.ValueOrDie().type; + result.fixed_size = 0; + break; + } + case arrow::Type::FIXED_SIZE_LIST: { + auto fsl_type = std::static_pointer_cast(dt); + auto elem_result = ArrowFieldToMetadata(fsl_type->value_field()); + if (!elem_result.ok()) return elem_result.status(); + result.type = ValueType::ARRAY; + result.element_type = elem_result.ValueOrDie().type; + result.fixed_size = static_cast(fsl_type->list_size()); + break; + } + case arrow::Type::MAP: + result.type = ValueType::MAP; + break; + default: + return arrow::Status::NotImplemented("Unsupported Arrow type: ", + dt->ToString()); + } + return result; +} + +std::shared_ptr scalar_vt_to_arrow(ValueType vt) { + switch (vt) { + case ValueType::BOOL: + return arrow::boolean(); + case ValueType::INT32: + return arrow::int32(); + case ValueType::INT64: + return arrow::int64(); + case ValueType::FLOAT: + return arrow::float32(); + case ValueType::DOUBLE: + return arrow::float64(); + case ValueType::STRING: + case ValueType::FIXED_STRING16: + case ValueType::FIXED_STRING32: + case ValueType::FIXED_STRING64: + return arrow::utf8(); + default: + return nullptr; + } +} + +arrow::Result> metadata_to_arrow_field( + const FieldMetadata &metadata) { + std::shared_ptr type; + + if (metadata.type == ValueType::ARRAY) { + const auto elem_dt = scalar_vt_to_arrow(metadata.element_type); + if (!elem_dt) { + return arrow::Status::NotImplemented( + "Unsupported array element type: ", + static_cast(metadata.element_type)); + } + if (metadata.fixed_size > 0) { + type = arrow::fixed_size_list(arrow::field("item", elem_dt), + static_cast(metadata.fixed_size)); + } else { + type = arrow::list(arrow::field("item", elem_dt)); + } + } else if (metadata.type == ValueType::MAP) { + type = arrow::map(arrow::utf8(), map_union_value_type()); + } else { + type = scalar_vt_to_arrow(metadata.type); + if (!type) { + return arrow::Status::NotImplemented("Unsupported ValueType: ", + static_cast(metadata.type)); + } + } + return arrow::field(metadata.name, type, metadata.nullable); +} + +arrow::Result arrow_schema_to_metadata( + const std::string &schema_name, + const std::shared_ptr &schema) { + SchemaMetadata result; + result.name = schema_name; + result.version = 0; + for (const auto &field : schema->fields()) { + ARROW_ASSIGN_OR_RAISE(auto field_metadata, ArrowFieldToMetadata(field)); + result.fields.push_back(field_metadata); + } + return result; +} + +arrow::Result> metadata_to_arrow_schema( + const SchemaMetadata &metadata) { + std::vector> fields; + for (const auto &field_metadata : metadata.fields) { + ARROW_ASSIGN_OR_RAISE(auto field, metadata_to_arrow_field(field_metadata)); + fields.push_back(field); + } + return arrow::schema(fields); +} + MetadataManager::MetadataManager(const std::string &metadata_dir_path) : metadata_dir(metadata_dir_path) {} From 436cedaca5d7af84cba46499a05d6df6d6b64682 Mon Sep 17 00:00:00 2001 From: dmgcodevil Date: Sat, 4 Apr 2026 19:33:59 -0400 Subject: [PATCH 3/4] header-to-cpp memory folder --- CMakeLists.txt | 1 + include/memory/array_arena.hpp | 290 +-------- include/memory/free_list_arena.hpp | 382 +----------- include/memory/map_arena.hpp | 323 +--------- include/memory/node_arena.hpp | 912 ++------------------------- include/memory/schema_layout.hpp | 181 +----- include/memory/string_arena.hpp | 215 +------ include/query/row.hpp | 291 +-------- src/memory/CMakeLists.txt | 8 + src/memory/array_arena.cpp | 265 ++++++++ src/memory/free_list_arena.cpp | 273 +++++++++ src/memory/map_arena.cpp | 308 ++++++++++ src/memory/node_arena.cpp | 946 +++++++++++++++++++++++++++++ src/memory/schema_layout.cpp | 178 ++++++ src/memory/string_arena.cpp | 227 +++++++ src/query/row.cpp | 197 ++++++ tests/CMakeLists.txt | 2 + 17 files changed, 2543 insertions(+), 2456 deletions(-) create mode 100644 src/memory/CMakeLists.txt create mode 100644 src/memory/array_arena.cpp create mode 100644 src/memory/free_list_arena.cpp create mode 100644 src/memory/map_arena.cpp create mode 100644 src/memory/node_arena.cpp create mode 100644 src/memory/schema_layout.cpp create mode 100644 src/memory/string_arena.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index e8a00dc..7e9ae3d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -240,6 +240,7 @@ add_subdirectory(src/arrow) add_subdirectory(src/schema) add_subdirectory(src/core) add_subdirectory(src/storage) +add_subdirectory(src/memory) add_subdirectory(src/query) add_subdirectory(src/main) diff --git a/include/memory/array_arena.hpp b/include/memory/array_arena.hpp index 97338bc..a831293 100644 --- a/include/memory/array_arena.hpp +++ b/include/memory/array_arena.hpp @@ -57,32 +57,7 @@ class ArrayArena { * @param capacity Number of element slots to allocate * @return Ok(ArrayRef) with ref_count = 1, or Error with reason on failure */ - arrow::Result allocate(ValueType elem_type, uint32_t capacity) { - // capacity 0 is valid: return empty (null) ArrayRef for empty arrays - if (capacity == 0) { - return ArrayRef{}; - } - - const size_t elem_sz = get_type_size(elem_type); - const size_t data_bytes = elem_sz * capacity; - const size_t alloc_size = ArrayRef::HEADER_SIZE + data_bytes; - - std::lock_guard lock(arena_mutex_); - void* raw = arena_->allocate(alloc_size); - if (!raw) { - return arrow::Status::OutOfMemory( - "ArrayArena::allocate: arena allocation failed (requested ", - alloc_size, " bytes)"); - } - - init_header(raw, capacity); - - char* data = static_cast(raw) + ArrayRef::HEADER_SIZE; - zero_init_elements(data, elem_type, capacity); - - active_allocs_.fetch_add(1, std::memory_order_relaxed); - return ArrayRef{data, elem_type}; - } + arrow::Result allocate(ValueType elem_type, uint32_t capacity); /** * Allocate and populate an array from existing data. @@ -96,38 +71,7 @@ class ArrayArena { arrow::Result allocate_with_data(ValueType elem_type, const void* elements, uint32_t count, - uint32_t capacity = 0) { - if (capacity < count) capacity = count; - // capacity 0 is valid (count 0): return empty ArrayRef - if (capacity == 0) { - return ArrayRef{}; - } - - const size_t elem_sz = get_type_size(elem_type); - const size_t data_bytes = elem_sz * capacity; - const size_t alloc_size = ArrayRef::HEADER_SIZE + data_bytes; - - std::lock_guard lock(arena_mutex_); - void* raw = arena_->allocate(alloc_size); - if (!raw) { - return arrow::Status::OutOfMemory( - "ArrayArena::allocate_with_data: arena allocation failed (requested ", - alloc_size, " bytes)"); - } - - auto* header = init_header(raw, capacity); - header->length = count; - - char* data = static_cast(raw) + ArrayRef::HEADER_SIZE; - copy_init_elements(data, static_cast(elements), elem_type, - count); - if (capacity > count) { - zero_init_elements(data + elem_sz * count, elem_type, capacity - count); - } - - active_allocs_.fetch_add(1, std::memory_order_relaxed); - return ArrayRef{data, elem_type}; - } + uint32_t capacity = 0); /** * Append one element to an array. Two strategies: @@ -147,44 +91,7 @@ class ArrayArena { * @param element Pointer to the element data to append * @return Ok on success; Error with reason if ref is null or allocation fails */ - arrow::Status append(ArrayRef& ref, const void* element) { - if (ref.is_null()) { - return arrow::Status::Invalid( - "ArrayArena::append: ArrayRef is null (cannot append to null ref)"); - } - - auto* header = get_header(ref); - if (!header) { - return arrow::Status::Invalid( - "ArrayArena::append: invalid ArrayRef (header is null)"); - } - - if (header->length < header->capacity) { - char* dest = ref.mutable_element_ptr(header->length); - assign_element(dest, element, ref.elem_type()); - header->length++; - return arrow::Status::OK(); - } - - // Full - reallocate with 2 x capacity using allocate_with_data - // which properly handles ref-counted element types. - const uint32_t new_cap = header->capacity * 2; - const uint32_t old_len = header->length; - - ARROW_ASSIGN_OR_RAISE( - ArrayRef new_ref, - allocate_with_data(ref.elem_type(), ref.data(), old_len, new_cap)); - - assign_element(new_ref.mutable_element_ptr(old_len), element, - ref.elem_type()); - auto* new_header = get_header(new_ref); - new_header->length = old_len + 1; - - header->mark_for_deletion(); - - ref = std::move(new_ref); - return arrow::Status::OK(); - } + arrow::Status append(ArrayRef& ref, const void* element); /** * Create a copy of an existing array (for copy-on-write / versioning). @@ -196,34 +103,14 @@ class ArrayArena { * @return Ok(new ArrayRef) with independent data, or Error with reason */ arrow::Result copy(const ArrayRef& src, - uint32_t extra_capacity = 0) { - if (src.is_null()) { - return arrow::Status::Invalid( - "ArrayArena::copy: source ArrayRef is null"); - } - - const auto* header = get_header_const(src); - if (!header) { - return arrow::Status::Invalid( - "ArrayArena::copy: invalid source ArrayRef (header is null)"); - } - - const uint32_t new_capacity = header->capacity + extra_capacity; - return allocate_with_data(src.elem_type(), src.data(), header->length, - new_capacity); - } + uint32_t extra_capacity = 0); /** * Mark an array for deferred deletion. * The actual deallocation happens when the last ArrayRef is destroyed * and release() is called. */ - void mark_for_deletion(const ArrayRef& ref) { - if (ref.is_null()) return; - if (auto* h = get_header_mut(ref)) { - h->mark_for_deletion(); - } - } + void mark_for_deletion(const ArrayRef& ref); /** * Deallocate an array's memory back to the FreeListArena. @@ -236,21 +123,7 @@ class ArrayArena { * @param data Pointer to element data (NOT to header) * @param elem_type Element type (NA = skip element cleanup) */ - void release_array(char* data, ValueType elem_type = ValueType::NA) { - if (!data) return; - - auto* header = - reinterpret_cast(data - ArrayRef::HEADER_SIZE); - if (!header->arena) return; // already released - header->arena = nullptr; // prevent double-free - - destruct_elements(data, elem_type, header->length); - - active_allocs_.fetch_sub(1, std::memory_order_relaxed); - - std::lock_guard lock(arena_mutex_); - arena_->deallocate(header); - } + void release_array(char* data, ValueType elem_type = ValueType::NA); // ======================================================================== // Statistics @@ -267,27 +140,11 @@ class ArrayArena { size_t get_freed_bytes() const { return arena_->get_freed_bytes(); } - void reset() { - std::lock_guard lock(arena_mutex_); - arena_->reset(); - } - - void clear() { - std::lock_guard lock(arena_mutex_); - arena_->clear(); - } + void reset(); + void clear(); private: - /** Initialize a freshly allocated header block. */ - ArrayRef::ArrayHeader* init_header(void* raw, uint32_t capacity) { - auto* header = static_cast(raw); - header->ref_count.store(0, std::memory_order_relaxed); - header->flags = 0; - header->length = 0; - header->capacity = capacity; - header->arena = this; - return header; - } + ArrayRef::ArrayHeader* init_header(void* raw, uint32_t capacity); static ArrayRef::ArrayHeader* get_header(const ArrayRef& ref) { if (ref.is_null()) return nullptr; @@ -303,140 +160,19 @@ class ArrayArena { return get_header(ref); } - /** - * Destruct non-trivial elements before freeing array memory. - * - * For STRING elements: marks each string for deletion, then calls - * the destructor. The destructor decrements ref_count; when it hits 0 - * with the deletion flag set, the string memory is freed. - * - * For ARRAY elements: calls the destructor, which triggers the same - * release_array chain recursively. - * - * Primitives (INT32, DOUBLE, etc.) have trivial destructors - skip them. - */ static void destruct_elements(char* data, ValueType elem_type, - uint32_t count) { - if (count == 0) return; - - if (is_string_type(elem_type)) { - for (uint32_t i = 0; i < count; ++i) { - auto* sr = reinterpret_cast(data + i * sizeof(StringRef)); - if (!sr->is_null()) { - // Must mark for deletion first - StringRef::release() only - // frees memory when BOTH ref_count==0 AND marked_for_deletion. - auto* hdr = reinterpret_cast( - const_cast(sr->data() - StringRef::HEADER_SIZE)); - hdr->mark_for_deletion(); - } - sr->~StringRef(); - } - } else if (is_array_type(elem_type)) { - for (uint32_t i = 0; i < count; ++i) { - auto* ar = reinterpret_cast(data + i * sizeof(ArrayRef)); - if (!ar->is_null()) { - // ArrayRef destructor calls release() which calls release_array - // recursively if this was the last reference. - auto* hdr = reinterpret_cast( - ar->data() - ArrayRef::HEADER_SIZE); - hdr->mark_for_deletion(); - } - ar->~ArrayRef(); - } - } - // Primitives: trivial destructors, nothing to do. - } - - /** - * Copy-construct elements from src to raw (uninitialized) dst memory. - * Uses copy constructors for ref-counted types (StringRef, ArrayRef) - * to properly increment reference counts; memcpy for primitives. - * - * IMPORTANT: dst must be RAW uninitialized memory (no live objects). - */ + uint32_t count); static void copy_init_elements(char* dst, const char* src, - ValueType elem_type, uint32_t count) { - if (count == 0) return; - if (is_string_type(elem_type)) { - for (uint32_t i = 0; i < count; ++i) { - const auto* s = - reinterpret_cast(src + i * sizeof(StringRef)); - new (dst + i * sizeof(StringRef)) StringRef(*s); - } - } else if (is_array_type(elem_type)) { - for (uint32_t i = 0; i < count; ++i) { - const auto* a = - reinterpret_cast(src + i * sizeof(ArrayRef)); - new (dst + i * sizeof(ArrayRef)) ArrayRef(*a); - } - } else { - std::memcpy(dst, src, get_type_size(elem_type) * count); - } - } - - /** - * Copy-assign a single element from src to an INITIALIZED dst slot. - * Uses copy assignment for ref-counted types (properly releases old, - * increments new); memcpy for primitives. - */ - static void assign_element(char* dst, const void* src, ValueType elem_type) { - if (is_string_type(elem_type)) { - *reinterpret_cast(dst) = - *reinterpret_cast(src); - } else if (is_array_type(elem_type)) { - *reinterpret_cast(dst) = - *reinterpret_cast(src); - } else { - std::memcpy(dst, src, get_type_size(elem_type)); - } - } - - /** - * Initialize element memory. - * Uses placement-new for types with non-trivial constructors - * (StringRef, ArrayRef); memset(0) for primitives. - */ + ValueType elem_type, uint32_t count); + static void assign_element(char* dst, const void* src, ValueType elem_type); static void zero_init_elements(char* data, ValueType elem_type, - uint32_t count) { - if (is_string_type(elem_type)) { - for (uint32_t i = 0; i < count; ++i) { - new (data + i * sizeof(StringRef)) StringRef(); - } - } else if (is_array_type(elem_type)) { - for (uint32_t i = 0; i < count; ++i) { - new (data + i * sizeof(ArrayRef)) ArrayRef(); - } - } else { - std::memset(data, 0, get_type_size(elem_type) * count); - } - } + uint32_t count); std::unique_ptr arena_; mutable std::mutex arena_mutex_; std::atomic active_allocs_{0}; }; -// ============================================================================ -// ArrayRef::release() implementation (after ArrayArena is fully defined) -// ============================================================================ - -inline void ArrayRef::release() { - if (!data_) return; - if (auto* h = get_header()) { - assert(h->ref_count.load(std::memory_order_relaxed) > 0 && - "ArrayRef::release() called with ref_count already 0 — " - "double-release or missing ref-count increment"); - - const int32_t old_count = - h->ref_count.fetch_sub(1, std::memory_order_acq_rel); - if (old_count == 1 && h->is_marked_for_deletion() && h->arena) { - h->arena->release_array(data_, elem_type_); - } - } - data_ = nullptr; - elem_type_ = ValueType::NA; -} - } // namespace tundradb #endif // ARRAY_ARENA_HPP diff --git a/include/memory/free_list_arena.hpp b/include/memory/free_list_arena.hpp index 876ba7d..0529cc2 100644 --- a/include/memory/free_list_arena.hpp +++ b/include/memory/free_list_arena.hpp @@ -40,12 +40,7 @@ class FreeListArena : public MemArena { public: explicit FreeListArena( size_t initial_size = 1024 * 1024, // 1MB default - size_t min_fragment_size = 64) // 64 bytes minimum fragment - : chunk_size_(initial_size), - current_chunk_size_(0), - min_fragment_size_(min_fragment_size) { - allocate_new_chunk(chunk_size_); - } + size_t min_fragment_size = 64); // 64 bytes minimum fragment ~FreeListArena() override { FreeListArena::clear(); } @@ -61,88 +56,23 @@ class FreeListArena : public MemArena { * @param alignment Memory alignment requirement (default: 8 bytes) * @return Pointer to allocated memory, or nullptr if allocation fails */ - void* allocate(size_t size, size_t alignment = 8) override { - size = align_up(size, alignment); - - // Try to find a suitable free block first - void* reused_block = find_free_block(size); - if (reused_block) { - return reused_block; - } - - // No suitable free block, allocate new memory - return allocate_new_block(size, alignment); - } + void* allocate(size_t size, size_t alignment = 8) override; /** * Deallocate a block and add it to the free list * @param ptr Pointer returned by allocate() */ - void deallocate(void* ptr) override { - if (!ptr) return; - - // Get the block header - BlockHeader* header = get_block_header(ptr); - - assert(!header->is_free && "Double free detected"); - - // log_debug("DEALLOCATE: ptr={}, header={}, size={}", ptr, - // static_cast(header), header->size); - - // Mark as free (coalesce_blocks will handle adding to free list) - header->is_free = true; - - // Try to coalesce with adjacent blocks - coalesce_blocks(ptr); - - freed_bytes_ += header->size; // For fragmentation ratio calculation - total_used_ -= header->size; // Decrement live memory usage - - // log_debug("DEALLOCATE DONE: free_block_count={}", - // get_free_block_count()); - } + void deallocate(void* ptr) override; /** * Reset the arena - clears all allocations and free lists */ - void reset() override { - // Clear free lists - free_blocks_by_size_.clear(); - - // Reset all chunk allocated sizes - for (size_t i = 0; i < chunk_allocated_sizes_.size(); ++i) { - chunk_allocated_sizes_[i] = 0; - } - - // Reset chunk management to use first chunk - current_offset_ = 0; - if (!chunks_.empty()) { - current_chunk_ = chunks_[0].get(); - current_chunk_size_ = chunk_sizes_[0]; - } - - // Reset statistics - total_used_ = 0; // Reset individual block usage (can be reused) - freed_bytes_ = 0; - // NOTE: total_allocated_ (chunk memory) is NOT reset - chunks are still - // allocated - } + void reset() override; /** * Clear all allocated memory */ - void clear() override { - chunks_.clear(); - chunk_sizes_.clear(); - chunk_allocated_sizes_.clear(); - free_blocks_by_size_.clear(); - current_chunk_ = nullptr; - current_chunk_size_ = 0; - current_offset_ = 0; - total_allocated_ = 0; // Reset chunk memory (chunks are freed) - total_used_ = 0; // Reset individual block usage - freed_bytes_ = 0; - } + void clear() override; /** * STATISTICS DOCUMENTATION: @@ -179,19 +109,10 @@ class FreeListArena : public MemArena { /// Number of allocated chunks. size_t get_chunk_count() const override { return chunks_.size(); } /// Number of distinct free blocks indexed in the size map. - size_t get_free_block_count() const { - size_t count = 0; - for (const auto& blocks : free_blocks_by_size_ | std::views::values) { - count += blocks.size(); - } - return count; - } + size_t get_free_block_count() const; /// Ratio of cumulative freed bytes to total chunk memory (0 if no chunks). - double get_fragmentation_ratio() const { - if (total_allocated_ == 0) return 0.0; - return static_cast(freed_bytes_) / total_allocated_; - } + double get_fragmentation_ratio() const; // Testing interface - expose internals for verification #ifdef TESTING_ENABLED @@ -209,7 +130,6 @@ class FreeListArena : public MemArena { } bool verify_block_integrity_for_testing() { - // Verify that all blocks in chunks are properly laid out in memory for (size_t i = 0; i < chunks_.size(); ++i) { char* chunk_start = chunks_[i].get(); char* chunk_allocated_end = chunk_start + chunk_allocated_sizes_[i]; @@ -218,14 +138,12 @@ class FreeListArena : public MemArena { while (current_ptr < chunk_allocated_end) { BlockHeader* current = reinterpret_cast(current_ptr); - // Verify block is within allocated bounds char* block_end = current_ptr + BlockHeader::HEADER_SIZE + current->size; if (block_end > chunk_allocated_end) { - return false; // Block extends beyond allocated space + return false; } - // Move to next block using physical traversal current_ptr = block_end; } } @@ -260,286 +178,24 @@ class FreeListArena : public MemArena { size_t freed_bytes_ = 0; // Cumulative bytes freed (for fragmentation ratio calculation) - void allocate_new_chunk(size_t size) { - auto new_chunk = std::make_unique(size); - current_chunk_ = new_chunk.get(); - current_chunk_size_ = size; - chunks_.push_back(std::move(new_chunk)); - chunk_sizes_.push_back(size); - chunk_allocated_sizes_.push_back(0); // Start with 0 allocated - current_offset_ = 0; - - // Track total chunk memory allocated (persists across reset) - total_allocated_ += size; - } - - // Find which chunk contains this pointer - char* find_chunk_start(void* ptr) { - char* char_ptr = static_cast(ptr); - for (size_t i = 0; i < chunks_.size(); ++i) { - char* chunk_start = chunks_[i].get(); - char* chunk_end = chunk_start + chunk_sizes_[i]; - if (char_ptr >= chunk_start && char_ptr < chunk_end) { - return chunk_start; - } - } - return nullptr; - } - - // Find previous block by traversing physically through memory - BlockHeader* find_prev_block(BlockHeader* target) { - char* chunk_start = find_chunk_start(target); - if (!chunk_start) return nullptr; - - char* target_ptr = reinterpret_cast(target); - if (target_ptr == chunk_start) { - return nullptr; // First block in chunk has no previous - } - - // Walk through memory from chunk start to find previous block - char* current_ptr = chunk_start; - BlockHeader* prev = nullptr; - - while (current_ptr < target_ptr) { - BlockHeader* current = reinterpret_cast(current_ptr); - - // Calculate next block position - char* next_ptr = current_ptr + BlockHeader::HEADER_SIZE + current->size; - - if (next_ptr == target_ptr) { - return current; // Found the block immediately before target - } - - prev = current; - current_ptr = next_ptr; - } - - return nullptr; // Shouldn't happen if memory is properly managed - } + void allocate_new_chunk(size_t size); + char* find_chunk_start(void* ptr); + BlockHeader* find_prev_block(BlockHeader* target); BlockHeader* get_block_header(void* ptr) { return reinterpret_cast(static_cast(ptr) - BlockHeader::HEADER_SIZE); } - void* allocate_new_block(size_t size, size_t alignment) { - // Align the requested size - size_t aligned_size = align_up(size, alignment); - - // Calculate the aligned offset for the data portion - size_t data_aligned_offset = calculate_aligned_offset( - current_chunk_, current_offset_ + BlockHeader::HEADER_SIZE, alignment); - - size_t header_offset = data_aligned_offset - BlockHeader::HEADER_SIZE; - size_t total_size = data_aligned_offset + aligned_size - current_offset_; - - // Check if we need a new chunk - if (current_chunk_ == nullptr || - current_offset_ + total_size > current_chunk_size_) { - size_t needed_chunk_size = - std::max(chunk_size_, total_size) + get_alignment_overhead(alignment); - allocate_new_chunk(needed_chunk_size); - - // Recalculate offsets for new chunk - data_aligned_offset = calculate_aligned_offset( - current_chunk_, BlockHeader::HEADER_SIZE, alignment); - header_offset = data_aligned_offset - BlockHeader::HEADER_SIZE; - total_size = data_aligned_offset + aligned_size; - } - - // Place header and data at properly aligned positions - char* header_start = current_chunk_ + header_offset; - BlockHeader* header = reinterpret_cast(header_start); - - header->size = aligned_size; // Store the aligned size - header->is_free = false; - - char* data_ptr = current_chunk_ + data_aligned_offset; - // log_debug( - // "ALLOCATE_NEW: data_ptr={}, header={}, size={}, total_size={}, " - // "offset={}", - // data_ptr, static_cast(header), aligned_size, total_size, - // current_offset_); - - current_offset_ = data_aligned_offset + aligned_size; - // Track individual block allocation (live memory usage) - total_used_ += aligned_size; - - // Update allocated size for current chunk - chunk_allocated_sizes_.back() = current_offset_; - - return data_ptr; - } - - void* find_free_block(size_t size) { - // Find best fit from free list - auto it = free_blocks_by_size_.lower_bound(size); - if (it != free_blocks_by_size_.end()) { - auto& blocks = it->second; - if (!blocks.empty()) { - BlockHeader* header = *blocks.begin(); - blocks.erase(blocks.begin()); - - if (blocks.empty()) { - free_blocks_by_size_.erase(it); - } - - // Split block if it's much larger - if (header->size > - size + BlockHeader::HEADER_SIZE + min_fragment_size_) { - split_block(header, size); - } - - header->is_free = false; - return reinterpret_cast(header) + BlockHeader::HEADER_SIZE; - } - } - - return nullptr; - } - - void split_block(BlockHeader* header, size_t needed_size) { - size_t remaining_size = - header->size - needed_size - BlockHeader::HEADER_SIZE; - - // Create new block from remainder - char* new_block_start = reinterpret_cast(header) + - BlockHeader::HEADER_SIZE + needed_size; - BlockHeader* new_header = reinterpret_cast(new_block_start); - - new_header->size = remaining_size; - new_header->is_free = true; - - header->size = needed_size; - - // Add remainder to free list (pass data pointer to add_to_free_list) - add_to_free_list(new_block_start + BlockHeader::HEADER_SIZE, - remaining_size); - } - - void add_to_free_list(void* ptr, size_t size) { - BlockHeader* header = get_block_header(ptr); - // log_debug("ADD_TO_FREE_LIST: ptr={}, header={}, size={}", ptr, - // static_cast(header), size); - free_blocks_by_size_[size].insert(header); - } - - void remove_block_from_free_list(BlockHeader* block) { - auto it = free_blocks_by_size_.find(block->size); - if (it != free_blocks_by_size_.end()) { - it->second.erase(block); - if (it->second.empty()) { - free_blocks_by_size_.erase(it); - } - // log_debug("COALESCE: removed block from free list"); - } - } - - // Find the next block using physical adjacency - BlockHeader* find_next_block(BlockHeader* header) { - // Need byte-level arithmetic (header + header->size would be wrong pointer - // math) - char* current_ptr = reinterpret_cast(header); - char* next_ptr = current_ptr + BlockHeader::HEADER_SIZE + header->size; - - // Check if the next block is within the same chunk - char* chunk_start = find_chunk_start(header); - if (!chunk_start) { - // Defensive check - should not happen with valid blocks - // log_debug("FIND_NEXT: header={}, no chunk found", - // static_cast(header)); - return nullptr; - } - - size_t chunk_index = 0; - for (size_t i = 0; i < chunks_.size(); ++i) { - if (chunks_[i].get() == chunk_start) { - chunk_index = i; - break; - } - } - - char* chunk_allocated_end = - chunk_start + chunk_allocated_sizes_[chunk_index]; - - // log_debug( - // "FIND_NEXT: header={}, next_ptr={}, chunk_start={}, " - // "chunk_allocated_end={}", - // static_cast(header), next_ptr, chunk_start, - // chunk_allocated_end); - - // Check if next block would be within allocated portion of chunk - // Must ensure entire header fits (not just start position) - if (next_ptr + BlockHeader::HEADER_SIZE <= chunk_allocated_end) { - BlockHeader* next_header = reinterpret_cast(next_ptr); - // Using physical traversal - don't rely on header.next pointer - // log_debug("FIND_NEXT: found next block={}, size={}, is_free={}", - // static_cast(next_header), next_header->size, - // next_header->is_free); - return next_header; - } - - // log_debug("FIND_NEXT: next block would be outside allocated portion"); - return nullptr; // Next block would be outside allocated portion - } - - void coalesce_blocks(void* ptr) { - BlockHeader* header = get_block_header(ptr); - - // log_debug("COALESCE START: ptr={}, header={}, size={}", ptr, - // static_cast(header), header->size); - - // Coalesce with next block (forward coalescing) - BlockHeader* next = find_next_block(header); - if (next && next->is_free) { - // log_debug("COALESCE: merging with NEXT block={}, size={}", - // static_cast(next), next->size); - - // Remove next block from free list - remove_block_from_free_list(next); - - // Merge blocks - size_t old_size = header->size; - header->size += BlockHeader::HEADER_SIZE + next->size; - // log_debug("COALESCE: merged forward - old_size={}, new_size={}", - // old_size, - // header->size); - } else { - // log_debug("COALESCE: no next block to merge (next={}, is_free={})", - // static_cast(next), next ? next->is_free : false); - } - - // Coalesce with previous block (backward coalescing) - BlockHeader* prev = find_prev_block(header); - if (prev && prev->is_free) { - // log_debug("COALESCE: merging with PREV block={}, size={}", - // static_cast(prev), prev->size); - - // Remove prev block from free list - remove_block_from_free_list(prev); - - // Merge blocks - size_t old_size = prev->size; - prev->size += BlockHeader::HEADER_SIZE + header->size; - // log_debug("COALESCE: merged backward - old_size={}, new_size={}", - // old_size, prev->size); - - // Update header to point to merged block - header = prev; - } else { - // log_debug("COALESCE: no prev block to merge (prev={}, is_free={})", - // static_cast(prev), prev ? prev->is_free : false); - } - - // Add the coalesced block back to free list - // log_debug("COALESCE: adding final block to free list: header={}, - // size={}", - // static_cast(header), header->size); - add_to_free_list(reinterpret_cast(header) + BlockHeader::HEADER_SIZE, - header->size); - } + void* allocate_new_block(size_t size, size_t alignment); + void* find_free_block(size_t size); + void split_block(BlockHeader* header, size_t needed_size); + void add_to_free_list(void* ptr, size_t size); + void remove_block_from_free_list(BlockHeader* block); + BlockHeader* find_next_block(BlockHeader* header); + void coalesce_blocks(void* ptr); }; } // namespace tundradb -#endif // FREE_LIST_ARENA_HPP \ No newline at end of file +#endif // FREE_LIST_ARENA_HPP diff --git a/include/memory/map_arena.hpp b/include/memory/map_arena.hpp index 94fe6aa..5e9a618 100644 --- a/include/memory/map_arena.hpp +++ b/include/memory/map_arena.hpp @@ -47,28 +47,7 @@ class MapArena { * Allocate a new empty map with the given capacity. * Returns a MapRef with ref_count = 1 and count = 0. */ - arrow::Result allocate(uint32_t capacity = DEFAULT_CAPACITY) { - if (capacity == 0) return MapRef{}; - - const size_t data_bytes = sizeof(MapEntry) * capacity; - const size_t alloc_size = MapRef::HEADER_SIZE + data_bytes; - - std::lock_guard lock(arena_mutex_); - void* raw = arena_->allocate(alloc_size); - if (!raw) { - return arrow::Status::OutOfMemory( - "MapArena::allocate: arena allocation failed (requested ", alloc_size, - " bytes)"); - } - - init_header(raw, capacity); - - char* data = static_cast(raw) + MapRef::HEADER_SIZE; - zero_init_entries(data, capacity); - - active_allocs_.fetch_add(1, std::memory_order_relaxed); - return MapRef{data}; - } + arrow::Result allocate(uint32_t capacity = DEFAULT_CAPACITY); /** * Create a COW copy of an existing map. @@ -77,62 +56,16 @@ class MapArena { * @param src Source MapRef to copy * @param extra_capacity Additional entry slots beyond the original capacity */ - arrow::Result copy(const MapRef& src, uint32_t extra_capacity = 0) { - if (src.is_null()) { - return arrow::Status::Invalid("MapArena::copy: source MapRef is null"); - } - - const auto* header = get_header_const(src); - if (!header) { - return arrow::Status::Invalid( - "MapArena::copy: invalid source MapRef (header is null)"); - } + arrow::Result copy(const MapRef& src, uint32_t extra_capacity = 0); - const uint32_t src_count = header->count; - const uint32_t new_capacity = header->capacity + extra_capacity; - - ARROW_ASSIGN_OR_RAISE(MapRef new_ref, allocate(new_capacity)); - - auto* new_header = get_header(new_ref); - new_header->count = src_count; - - // Copy-construct entries (handles ref-counted StringRef keys/values) - for (uint32_t i = 0; i < src_count; ++i) { - const auto* src_entry = src.entry_ptr(i); - auto* dst_entry = new_ref.mutable_entry_ptr(i); - copy_init_entry(dst_entry, src_entry); - } - - return new_ref; - } - - void mark_for_deletion(const MapRef& ref) { - if (ref.is_null()) return; - if (auto* h = get_header_mut(ref)) { - h->mark_for_deletion(); - } - } + void mark_for_deletion(const MapRef& ref); /** * Deallocate a map's memory back to the FreeListArena. * Called by MapRef::release() when ref_count reaches 0 and * the map is marked for deletion. */ - void release_map(char* data) { - if (!data) return; - - auto* header = - reinterpret_cast(data - MapRef::HEADER_SIZE); - if (!header->arena) return; - header->arena = nullptr; - - destruct_entries(data, header->count); - - active_allocs_.fetch_sub(1, std::memory_order_relaxed); - - std::lock_guard lock(arena_mutex_); - arena_->deallocate(header); - } + void release_map(char* data); // ======================================================================== // Entry-level operations @@ -142,17 +75,7 @@ class MapArena { * Find an entry by key name (linear scan). * Returns the index, or -1 if not found. */ - static int32_t find_entry(const MapRef& ref, const std::string& key) { - if (ref.is_null()) return -1; - const uint32_t n = ref.count(); - for (uint32_t i = 0; i < n; ++i) { - const auto* entry = ref.entry_ptr(i); - if (entry->key.view() == key) { - return static_cast(i); - } - } - return -1; - } + static int32_t find_entry(const MapRef& ref, const std::string& key); /** * Set an entry value. If the key exists, overwrites it in place. @@ -163,71 +86,13 @@ class MapArena { * (store it in string_arena before calling this). */ static arrow::Status set_entry(MapRef& ref, const StringRef& key, - ValueType vtype, const void* value_ptr) { - if (ref.is_null()) { - return arrow::Status::Invalid("MapArena::set_entry: MapRef is null"); - } - - auto* header = get_header(ref); - if (!header) { - return arrow::Status::Invalid("MapArena::set_entry: invalid header"); - } - - // Try to find existing entry - const std::string_view key_view = key.view(); - for (uint32_t i = 0; i < header->count; ++i) { - auto* entry = ref.mutable_entry_ptr(i); - if (entry->key.view() == key_view) { - destruct_entry_value(entry); - entry->value_type = static_cast(vtype); - copy_value_into_entry(entry, vtype, value_ptr); - return arrow::Status::OK(); - } - } - - // Not found — append - if (header->count >= header->capacity) { - return arrow::Status::CapacityError( - "MapArena::set_entry: map is full (count=", header->count, - ", capacity=", header->capacity, ")"); - } - - auto* entry = ref.mutable_entry_ptr(header->count); - // Destruct the default-constructed entry before overwriting - entry->key.~StringRef(); - new (&entry->key) StringRef(key); - entry->value_type = static_cast(vtype); - copy_value_into_entry(entry, vtype, value_ptr); - header->count++; - return arrow::Status::OK(); - } + ValueType vtype, const void* value_ptr); /** * Remove an entry by key (swap-with-last, O(1) removal). * Returns true if found and removed, false if not found. */ - static bool remove_entry(MapRef& ref, const std::string& key) { - if (ref.is_null()) return false; - auto* header = get_header(ref); - if (!header || header->count == 0) return false; - - for (uint32_t i = 0; i < header->count; ++i) { - auto* entry = ref.mutable_entry_ptr(i); - if (entry->key.view() == key) { - destruct_entry(entry); - if (i < header->count - 1) { - auto* last = ref.mutable_entry_ptr(header->count - 1); - move_entry(entry, last); - } - // Zero-init the now-vacant last slot - auto* vacant = ref.mutable_entry_ptr(header->count - 1); - new (vacant) MapEntry(); - header->count--; - return true; - } - } - return false; - } + static bool remove_entry(MapRef& ref, const std::string& key); // ======================================================================== // Statistics @@ -241,26 +106,11 @@ class MapArena { size_t get_used_bytes() const { return arena_->get_used_bytes(); } size_t get_freed_bytes() const { return arena_->get_freed_bytes(); } - void reset() { - std::lock_guard lock(arena_mutex_); - arena_->reset(); - } - - void clear() { - std::lock_guard lock(arena_mutex_); - arena_->clear(); - } + void reset(); + void clear(); private: - MapRef::MapHeader* init_header(void* raw, uint32_t capacity) { - auto* header = static_cast(raw); - header->ref_count.store(0, std::memory_order_relaxed); - header->flags = 0; - header->count = 0; - header->capacity = capacity; - header->arena = this; - return header; - } + MapRef::MapHeader* init_header(void* raw, uint32_t capacity); static MapRef::MapHeader* get_header(const MapRef& ref) { if (ref.is_null()) return nullptr; @@ -276,159 +126,20 @@ class MapArena { return get_header(ref); } - static void zero_init_entries(char* data, uint32_t count) { - for (uint32_t i = 0; i < count; ++i) { - new (data + i * sizeof(MapEntry)) MapEntry(); - } - } - - /** Copy-construct a single entry (properly handles ref-counted key). */ - static void copy_init_entry(MapEntry* dst, const MapEntry* src) { - dst->key.~StringRef(); - new (&dst->key) StringRef(src->key); - dst->value_type = src->value_type; - std::memset(dst->pad, 0, sizeof(dst->pad)); - copy_value_into_entry(dst, static_cast(src->value_type), - src->value); - } - - /** Copy value bytes into entry, using copy-constructor for ref-counted types. - * For primitive types only the actual type size is read from src, so callers - * may safely pass a pointer to a stack variable smaller than VALUE_SIZE. - */ + static void zero_init_entries(char* data, uint32_t count); + static void copy_init_entry(MapEntry* dst, const MapEntry* src); static void copy_value_into_entry(MapEntry* entry, ValueType vtype, - const void* src) { - if (is_string_type(vtype)) { - auto* dst = reinterpret_cast(entry->value); - dst->~StringRef(); - new (dst) StringRef(*reinterpret_cast(src)); - } else if (is_array_type(vtype)) { - auto* dst = reinterpret_cast(entry->value); - dst->~ArrayRef(); - new (dst) ArrayRef(*reinterpret_cast(src)); - } else if (is_map_type(vtype)) { - auto* dst = reinterpret_cast(entry->value); - dst->~MapRef(); - new (dst) MapRef(*reinterpret_cast(src)); - } else { - size_t n = get_type_size(vtype); - std::memset(entry->value, 0, MapEntry::VALUE_SIZE); - std::memcpy(entry->value, src, n); - } - } - - /** - * Destruct the value in an entry. - * Marks ref-counted values for deletion before calling destructors so that - * release() can actually free the underlying arena memory when ref_count - * reaches 0. - */ - static void destruct_entry_value(MapEntry* entry) { - auto vtype = static_cast(entry->value_type); - if (is_string_type(vtype)) { - auto* sr = reinterpret_cast(entry->value); - if (!sr->is_null()) { - auto* hdr = reinterpret_cast( - const_cast(sr->data() - StringRef::HEADER_SIZE)); - hdr->mark_for_deletion(); - } - sr->~StringRef(); - new (sr) StringRef(); - } else if (is_array_type(vtype)) { - auto* ar = reinterpret_cast(entry->value); - if (!ar->is_null()) { - auto* hdr = reinterpret_cast( - ar->data() - ArrayRef::HEADER_SIZE); - hdr->mark_for_deletion(); - } - ar->~ArrayRef(); - new (ar) ArrayRef(); - } else if (is_map_type(vtype)) { - auto* mr = reinterpret_cast(entry->value); - if (!mr->is_null()) { - auto* hdr = reinterpret_cast(mr->data() - - MapRef::HEADER_SIZE); - hdr->mark_for_deletion(); - } - mr->~MapRef(); - new (mr) MapRef(); - } - } - - /** Destruct an entire entry (key + value). */ - static void destruct_entry(MapEntry* entry) { - destruct_entry_value(entry); - if (!entry->key.is_null()) { - auto* hdr = reinterpret_cast( - const_cast(entry->key.data() - StringRef::HEADER_SIZE)); - hdr->mark_for_deletion(); - } - entry->key.~StringRef(); - new (&entry->key) StringRef(); - entry->value_type = static_cast(ValueType::NA); - std::memset(entry->value, 0, MapEntry::VALUE_SIZE); - } - - /** Move entry src into dst (dst assumed already destructed). */ - static void move_entry(MapEntry* dst, MapEntry* src) { - new (&dst->key) StringRef(std::move(src->key)); - dst->value_type = src->value_type; - std::memcpy(dst->value, src->value, MapEntry::VALUE_SIZE); - // Clear src without destructing moved-from refs - src->value_type = static_cast(ValueType::NA); - std::memset(src->value, 0, MapEntry::VALUE_SIZE); - } - - /** Destruct all entries in a map block (marks keys/values for deletion). */ - static void destruct_entries(char* data, uint32_t count) { - for (uint32_t i = 0; i < count; ++i) { - auto* entry = reinterpret_cast(data + i * sizeof(MapEntry)); - destruct_entry_value(entry); - if (!entry->key.is_null()) { - auto* hdr = reinterpret_cast( - const_cast(entry->key.data() - StringRef::HEADER_SIZE)); - hdr->mark_for_deletion(); - } - entry->key.~StringRef(); - } - } + const void* src); + static void destruct_entry_value(MapEntry* entry); + static void destruct_entry(MapEntry* entry); + static void move_entry(MapEntry* dst, MapEntry* src); + static void destruct_entries(char* data, uint32_t count); std::unique_ptr arena_; mutable std::mutex arena_mutex_; std::atomic active_allocs_{0}; }; -// ============================================================================ -// MapRef::release() implementation (after MapArena is fully defined) -// ============================================================================ - -inline void MapRef::release() { - if (!data_) return; - if (auto* h = get_header()) { - assert(h->ref_count.load(std::memory_order_relaxed) > 0 && - "MapRef::release() called with ref_count already 0"); - - const int32_t old_count = - h->ref_count.fetch_sub(1, std::memory_order_acq_rel); - if (old_count == 1 && h->is_marked_for_deletion() && h->arena) { - h->arena->release_map(data_); - } - } - data_ = nullptr; -} - -inline Value MapRef::get_value(const std::string& key) const { - int32_t idx = MapArena::find_entry(*this, key); - if (idx < 0) return Value{}; - const auto* entry = entry_ptr(static_cast(idx)); - return Value::read_value_from_memory( - entry->value, static_cast(entry->value_type)); -} - -inline bool MapRef::contains(const std::string& key) const { - return MapArena::find_entry(*this, key) >= 0; -} - } // namespace tundradb #endif // MAP_ARENA_HPP diff --git a/include/memory/node_arena.hpp b/include/memory/node_arena.hpp index ce2f17d..85757b0 100644 --- a/include/memory/node_arena.hpp +++ b/include/memory/node_arena.hpp @@ -83,38 +83,14 @@ struct VersionInfo { /// Walks the version chain and returns the version visible at the snapshot, /// or nullptr. const VersionInfo* find_version_at_snapshot(uint64_t valid_time, - uint64_t tx_time) const { - const VersionInfo* current = this; - while (current != nullptr) { - if (current->is_visible_at(valid_time, tx_time)) { - return current; - } - current = current->prev; - } - return nullptr; - } + uint64_t tx_time) const; /// Walks the chain using VALIDTIME only (ignores TXNTIME); nullptr if none /// matches. - const VersionInfo* find_version_at_time(uint64_t ts) const { - const VersionInfo* current = this; - while (current != nullptr) { - if (current->is_valid_at(ts)) return current; - current = current->prev; - } - return nullptr; - } + const VersionInfo* find_version_at_time(uint64_t ts) const; /// Number of versions in this chain (including this node). - size_t count_versions() const { - size_t count = 1; - const VersionInfo* current = prev; - while (current != nullptr) { - count++; - current = current->prev; - } - return count; - } + size_t count_versions() const; /// True if effective value for \p field_idx is present in the lazy field /// cache. @@ -241,17 +217,11 @@ struct NodeHandle { /// Counts versions along the chain from the current head (1 if not /// versioned). - size_t count_versions() const { - if (!is_versioned()) return 1; - return version_info_->count_versions(); - } + size_t count_versions() const; /// Starting from the current head, finds the version valid at VALIDTIME \p /// ts. - const VersionInfo* find_version_at_time(uint64_t ts) const { - if (!is_versioned()) return nullptr; - return version_info_->find_version_at_time(ts); - } + const VersionInfo* find_version_at_time(uint64_t ts) const; /// Previous version link from the current head, or nullptr. const VersionInfo* get_prev_version() const { @@ -362,130 +332,25 @@ class NodeArena { NodeArena(std::unique_ptr mem_arena, std::shared_ptr layout_registry, std::unique_ptr string_arena = nullptr, - bool enable_versioning = false) - : mem_arena_(std::move(mem_arena)), - layout_registry_(std::move(layout_registry)), - string_arena_(string_arena ? std::move(string_arena) - : std::make_unique()), - array_arena_(std::make_unique()), - map_arena_(std::make_unique()), - versioning_enabled_(enable_versioning), - version_counter_(0) { - if (versioning_enabled_) { - version_arena_ = std::make_unique(4 * 1024 * 1024); - } - } + bool enable_versioning = false); - ~NodeArena() { - // VersionInfo objects are placement-new'd into version_arena_ memory. - // Their SmallDenseMap members may heap-allocate, so we must call - // destructors before the arena frees the underlying memory. - for (auto* vi : version_infos_) { - vi->~VersionInfo(); - } - } + ~NodeArena(); /** Allocate new node (versioned if enabled). */ - NodeHandle allocate_node(const std::string& schema_name) { - const std::shared_ptr layout = - layout_registry_->get_layout(schema_name); - if (!layout) { - return NodeHandle{}; // null handle for unknown schema - } - - return allocate_node(layout); - } + NodeHandle allocate_node(const std::string& schema_name); /** Allocate new node with given layout. */ - NodeHandle allocate_node(const std::shared_ptr& layout) { - size_t node_size = layout->get_total_size_with_bitset(); - size_t alignment = layout->get_alignment(); - - void* node_data = mem_arena_->allocate(node_size, alignment); - if (!node_data) { - return NodeHandle{}; // allocation failed - } - - // Initialize the node data with default values - layout->initialize_node_data(static_cast(node_data)); - - // Create versioned or non-versioned handle based on configuration - if (versioning_enabled_) { - // Allocate VersionInfo (v0) in version_arena_ - void* version_info_memory = - version_arena_->allocate(sizeof(VersionInfo), alignof(VersionInfo)); - if (!version_info_memory) { - return NodeHandle{}; // Allocation failed - } - - // Construct base version (v0) - uint64_t now = get_current_timestamp_ns(); - auto* version_info = new (version_info_memory) VersionInfo(); - version_infos_.push_back(version_info); - version_info->version_id = 0; - version_info->valid_from = now; - version_info->valid_to = std::numeric_limits::max(); - version_info->prev = nullptr; - - return {node_data, node_size, layout->get_schema_name(), 1, version_info}; - } - return {node_data, node_size, layout->get_schema_name()}; - } + NodeHandle allocate_node(const std::shared_ptr& layout); /** Get field value pointer. */ static const char* get_value_ptr(const NodeHandle& handle, const std::shared_ptr& layout, - const std::shared_ptr& field) { - // Logger::get_instance().debug("get_field_value: {}.{}", schema_name, - // field_name); - if (handle.is_null()) { - // Logger::get_instance().error("null value for invalid handle"); - return nullptr; // null value for invalid handle - } - - return layout->get_value_ptr(static_cast(handle.ptr), field); - } + const std::shared_ptr& field); /// Reads a field as Value, resolving version-chain overrides when versioned. static Value get_value(const NodeHandle& handle, const std::shared_ptr& layout, - const std::shared_ptr& field) { - if (handle.is_null()) { - return Value{}; // null value for invalid handle - } - - // For versioned nodes, check version chain - if (handle.is_versioned()) { - const FieldLayout* field_layout = layout->get_field_layout(field); - if (!field_layout) { - return Value{}; // Invalid field - } - - uint16_t field_idx = field_layout->index; - - // Traverse version chain to find the field - const VersionInfo* current = handle.version_info_; - while (current != nullptr) { - auto it = current->updated_fields.find(field_idx); - if (it != current->updated_fields.end()) { - // Found in version chain - // Check if it's nullptr (explicit NULL sentinel) - if (it->second == nullptr) { - return Value{}; // Explicitly set to NULL - } - // Read actual value from version_arena_ - return Value::read_value_from_memory(it->second, field_layout->type); - } - current = current->prev; - } - - // Not found in version chain, read from base node - return layout->get_value(static_cast(handle.ptr), field); - } - - // Non-versioned: direct read from base node - return layout->get_value(static_cast(handle.ptr), field); - } + const std::shared_ptr& field); /** * Prepare a Value for the APPEND operation in versioned path. @@ -494,66 +359,7 @@ class NodeArena { */ arrow::Result prepare_append_value( const NodeHandle& handle, const std::shared_ptr& layout, - const FieldLayout& field_layout, const Value& new_value) { - if (!is_array_type(field_layout.type)) { - return arrow::Status::TypeError( - "APPEND is only valid for array fields, got: ", - tundradb::to_string(field_layout.type)); - } - - // Read current ArrayRef from the version chain or base node - ArrayRef current_ref; - if (handle.is_versioned()) { - auto [found, ptr] = get_field_ptr_from_version_chain(handle.version_info_, - field_layout.index); - if (found && ptr) { - current_ref = *reinterpret_cast(ptr); - } else if (!found) { - const char* base_ptr = layout->get_value_ptr( - static_cast(handle.ptr), field_layout.index); - if (base_ptr) { - current_ref = *reinterpret_cast(base_ptr); - } - } - } - - if (new_value.holds_raw_array()) { - const auto& elems = new_value.as_raw_array(); - if (elems.empty()) { - if (current_ref.is_null()) return Value{ArrayRef{}}; - ARROW_ASSIGN_OR_RAISE(ArrayRef copy, array_arena_->copy(current_ref)); - return Value{std::move(copy)}; - } - if (current_ref.is_null()) { - ARROW_ASSIGN_OR_RAISE(ArrayRef arr_ref, - store_raw_array(field_layout.type_desc, elems)); - return Value{std::move(arr_ref)}; - } - const auto n = static_cast(elems.size()); - ARROW_ASSIGN_OR_RAISE( - ArrayRef new_ref, - array_arena_->copy(current_ref, grow_for_append(current_ref, n))); - for (const auto& elem : elems) { - ARROW_RETURN_NOT_OK( - append_single_element(new_ref, field_layout.type_desc, elem)); - } - return Value{std::move(new_ref)}; - } - - // Single element - if (current_ref.is_null()) { - const std::vector elems = {new_value}; - ARROW_ASSIGN_OR_RAISE(ArrayRef arr_ref, - store_raw_array(field_layout.type_desc, elems)); - return Value{std::move(arr_ref)}; - } - ARROW_ASSIGN_OR_RAISE( - ArrayRef new_ref, - array_arena_->copy(current_ref, grow_for_append(current_ref, 1))); - ARROW_RETURN_NOT_OK( - append_single_element(new_ref, field_layout.type_desc, new_value)); - return Value{std::move(new_ref)}; - } + const FieldLayout& field_layout, const Value& new_value); /** * Set field in v0 (initial population). @@ -562,18 +368,7 @@ class NodeArena { arrow::Status set_field_value_v0(NodeHandle& handle, const std::shared_ptr& layout, const std::shared_ptr& field, - const Value& value) { - assert(!handle.is_null()); - - const FieldLayout* field_layout = layout->get_field_layout(field); - if (!field_layout) { - return arrow::Status::Invalid( - "set_field_value_v0: field not found in layout"); - } - - // Write directly to base node - return set_field_value_internal(handle.ptr, layout, field_layout, value); - } + const Value& value); // ========================================================================= // apply_updates — single public write entry point @@ -582,30 +377,7 @@ class NodeArena { /// Applies field updates; creates a new version when versioning is enabled. arrow::Result apply_updates(NodeHandle& handle, const std::shared_ptr& layout, - const std::vector& updates) { - ARROW_ASSIGN_OR_RAISE(auto schema_updates, - resolve_field_indices(layout, updates)); - - if (!versioning_enabled_ || !handle.is_versioned()) { - ARROW_RETURN_NOT_OK( - apply_non_versioned_schema_updates(handle, layout, schema_updates)); - return true; - } - - if (schema_updates.empty()) { - return true; - } - - const uint64_t now = get_current_timestamp_ns(); - ARROW_ASSIGN_OR_RAISE(auto* new_vi, allocate_version(handle, now)); - - ARROW_RETURN_NOT_OK(materialize_versioned_schema_fields( - handle, layout, schema_updates, new_vi)); - - handle.version_info_->valid_to = now; - handle.version_info_ = new_vi; - return true; - } + const std::vector& updates); // ========================================================================= // Map (properties) helpers @@ -671,23 +443,7 @@ class NodeArena { static const char* get_value_ptr_at_version( const NodeHandle& handle, const VersionInfo* version, const std::shared_ptr& layout, - const std::shared_ptr& field) { - const FieldLayout* field_layout = layout->get_field_layout(field); - if (!field_layout) { - return nullptr; - } - - auto [found, field_ptr] = - get_field_ptr_from_version_chain(version, field_layout->index); - - if (found) { - return field_ptr; - } - - // Not in version chain, read from base node - return layout->get_value_ptr(static_cast(handle.ptr), - field_layout->index); - } + const std::shared_ptr& field); /** * Get field value starting from a specific version. @@ -696,91 +452,26 @@ class NodeArena { static arrow::Result get_value_at_version( const NodeHandle& handle, const VersionInfo* version, const std::shared_ptr& layout, - const std::shared_ptr& field) { - const FieldLayout* field_layout = layout->get_field_layout(field); - if (!field_layout) { - return arrow::Status::KeyError("Field not found in layout"); - } - - // Try to find in version chain first - auto [found, field_ptr] = - get_field_ptr_from_version_chain(version, field_layout->index); - - if (found) { - if (field_ptr == nullptr) { - // Explicit NULL value - return Value{}; - } - // Read value from version chain - return layout->get_value_from_ptr(field_ptr, *field_layout); - } - - // Not in version chain, read from base node - return layout->get_value(static_cast(handle.ptr), - *field_layout); - } + const std::shared_ptr& field); private: - static uint64_t get_current_timestamp_ns() { - return Clock::instance().now_nanos(); - } + static uint64_t get_current_timestamp_ns(); // ---- apply_updates helpers ------------------------------------------------ /** Resolve FieldUpdates to IndexedFieldUpdates using the schema layout. */ static arrow::Result> resolve_field_indices( const std::shared_ptr& layout, - const std::vector& updates) { - std::vector result; - result.reserve(updates.size()); - for (const auto& upd : updates) { - const FieldLayout* fl = layout->get_field_layout(upd.field); - if (!fl) { - return arrow::Status::Invalid("Invalid field in apply_updates: ", - upd.field->name()); - } - result.push_back({static_cast(fl->index), upd.value, upd.op, - upd.nested_path}); - } - return result; - } + const std::vector& updates); /** Non-versioned path: write schema fields directly to base node memory. */ arrow::Status apply_non_versioned_schema_updates( NodeHandle& handle, const std::shared_ptr& layout, - const std::vector& schema_updates) { - for (const auto& upd : schema_updates) { - if (upd.field_idx >= layout->get_fields().size()) { - return arrow::Status::IndexError("Field index out of bounds"); - } - const FieldLayout& fl = layout->get_fields()[upd.field_idx]; - - if (!upd.nested_path.empty()) { - ARROW_RETURN_NOT_OK(apply_nested_path_update_non_versioned( - handle.ptr, layout, &fl, upd.nested_path, upd.value)); - continue; - } - - ARROW_RETURN_NOT_OK( - set_field_value_internal(handle.ptr, layout, &fl, upd.value, upd.op)); - } - return arrow::Status::OK(); - } + const std::vector& schema_updates); /** Allocate and construct a new VersionInfo, chained after the current. */ arrow::Result allocate_version(const NodeHandle& handle, - const uint64_t now) { - void* vi_mem = - version_arena_->allocate(sizeof(VersionInfo), alignof(VersionInfo)); - if (!vi_mem) { - return arrow::Status::OutOfMemory("Failed to allocate VersionInfo"); - } - const uint64_t vid = - version_counter_.fetch_add(1, std::memory_order_relaxed) + 1; - auto* new_vi = new (vi_mem) VersionInfo(vid, now, handle.version_info_); - version_infos_.push_back(new_vi); - return new_vi; - } + uint64_t now); /** * Batch-allocate storage for schema fields and write each value into the @@ -789,88 +480,7 @@ class NodeArena { arrow::Status materialize_versioned_schema_fields( NodeHandle& handle, const std::shared_ptr& layout, const std::vector& schema_updates, - VersionInfo* target_vi) { - size_t total_size = 0; - size_t max_alignment = 1; - for (const auto& upd : schema_updates) { - const FieldLayout& fl = layout->get_fields()[upd.field_idx]; - if (upd.op == UpdateType::APPEND || !upd.value.is_null()) { - total_size += fl.size; - max_alignment = std::max(max_alignment, fl.alignment); - } - } - - char* batch_memory = nullptr; - if (total_size > 0) { - batch_memory = static_cast( - version_arena_->allocate(total_size, max_alignment)); - if (!batch_memory) { - return arrow::Status::OutOfMemory( - "Failed to batch allocate field storage"); - } - std::memset(batch_memory, 0, total_size); - } - - size_t offset = 0; - for (const auto& upd : schema_updates) { - const FieldLayout& fl = layout->get_fields()[upd.field_idx]; - - if (!upd.nested_path.empty()) { - ARROW_ASSIGN_OR_RAISE( - Value map_val, apply_nested_path_update_versioned( - handle, layout, fl, upd.nested_path, upd.value)); - assert(batch_memory != nullptr); - char* field_storage = batch_memory + offset; - offset += fl.size; - if (!write_value_to_memory(field_storage, fl.type, map_val)) { - return arrow::Status::TypeError("Type mismatch writing MAP field"); - } - target_vi->updated_fields[upd.field_idx] = field_storage; - continue; - } - - if (upd.op == UpdateType::SET && upd.value.is_null()) { - target_vi->updated_fields[upd.field_idx] = nullptr; - continue; - } - - assert(batch_memory != nullptr); - Value storage_value = upd.value; - - if (upd.op == UpdateType::APPEND) { - ARROW_ASSIGN_OR_RAISE( - storage_value, prepare_append_value(handle, layout, fl, upd.value)); - } else { - if (upd.value.type() == ValueType::STRING && - upd.value.holds_std_string()) { - ARROW_ASSIGN_OR_RAISE( - StringRef str_ref, - string_arena_->store_string_auto(upd.value.as_string())); - storage_value = Value{str_ref, fl.type}; - } else if (upd.value.type() == ValueType::ARRAY && - upd.value.holds_raw_array()) { - ARROW_ASSIGN_OR_RAISE( - ArrayRef arr_ref, - store_raw_array(fl.type_desc, upd.value.as_raw_array())); - storage_value = Value{std::move(arr_ref)}; - } else if (upd.value.type() == ValueType::MAP && - upd.value.holds_raw_map()) { - ARROW_ASSIGN_OR_RAISE(MapRef map_ref, - store_raw_map(upd.value.as_raw_map())); - storage_value = Value{std::move(map_ref)}; - } - } - - char* field_storage = batch_memory + offset; - offset += fl.size; - - if (!write_value_to_memory(field_storage, fl.type, storage_value)) { - return arrow::Status::TypeError("Type mismatch writing field value"); - } - target_vi->updated_fields[upd.field_idx] = field_storage; - } - return arrow::Status::OK(); - } + VersionInfo* target_vi); // ---- end apply_updates helpers -------------------------------------------- @@ -878,95 +488,7 @@ class NodeArena { arrow::Status set_field_value_internal( void* node_ptr, const std::shared_ptr& layout, const FieldLayout* field_layout, const Value& value, - UpdateType update_type = UpdateType::SET) { - if (update_type == UpdateType::APPEND) { - return append_to_array_field(node_ptr, layout, field_layout, value); - } - - // If the field currently contains a string, deallocate it first - if (is_string_type(field_layout->type) && - is_field_set(static_cast(node_ptr), field_layout->index)) { - Value old_value = - layout->get_value(static_cast(node_ptr), *field_layout); - if (!old_value.is_null() && old_value.type() != ValueType::NA) { - try { - const StringRef& old_str_ref = old_value.as_string_ref(); - if (!old_str_ref.is_null()) { - string_arena_->mark_for_deletion(old_str_ref); - } - } catch (...) { - // Old value wasn't a StringRef, ignore - } - } - } - - // If the field currently contains an array, mark for deletion - if (is_array_type(field_layout->type) && - is_field_set(static_cast(node_ptr), field_layout->index)) { - Value old_value = - layout->get_value(static_cast(node_ptr), *field_layout); - if (!old_value.is_null() && old_value.holds_array_ref()) { - const ArrayRef& old_arr_ref = old_value.as_array_ref(); - if (!old_arr_ref.is_null()) { - array_arena_->mark_for_deletion(old_arr_ref); - } - } - } - - // If the field currently contains a map, mark for deletion - if (is_map_type(field_layout->type) && - is_field_set(static_cast(node_ptr), field_layout->index)) { - Value old_value = - layout->get_value(static_cast(node_ptr), *field_layout); - if (!old_value.is_null() && old_value.holds_map_ref()) { - const MapRef& old_map_ref = old_value.as_map_ref(); - if (!old_map_ref.is_null()) { - map_arena_->mark_for_deletion(old_map_ref); - } - } - } - - // Handle string storage: std::string -> StringRef via arena - if (value.type() == ValueType::STRING && value.holds_std_string()) { - const std::string& str_content = value.as_string(); - ARROW_ASSIGN_OR_RAISE(StringRef str_ref, - string_arena_->store_string_auto(str_content)); - if (!layout->set_field_value(static_cast(node_ptr), *field_layout, - Value{str_ref, field_layout->type})) { - return arrow::Status::Invalid("Failed to write string field value"); - } - return arrow::Status::OK(); - } - - // Handle array storage: std::vector -> ArrayRef via arena - if (value.type() == ValueType::ARRAY && value.holds_raw_array()) { - ARROW_ASSIGN_OR_RAISE( - ArrayRef arr_ref, - store_raw_array(field_layout->type_desc, value.as_raw_array())); - if (!layout->set_field_value(static_cast(node_ptr), *field_layout, - Value{std::move(arr_ref)})) { - return arrow::Status::Invalid("Failed to write array field value"); - } - return arrow::Status::OK(); - } - - // Handle map storage: std::map -> MapRef via arena - if (value.type() == ValueType::MAP && value.holds_raw_map()) { - ARROW_ASSIGN_OR_RAISE(MapRef map_ref, store_raw_map(value.as_raw_map())); - if (!layout->set_field_value(static_cast(node_ptr), *field_layout, - Value{std::move(map_ref)})) { - return arrow::Status::Invalid("Failed to write map field value"); - } - return arrow::Status::OK(); - } - - // Value already holds arena-backed ref (StringRef / ArrayRef) or primitive - if (!layout->set_field_value(static_cast(node_ptr), *field_layout, - value)) { - return arrow::Status::Invalid("Failed to write field value"); - } - return arrow::Status::OK(); - } + UpdateType update_type = UpdateType::SET); // ---- nested-path update helpers ------------------------------------------- @@ -975,97 +497,14 @@ class NodeArena { * Converts std::string -> StringRef via the string arena; primitives * are returned unchanged. */ - arrow::Result materialise_map_value(const Value& value) { - if (value.type() == ValueType::STRING && value.holds_std_string()) { - ARROW_ASSIGN_OR_RAISE( - StringRef sr, string_arena_->store_string_auto(value.as_string())); - return Value{sr, ValueType::STRING}; - } - return value; - } + arrow::Result materialise_map_value(const Value& value); /** * Set a single key inside an existing (or new) MapRef. * Handles COW growth when the map is full and string materialisation. */ arrow::Status set_nested_map_key(MapRef& ref, const std::string& key, - const Value& value) { - if (ref.is_null()) { - ARROW_ASSIGN_OR_RAISE(ref, map_arena_->allocate()); - } - - ARROW_ASSIGN_OR_RAISE(Value mat, materialise_map_value(value)); - ARROW_ASSIGN_OR_RAISE(StringRef key_ref, - string_arena_->store_string_auto(key)); - - ValueType vtype = mat.type(); - // For string-like types stored inside maps the entry type is STRING. - if (is_string_type(vtype)) vtype = ValueType::STRING; - - const void* vptr = nullptr; - int32_t i32; - int64_t i64; - double d; - float f; - bool b; - StringRef sr; - ArrayRef ar; - MapRef mr; - - switch (vtype) { - case ValueType::INT32: - i32 = mat.as_int32(); - vptr = &i32; - break; - case ValueType::INT64: - i64 = mat.as_int64(); - vptr = &i64; - break; - case ValueType::DOUBLE: - d = mat.as_double(); - vptr = &d; - break; - case ValueType::FLOAT: - f = mat.as_float(); - vptr = &f; - break; - case ValueType::BOOL: - b = mat.as_bool(); - vptr = &b; - break; - case ValueType::STRING: - sr = mat.as_string_ref(); - vptr = &sr; - break; - case ValueType::ARRAY: - if (!mat.holds_array_ref()) - return arrow::Status::Invalid( - "nested_path update: raw arrays not supported"); - ar = mat.as_array_ref(); - vptr = &ar; - break; - case ValueType::MAP: - if (!mat.holds_map_ref()) - return arrow::Status::Invalid( - "nested_path update: raw maps not supported"); - mr = mat.as_map_ref(); - vptr = &mr; - break; - default: - return arrow::Status::Invalid( - "nested_path update: unsupported value type"); - } - - auto status = MapArena::set_entry(ref, key_ref, vtype, vptr); - if (status.IsCapacityError()) { - ARROW_ASSIGN_OR_RAISE(MapRef grown, - map_arena_->copy(ref, ref.capacity())); - map_arena_->mark_for_deletion(ref); - ref = std::move(grown); - return MapArena::set_entry(ref, key_ref, vtype, vptr); - } - return status; - } + const Value& value); /** * Non-versioned nested-path update: read current composite value from the @@ -1076,44 +515,7 @@ class NodeArena { arrow::Status apply_nested_path_update_non_versioned( void* node_ptr, const std::shared_ptr& layout, const FieldLayout* fl, const std::vector& nested_path, - const Value& value) { - if (nested_path.empty()) { - return arrow::Status::Invalid( - "nested_path update requires at least one path segment"); - } - if (nested_path.size() > 1) { - return arrow::Status::NotImplemented( - "nested_path update depth > 1 is not implemented yet"); - } - const std::string& key = nested_path.front(); - if (!is_map_type(fl->type)) { - return arrow::Status::TypeError("nested_path update on non-map field: ", - tundradb::to_string(fl->type)); - } - - auto* base = static_cast(node_ptr); - MapRef current; - if (is_field_set(base, fl->index)) { - Value old = layout->get_value(base, *fl); - if (!old.is_null() && old.holds_map_ref()) current = old.as_map_ref(); - } - - MapRef copy; - if (current.is_null()) { - ARROW_ASSIGN_OR_RAISE(copy, map_arena_->allocate()); - } else { - ARROW_ASSIGN_OR_RAISE(copy, map_arena_->copy(current)); - map_arena_->mark_for_deletion(current); - } - - ARROW_RETURN_NOT_OK(set_nested_map_key(copy, key, value)); - - if (!layout->set_field_value(base, *fl, Value{std::move(copy)})) { - return arrow::Status::Invalid( - "Failed to write map field after nested_path update"); - } - return arrow::Status::OK(); - } + const Value& value); /** * Versioned nested-path update: read current composite value from version @@ -1124,46 +526,7 @@ class NodeArena { arrow::Result apply_nested_path_update_versioned( const NodeHandle& handle, const std::shared_ptr& layout, const FieldLayout& fl, const std::vector& nested_path, - const Value& value) { - if (nested_path.empty()) { - return arrow::Status::Invalid( - "nested_path update requires at least one path segment"); - } - if (nested_path.size() > 1) { - return arrow::Status::NotImplemented( - "nested_path update depth > 1 is not implemented yet"); - } - const std::string& key = nested_path.front(); - if (!is_map_type(fl.type)) { - return arrow::Status::TypeError("nested_path update on non-map field: ", - tundradb::to_string(fl.type)); - } - - MapRef current; - if (handle.is_versioned()) { - auto [found, ptr] = - get_field_ptr_from_version_chain(handle.version_info_, fl.index); - if (found && ptr) { - current = *reinterpret_cast(ptr); - } else if (!found) { - const char* base_ptr = layout->get_value_ptr( - static_cast(handle.ptr), fl.index); - if (base_ptr) { - current = *reinterpret_cast(base_ptr); - } - } - } - - MapRef copy; - if (current.is_null()) { - ARROW_ASSIGN_OR_RAISE(copy, map_arena_->allocate()); - } else { - ARROW_ASSIGN_OR_RAISE(copy, map_arena_->copy(current)); - } - - ARROW_RETURN_NOT_OK(set_nested_map_key(copy, key, value)); - return Value{std::move(copy)}; - } + const Value& value); // ---- end nested-path update helpers --------------------------------------- @@ -1175,124 +538,20 @@ class NodeArena { */ arrow::Status append_to_array_field( void* node_ptr, const std::shared_ptr& layout, - const FieldLayout* field_layout, const Value& value) { - if (!is_array_type(field_layout->type)) { - return arrow::Status::TypeError( - "APPEND is only valid for array fields, got: ", - tundradb::to_string(field_layout->type)); - } - - auto* base = static_cast(node_ptr); - const bool field_is_set = is_field_set(base, field_layout->index); - - ArrayRef current_ref; - if (field_is_set) { - Value old_value = layout->get_value(base, *field_layout); - if (!old_value.is_null() && old_value.holds_array_ref()) { - current_ref = old_value.as_array_ref(); - } - } - - if (value.holds_raw_array()) { - const auto& elems = value.as_raw_array(); - if (elems.empty()) return arrow::Status::OK(); - - ArrayRef new_ref; - if (current_ref.is_null()) { - ARROW_ASSIGN_OR_RAISE(new_ref, - store_raw_array(field_layout->type_desc, elems)); - } else { - const auto n = static_cast(elems.size()); - ARROW_ASSIGN_OR_RAISE( - new_ref, - array_arena_->copy(current_ref, grow_for_append(current_ref, n))); - for (const auto& elem : elems) { - ARROW_RETURN_NOT_OK( - append_single_element(new_ref, field_layout->type_desc, elem)); - } - array_arena_->mark_for_deletion(current_ref); - } - - if (!layout->set_field_value(base, *field_layout, - Value{std::move(new_ref)})) { - return arrow::Status::Invalid( - "Failed to write array field after APPEND"); - } - return arrow::Status::OK(); - } - - // Single element append - if (current_ref.is_null()) { - const std::vector elems = {value}; - ARROW_ASSIGN_OR_RAISE(ArrayRef new_ref, - store_raw_array(field_layout->type_desc, elems)); - if (!layout->set_field_value(base, *field_layout, - Value{std::move(new_ref)})) { - return arrow::Status::Invalid( - "Failed to write array field after APPEND"); - } - return arrow::Status::OK(); - } - - ARROW_ASSIGN_OR_RAISE( - ArrayRef new_ref, - array_arena_->copy(current_ref, grow_for_append(current_ref, 1))); - ARROW_RETURN_NOT_OK( - append_single_element(new_ref, field_layout->type_desc, value)); - array_arena_->mark_for_deletion(current_ref); - - if (!layout->set_field_value(base, *field_layout, - Value{std::move(new_ref)})) { - return arrow::Status::Invalid("Failed to write array field after APPEND"); - } - return arrow::Status::OK(); - } + const FieldLayout* field_layout, const Value& value); /** * How many extra slots copy() should pre-allocate so that the * subsequent append() calls won't trigger a second reallocation. * Returns 0 when the array already has enough spare capacity. */ - static uint32_t grow_for_append(const ArrayRef& ref, uint32_t n) { - const uint32_t spare = ref.capacity() - ref.length(); - if (spare >= n) return 0; - return n - spare; - } + static uint32_t grow_for_append(const ArrayRef& ref, uint32_t n); /** Append a single Value element to an ArrayRef via the arena. */ arrow::Status append_single_element(ArrayRef& ref, const TypeDescriptor& type_desc, - const Value& elem) { - switch (type_desc.element_type) { - case ValueType::INT32: { - int32_t v = elem.as_int32(); - return array_arena_->append(ref, &v); - } - case ValueType::INT64: { - int64_t v = elem.as_int64(); - return array_arena_->append(ref, &v); - } - case ValueType::DOUBLE: { - double v = elem.as_double(); - return array_arena_->append(ref, &v); - } - case ValueType::BOOL: { - bool v = elem.as_bool(); - return array_arena_->append(ref, &v); - } - case ValueType::STRING: { - ARROW_ASSIGN_OR_RAISE( - StringRef sr, string_arena_->store_string_auto(elem.as_string())); - return array_arena_->append(ref, &sr); - } - default: - return arrow::Status::NotImplemented( - "APPEND: unsupported element type: ", - tundradb::to_string(type_desc.element_type)); - } - } + const Value& elem); - /** Traverse the version chain to find field pointer. */ /** * Get field pointer from version chain. * Returns pair: @@ -1301,67 +560,11 @@ class NodeArena { * - {false, nullptr} = field not found in version chain (read from base) */ static std::pair get_field_ptr_from_version_chain( - const VersionInfo* version_info, uint16_t field_idx) { - const VersionInfo* current = version_info; - while (current != nullptr) { - // Check if this version has an override for this field - if (auto it = current->updated_fields.find(field_idx); - it != current->updated_fields.end()) { - return {true, it->second}; // Found (value or nullptr for NULL) - } - current = current->prev; - } - - // Not found in any version - read from base node - return {false, nullptr}; - } + const VersionInfo* version_info, uint16_t field_idx); /** Write value to memory (type-safe). */ static bool write_value_to_memory(char* ptr, ValueType type, - const Value& value) { - switch (type) { - case ValueType::INT64: - if (value.type() != ValueType::INT64) return false; - *reinterpret_cast(ptr) = value.as_int64(); - return true; - - case ValueType::INT32: - if (value.type() != ValueType::INT32) return false; - *reinterpret_cast(ptr) = value.as_int32(); - return true; - - case ValueType::DOUBLE: - if (value.type() != ValueType::DOUBLE) return false; - *reinterpret_cast(ptr) = value.as_double(); - return true; - - case ValueType::BOOL: - if (value.type() != ValueType::BOOL) return false; - *reinterpret_cast(ptr) = value.as_bool(); - return true; - - case ValueType::STRING: - case ValueType::FIXED_STRING16: - case ValueType::FIXED_STRING32: - case ValueType::FIXED_STRING64: - if (!is_string_type(value.type())) return false; - *reinterpret_cast(ptr) = value.as_string_ref(); - return true; - - case ValueType::ARRAY: - if (value.type() != ValueType::ARRAY) return false; - *reinterpret_cast(ptr) = value.as_array_ref(); - return true; - - case ValueType::MAP: - if (value.type() != ValueType::MAP) return false; - *reinterpret_cast(ptr) = value.as_map_ref(); - return true; - - default: - return false; - } - } + const Value& value); /** * Convert a raw array (std::vector) to an arena-backed ArrayRef. @@ -1372,46 +575,7 @@ class NodeArena { * @return Ok(ArrayRef) or Error with reason (e.g. allocation failure) */ arrow::Result store_raw_array(const TypeDescriptor& type_desc, - const std::vector& elements) { - const ValueType elem_type = type_desc.element_type; - const auto count = static_cast(elements.size()); - - uint32_t capacity = count; - if (type_desc.is_fixed_size_array() && type_desc.fixed_size > count) { - capacity = type_desc.fixed_size; - } - - ARROW_ASSIGN_OR_RAISE(ArrayRef ref, - array_arena_->allocate(elem_type, capacity)); - - // Empty array: allocate(0) returns null ArrayRef; nothing to fill - if (ref.is_null()) { - return ref; - } - - const size_t elem_sz = get_type_size(elem_type); - auto* header = reinterpret_cast( - ref.data() - ArrayRef::HEADER_SIZE); - - for (uint32_t i = 0; i < count; ++i) { - char* dest = ref.mutable_element_ptr(i); - const Value& elem = elements[i]; - - // For string elements, store via string arena first - if (is_string_type(elem_type) && elem.holds_std_string()) { - ARROW_ASSIGN_OR_RAISE( - StringRef str_ref, - string_arena_->store_string_auto(elem.as_string())); - *reinterpret_cast(dest) = std::move(str_ref); - } else { - // Write primitive or pre-allocated ref directly - write_value_to_memory(dest, elem_type, elem); - } - } - - header->length = count; - return ref; - } + const std::vector& elements); /** * Convert a raw map (std::map) to an arena-backed MapRef. @@ -1420,15 +584,7 @@ class NodeArena { * @return Ok(MapRef) or Error with reason (e.g. allocation failure) */ arrow::Result store_raw_map( - const std::map& entries) { - ARROW_ASSIGN_OR_RAISE( - MapRef ref, - map_arena_->allocate(static_cast(entries.size()))); - for (const auto& [key, val] : entries) { - ARROW_RETURN_NOT_OK(set_nested_map_key(ref, key, val)); - } - return ref; - } + const std::map& entries); std::unique_ptr mem_arena_; std::shared_ptr layout_registry_; @@ -1479,4 +635,4 @@ inline std::unique_ptr create_free_list_arena( } // namespace tundradb -#endif // NODE_ARENA_HPP \ No newline at end of file +#endif // NODE_ARENA_HPP diff --git a/include/memory/schema_layout.hpp b/include/memory/schema_layout.hpp index bddbfdc..0475ddd 100644 --- a/include/memory/schema_layout.hpp +++ b/include/memory/schema_layout.hpp @@ -77,14 +77,7 @@ struct FieldLayout { */ class SchemaLayout { public: - explicit SchemaLayout(const std::shared_ptr& schema) - : schema_name_(std::move(schema->name())), total_size_(0), alignment_(8) { - fields_.reserve(schema->num_fields()); - for (auto field : schema->fields()) { - add_field(field); - } - finalize(); - } + explicit SchemaLayout(const std::shared_ptr& schema); /** * Get the size of the bit set in bytes @@ -103,7 +96,6 @@ class SchemaLayout { * Must be called after all fields are added */ void finalize() { - // Add padding at the end to ensure array alignment total_size_ = align_up(total_size_, alignment_); data_offset_ = align_up(get_bitset_size(), alignment_); finalized_ = true; @@ -118,18 +110,7 @@ class SchemaLayout { /// Pointer to field storage, or nullptr if the field bit is unset. const char* get_value_ptr(const char* node_data, - const size_t field_index) const { - const FieldLayout& field_layout = fields_[field_index]; - // Check if this field has been set using the bit set - if (!is_field_set(node_data, field_layout.index)) { - return nullptr; // null value for unset field - } - - // Field has been set, read it from memory - const char* data_start = node_data + data_offset_; - const char* field_ptr = data_start + field_layout.offset; - return field_ptr; - } + const size_t field_index) const; /// Pointer to field storage for \p field, or nullptr if unset. const char* get_value_ptr(const char* node_data, @@ -167,12 +148,7 @@ class SchemaLayout { * @return Value read from field_ptr */ Value get_value_from_ptr(const char* field_ptr, - const FieldLayout& field_layout) const { - if (field_ptr == nullptr) { - return Value{}; // Explicit NULL - } - return Value::read_value_from_memory(field_ptr, field_layout.type); - } + const FieldLayout& field_layout) const; /** * Set field value in node data @@ -183,39 +159,12 @@ class SchemaLayout { } bool set_field_value(char* node_data, const FieldLayout& field_layout, - const Value& value) { - // Update the bit set to indicate this field has been set - set_field_bit(node_data, field_layout.index, !value.is_null()); - - // If the value is null, we don't need to write it to memory just set bit - if (value.is_null()) { - return true; - } - - // Write the actual value to memory - char* data_start = node_data + data_offset_; - char* field_ptr = data_start + field_layout.offset; - return write_value_to_memory(field_ptr, field_layout.type, value); - } + const Value& value); /** * Initialize node data with default values */ - void initialize_node_data(char* node_data) const { - // Clear the bit set (all fields initially unset) - const size_t bitset_size = get_bitset_size(); - std::memset(node_data, 0, bitset_size); - - // Zero out all data memory - char* data_start = node_data + data_offset_; // get_data_offset(); - std::memset(data_start, 0, total_size_); - - // Set any non-zero default values if needed - for (const auto& field : fields_) { - char* field_ptr = data_start + field.offset; - initialize_field_memory(field_ptr, field.type); - } - } + void initialize_node_data(char* node_data) const; // Getters /// Schema name this layout was built for. @@ -233,107 +182,19 @@ class SchemaLayout { /// Layout entry for \p field, or nullptr if index is out of range or field is /// null. const FieldLayout* get_field_layout( - const std::shared_ptr& field) const { - if (!field) { - // log_error("get_field_layout: field is null"); - return nullptr; - } - if (field->index_ >= fields_.size()) { - // log_error("get_field_layout: field index {} >= fields size {}", - // field->index_, fields_.size()); - return nullptr; - } - return &fields_[field->index_]; - } + const std::shared_ptr& field) const; private: /** * Add a field to the schema layout * Fields are automatically aligned and packed efficiently */ - void add_field(const std::shared_ptr& field) { - assert(field != nullptr); - const auto& td = field->type_descriptor(); - size_t field_size = td.storage_size(); - size_t field_alignment = td.storage_alignment(); - - alignment_ = std::max(alignment_, field_alignment); - - // Calculate field offset (relative to start of data, after bit set) - size_t aligned_offset = align_up(total_size_, field_alignment); - field->index_ = fields_.size(); - fields_.emplace_back(field->index_, field->name(), field->type(), td, - aligned_offset, field_size, field_alignment, - field->nullable()); - - // Update total size (size of data portion only) - total_size_ = aligned_offset + field_size; - } + void add_field(const std::shared_ptr& field); static bool write_value_to_memory(char* ptr, const ValueType type, - const Value& value) { - switch (type) { - case ValueType::INT64: - if (value.type() != ValueType::INT64) return false; - *reinterpret_cast(ptr) = value.as_int64(); - return true; - case ValueType::INT32: - if (value.type() != ValueType::INT32) return false; - *reinterpret_cast(ptr) = value.as_int32(); - return true; - case ValueType::DOUBLE: - if (value.type() != ValueType::DOUBLE) return false; - *reinterpret_cast(ptr) = value.as_double(); - return true; - case ValueType::BOOL: - if (value.type() != ValueType::BOOL) return false; - *reinterpret_cast(ptr) = value.as_bool(); - return true; - case ValueType::STRING: - case ValueType::FIXED_STRING16: - case ValueType::FIXED_STRING32: - case ValueType::FIXED_STRING64: { - // All string types expect StringRef - if (!is_string_type(value.type())) return false; - // Value should contain StringRef (created by NodeArena) - *reinterpret_cast(ptr) = value.as_string_ref(); - return true; - } - case ValueType::ARRAY: - if (value.type() != ValueType::ARRAY) return false; - *reinterpret_cast(ptr) = value.as_array_ref(); - return true; - case ValueType::MAP: - if (value.type() != ValueType::MAP) return false; - *reinterpret_cast(ptr) = value.as_map_ref(); - return true; - default: - return false; - } - } + const Value& value); - static void initialize_field_memory(char* ptr, const ValueType type) { - switch (type) { - case ValueType::STRING: - case ValueType::FIXED_STRING16: - case ValueType::FIXED_STRING32: - case ValueType::FIXED_STRING64: - // Initialize StringRef to null/empty - new (ptr) StringRef(); - break; - case ValueType::ARRAY: - // Initialize ArrayRef to null/empty - new (ptr) ArrayRef(); - break; - case ValueType::MAP: - // Initialize MapRef to null/empty - new (ptr) MapRef(); - break; - default: - // Zero initialization is fine for numeric types and bools - break; - } - } + static void initialize_field_memory(char* ptr, const ValueType type); std::string schema_name_; std::vector fields_; @@ -351,20 +212,12 @@ class LayoutRegistry { /** * Register a manually created layout */ - void register_layout(std::shared_ptr layout) { - if (!layout->is_finalized()) { - layout->finalize(); - } - layouts_[layout->get_schema_name()] = std::move(layout); - } + void register_layout(std::shared_ptr layout); /** * Get layout for a schema, returns nullptr if not found */ - std::shared_ptr get_layout(const std::string& schema_name) { - const auto it = layouts_.find(schema_name); - return it != layouts_.end() ? it->second : nullptr; - } + std::shared_ptr get_layout(const std::string& schema_name); /// True if a layout is registered for \p schema_name. bool exists(const std::string& schema_name) const { @@ -376,7 +229,6 @@ class LayoutRegistry { const std::shared_ptr& schema) { auto layout = std::make_shared(schema); layouts_[schema->name()] = layout; - // Logger::get_instance().debug("created schema layout"); return layout; } @@ -386,14 +238,7 @@ class LayoutRegistry { } /// All registered schema names (order unspecified). - [[nodiscard]] std::vector get_schema_names() const { - std::vector names; - names.reserve(layouts_.size()); - for (auto const& entry : layouts_) { - names.push_back(entry.first().str()); - } - return names; - } + [[nodiscard]] std::vector get_schema_names() const; /// Number of registered layouts. size_t size() const { return layouts_.size(); } @@ -408,4 +253,4 @@ class LayoutRegistry { } // namespace tundradb -#endif // SCHEMA_LAYOUT_HPP \ No newline at end of file +#endif // SCHEMA_LAYOUT_HPP diff --git a/include/memory/string_arena.hpp b/include/memory/string_arena.hpp index 7b81f26..6a601d7 100644 --- a/include/memory/string_arena.hpp +++ b/include/memory/string_arena.hpp @@ -69,54 +69,7 @@ class StringPool { * @return Ok(StringRef) with ref_count = 1, or Error with reason */ arrow::Result store_string(const std::string& str, - uint32_t pool_id) { - if (str.length() > max_size_) { - return arrow::Status::Invalid("StringPool::store_string: string length ", - str.length(), " exceeds pool max_size ", - max_size_); - } - - // Check deduplication cache (thread-safe via tbb::concurrent_hash_map) - if (enable_deduplication_) { - typename decltype(dedup_cache_)::const_accessor acc; - if (dedup_cache_.find(acc, str)) { - return acc->second; - } - } - - // CRITICAL: Lock arena allocation (FreeListArena is NOT thread-safe) - std::lock_guard lock(arena_mutex_); - - // Allocate: [header 16 bytes][string data][null terminator] - const size_t alloc_size = StringRef::HEADER_SIZE + str.length() + 1; - void* raw_storage = arena_->allocate(alloc_size); - if (!raw_storage) { - return arrow::Status::OutOfMemory( - "StringPool::store_string: arena allocation failed (requested ", - alloc_size, " bytes)"); - } - - auto* header = static_cast(raw_storage); - header->ref_count.store(0, std::memory_order_relaxed); - header->length = static_cast(str.length()); - header->flags = 0; - header->padding = 0; - - char* data = reinterpret_cast(header) + StringRef::HEADER_SIZE; - std::memcpy(data, str.c_str(), str.length()); - data[str.length()] = '\0'; - - StringRef ref(data, static_cast(str.length()), pool_id); - active_allocs_.fetch_add(1, std::memory_order_relaxed); - - if (enable_deduplication_) { - typename decltype(dedup_cache_)::accessor acc; - dedup_cache_.insert(acc, str); - acc->second = ref; - } - - return ref; - } + uint32_t pool_id); /** * Store a string view in this pool. @@ -132,20 +85,7 @@ class StringPool { * * @param data Pointer to string data (NOT to header) */ - void mark_for_deletion(const char* data) { - if (!data) return; - - auto* header = reinterpret_cast( - const_cast(data - StringRef::HEADER_SIZE)); - - header->mark_for_deletion(); - - // Remove from dedup cache immediately to prevent new references - if (enable_deduplication_) { - std::string str(data, header->length); - dedup_cache_.erase(str); - } - } + void mark_for_deletion(const char* data); /** * Deallocate a string's memory back to the FreeListArena. @@ -158,28 +98,13 @@ class StringPool { * * @param data Pointer to string data (NOT to header) */ - void release_string(const char* data) { - if (!data) return; - - auto* header = reinterpret_cast( - const_cast(data - StringRef::HEADER_SIZE)); - - active_allocs_.fetch_sub(1, std::memory_order_relaxed); - - std::lock_guard lock(arena_mutex_); - arena_->deallocate(header); - } + void release_string(const char* data); /** * Enable or disable string deduplication. * When enabled, identical strings share the same memory. */ - void enable_deduplication(bool enable = true) { - enable_deduplication_ = enable; - if (!enable) { - dedup_cache_.clear(); - } - } + void enable_deduplication(bool enable = true); // ======================================================================== // STATISTICS @@ -194,28 +119,14 @@ class StringPool { size_t get_total_allocated() const { return arena_->get_total_allocated(); } - size_t get_used_bytes() const { - if (auto* free_list = dynamic_cast(arena_.get())) { - return free_list->get_used_bytes(); - } - return 0; - } + size_t get_used_bytes() const; size_t get_string_count() const { return dedup_cache_.size(); } /** * Get total reference count across all strings (for debugging). */ - size_t get_total_references() const { - size_t total = 0; - typename decltype(dedup_cache_)::const_accessor acc; - for (auto it = dedup_cache_.begin(); it != dedup_cache_.end(); ++it) { - if (dedup_cache_.find(acc, it->first)) { - total += acc->second.get_ref_count(); - } - } - return total; - } + size_t get_total_references() const; /** * Reset the pool - clears all allocations. @@ -282,14 +193,7 @@ class StringArena { * - Pool 2: strings up to 64 bytes * - Pool 3: unlimited size strings */ - StringArena() { - pools_.reserve(4); - pools_.emplace_back(std::make_unique(16)); - pools_.emplace_back(std::make_unique(32)); - pools_.emplace_back(std::make_unique(64)); - pools_.emplace_back(std::make_unique(SIZE_MAX)); - register_pools(); - } + StringArena(); /** * Store a string in a specific pool by ID. @@ -299,26 +203,13 @@ class StringArena { * @return Ok(StringRef) with ref_count = 1, or Error with reason */ arrow::Result store_string(const std::string& str, - uint32_t pool_id = 3) { - if (pool_id >= pools_.size()) { - return arrow::Status::Invalid( - "StringArena::store_string: invalid pool_id ", pool_id, - " (max: ", pools_.size() - 1, ")"); - } - return pools_[pool_id]->store_string(str, pool_id); - } + uint32_t pool_id = 3); /** * Store a string, automatically choosing the best pool. * Picks the smallest pool that can fit the string. */ - arrow::Result store_string_auto(const std::string& str) { - size_t len = str.length(); - if (len <= 16) return pools_[0]->store_string(str, 0); - if (len <= 32) return pools_[1]->store_string(str, 1); - if (len <= 64) return pools_[2]->store_string(str, 2); - return pools_[3]->store_string(str, 3); - } + arrow::Result store_string_auto(const std::string& str); /** * Mark a string for deletion. @@ -326,11 +217,7 @@ class StringArena { * * @param ref StringRef to mark for deletion */ - void mark_for_deletion(const StringRef& ref) { - if (!ref.is_null()) { - pools_[ref.pool_id()]->mark_for_deletion(ref.data()); - } - } + void mark_for_deletion(const StringRef& ref); /** * Get string content from reference (zero-copy view). @@ -342,48 +229,25 @@ class StringArena { /** * Enable or disable deduplication for all pools. */ - void enable_deduplication(bool enable = true) { - for (auto& pool : pools_) { - pool->enable_deduplication(enable); - } - } + void enable_deduplication(bool enable = true); /** Total live (allocated but not freed) strings across all pools. */ - int64_t get_active_allocs() const { - int64_t total = 0; - for (const auto& pool : pools_) { - total += pool->get_active_allocs(); - } - return total; - } + int64_t get_active_allocs() const; /** * Get pool by ID. */ - StringPool* get_pool(uint32_t pool_id) const { - if (pool_id < pools_.size()) { - return pools_[pool_id].get(); - } - return nullptr; - } + StringPool* get_pool(uint32_t pool_id) const; /** * Reset all pools. */ - void reset() { - for (auto& pool : pools_) { - pool->reset(); - } - } + void reset(); /** * Clear all memory. */ - void clear() { - for (auto& pool : pools_) { - pool->clear(); - } - } + void clear(); /** * Register pools with the global registry. @@ -424,60 +288,15 @@ class StringArenaRegistry { acc->second = pool; } - static StringPool* get_pool(uint32_t pool_id) { - typename decltype(pool_map_)::const_accessor acc; - if (instance().pool_map_.find(acc, pool_id)) { - return acc->second; - } - return nullptr; - } + static StringPool* get_pool(uint32_t pool_id); /** * Release a string reference. * Called by StringRef::release() to deallocate the string. */ - static void release_string(uint32_t pool_id, const char* data) { - if (auto* pool = get_pool(pool_id)) { - pool->release_string(data); - } - } + static void release_string(uint32_t pool_id, const char* data); }; -// ============================================================================ -// StringArena::register_pools() implementation (after StringArenaRegistry) -// ============================================================================ - -inline void StringArena::register_pools() { - for (uint32_t i = 0; i < pools_.size(); ++i) { - StringArenaRegistry::register_pool(i, pools_[i].get()); - } -} - -// ============================================================================ -// StringRef::release() implementation -// ============================================================================ - -inline void StringRef::release() { - if (data_) { - if (auto* header = get_header()) { - assert(header->ref_count.load(std::memory_order_relaxed) > 0 && - "StringRef::release() called with ref_count already 0 — " - "double-release or missing ref-count increment"); - - int32_t old_count = - header->ref_count.fetch_sub(1, std::memory_order_acq_rel); - - if (old_count == 1 && header->is_marked_for_deletion()) { - StringArenaRegistry::release_string(pool_id_, data_); - } - } - - data_ = nullptr; - length_ = 0; - pool_id_ = 0; - } -} - } // namespace tundradb #endif // STRING_ARENA_HPP diff --git a/include/query/row.hpp b/include/query/row.hpp index d475804..4c7e49f 100644 --- a/include/query/row.hpp +++ b/include/query/row.hpp @@ -53,41 +53,12 @@ struct PathSegment { } }; -/** - * @brief Checks whether @p prefix is a prefix of @p path. - * - * @param prefix The candidate prefix path. - * @param path The full path to test against. - * @return True if every element of @p prefix matches the corresponding element - * of @p path. - */ -inline bool is_prefix(const std::vector& prefix, - const std::vector& path) { - if (prefix.size() > path.size()) { - return false; - } - for (size_t i = 0; i < prefix.size(); ++i) { - if (!(prefix[i] == path[i])) return false; - } - return true; -} +/// Checks whether @p prefix is a prefix of @p path. +bool is_prefix(const std::vector& prefix, + const std::vector& path); -/** - * @brief Joins path segments into an arrow-delimited string (e.g. - * "users:0->companies:1"). - * - * @param schema_path The segments to join. - * @return The formatted path string. - */ -inline std::string join_schema_path( - const std::vector& schema_path) { - std::ostringstream oss; - for (size_t i = 0; i < schema_path.size(); ++i) { - if (i != 0) oss << "->"; - oss << schema_path[i].toString(); - } - return oss.str(); -} +/// Joins path segments into an arrow-delimited string. +std::string join_schema_path(const std::vector& schema_path); /** * @brief A single denormalised result row produced during BFS traversal. @@ -126,228 +97,41 @@ struct Row { cells[field_id].data != nullptr; } - /** - * @brief Populates cells from a node's fields using the given index mapping. - * - * @param field_indices Maps each node field position to a row cell index. - * @param node The source node. - * @param temporal_context Temporal snapshot for versioned reads (may be - * nullptr). - */ + /// Populates cells from a node's fields using the given index mapping. void set_cell_from_node(const std::vector& field_indices, const std::shared_ptr& node, - TemporalContext* temporal_context) { - auto view = node->view(temporal_context); - - const auto& fields = node->get_schema()->fields(); - const size_t n = std::min(fields.size(), field_indices.size()); - for (size_t i = 0; i < n; ++i) { - const auto& field = fields[i]; - const int field_id = field_indices[i]; - auto value_ref_result = view.get_value_ref(field); - if (value_ref_result.ok()) { - this->set_cell(field_id, value_ref_result.ValueOrDie()); - } - } - } + TemporalContext* temporal_context); - /** - * @brief Populates row cells from an edge projection. - * - * This is the edge equivalent of `set_cell_from_node()`, used when query - * output includes edge aliases (for example `SELECT e` or `SELECT e.role`). - * - * Mapping behavior: - * - `fields[i]` is the projected field metadata for the edge alias. - * - `field_indices[i]` is the destination row-cell index. - * - The method reads values through `EdgeView` so temporal snapshots are - * honored. - * - * Field resolution behavior: - * - Structural edge fields (`id`, `_edge_id`, `source_id`, `target_id`, - * `created_ts`) are read directly as-is. - * - Non-structural fields are resolved by name against the edge's runtime - * schema to ensure type/layout consistency before reading. - * - Missing or unresolved fields are skipped (no write to the target cell). - * - * @param field_indices Maps each projected edge field to a row cell index. - * @param edge The source edge to read from. - * @param fields Projected edge fields for the alias in output-schema order. - * @param temporal_context Temporal snapshot for versioned reads (may be - * nullptr). - */ + /// Populates row cells from an edge projection. void set_cell_from_edge( const std::vector& field_indices, const std::shared_ptr& edge, const llvm::SmallVector, 4>& fields, - TemporalContext* temporal_context) { - auto view = edge->view(temporal_context); - const auto edge_schema = edge->get_schema(); - const size_t n = std::min(fields.size(), field_indices.size()); - for (size_t i = 0; i < n; ++i) { - auto field = fields[i]; - if (!field) continue; - const auto& name = field->name(); - const bool structural = - (name == field_names::kId || name == field_names::kEdgeId || - name == field_names::kSourceId || name == field_names::kTargetId || - name == field_names::kCreatedTs); - if (!structural && edge_schema) { - auto real_field = edge_schema->get_field(name); - if (!real_field) continue; - field = real_field; - } - const int field_id = field_indices[i]; - auto value_ref_result = view.get_value_ref(field); - if (value_ref_result.ok()) { - this->set_cell(field_id, value_ref_result.ValueOrDie()); - } - } - } + TemporalContext* temporal_context); /** @brief Returns true if this row's path starts with @p prefix. */ [[nodiscard]] bool start_with(const std::vector& prefix) const { return is_prefix(prefix, this->path); } - /** - * @brief Lazily extracts a schema-name->node-ID map from the "*.id" cells. - * - * @param field_id_to_name Mapping from field index to fully-qualified name. - * @return A reference to the cached schema-ID map. - */ + /// Lazily extracts a schema-name->node-ID map from the "*.id" cells. const std::unordered_map& extract_schema_ids( - const llvm::SmallDenseMap& field_id_to_name) { - if (ids_populated) { - return ids; - } - for (size_t i = 0; i < cells.size(); ++i) { - const auto& value = cells[i]; - if (!value.data) continue; - const auto& field_name = field_id_to_name.at(static_cast(i)); - size_t dot_pos = field_name.find('.'); - if (dot_pos != std::string::npos) { - std::string schema = field_name.substr(0, dot_pos); - if (field_name.substr(dot_pos + 1) == field_names::kId) { - ids[schema] = value.as_int64(); - } - } - } - return ids; - } + const llvm::SmallDenseMap& field_id_to_name); - /** - * @brief Merges another row into this one (non-destructive). - * - * Fields present in @p other but absent in this row are copied. - * Existing values are kept. - * - * @param other The row to merge from. - * @return A new Row combining both. - */ + /// Merges another row into this one (non-destructive). [[nodiscard]] std::shared_ptr merge( - const std::shared_ptr& other) const { - std::shared_ptr merged = std::make_shared(*this); - IF_DEBUG_ENABLED { - log_debug("Row::merge() - this: {}", this->ToString()); - log_debug("Row::merge() - other: {}", other->ToString()); - } - - for (size_t i = 0; i < other->cells.size(); ++i) { - if (!merged->has_value(static_cast(i))) { - IF_DEBUG_ENABLED { - log_debug("Row::merge() - adding field '{}' with value: {}", i, - cells[i].ToString()); - } - merged->cells[i] = other->cells[i]; - } else { - IF_DEBUG_ENABLED { - log_debug("Row::merge() - skipping field '{}' (already has value)", - i); - } - } - } - IF_DEBUG_ENABLED { - log_debug("Row::merge() - result: {}", merged->ToString()); - } - return merged; - } + const std::shared_ptr& other) const; - /** @brief Returns a debug string listing the path and all cell values. */ - [[nodiscard]] std::string ToString() const { - std::stringstream ss; - ss << "Row{"; - ss << "path='" << join_schema_path(path) << "', "; - - bool first = true; - for (size_t i = 0; i < cells.size(); i++) { - if (!first) { - ss << ", "; - } - first = false; - - ss << i << ": "; - const auto value_ref = cells[i]; - if (!value_ref.data) { - ss << "NULL"; - } else { - switch (value_ref.type) { - case ValueType::INT64: - ss << value_ref.as_int64(); - break; - case ValueType::INT32: - ss << value_ref.as_int32(); - break; - case ValueType::DOUBLE: - ss << value_ref.as_double(); - break; - case ValueType::STRING: - ss << "\"" << value_ref.as_string_ref().to_string() << "\""; - break; - case ValueType::BOOL: - ss << (value_ref.as_bool() ? "true" : "false"); - break; - default: - ss << "unknown"; - break; - } - } - } - ss << "}"; - return ss.str(); - } + /// Returns a debug string listing the path and all cell values. + [[nodiscard]] std::string ToString() const; }; -/** - * @brief Creates a blank Row sized to fit the given output schema. - * - * @param final_output_schema The Arrow schema of the final query output. - * @return A Row with id = −1 and all cells null. - */ -inline Row create_empty_row_from_schema( - const std::shared_ptr& final_output_schema) { - Row new_row(final_output_schema->num_fields() + 32); - new_row.id = -1; - return new_row; -} +/// Creates a blank Row sized to fit the given output schema. +Row create_empty_row_from_schema( + const std::shared_ptr& final_output_schema); -/** - * @brief Collects rows whose path starts with @p parent's path (excluding @p - * parent itself). - * - * @param parent The parent row. - * @param rows All candidate rows. - * @return The subset of @p rows that are children of @p parent. - */ -inline std::vector get_child_rows(const Row& parent, - const std::vector& rows) { - std::vector child; - for (const auto& row : rows) { - if (parent.id != row.id && row.start_with(parent.path)) { - child.push_back(row); - } - } - return child; -} +/// Collects rows whose path starts with @p parent's path (excluding parent). +std::vector get_child_rows(const Row& parent, + const std::vector& rows); /** * @brief Tree node used to group and merge rows during BFS result assembly. @@ -375,36 +159,11 @@ struct RowNode { /** @brief Returns true if this node carries a row (i.e. is a leaf). */ bool leaf() const { return row.has_value(); } - /** - * @brief Recursively inserts a row into the tree following its path segments. - * - * @param path_idx Current index into new_row->path. - * @param new_row The row to insert. - */ - void insert_row_dfs(size_t path_idx, const std::shared_ptr& new_row) { - if (path_idx == new_row->path.size()) { - this->row = new_row; - return; - } - - for (const auto& n : children) { - if (n->path_segment == new_row->path[path_idx]) { - n->insert_row_dfs(path_idx + 1, new_row); - return; - } - } + /// Recursively inserts a row into the tree following its path segments. + void insert_row_dfs(size_t path_idx, const std::shared_ptr& new_row); - auto new_node = std::make_unique(); - new_node->depth = depth + 1; - new_node->path_segment = new_row->path[path_idx]; - new_node->insert_row_dfs(path_idx + 1, new_row); - children.emplace_back(std::move(new_node)); - } - - /** @brief Inserts a row starting from the root of its path. */ - void insert_row(const std::shared_ptr& new_row) { - insert_row_dfs(0, new_row); - } + /// Inserts a row starting from the root of its path. + void insert_row(const std::shared_ptr& new_row); /** * @brief Recursively merges child rows via Cartesian product to produce diff --git a/src/memory/CMakeLists.txt b/src/memory/CMakeLists.txt new file mode 100644 index 0000000..e8108f4 --- /dev/null +++ b/src/memory/CMakeLists.txt @@ -0,0 +1,8 @@ +target_sources(core PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/node_arena.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/free_list_arena.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/string_arena.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/array_arena.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/map_arena.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/schema_layout.cpp +) diff --git a/src/memory/array_arena.cpp b/src/memory/array_arena.cpp new file mode 100644 index 0000000..aec4d20 --- /dev/null +++ b/src/memory/array_arena.cpp @@ -0,0 +1,265 @@ +#include "memory/array_arena.hpp" + +namespace tundradb { + +// ============================================================================ +// ArrayArena +// ============================================================================ + +arrow::Result ArrayArena::allocate(ValueType elem_type, + uint32_t capacity) { + if (capacity == 0) { + return ArrayRef{}; + } + + const size_t elem_sz = get_type_size(elem_type); + const size_t data_bytes = elem_sz * capacity; + const size_t alloc_size = ArrayRef::HEADER_SIZE + data_bytes; + + std::lock_guard lock(arena_mutex_); + void* raw = arena_->allocate(alloc_size); + if (!raw) { + return arrow::Status::OutOfMemory( + "ArrayArena::allocate: arena allocation failed (requested ", + alloc_size, " bytes)"); + } + + init_header(raw, capacity); + + char* data = static_cast(raw) + ArrayRef::HEADER_SIZE; + zero_init_elements(data, elem_type, capacity); + + active_allocs_.fetch_add(1, std::memory_order_relaxed); + return ArrayRef{data, elem_type}; +} + +arrow::Result ArrayArena::allocate_with_data(ValueType elem_type, + const void* elements, + uint32_t count, + uint32_t capacity) { + if (capacity < count) capacity = count; + if (capacity == 0) { + return ArrayRef{}; + } + + const size_t elem_sz = get_type_size(elem_type); + const size_t data_bytes = elem_sz * capacity; + const size_t alloc_size = ArrayRef::HEADER_SIZE + data_bytes; + + std::lock_guard lock(arena_mutex_); + void* raw = arena_->allocate(alloc_size); + if (!raw) { + return arrow::Status::OutOfMemory( + "ArrayArena::allocate_with_data: arena allocation failed (requested ", + alloc_size, " bytes)"); + } + + auto* header = init_header(raw, capacity); + header->length = count; + + char* data = static_cast(raw) + ArrayRef::HEADER_SIZE; + copy_init_elements(data, static_cast(elements), elem_type, + count); + if (capacity > count) { + zero_init_elements(data + elem_sz * count, elem_type, capacity - count); + } + + active_allocs_.fetch_add(1, std::memory_order_relaxed); + return ArrayRef{data, elem_type}; +} + +arrow::Status ArrayArena::append(ArrayRef& ref, const void* element) { + if (ref.is_null()) { + return arrow::Status::Invalid( + "ArrayArena::append: ArrayRef is null (cannot append to null ref)"); + } + + auto* header = get_header(ref); + if (!header) { + return arrow::Status::Invalid( + "ArrayArena::append: invalid ArrayRef (header is null)"); + } + + if (header->length < header->capacity) { + char* dest = ref.mutable_element_ptr(header->length); + assign_element(dest, element, ref.elem_type()); + header->length++; + return arrow::Status::OK(); + } + + const uint32_t new_cap = header->capacity * 2; + const uint32_t old_len = header->length; + + ARROW_ASSIGN_OR_RAISE( + ArrayRef new_ref, + allocate_with_data(ref.elem_type(), ref.data(), old_len, new_cap)); + + assign_element(new_ref.mutable_element_ptr(old_len), element, + ref.elem_type()); + auto* new_header = get_header(new_ref); + new_header->length = old_len + 1; + + header->mark_for_deletion(); + + ref = std::move(new_ref); + return arrow::Status::OK(); +} + +arrow::Result ArrayArena::copy(const ArrayRef& src, + uint32_t extra_capacity) { + if (src.is_null()) { + return arrow::Status::Invalid( + "ArrayArena::copy: source ArrayRef is null"); + } + + const auto* header = get_header_const(src); + if (!header) { + return arrow::Status::Invalid( + "ArrayArena::copy: invalid source ArrayRef (header is null)"); + } + + const uint32_t new_capacity = header->capacity + extra_capacity; + return allocate_with_data(src.elem_type(), src.data(), header->length, + new_capacity); +} + +void ArrayArena::mark_for_deletion(const ArrayRef& ref) { + if (ref.is_null()) return; + if (auto* h = get_header_mut(ref)) { + h->mark_for_deletion(); + } +} + +void ArrayArena::release_array(char* data, ValueType elem_type) { + if (!data) return; + + auto* header = + reinterpret_cast(data - ArrayRef::HEADER_SIZE); + if (!header->arena) return; + header->arena = nullptr; + + destruct_elements(data, elem_type, header->length); + + active_allocs_.fetch_sub(1, std::memory_order_relaxed); + + std::lock_guard lock(arena_mutex_); + arena_->deallocate(header); +} + +void ArrayArena::reset() { + std::lock_guard lock(arena_mutex_); + arena_->reset(); +} + +void ArrayArena::clear() { + std::lock_guard lock(arena_mutex_); + arena_->clear(); +} + +ArrayRef::ArrayHeader* ArrayArena::init_header(void* raw, uint32_t capacity) { + auto* header = static_cast(raw); + header->ref_count.store(0, std::memory_order_relaxed); + header->flags = 0; + header->length = 0; + header->capacity = capacity; + header->arena = this; + return header; +} + +void ArrayArena::destruct_elements(char* data, ValueType elem_type, + uint32_t count) { + if (count == 0) return; + + if (is_string_type(elem_type)) { + for (uint32_t i = 0; i < count; ++i) { + auto* sr = reinterpret_cast(data + i * sizeof(StringRef)); + if (!sr->is_null()) { + auto* hdr = reinterpret_cast( + const_cast(sr->data() - StringRef::HEADER_SIZE)); + hdr->mark_for_deletion(); + } + sr->~StringRef(); + } + } else if (is_array_type(elem_type)) { + for (uint32_t i = 0; i < count; ++i) { + auto* ar = reinterpret_cast(data + i * sizeof(ArrayRef)); + if (!ar->is_null()) { + auto* hdr = reinterpret_cast( + ar->data() - ArrayRef::HEADER_SIZE); + hdr->mark_for_deletion(); + } + ar->~ArrayRef(); + } + } +} + +void ArrayArena::copy_init_elements(char* dst, const char* src, + ValueType elem_type, uint32_t count) { + if (count == 0) return; + if (is_string_type(elem_type)) { + for (uint32_t i = 0; i < count; ++i) { + const auto* s = + reinterpret_cast(src + i * sizeof(StringRef)); + new (dst + i * sizeof(StringRef)) StringRef(*s); + } + } else if (is_array_type(elem_type)) { + for (uint32_t i = 0; i < count; ++i) { + const auto* a = + reinterpret_cast(src + i * sizeof(ArrayRef)); + new (dst + i * sizeof(ArrayRef)) ArrayRef(*a); + } + } else { + std::memcpy(dst, src, get_type_size(elem_type) * count); + } +} + +void ArrayArena::assign_element(char* dst, const void* src, + ValueType elem_type) { + if (is_string_type(elem_type)) { + *reinterpret_cast(dst) = + *reinterpret_cast(src); + } else if (is_array_type(elem_type)) { + *reinterpret_cast(dst) = + *reinterpret_cast(src); + } else { + std::memcpy(dst, src, get_type_size(elem_type)); + } +} + +void ArrayArena::zero_init_elements(char* data, ValueType elem_type, + uint32_t count) { + if (is_string_type(elem_type)) { + for (uint32_t i = 0; i < count; ++i) { + new (data + i * sizeof(StringRef)) StringRef(); + } + } else if (is_array_type(elem_type)) { + for (uint32_t i = 0; i < count; ++i) { + new (data + i * sizeof(ArrayRef)) ArrayRef(); + } + } else { + std::memset(data, 0, get_type_size(elem_type) * count); + } +} + +// ============================================================================ +// ArrayRef::release() implementation (after ArrayArena is fully defined) +// ============================================================================ + +void ArrayRef::release() { + if (!data_) return; + if (auto* h = get_header()) { + assert(h->ref_count.load(std::memory_order_relaxed) > 0 && + "ArrayRef::release() called with ref_count already 0 — " + "double-release or missing ref-count increment"); + + const int32_t old_count = + h->ref_count.fetch_sub(1, std::memory_order_acq_rel); + if (old_count == 1 && h->is_marked_for_deletion() && h->arena) { + h->arena->release_array(data_, elem_type_); + } + } + data_ = nullptr; + elem_type_ = ValueType::NA; +} + +} // namespace tundradb diff --git a/src/memory/free_list_arena.cpp b/src/memory/free_list_arena.cpp new file mode 100644 index 0000000..9e3f268 --- /dev/null +++ b/src/memory/free_list_arena.cpp @@ -0,0 +1,273 @@ +#include "memory/free_list_arena.hpp" + +namespace tundradb { + +FreeListArena::FreeListArena(size_t initial_size, size_t min_fragment_size) + : chunk_size_(initial_size), + current_chunk_size_(0), + min_fragment_size_(min_fragment_size) { + allocate_new_chunk(chunk_size_); +} + +void* FreeListArena::allocate(size_t size, size_t alignment) { + size = align_up(size, alignment); + + void* reused_block = find_free_block(size); + if (reused_block) { + return reused_block; + } + + return allocate_new_block(size, alignment); +} + +void FreeListArena::deallocate(void* ptr) { + if (!ptr) return; + + BlockHeader* header = get_block_header(ptr); + + assert(!header->is_free && "Double free detected"); + + header->is_free = true; + + coalesce_blocks(ptr); + + freed_bytes_ += header->size; + total_used_ -= header->size; +} + +void FreeListArena::reset() { + free_blocks_by_size_.clear(); + + for (size_t i = 0; i < chunk_allocated_sizes_.size(); ++i) { + chunk_allocated_sizes_[i] = 0; + } + + current_offset_ = 0; + if (!chunks_.empty()) { + current_chunk_ = chunks_[0].get(); + current_chunk_size_ = chunk_sizes_[0]; + } + + total_used_ = 0; + freed_bytes_ = 0; +} + +void FreeListArena::clear() { + chunks_.clear(); + chunk_sizes_.clear(); + chunk_allocated_sizes_.clear(); + free_blocks_by_size_.clear(); + current_chunk_ = nullptr; + current_chunk_size_ = 0; + current_offset_ = 0; + total_allocated_ = 0; + total_used_ = 0; + freed_bytes_ = 0; +} + +size_t FreeListArena::get_free_block_count() const { + size_t count = 0; + for (const auto& blocks : free_blocks_by_size_ | std::views::values) { + count += blocks.size(); + } + return count; +} + +double FreeListArena::get_fragmentation_ratio() const { + if (total_allocated_ == 0) return 0.0; + return static_cast(freed_bytes_) / total_allocated_; +} + + +void FreeListArena::allocate_new_chunk(size_t size) { + auto new_chunk = std::make_unique(size); + current_chunk_ = new_chunk.get(); + current_chunk_size_ = size; + chunks_.push_back(std::move(new_chunk)); + chunk_sizes_.push_back(size); + chunk_allocated_sizes_.push_back(0); + current_offset_ = 0; + + total_allocated_ += size; +} + +char* FreeListArena::find_chunk_start(void* ptr) { + char* char_ptr = static_cast(ptr); + for (size_t i = 0; i < chunks_.size(); ++i) { + char* chunk_start = chunks_[i].get(); + char* chunk_end = chunk_start + chunk_sizes_[i]; + if (char_ptr >= chunk_start && char_ptr < chunk_end) { + return chunk_start; + } + } + return nullptr; +} + +BlockHeader* FreeListArena::find_prev_block(BlockHeader* target) { + char* chunk_start = find_chunk_start(target); + if (!chunk_start) return nullptr; + + char* target_ptr = reinterpret_cast(target); + if (target_ptr == chunk_start) { + return nullptr; + } + + char* current_ptr = chunk_start; + + while (current_ptr < target_ptr) { + BlockHeader* current = reinterpret_cast(current_ptr); + + char* next_ptr = current_ptr + BlockHeader::HEADER_SIZE + current->size; + + if (next_ptr == target_ptr) { + return current; + } + + current_ptr = next_ptr; + } + + return nullptr; +} + +void* FreeListArena::allocate_new_block(size_t size, size_t alignment) { + size_t aligned_size = align_up(size, alignment); + + size_t data_aligned_offset = calculate_aligned_offset( + current_chunk_, current_offset_ + BlockHeader::HEADER_SIZE, alignment); + + size_t header_offset = data_aligned_offset - BlockHeader::HEADER_SIZE; + size_t total_size = data_aligned_offset + aligned_size - current_offset_; + + if (current_chunk_ == nullptr || + current_offset_ + total_size > current_chunk_size_) { + size_t needed_chunk_size = + std::max(chunk_size_, total_size) + get_alignment_overhead(alignment); + allocate_new_chunk(needed_chunk_size); + + data_aligned_offset = calculate_aligned_offset( + current_chunk_, BlockHeader::HEADER_SIZE, alignment); + header_offset = data_aligned_offset - BlockHeader::HEADER_SIZE; + total_size = data_aligned_offset + aligned_size; + } + + char* header_start = current_chunk_ + header_offset; + BlockHeader* header = reinterpret_cast(header_start); + + header->size = aligned_size; + header->is_free = false; + + char* data_ptr = current_chunk_ + data_aligned_offset; + + current_offset_ = data_aligned_offset + aligned_size; + total_used_ += aligned_size; + + chunk_allocated_sizes_.back() = current_offset_; + + return data_ptr; +} + +void* FreeListArena::find_free_block(size_t size) { + auto it = free_blocks_by_size_.lower_bound(size); + if (it != free_blocks_by_size_.end()) { + auto& blocks = it->second; + if (!blocks.empty()) { + BlockHeader* header = *blocks.begin(); + blocks.erase(blocks.begin()); + + if (blocks.empty()) { + free_blocks_by_size_.erase(it); + } + + if (header->size > + size + BlockHeader::HEADER_SIZE + min_fragment_size_) { + split_block(header, size); + } + + header->is_free = false; + return reinterpret_cast(header) + BlockHeader::HEADER_SIZE; + } + } + + return nullptr; +} + +void FreeListArena::split_block(BlockHeader* header, size_t needed_size) { + size_t remaining_size = + header->size - needed_size - BlockHeader::HEADER_SIZE; + + char* new_block_start = reinterpret_cast(header) + + BlockHeader::HEADER_SIZE + needed_size; + BlockHeader* new_header = reinterpret_cast(new_block_start); + + new_header->size = remaining_size; + new_header->is_free = true; + + header->size = needed_size; + + add_to_free_list(new_block_start + BlockHeader::HEADER_SIZE, + remaining_size); +} + +void FreeListArena::add_to_free_list(void* ptr, size_t size) { + BlockHeader* header = get_block_header(ptr); + free_blocks_by_size_[size].insert(header); +} + +void FreeListArena::remove_block_from_free_list(BlockHeader* block) { + auto it = free_blocks_by_size_.find(block->size); + if (it != free_blocks_by_size_.end()) { + it->second.erase(block); + if (it->second.empty()) { + free_blocks_by_size_.erase(it); + } + } +} + +BlockHeader* FreeListArena::find_next_block(BlockHeader* header) { + char* current_ptr = reinterpret_cast(header); + char* next_ptr = current_ptr + BlockHeader::HEADER_SIZE + header->size; + + char* chunk_start = find_chunk_start(header); + if (!chunk_start) { + return nullptr; + } + + size_t chunk_index = 0; + for (size_t i = 0; i < chunks_.size(); ++i) { + if (chunks_[i].get() == chunk_start) { + chunk_index = i; + break; + } + } + + char* chunk_allocated_end = + chunk_start + chunk_allocated_sizes_[chunk_index]; + + if (next_ptr + BlockHeader::HEADER_SIZE <= chunk_allocated_end) { + return reinterpret_cast(next_ptr); + } + + return nullptr; +} + +void FreeListArena::coalesce_blocks(void* ptr) { + BlockHeader* header = get_block_header(ptr); + + BlockHeader* next = find_next_block(header); + if (next && next->is_free) { + remove_block_from_free_list(next); + header->size += BlockHeader::HEADER_SIZE + next->size; + } + + BlockHeader* prev = find_prev_block(header); + if (prev && prev->is_free) { + remove_block_from_free_list(prev); + prev->size += BlockHeader::HEADER_SIZE + header->size; + header = prev; + } + + add_to_free_list(reinterpret_cast(header) + BlockHeader::HEADER_SIZE, + header->size); +} + +} // namespace tundradb diff --git a/src/memory/map_arena.cpp b/src/memory/map_arena.cpp new file mode 100644 index 0000000..2c51e8f --- /dev/null +++ b/src/memory/map_arena.cpp @@ -0,0 +1,308 @@ +#include "memory/map_arena.hpp" + +namespace tundradb { + +// ============================================================================ +// MapArena +// ============================================================================ + +arrow::Result MapArena::allocate(uint32_t capacity) { + if (capacity == 0) return MapRef{}; + + const size_t data_bytes = sizeof(MapEntry) * capacity; + const size_t alloc_size = MapRef::HEADER_SIZE + data_bytes; + + std::lock_guard lock(arena_mutex_); + void* raw = arena_->allocate(alloc_size); + if (!raw) { + return arrow::Status::OutOfMemory( + "MapArena::allocate: arena allocation failed (requested ", alloc_size, + " bytes)"); + } + + init_header(raw, capacity); + + char* data = static_cast(raw) + MapRef::HEADER_SIZE; + zero_init_entries(data, capacity); + + active_allocs_.fetch_add(1, std::memory_order_relaxed); + return MapRef{data}; +} + +arrow::Result MapArena::copy(const MapRef& src, + uint32_t extra_capacity) { + if (src.is_null()) { + return arrow::Status::Invalid("MapArena::copy: source MapRef is null"); + } + + const auto* header = get_header_const(src); + if (!header) { + return arrow::Status::Invalid( + "MapArena::copy: invalid source MapRef (header is null)"); + } + + const uint32_t src_count = header->count; + const uint32_t new_capacity = header->capacity + extra_capacity; + + ARROW_ASSIGN_OR_RAISE(MapRef new_ref, allocate(new_capacity)); + + auto* new_header = get_header(new_ref); + new_header->count = src_count; + + for (uint32_t i = 0; i < src_count; ++i) { + const auto* src_entry = src.entry_ptr(i); + auto* dst_entry = new_ref.mutable_entry_ptr(i); + copy_init_entry(dst_entry, src_entry); + } + + return new_ref; +} + +void MapArena::mark_for_deletion(const MapRef& ref) { + if (ref.is_null()) return; + if (auto* h = get_header_mut(ref)) { + h->mark_for_deletion(); + } +} + +void MapArena::release_map(char* data) { + if (!data) return; + + auto* header = + reinterpret_cast(data - MapRef::HEADER_SIZE); + if (!header->arena) return; + header->arena = nullptr; + + destruct_entries(data, header->count); + + active_allocs_.fetch_sub(1, std::memory_order_relaxed); + + std::lock_guard lock(arena_mutex_); + arena_->deallocate(header); +} + +int32_t MapArena::find_entry(const MapRef& ref, const std::string& key) { + if (ref.is_null()) return -1; + const uint32_t n = ref.count(); + for (uint32_t i = 0; i < n; ++i) { + const auto* entry = ref.entry_ptr(i); + if (entry->key.view() == key) { + return static_cast(i); + } + } + return -1; +} + +arrow::Status MapArena::set_entry(MapRef& ref, const StringRef& key, + ValueType vtype, const void* value_ptr) { + if (ref.is_null()) { + return arrow::Status::Invalid("MapArena::set_entry: MapRef is null"); + } + + auto* header = get_header(ref); + if (!header) { + return arrow::Status::Invalid("MapArena::set_entry: invalid header"); + } + + const std::string_view key_view = key.view(); + for (uint32_t i = 0; i < header->count; ++i) { + auto* entry = ref.mutable_entry_ptr(i); + if (entry->key.view() == key_view) { + destruct_entry_value(entry); + entry->value_type = static_cast(vtype); + copy_value_into_entry(entry, vtype, value_ptr); + return arrow::Status::OK(); + } + } + + if (header->count >= header->capacity) { + return arrow::Status::CapacityError( + "MapArena::set_entry: map is full (count=", header->count, + ", capacity=", header->capacity, ")"); + } + + auto* entry = ref.mutable_entry_ptr(header->count); + entry->key.~StringRef(); + new (&entry->key) StringRef(key); + entry->value_type = static_cast(vtype); + copy_value_into_entry(entry, vtype, value_ptr); + header->count++; + return arrow::Status::OK(); +} + +bool MapArena::remove_entry(MapRef& ref, const std::string& key) { + if (ref.is_null()) return false; + auto* header = get_header(ref); + if (!header || header->count == 0) return false; + + for (uint32_t i = 0; i < header->count; ++i) { + auto* entry = ref.mutable_entry_ptr(i); + if (entry->key.view() == key) { + destruct_entry(entry); + if (i < header->count - 1) { + auto* last = ref.mutable_entry_ptr(header->count - 1); + move_entry(entry, last); + } + auto* vacant = ref.mutable_entry_ptr(header->count - 1); + new (vacant) MapEntry(); + header->count--; + return true; + } + } + return false; +} + +void MapArena::reset() { + std::lock_guard lock(arena_mutex_); + arena_->reset(); +} + +void MapArena::clear() { + std::lock_guard lock(arena_mutex_); + arena_->clear(); +} + +MapRef::MapHeader* MapArena::init_header(void* raw, uint32_t capacity) { + auto* header = static_cast(raw); + header->ref_count.store(0, std::memory_order_relaxed); + header->flags = 0; + header->count = 0; + header->capacity = capacity; + header->arena = this; + return header; +} + +void MapArena::zero_init_entries(char* data, uint32_t count) { + for (uint32_t i = 0; i < count; ++i) { + new (data + i * sizeof(MapEntry)) MapEntry(); + } +} + +void MapArena::copy_init_entry(MapEntry* dst, const MapEntry* src) { + dst->key.~StringRef(); + new (&dst->key) StringRef(src->key); + dst->value_type = src->value_type; + std::memset(dst->pad, 0, sizeof(dst->pad)); + copy_value_into_entry(dst, static_cast(src->value_type), + src->value); +} + +void MapArena::copy_value_into_entry(MapEntry* entry, ValueType vtype, + const void* src) { + if (is_string_type(vtype)) { + auto* dst = reinterpret_cast(entry->value); + dst->~StringRef(); + new (dst) StringRef(*reinterpret_cast(src)); + } else if (is_array_type(vtype)) { + auto* dst = reinterpret_cast(entry->value); + dst->~ArrayRef(); + new (dst) ArrayRef(*reinterpret_cast(src)); + } else if (is_map_type(vtype)) { + auto* dst = reinterpret_cast(entry->value); + dst->~MapRef(); + new (dst) MapRef(*reinterpret_cast(src)); + } else { + size_t n = get_type_size(vtype); + std::memset(entry->value, 0, MapEntry::VALUE_SIZE); + std::memcpy(entry->value, src, n); + } +} + +void MapArena::destruct_entry_value(MapEntry* entry) { + auto vtype = static_cast(entry->value_type); + if (is_string_type(vtype)) { + auto* sr = reinterpret_cast(entry->value); + if (!sr->is_null()) { + auto* hdr = reinterpret_cast( + const_cast(sr->data() - StringRef::HEADER_SIZE)); + hdr->mark_for_deletion(); + } + sr->~StringRef(); + new (sr) StringRef(); + } else if (is_array_type(vtype)) { + auto* ar = reinterpret_cast(entry->value); + if (!ar->is_null()) { + auto* hdr = reinterpret_cast( + ar->data() - ArrayRef::HEADER_SIZE); + hdr->mark_for_deletion(); + } + ar->~ArrayRef(); + new (ar) ArrayRef(); + } else if (is_map_type(vtype)) { + auto* mr = reinterpret_cast(entry->value); + if (!mr->is_null()) { + auto* hdr = reinterpret_cast(mr->data() - + MapRef::HEADER_SIZE); + hdr->mark_for_deletion(); + } + mr->~MapRef(); + new (mr) MapRef(); + } +} + +void MapArena::destruct_entry(MapEntry* entry) { + destruct_entry_value(entry); + if (!entry->key.is_null()) { + auto* hdr = reinterpret_cast( + const_cast(entry->key.data() - StringRef::HEADER_SIZE)); + hdr->mark_for_deletion(); + } + entry->key.~StringRef(); + new (&entry->key) StringRef(); + entry->value_type = static_cast(ValueType::NA); + std::memset(entry->value, 0, MapEntry::VALUE_SIZE); +} + +void MapArena::move_entry(MapEntry* dst, MapEntry* src) { + new (&dst->key) StringRef(std::move(src->key)); + dst->value_type = src->value_type; + std::memcpy(dst->value, src->value, MapEntry::VALUE_SIZE); + src->value_type = static_cast(ValueType::NA); + std::memset(src->value, 0, MapEntry::VALUE_SIZE); +} + +void MapArena::destruct_entries(char* data, uint32_t count) { + for (uint32_t i = 0; i < count; ++i) { + auto* entry = reinterpret_cast(data + i * sizeof(MapEntry)); + destruct_entry_value(entry); + if (!entry->key.is_null()) { + auto* hdr = reinterpret_cast( + const_cast(entry->key.data() - StringRef::HEADER_SIZE)); + hdr->mark_for_deletion(); + } + entry->key.~StringRef(); + } +} + +// ============================================================================ +// MapRef methods +// ============================================================================ + +void MapRef::release() { + if (!data_) return; + if (auto* h = get_header()) { + assert(h->ref_count.load(std::memory_order_relaxed) > 0 && + "MapRef::release() called with ref_count already 0"); + + const int32_t old_count = + h->ref_count.fetch_sub(1, std::memory_order_acq_rel); + if (old_count == 1 && h->is_marked_for_deletion() && h->arena) { + h->arena->release_map(data_); + } + } + data_ = nullptr; +} + +Value MapRef::get_value(const std::string& key) const { + int32_t idx = MapArena::find_entry(*this, key); + if (idx < 0) return Value{}; + const auto* entry = entry_ptr(static_cast(idx)); + return Value::read_value_from_memory( + entry->value, static_cast(entry->value_type)); +} + +bool MapRef::contains(const std::string& key) const { + return MapArena::find_entry(*this, key) >= 0; +} + +} // namespace tundradb diff --git a/src/memory/node_arena.cpp b/src/memory/node_arena.cpp new file mode 100644 index 0000000..a1abc14 --- /dev/null +++ b/src/memory/node_arena.cpp @@ -0,0 +1,946 @@ +#include "memory/node_arena.hpp" + +#include + +namespace tundradb { + +// =========================================================================== +// VersionInfo +// =========================================================================== + +const VersionInfo* VersionInfo::find_version_at_snapshot( + uint64_t valid_time, uint64_t tx_time) const { + const VersionInfo* current = this; + while (current != nullptr) { + if (current->is_visible_at(valid_time, tx_time)) { + return current; + } + current = current->prev; + } + return nullptr; +} + +const VersionInfo* VersionInfo::find_version_at_time(uint64_t ts) const { + const VersionInfo* current = this; + while (current != nullptr) { + if (current->is_valid_at(ts)) return current; + current = current->prev; + } + return nullptr; +} + +size_t VersionInfo::count_versions() const { + size_t count = 1; + const VersionInfo* current = prev; + while (current != nullptr) { + count++; + current = current->prev; + } + return count; +} + +// =========================================================================== +// NodeHandle +// =========================================================================== + +size_t NodeHandle::count_versions() const { + if (!is_versioned()) return 1; + return version_info_->count_versions(); +} + +const VersionInfo* NodeHandle::find_version_at_time(uint64_t ts) const { + if (!is_versioned()) return nullptr; + return version_info_->find_version_at_time(ts); +} + +// =========================================================================== +// NodeArena — constructor / destructor +// =========================================================================== + +NodeArena::NodeArena(std::unique_ptr mem_arena, + std::shared_ptr layout_registry, + std::unique_ptr string_arena, + bool enable_versioning) + : mem_arena_(std::move(mem_arena)), + layout_registry_(std::move(layout_registry)), + string_arena_(string_arena ? std::move(string_arena) + : std::make_unique()), + array_arena_(std::make_unique()), + map_arena_(std::make_unique()), + versioning_enabled_(enable_versioning), + version_counter_(0) { + if (versioning_enabled_) { + version_arena_ = std::make_unique(4 * 1024 * 1024); + } +} + +NodeArena::~NodeArena() { + // VersionInfo objects are placement-new'd into version_arena_ memory. + // Their SmallDenseMap members may heap-allocate, so we must call + // destructors before the arena frees the underlying memory. + for (auto* vi : version_infos_) { + vi->~VersionInfo(); + } +} + +// =========================================================================== +// NodeArena — public methods +// =========================================================================== + +NodeHandle NodeArena::allocate_node(const std::string& schema_name) { + const std::shared_ptr layout = + layout_registry_->get_layout(schema_name); + if (!layout) { + return NodeHandle{}; + } + + return allocate_node(layout); +} + +NodeHandle NodeArena::allocate_node( + const std::shared_ptr& layout) { + size_t node_size = layout->get_total_size_with_bitset(); + size_t alignment = layout->get_alignment(); + + void* node_data = mem_arena_->allocate(node_size, alignment); + if (!node_data) { + return NodeHandle{}; + } + + layout->initialize_node_data(static_cast(node_data)); + + if (versioning_enabled_) { + void* version_info_memory = + version_arena_->allocate(sizeof(VersionInfo), alignof(VersionInfo)); + if (!version_info_memory) { + return NodeHandle{}; + } + + uint64_t now = get_current_timestamp_ns(); + auto* version_info = new (version_info_memory) VersionInfo(); + version_infos_.push_back(version_info); + version_info->version_id = 0; + version_info->valid_from = now; + version_info->valid_to = std::numeric_limits::max(); + version_info->prev = nullptr; + + return {node_data, node_size, layout->get_schema_name(), 1, version_info}; + } + return {node_data, node_size, layout->get_schema_name()}; +} + +const char* NodeArena::get_value_ptr( + const NodeHandle& handle, const std::shared_ptr& layout, + const std::shared_ptr& field) { + if (handle.is_null()) { + return nullptr; + } + + return layout->get_value_ptr(static_cast(handle.ptr), field); +} + +Value NodeArena::get_value(const NodeHandle& handle, + const std::shared_ptr& layout, + const std::shared_ptr& field) { + if (handle.is_null()) { + return Value{}; + } + + if (handle.is_versioned()) { + const FieldLayout* field_layout = layout->get_field_layout(field); + if (!field_layout) { + return Value{}; + } + + uint16_t field_idx = field_layout->index; + + const VersionInfo* current = handle.version_info_; + while (current != nullptr) { + auto it = current->updated_fields.find(field_idx); + if (it != current->updated_fields.end()) { + if (it->second == nullptr) { + return Value{}; + } + return Value::read_value_from_memory(it->second, field_layout->type); + } + current = current->prev; + } + + return layout->get_value(static_cast(handle.ptr), field); + } + + return layout->get_value(static_cast(handle.ptr), field); +} + +arrow::Result NodeArena::prepare_append_value( + const NodeHandle& handle, const std::shared_ptr& layout, + const FieldLayout& field_layout, const Value& new_value) { + if (!is_array_type(field_layout.type)) { + return arrow::Status::TypeError( + "APPEND is only valid for array fields, got: ", + tundradb::to_string(field_layout.type)); + } + + ArrayRef current_ref; + if (handle.is_versioned()) { + auto [found, ptr] = get_field_ptr_from_version_chain(handle.version_info_, + field_layout.index); + if (found && ptr) { + current_ref = *reinterpret_cast(ptr); + } else if (!found) { + const char* base_ptr = layout->get_value_ptr( + static_cast(handle.ptr), field_layout.index); + if (base_ptr) { + current_ref = *reinterpret_cast(base_ptr); + } + } + } + + if (new_value.holds_raw_array()) { + const auto& elems = new_value.as_raw_array(); + if (elems.empty()) { + if (current_ref.is_null()) return Value{ArrayRef{}}; + ARROW_ASSIGN_OR_RAISE(ArrayRef copy, array_arena_->copy(current_ref)); + return Value{std::move(copy)}; + } + if (current_ref.is_null()) { + ARROW_ASSIGN_OR_RAISE(ArrayRef arr_ref, + store_raw_array(field_layout.type_desc, elems)); + return Value{std::move(arr_ref)}; + } + const auto n = static_cast(elems.size()); + ARROW_ASSIGN_OR_RAISE( + ArrayRef new_ref, + array_arena_->copy(current_ref, grow_for_append(current_ref, n))); + for (const auto& elem : elems) { + ARROW_RETURN_NOT_OK( + append_single_element(new_ref, field_layout.type_desc, elem)); + } + return Value{std::move(new_ref)}; + } + + if (current_ref.is_null()) { + const std::vector elems = {new_value}; + ARROW_ASSIGN_OR_RAISE(ArrayRef arr_ref, + store_raw_array(field_layout.type_desc, elems)); + return Value{std::move(arr_ref)}; + } + ARROW_ASSIGN_OR_RAISE( + ArrayRef new_ref, + array_arena_->copy(current_ref, grow_for_append(current_ref, 1))); + ARROW_RETURN_NOT_OK( + append_single_element(new_ref, field_layout.type_desc, new_value)); + return Value{std::move(new_ref)}; +} + +arrow::Status NodeArena::set_field_value_v0( + NodeHandle& handle, const std::shared_ptr& layout, + const std::shared_ptr& field, const Value& value) { + assert(!handle.is_null()); + + const FieldLayout* field_layout = layout->get_field_layout(field); + if (!field_layout) { + return arrow::Status::Invalid( + "set_field_value_v0: field not found in layout"); + } + + return set_field_value_internal(handle.ptr, layout, field_layout, value); +} + +arrow::Result NodeArena::apply_updates( + NodeHandle& handle, const std::shared_ptr& layout, + const std::vector& updates) { + ARROW_ASSIGN_OR_RAISE(auto schema_updates, + resolve_field_indices(layout, updates)); + + if (!versioning_enabled_ || !handle.is_versioned()) { + ARROW_RETURN_NOT_OK( + apply_non_versioned_schema_updates(handle, layout, schema_updates)); + return true; + } + + if (schema_updates.empty()) { + return true; + } + + const uint64_t now = get_current_timestamp_ns(); + ARROW_ASSIGN_OR_RAISE(auto* new_vi, allocate_version(handle, now)); + + ARROW_RETURN_NOT_OK(materialize_versioned_schema_fields( + handle, layout, schema_updates, new_vi)); + + handle.version_info_->valid_to = now; + handle.version_info_ = new_vi; + return true; +} + +const char* NodeArena::get_value_ptr_at_version( + const NodeHandle& handle, const VersionInfo* version, + const std::shared_ptr& layout, + const std::shared_ptr& field) { + const FieldLayout* field_layout = layout->get_field_layout(field); + if (!field_layout) { + return nullptr; + } + + auto [found, field_ptr] = + get_field_ptr_from_version_chain(version, field_layout->index); + + if (found) { + return field_ptr; + } + + return layout->get_value_ptr(static_cast(handle.ptr), + field_layout->index); +} + +arrow::Result NodeArena::get_value_at_version( + const NodeHandle& handle, const VersionInfo* version, + const std::shared_ptr& layout, + const std::shared_ptr& field) { + const FieldLayout* field_layout = layout->get_field_layout(field); + if (!field_layout) { + return arrow::Status::KeyError("Field not found in layout"); + } + + auto [found, field_ptr] = + get_field_ptr_from_version_chain(version, field_layout->index); + + if (found) { + if (field_ptr == nullptr) { + return Value{}; + } + return layout->get_value_from_ptr(field_ptr, *field_layout); + } + + return layout->get_value(static_cast(handle.ptr), + *field_layout); +} + +// =========================================================================== +// NodeArena — private methods +// =========================================================================== + +uint64_t NodeArena::get_current_timestamp_ns() { + return Clock::instance().now_nanos(); +} + +arrow::Result> NodeArena::resolve_field_indices( + const std::shared_ptr& layout, + const std::vector& updates) { + std::vector result; + result.reserve(updates.size()); + for (const auto& upd : updates) { + const FieldLayout* fl = layout->get_field_layout(upd.field); + if (!fl) { + return arrow::Status::Invalid("Invalid field in apply_updates: ", + upd.field->name()); + } + result.push_back({static_cast(fl->index), upd.value, upd.op, + upd.nested_path}); + } + return result; +} + +arrow::Status NodeArena::apply_non_versioned_schema_updates( + NodeHandle& handle, const std::shared_ptr& layout, + const std::vector& schema_updates) { + for (const auto& upd : schema_updates) { + if (upd.field_idx >= layout->get_fields().size()) { + return arrow::Status::IndexError("Field index out of bounds"); + } + const FieldLayout& fl = layout->get_fields()[upd.field_idx]; + + if (!upd.nested_path.empty()) { + ARROW_RETURN_NOT_OK(apply_nested_path_update_non_versioned( + handle.ptr, layout, &fl, upd.nested_path, upd.value)); + continue; + } + + ARROW_RETURN_NOT_OK( + set_field_value_internal(handle.ptr, layout, &fl, upd.value, upd.op)); + } + return arrow::Status::OK(); +} + +arrow::Result NodeArena::allocate_version( + const NodeHandle& handle, const uint64_t now) { + void* vi_mem = + version_arena_->allocate(sizeof(VersionInfo), alignof(VersionInfo)); + if (!vi_mem) { + return arrow::Status::OutOfMemory("Failed to allocate VersionInfo"); + } + const uint64_t vid = + version_counter_.fetch_add(1, std::memory_order_relaxed) + 1; + auto* new_vi = new (vi_mem) VersionInfo(vid, now, handle.version_info_); + version_infos_.push_back(new_vi); + return new_vi; +} + +arrow::Status NodeArena::materialize_versioned_schema_fields( + NodeHandle& handle, const std::shared_ptr& layout, + const std::vector& schema_updates, + VersionInfo* target_vi) { + size_t total_size = 0; + size_t max_alignment = 1; + for (const auto& upd : schema_updates) { + const FieldLayout& fl = layout->get_fields()[upd.field_idx]; + if (upd.op == UpdateType::APPEND || !upd.value.is_null()) { + total_size += fl.size; + max_alignment = std::max(max_alignment, fl.alignment); + } + } + + char* batch_memory = nullptr; + if (total_size > 0) { + batch_memory = static_cast( + version_arena_->allocate(total_size, max_alignment)); + if (!batch_memory) { + return arrow::Status::OutOfMemory( + "Failed to batch allocate field storage"); + } + std::memset(batch_memory, 0, total_size); + } + + size_t offset = 0; + for (const auto& upd : schema_updates) { + const FieldLayout& fl = layout->get_fields()[upd.field_idx]; + + if (!upd.nested_path.empty()) { + ARROW_ASSIGN_OR_RAISE( + Value map_val, apply_nested_path_update_versioned( + handle, layout, fl, upd.nested_path, upd.value)); + assert(batch_memory != nullptr); + char* field_storage = batch_memory + offset; + offset += fl.size; + if (!write_value_to_memory(field_storage, fl.type, map_val)) { + return arrow::Status::TypeError("Type mismatch writing MAP field"); + } + target_vi->updated_fields[upd.field_idx] = field_storage; + continue; + } + + if (upd.op == UpdateType::SET && upd.value.is_null()) { + target_vi->updated_fields[upd.field_idx] = nullptr; + continue; + } + + assert(batch_memory != nullptr); + Value storage_value = upd.value; + + if (upd.op == UpdateType::APPEND) { + ARROW_ASSIGN_OR_RAISE( + storage_value, prepare_append_value(handle, layout, fl, upd.value)); + } else { + if (upd.value.type() == ValueType::STRING && + upd.value.holds_std_string()) { + ARROW_ASSIGN_OR_RAISE( + StringRef str_ref, + string_arena_->store_string_auto(upd.value.as_string())); + storage_value = Value{str_ref, fl.type}; + } else if (upd.value.type() == ValueType::ARRAY && + upd.value.holds_raw_array()) { + ARROW_ASSIGN_OR_RAISE( + ArrayRef arr_ref, + store_raw_array(fl.type_desc, upd.value.as_raw_array())); + storage_value = Value{std::move(arr_ref)}; + } else if (upd.value.type() == ValueType::MAP && + upd.value.holds_raw_map()) { + ARROW_ASSIGN_OR_RAISE(MapRef map_ref, + store_raw_map(upd.value.as_raw_map())); + storage_value = Value{std::move(map_ref)}; + } + } + + char* field_storage = batch_memory + offset; + offset += fl.size; + + if (!write_value_to_memory(field_storage, fl.type, storage_value)) { + return arrow::Status::TypeError("Type mismatch writing field value"); + } + target_vi->updated_fields[upd.field_idx] = field_storage; + } + return arrow::Status::OK(); +} + +arrow::Status NodeArena::set_field_value_internal( + void* node_ptr, const std::shared_ptr& layout, + const FieldLayout* field_layout, const Value& value, + UpdateType update_type) { + if (update_type == UpdateType::APPEND) { + return append_to_array_field(node_ptr, layout, field_layout, value); + } + + if (is_string_type(field_layout->type) && + is_field_set(static_cast(node_ptr), field_layout->index)) { + Value old_value = + layout->get_value(static_cast(node_ptr), *field_layout); + if (!old_value.is_null() && old_value.type() != ValueType::NA) { + try { + const StringRef& old_str_ref = old_value.as_string_ref(); + if (!old_str_ref.is_null()) { + string_arena_->mark_for_deletion(old_str_ref); + } + } catch (...) { + } + } + } + + if (is_array_type(field_layout->type) && + is_field_set(static_cast(node_ptr), field_layout->index)) { + Value old_value = + layout->get_value(static_cast(node_ptr), *field_layout); + if (!old_value.is_null() && old_value.holds_array_ref()) { + const ArrayRef& old_arr_ref = old_value.as_array_ref(); + if (!old_arr_ref.is_null()) { + array_arena_->mark_for_deletion(old_arr_ref); + } + } + } + + if (is_map_type(field_layout->type) && + is_field_set(static_cast(node_ptr), field_layout->index)) { + Value old_value = + layout->get_value(static_cast(node_ptr), *field_layout); + if (!old_value.is_null() && old_value.holds_map_ref()) { + const MapRef& old_map_ref = old_value.as_map_ref(); + if (!old_map_ref.is_null()) { + map_arena_->mark_for_deletion(old_map_ref); + } + } + } + + if (value.type() == ValueType::STRING && value.holds_std_string()) { + const std::string& str_content = value.as_string(); + ARROW_ASSIGN_OR_RAISE(StringRef str_ref, + string_arena_->store_string_auto(str_content)); + if (!layout->set_field_value(static_cast(node_ptr), *field_layout, + Value{str_ref, field_layout->type})) { + return arrow::Status::Invalid("Failed to write string field value"); + } + return arrow::Status::OK(); + } + + if (value.type() == ValueType::ARRAY && value.holds_raw_array()) { + ARROW_ASSIGN_OR_RAISE( + ArrayRef arr_ref, + store_raw_array(field_layout->type_desc, value.as_raw_array())); + if (!layout->set_field_value(static_cast(node_ptr), *field_layout, + Value{std::move(arr_ref)})) { + return arrow::Status::Invalid("Failed to write array field value"); + } + return arrow::Status::OK(); + } + + if (value.type() == ValueType::MAP && value.holds_raw_map()) { + ARROW_ASSIGN_OR_RAISE(MapRef map_ref, store_raw_map(value.as_raw_map())); + if (!layout->set_field_value(static_cast(node_ptr), *field_layout, + Value{std::move(map_ref)})) { + return arrow::Status::Invalid("Failed to write map field value"); + } + return arrow::Status::OK(); + } + + if (!layout->set_field_value(static_cast(node_ptr), *field_layout, + value)) { + return arrow::Status::Invalid("Failed to write field value"); + } + return arrow::Status::OK(); +} + +arrow::Result NodeArena::materialise_map_value(const Value& value) { + if (value.type() == ValueType::STRING && value.holds_std_string()) { + ARROW_ASSIGN_OR_RAISE( + StringRef sr, string_arena_->store_string_auto(value.as_string())); + return Value{sr, ValueType::STRING}; + } + return value; +} + +arrow::Status NodeArena::set_nested_map_key(MapRef& ref, + const std::string& key, + const Value& value) { + if (ref.is_null()) { + ARROW_ASSIGN_OR_RAISE(ref, map_arena_->allocate()); + } + + ARROW_ASSIGN_OR_RAISE(Value mat, materialise_map_value(value)); + ARROW_ASSIGN_OR_RAISE(StringRef key_ref, + string_arena_->store_string_auto(key)); + + ValueType vtype = mat.type(); + if (is_string_type(vtype)) vtype = ValueType::STRING; + + const void* vptr = nullptr; + int32_t i32; + int64_t i64; + double d; + float f; + bool b; + StringRef sr; + ArrayRef ar; + MapRef mr; + + switch (vtype) { + case ValueType::INT32: + i32 = mat.as_int32(); + vptr = &i32; + break; + case ValueType::INT64: + i64 = mat.as_int64(); + vptr = &i64; + break; + case ValueType::DOUBLE: + d = mat.as_double(); + vptr = &d; + break; + case ValueType::FLOAT: + f = mat.as_float(); + vptr = &f; + break; + case ValueType::BOOL: + b = mat.as_bool(); + vptr = &b; + break; + case ValueType::STRING: + sr = mat.as_string_ref(); + vptr = &sr; + break; + case ValueType::ARRAY: + if (!mat.holds_array_ref()) + return arrow::Status::Invalid( + "nested_path update: raw arrays not supported"); + ar = mat.as_array_ref(); + vptr = &ar; + break; + case ValueType::MAP: + if (!mat.holds_map_ref()) + return arrow::Status::Invalid( + "nested_path update: raw maps not supported"); + mr = mat.as_map_ref(); + vptr = &mr; + break; + default: + return arrow::Status::Invalid( + "nested_path update: unsupported value type"); + } + + auto status = MapArena::set_entry(ref, key_ref, vtype, vptr); + if (status.IsCapacityError()) { + ARROW_ASSIGN_OR_RAISE(MapRef grown, + map_arena_->copy(ref, ref.capacity())); + map_arena_->mark_for_deletion(ref); + ref = std::move(grown); + return MapArena::set_entry(ref, key_ref, vtype, vptr); + } + return status; +} + +arrow::Status NodeArena::apply_nested_path_update_non_versioned( + void* node_ptr, const std::shared_ptr& layout, + const FieldLayout* fl, const std::vector& nested_path, + const Value& value) { + if (nested_path.empty()) { + return arrow::Status::Invalid( + "nested_path update requires at least one path segment"); + } + if (nested_path.size() > 1) { + return arrow::Status::NotImplemented( + "nested_path update depth > 1 is not implemented yet"); + } + const std::string& key = nested_path.front(); + if (!is_map_type(fl->type)) { + return arrow::Status::TypeError("nested_path update on non-map field: ", + tundradb::to_string(fl->type)); + } + + auto* base = static_cast(node_ptr); + MapRef current; + if (is_field_set(base, fl->index)) { + Value old = layout->get_value(base, *fl); + if (!old.is_null() && old.holds_map_ref()) current = old.as_map_ref(); + } + + MapRef copy; + if (current.is_null()) { + ARROW_ASSIGN_OR_RAISE(copy, map_arena_->allocate()); + } else { + ARROW_ASSIGN_OR_RAISE(copy, map_arena_->copy(current)); + map_arena_->mark_for_deletion(current); + } + + ARROW_RETURN_NOT_OK(set_nested_map_key(copy, key, value)); + + if (!layout->set_field_value(base, *fl, Value{std::move(copy)})) { + return arrow::Status::Invalid( + "Failed to write map field after nested_path update"); + } + return arrow::Status::OK(); +} + +arrow::Result NodeArena::apply_nested_path_update_versioned( + const NodeHandle& handle, const std::shared_ptr& layout, + const FieldLayout& fl, const std::vector& nested_path, + const Value& value) { + if (nested_path.empty()) { + return arrow::Status::Invalid( + "nested_path update requires at least one path segment"); + } + if (nested_path.size() > 1) { + return arrow::Status::NotImplemented( + "nested_path update depth > 1 is not implemented yet"); + } + const std::string& key = nested_path.front(); + if (!is_map_type(fl.type)) { + return arrow::Status::TypeError("nested_path update on non-map field: ", + tundradb::to_string(fl.type)); + } + + MapRef current; + if (handle.is_versioned()) { + auto [found, ptr] = + get_field_ptr_from_version_chain(handle.version_info_, fl.index); + if (found && ptr) { + current = *reinterpret_cast(ptr); + } else if (!found) { + const char* base_ptr = layout->get_value_ptr( + static_cast(handle.ptr), fl.index); + if (base_ptr) { + current = *reinterpret_cast(base_ptr); + } + } + } + + MapRef copy; + if (current.is_null()) { + ARROW_ASSIGN_OR_RAISE(copy, map_arena_->allocate()); + } else { + ARROW_ASSIGN_OR_RAISE(copy, map_arena_->copy(current)); + } + + ARROW_RETURN_NOT_OK(set_nested_map_key(copy, key, value)); + return Value{std::move(copy)}; +} + +arrow::Status NodeArena::append_to_array_field( + void* node_ptr, const std::shared_ptr& layout, + const FieldLayout* field_layout, const Value& value) { + if (!is_array_type(field_layout->type)) { + return arrow::Status::TypeError( + "APPEND is only valid for array fields, got: ", + tundradb::to_string(field_layout->type)); + } + + auto* base = static_cast(node_ptr); + const bool field_is_set = is_field_set(base, field_layout->index); + + ArrayRef current_ref; + if (field_is_set) { + Value old_value = layout->get_value(base, *field_layout); + if (!old_value.is_null() && old_value.holds_array_ref()) { + current_ref = old_value.as_array_ref(); + } + } + + if (value.holds_raw_array()) { + const auto& elems = value.as_raw_array(); + if (elems.empty()) return arrow::Status::OK(); + + ArrayRef new_ref; + if (current_ref.is_null()) { + ARROW_ASSIGN_OR_RAISE(new_ref, + store_raw_array(field_layout->type_desc, elems)); + } else { + const auto n = static_cast(elems.size()); + ARROW_ASSIGN_OR_RAISE( + new_ref, + array_arena_->copy(current_ref, grow_for_append(current_ref, n))); + for (const auto& elem : elems) { + ARROW_RETURN_NOT_OK( + append_single_element(new_ref, field_layout->type_desc, elem)); + } + array_arena_->mark_for_deletion(current_ref); + } + + if (!layout->set_field_value(base, *field_layout, + Value{std::move(new_ref)})) { + return arrow::Status::Invalid( + "Failed to write array field after APPEND"); + } + return arrow::Status::OK(); + } + + if (current_ref.is_null()) { + const std::vector elems = {value}; + ARROW_ASSIGN_OR_RAISE(ArrayRef new_ref, + store_raw_array(field_layout->type_desc, elems)); + if (!layout->set_field_value(base, *field_layout, + Value{std::move(new_ref)})) { + return arrow::Status::Invalid( + "Failed to write array field after APPEND"); + } + return arrow::Status::OK(); + } + + ARROW_ASSIGN_OR_RAISE( + ArrayRef new_ref, + array_arena_->copy(current_ref, grow_for_append(current_ref, 1))); + ARROW_RETURN_NOT_OK( + append_single_element(new_ref, field_layout->type_desc, value)); + array_arena_->mark_for_deletion(current_ref); + + if (!layout->set_field_value(base, *field_layout, + Value{std::move(new_ref)})) { + return arrow::Status::Invalid("Failed to write array field after APPEND"); + } + return arrow::Status::OK(); +} + +uint32_t NodeArena::grow_for_append(const ArrayRef& ref, uint32_t n) { + const uint32_t spare = ref.capacity() - ref.length(); + if (spare >= n) return 0; + return n - spare; +} + +arrow::Status NodeArena::append_single_element(ArrayRef& ref, + const TypeDescriptor& type_desc, + const Value& elem) { + switch (type_desc.element_type) { + case ValueType::INT32: { + int32_t v = elem.as_int32(); + return array_arena_->append(ref, &v); + } + case ValueType::INT64: { + int64_t v = elem.as_int64(); + return array_arena_->append(ref, &v); + } + case ValueType::DOUBLE: { + double v = elem.as_double(); + return array_arena_->append(ref, &v); + } + case ValueType::BOOL: { + bool v = elem.as_bool(); + return array_arena_->append(ref, &v); + } + case ValueType::STRING: { + ARROW_ASSIGN_OR_RAISE( + StringRef sr, string_arena_->store_string_auto(elem.as_string())); + return array_arena_->append(ref, &sr); + } + default: + return arrow::Status::NotImplemented( + "APPEND: unsupported element type: ", + tundradb::to_string(type_desc.element_type)); + } +} + +std::pair NodeArena::get_field_ptr_from_version_chain( + const VersionInfo* version_info, uint16_t field_idx) { + const VersionInfo* current = version_info; + while (current != nullptr) { + if (auto it = current->updated_fields.find(field_idx); + it != current->updated_fields.end()) { + return {true, it->second}; + } + current = current->prev; + } + return {false, nullptr}; +} + +bool NodeArena::write_value_to_memory(char* ptr, ValueType type, + const Value& value) { + switch (type) { + case ValueType::INT64: + if (value.type() != ValueType::INT64) return false; + *reinterpret_cast(ptr) = value.as_int64(); + return true; + + case ValueType::INT32: + if (value.type() != ValueType::INT32) return false; + *reinterpret_cast(ptr) = value.as_int32(); + return true; + + case ValueType::DOUBLE: + if (value.type() != ValueType::DOUBLE) return false; + *reinterpret_cast(ptr) = value.as_double(); + return true; + + case ValueType::BOOL: + if (value.type() != ValueType::BOOL) return false; + *reinterpret_cast(ptr) = value.as_bool(); + return true; + + case ValueType::STRING: + case ValueType::FIXED_STRING16: + case ValueType::FIXED_STRING32: + case ValueType::FIXED_STRING64: + if (!is_string_type(value.type())) return false; + *reinterpret_cast(ptr) = value.as_string_ref(); + return true; + + case ValueType::ARRAY: + if (value.type() != ValueType::ARRAY) return false; + *reinterpret_cast(ptr) = value.as_array_ref(); + return true; + + case ValueType::MAP: + if (value.type() != ValueType::MAP) return false; + *reinterpret_cast(ptr) = value.as_map_ref(); + return true; + + default: + return false; + } +} + +arrow::Result NodeArena::store_raw_array( + const TypeDescriptor& type_desc, const std::vector& elements) { + const ValueType elem_type = type_desc.element_type; + const auto count = static_cast(elements.size()); + + uint32_t capacity = count; + if (type_desc.is_fixed_size_array() && type_desc.fixed_size > count) { + capacity = type_desc.fixed_size; + } + + ARROW_ASSIGN_OR_RAISE(ArrayRef ref, + array_arena_->allocate(elem_type, capacity)); + + if (ref.is_null()) { + return ref; + } + + const size_t elem_sz = get_type_size(elem_type); + auto* header = reinterpret_cast( + ref.data() - ArrayRef::HEADER_SIZE); + + for (uint32_t i = 0; i < count; ++i) { + char* dest = ref.mutable_element_ptr(i); + const Value& elem = elements[i]; + + if (is_string_type(elem_type) && elem.holds_std_string()) { + ARROW_ASSIGN_OR_RAISE( + StringRef str_ref, + string_arena_->store_string_auto(elem.as_string())); + *reinterpret_cast(dest) = std::move(str_ref); + } else { + write_value_to_memory(dest, elem_type, elem); + } + } + + header->length = count; + return ref; +} + +arrow::Result NodeArena::store_raw_map( + const std::map& entries) { + ARROW_ASSIGN_OR_RAISE( + MapRef ref, + map_arena_->allocate(static_cast(entries.size()))); + for (const auto& [key, val] : entries) { + ARROW_RETURN_NOT_OK(set_nested_map_key(ref, key, val)); + } + return ref; +} + +} // namespace tundradb diff --git a/src/memory/schema_layout.cpp b/src/memory/schema_layout.cpp new file mode 100644 index 0000000..85676db --- /dev/null +++ b/src/memory/schema_layout.cpp @@ -0,0 +1,178 @@ +#include "memory/schema_layout.hpp" + +namespace tundradb { + +// ============================================================================ +// SchemaLayout +// ============================================================================ + +SchemaLayout::SchemaLayout(const std::shared_ptr& schema) + : schema_name_(std::move(schema->name())), total_size_(0), alignment_(8) { + fields_.reserve(schema->num_fields()); + for (auto field : schema->fields()) { + add_field(field); + } + finalize(); +} + +const char* SchemaLayout::get_value_ptr(const char* node_data, + const size_t field_index) const { + const FieldLayout& field_layout = fields_[field_index]; + if (!is_field_set(node_data, field_layout.index)) { + return nullptr; + } + + const char* data_start = node_data + data_offset_; + const char* field_ptr = data_start + field_layout.offset; + return field_ptr; +} + +Value SchemaLayout::get_value_from_ptr(const char* field_ptr, + const FieldLayout& field_layout) const { + if (field_ptr == nullptr) { + return Value{}; + } + return Value::read_value_from_memory(field_ptr, field_layout.type); +} + +bool SchemaLayout::set_field_value(char* node_data, + const FieldLayout& field_layout, + const Value& value) { + set_field_bit(node_data, field_layout.index, !value.is_null()); + + if (value.is_null()) { + return true; + } + + char* data_start = node_data + data_offset_; + char* field_ptr = data_start + field_layout.offset; + return write_value_to_memory(field_ptr, field_layout.type, value); +} + +void SchemaLayout::initialize_node_data(char* node_data) const { + const size_t bitset_size = get_bitset_size(); + std::memset(node_data, 0, bitset_size); + + char* data_start = node_data + data_offset_; + std::memset(data_start, 0, total_size_); + + for (const auto& field : fields_) { + char* field_ptr = data_start + field.offset; + initialize_field_memory(field_ptr, field.type); + } +} + +const FieldLayout* SchemaLayout::get_field_layout( + const std::shared_ptr& field) const { + if (!field) { + return nullptr; + } + if (field->index_ >= fields_.size()) { + return nullptr; + } + return &fields_[field->index_]; +} + +void SchemaLayout::add_field(const std::shared_ptr& field) { + assert(field != nullptr); + const auto& td = field->type_descriptor(); + size_t field_size = td.storage_size(); + size_t field_alignment = td.storage_alignment(); + + alignment_ = std::max(alignment_, field_alignment); + + size_t aligned_offset = align_up(total_size_, field_alignment); + field->index_ = fields_.size(); + fields_.emplace_back(field->index_, field->name(), field->type(), td, + aligned_offset, field_size, field_alignment, + field->nullable()); + + total_size_ = aligned_offset + field_size; +} + +bool SchemaLayout::write_value_to_memory(char* ptr, const ValueType type, + const Value& value) { + switch (type) { + case ValueType::INT64: + if (value.type() != ValueType::INT64) return false; + *reinterpret_cast(ptr) = value.as_int64(); + return true; + case ValueType::INT32: + if (value.type() != ValueType::INT32) return false; + *reinterpret_cast(ptr) = value.as_int32(); + return true; + case ValueType::DOUBLE: + if (value.type() != ValueType::DOUBLE) return false; + *reinterpret_cast(ptr) = value.as_double(); + return true; + case ValueType::BOOL: + if (value.type() != ValueType::BOOL) return false; + *reinterpret_cast(ptr) = value.as_bool(); + return true; + case ValueType::STRING: + case ValueType::FIXED_STRING16: + case ValueType::FIXED_STRING32: + case ValueType::FIXED_STRING64: { + if (!is_string_type(value.type())) return false; + *reinterpret_cast(ptr) = value.as_string_ref(); + return true; + } + case ValueType::ARRAY: + if (value.type() != ValueType::ARRAY) return false; + *reinterpret_cast(ptr) = value.as_array_ref(); + return true; + case ValueType::MAP: + if (value.type() != ValueType::MAP) return false; + *reinterpret_cast(ptr) = value.as_map_ref(); + return true; + default: + return false; + } +} + +void SchemaLayout::initialize_field_memory(char* ptr, const ValueType type) { + switch (type) { + case ValueType::STRING: + case ValueType::FIXED_STRING16: + case ValueType::FIXED_STRING32: + case ValueType::FIXED_STRING64: + new (ptr) StringRef(); + break; + case ValueType::ARRAY: + new (ptr) ArrayRef(); + break; + case ValueType::MAP: + new (ptr) MapRef(); + break; + default: + break; + } +} + +// ============================================================================ +// LayoutRegistry +// ============================================================================ + +void LayoutRegistry::register_layout(std::shared_ptr layout) { + if (!layout->is_finalized()) { + layout->finalize(); + } + layouts_[layout->get_schema_name()] = std::move(layout); +} + +std::shared_ptr LayoutRegistry::get_layout( + const std::string& schema_name) { + const auto it = layouts_.find(schema_name); + return it != layouts_.end() ? it->second : nullptr; +} + +std::vector LayoutRegistry::get_schema_names() const { + std::vector names; + names.reserve(layouts_.size()); + for (auto const& entry : layouts_) { + names.push_back(entry.first().str()); + } + return names; +} + +} // namespace tundradb diff --git a/src/memory/string_arena.cpp b/src/memory/string_arena.cpp new file mode 100644 index 0000000..871290a --- /dev/null +++ b/src/memory/string_arena.cpp @@ -0,0 +1,227 @@ +#include "memory/string_arena.hpp" + +namespace tundradb { + +// ============================================================================ +// StringPool +// ============================================================================ + +arrow::Result StringPool::store_string(const std::string& str, + uint32_t pool_id) { + if (str.length() > max_size_) { + return arrow::Status::Invalid("StringPool::store_string: string length ", + str.length(), " exceeds pool max_size ", + max_size_); + } + + if (enable_deduplication_) { + typename decltype(dedup_cache_)::const_accessor acc; + if (dedup_cache_.find(acc, str)) { + return acc->second; + } + } + + std::lock_guard lock(arena_mutex_); + + const size_t alloc_size = StringRef::HEADER_SIZE + str.length() + 1; + void* raw_storage = arena_->allocate(alloc_size); + if (!raw_storage) { + return arrow::Status::OutOfMemory( + "StringPool::store_string: arena allocation failed (requested ", + alloc_size, " bytes)"); + } + + auto* header = static_cast(raw_storage); + header->ref_count.store(0, std::memory_order_relaxed); + header->length = static_cast(str.length()); + header->flags = 0; + header->padding = 0; + + char* data = reinterpret_cast(header) + StringRef::HEADER_SIZE; + std::memcpy(data, str.c_str(), str.length()); + data[str.length()] = '\0'; + + StringRef ref(data, static_cast(str.length()), pool_id); + active_allocs_.fetch_add(1, std::memory_order_relaxed); + + if (enable_deduplication_) { + typename decltype(dedup_cache_)::accessor acc; + dedup_cache_.insert(acc, str); + acc->second = ref; + } + + return ref; +} + +void StringPool::mark_for_deletion(const char* data) { + if (!data) return; + + auto* header = reinterpret_cast( + const_cast(data - StringRef::HEADER_SIZE)); + + header->mark_for_deletion(); + + if (enable_deduplication_) { + std::string str(data, header->length); + dedup_cache_.erase(str); + } +} + +void StringPool::release_string(const char* data) { + if (!data) return; + + auto* header = reinterpret_cast( + const_cast(data - StringRef::HEADER_SIZE)); + + active_allocs_.fetch_sub(1, std::memory_order_relaxed); + + std::lock_guard lock(arena_mutex_); + arena_->deallocate(header); +} + +void StringPool::enable_deduplication(bool enable) { + enable_deduplication_ = enable; + if (!enable) { + dedup_cache_.clear(); + } +} + +size_t StringPool::get_used_bytes() const { + if (auto* free_list = dynamic_cast(arena_.get())) { + return free_list->get_used_bytes(); + } + return 0; +} + +size_t StringPool::get_total_references() const { + size_t total = 0; + typename decltype(dedup_cache_)::const_accessor acc; + for (auto it = dedup_cache_.begin(); it != dedup_cache_.end(); ++it) { + if (dedup_cache_.find(acc, it->first)) { + total += acc->second.get_ref_count(); + } + } + return total; +} + +// ============================================================================ +// StringArena +// ============================================================================ + +StringArena::StringArena() { + pools_.reserve(4); + pools_.emplace_back(std::make_unique(16)); + pools_.emplace_back(std::make_unique(32)); + pools_.emplace_back(std::make_unique(64)); + pools_.emplace_back(std::make_unique(SIZE_MAX)); + register_pools(); +} + +arrow::Result StringArena::store_string(const std::string& str, + uint32_t pool_id) { + if (pool_id >= pools_.size()) { + return arrow::Status::Invalid( + "StringArena::store_string: invalid pool_id ", pool_id, + " (max: ", pools_.size() - 1, ")"); + } + return pools_[pool_id]->store_string(str, pool_id); +} + +arrow::Result StringArena::store_string_auto( + const std::string& str) { + size_t len = str.length(); + if (len <= 16) return pools_[0]->store_string(str, 0); + if (len <= 32) return pools_[1]->store_string(str, 1); + if (len <= 64) return pools_[2]->store_string(str, 2); + return pools_[3]->store_string(str, 3); +} + +void StringArena::mark_for_deletion(const StringRef& ref) { + if (!ref.is_null()) { + pools_[ref.pool_id()]->mark_for_deletion(ref.data()); + } +} + +void StringArena::enable_deduplication(bool enable) { + for (auto& pool : pools_) { + pool->enable_deduplication(enable); + } +} + +int64_t StringArena::get_active_allocs() const { + int64_t total = 0; + for (const auto& pool : pools_) { + total += pool->get_active_allocs(); + } + return total; +} + +StringPool* StringArena::get_pool(uint32_t pool_id) const { + if (pool_id < pools_.size()) { + return pools_[pool_id].get(); + } + return nullptr; +} + +void StringArena::reset() { + for (auto& pool : pools_) { + pool->reset(); + } +} + +void StringArena::clear() { + for (auto& pool : pools_) { + pool->clear(); + } +} + +void StringArena::register_pools() { + for (uint32_t i = 0; i < pools_.size(); ++i) { + StringArenaRegistry::register_pool(i, pools_[i].get()); + } +} + +// ============================================================================ +// StringArenaRegistry +// ============================================================================ + +StringPool* StringArenaRegistry::get_pool(uint32_t pool_id) { + typename decltype(pool_map_)::const_accessor acc; + if (instance().pool_map_.find(acc, pool_id)) { + return acc->second; + } + return nullptr; +} + +void StringArenaRegistry::release_string(uint32_t pool_id, const char* data) { + if (auto* pool = get_pool(pool_id)) { + pool->release_string(data); + } +} + +// ============================================================================ +// StringRef::release() implementation +// ============================================================================ + +void StringRef::release() { + if (data_) { + if (auto* header = get_header()) { + assert(header->ref_count.load(std::memory_order_relaxed) > 0 && + "StringRef::release() called with ref_count already 0 — " + "double-release or missing ref-count increment"); + + int32_t old_count = + header->ref_count.fetch_sub(1, std::memory_order_acq_rel); + + if (old_count == 1 && header->is_marked_for_deletion()) { + StringArenaRegistry::release_string(pool_id_, data_); + } + } + + data_ = nullptr; + length_ = 0; + pool_id_ = 0; + } +} + +} // namespace tundradb diff --git a/src/query/row.cpp b/src/query/row.cpp index 0004c10..4fedff3 100644 --- a/src/query/row.cpp +++ b/src/query/row.cpp @@ -221,4 +221,201 @@ std::string RowNode::toString(bool recursive, int indent_level) const { return ss.str(); } +// --------------------------------------------------------------------------- +// Standalone functions moved from row.hpp +// --------------------------------------------------------------------------- + +bool is_prefix(const std::vector& prefix, + const std::vector& path) { + if (prefix.size() > path.size()) return false; + for (size_t i = 0; i < prefix.size(); ++i) { + if (!(prefix[i] == path[i])) return false; + } + return true; +} + +std::string join_schema_path(const std::vector& schema_path) { + std::ostringstream oss; + for (size_t i = 0; i < schema_path.size(); ++i) { + if (i != 0) oss << "->"; + oss << schema_path[i].toString(); + } + return oss.str(); +} + +Row create_empty_row_from_schema( + const std::shared_ptr& final_output_schema) { + Row new_row(final_output_schema->num_fields() + 32); + new_row.id = -1; + return new_row; +} + +std::vector get_child_rows(const Row& parent, + const std::vector& rows) { + std::vector child; + for (const auto& row : rows) { + if (parent.id != row.id && row.start_with(parent.path)) { + child.push_back(row); + } + } + return child; +} + +// --------------------------------------------------------------------------- +// Row methods moved from row.hpp +// --------------------------------------------------------------------------- + +void Row::set_cell_from_node(const std::vector& field_indices, + const std::shared_ptr& node, + TemporalContext* temporal_context) { + auto view = node->view(temporal_context); + const auto& fields = node->get_schema()->fields(); + const size_t n = std::min(fields.size(), field_indices.size()); + for (size_t i = 0; i < n; ++i) { + const auto& field = fields[i]; + const int field_id = field_indices[i]; + auto value_ref_result = view.get_value_ref(field); + if (value_ref_result.ok()) { + this->set_cell(field_id, value_ref_result.ValueOrDie()); + } + } +} + +void Row::set_cell_from_edge( + const std::vector& field_indices, const std::shared_ptr& edge, + const llvm::SmallVector, 4>& fields, + TemporalContext* temporal_context) { + auto view = edge->view(temporal_context); + const auto edge_schema = edge->get_schema(); + const size_t n = std::min(fields.size(), field_indices.size()); + for (size_t i = 0; i < n; ++i) { + auto field = fields[i]; + if (!field) continue; + const auto& name = field->name(); + const bool structural = + (name == field_names::kId || name == field_names::kEdgeId || + name == field_names::kSourceId || name == field_names::kTargetId || + name == field_names::kCreatedTs); + if (!structural && edge_schema) { + auto real_field = edge_schema->get_field(name); + if (!real_field) continue; + field = real_field; + } + const int field_id = field_indices[i]; + auto value_ref_result = view.get_value_ref(field); + if (value_ref_result.ok()) { + this->set_cell(field_id, value_ref_result.ValueOrDie()); + } + } +} + +const std::unordered_map& Row::extract_schema_ids( + const llvm::SmallDenseMap& field_id_to_name) { + if (ids_populated) return ids; + for (size_t i = 0; i < cells.size(); ++i) { + const auto& value = cells[i]; + if (!value.data) continue; + const auto& field_name = field_id_to_name.at(static_cast(i)); + size_t dot_pos = field_name.find('.'); + if (dot_pos != std::string::npos) { + std::string schema = field_name.substr(0, dot_pos); + if (field_name.substr(dot_pos + 1) == field_names::kId) { + ids[schema] = value.as_int64(); + } + } + } + return ids; +} + +std::shared_ptr Row::merge(const std::shared_ptr& other) const { + std::shared_ptr merged = std::make_shared(*this); + IF_DEBUG_ENABLED { + log_debug("Row::merge() - this: {}", this->ToString()); + log_debug("Row::merge() - other: {}", other->ToString()); + } + for (size_t i = 0; i < other->cells.size(); ++i) { + if (!merged->has_value(static_cast(i))) { + IF_DEBUG_ENABLED { + log_debug("Row::merge() - adding field '{}' with value: {}", i, + cells[i].ToString()); + } + merged->cells[i] = other->cells[i]; + } else { + IF_DEBUG_ENABLED { + log_debug("Row::merge() - skipping field '{}' (already has value)", i); + } + } + } + IF_DEBUG_ENABLED { + log_debug("Row::merge() - result: {}", merged->ToString()); + } + return merged; +} + +std::string Row::ToString() const { + std::stringstream ss; + ss << "Row{"; + ss << "path='" << join_schema_path(path) << "', "; + bool first = true; + for (size_t i = 0; i < cells.size(); i++) { + if (!first) ss << ", "; + first = false; + ss << i << ": "; + const auto value_ref = cells[i]; + if (!value_ref.data) { + ss << "NULL"; + } else { + switch (value_ref.type) { + case ValueType::INT64: + ss << value_ref.as_int64(); + break; + case ValueType::INT32: + ss << value_ref.as_int32(); + break; + case ValueType::DOUBLE: + ss << value_ref.as_double(); + break; + case ValueType::STRING: + ss << "\"" << value_ref.as_string_ref().to_string() << "\""; + break; + case ValueType::BOOL: + ss << (value_ref.as_bool() ? "true" : "false"); + break; + default: + ss << "unknown"; + break; + } + } + } + ss << "}"; + return ss.str(); +} + +// --------------------------------------------------------------------------- +// RowNode methods moved from row.hpp +// --------------------------------------------------------------------------- + +void RowNode::insert_row_dfs(size_t path_idx, + const std::shared_ptr& new_row) { + if (path_idx == new_row->path.size()) { + this->row = new_row; + return; + } + for (const auto& n : children) { + if (n->path_segment == new_row->path[path_idx]) { + n->insert_row_dfs(path_idx + 1, new_row); + return; + } + } + auto new_node = std::make_unique(); + new_node->depth = depth + 1; + new_node->path_segment = new_row->path[path_idx]; + new_node->insert_row_dfs(path_idx + 1, new_row); + children.emplace_back(std::move(new_node)); +} + +void RowNode::insert_row(const std::shared_ptr& new_row) { + insert_row_dfs(0, new_row); +} + } // namespace tundradb diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 320b2f8..70d70af 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -335,9 +335,11 @@ target_link_libraries(memory_arena_test target_link_libraries(free_list_arena_test PRIVATE + core GTest::GTest GTest::Main spdlog::spdlog + LLVMSupport LLVMCore ) From f3fec8a1bd4917f1565325f0dd0f66da38bb71c2 Mon Sep 17 00:00:00 2001 From: dmgcodevil Date: Sat, 4 Apr 2026 19:34:13 -0400 Subject: [PATCH 4/4] header-to-cpp memory folder --- src/memory/array_arena.cpp | 10 ++++---- src/memory/free_list_arena.cpp | 17 +++++--------- src/memory/node_arena.cpp | 43 ++++++++++++++-------------------- src/memory/string_arena.cpp | 5 ++-- 4 files changed, 30 insertions(+), 45 deletions(-) diff --git a/src/memory/array_arena.cpp b/src/memory/array_arena.cpp index aec4d20..20ac808 100644 --- a/src/memory/array_arena.cpp +++ b/src/memory/array_arena.cpp @@ -20,8 +20,8 @@ arrow::Result ArrayArena::allocate(ValueType elem_type, void* raw = arena_->allocate(alloc_size); if (!raw) { return arrow::Status::OutOfMemory( - "ArrayArena::allocate: arena allocation failed (requested ", - alloc_size, " bytes)"); + "ArrayArena::allocate: arena allocation failed (requested ", alloc_size, + " bytes)"); } init_header(raw, capacity); @@ -108,8 +108,7 @@ arrow::Status ArrayArena::append(ArrayRef& ref, const void* element) { arrow::Result ArrayArena::copy(const ArrayRef& src, uint32_t extra_capacity) { if (src.is_null()) { - return arrow::Status::Invalid( - "ArrayArena::copy: source ArrayRef is null"); + return arrow::Status::Invalid("ArrayArena::copy: source ArrayRef is null"); } const auto* header = get_header_const(src); @@ -219,8 +218,7 @@ void ArrayArena::assign_element(char* dst, const void* src, *reinterpret_cast(dst) = *reinterpret_cast(src); } else if (is_array_type(elem_type)) { - *reinterpret_cast(dst) = - *reinterpret_cast(src); + *reinterpret_cast(dst) = *reinterpret_cast(src); } else { std::memcpy(dst, src, get_type_size(elem_type)); } diff --git a/src/memory/free_list_arena.cpp b/src/memory/free_list_arena.cpp index 9e3f268..771261a 100644 --- a/src/memory/free_list_arena.cpp +++ b/src/memory/free_list_arena.cpp @@ -78,7 +78,6 @@ double FreeListArena::get_fragmentation_ratio() const { return static_cast(freed_bytes_) / total_allocated_; } - void FreeListArena::allocate_new_chunk(size_t size) { auto new_chunk = std::make_unique(size); current_chunk_ = new_chunk.get(); @@ -178,8 +177,7 @@ void* FreeListArena::find_free_block(size_t size) { free_blocks_by_size_.erase(it); } - if (header->size > - size + BlockHeader::HEADER_SIZE + min_fragment_size_) { + if (header->size > size + BlockHeader::HEADER_SIZE + min_fragment_size_) { split_block(header, size); } @@ -192,11 +190,10 @@ void* FreeListArena::find_free_block(size_t size) { } void FreeListArena::split_block(BlockHeader* header, size_t needed_size) { - size_t remaining_size = - header->size - needed_size - BlockHeader::HEADER_SIZE; + size_t remaining_size = header->size - needed_size - BlockHeader::HEADER_SIZE; - char* new_block_start = reinterpret_cast(header) + - BlockHeader::HEADER_SIZE + needed_size; + char* new_block_start = + reinterpret_cast(header) + BlockHeader::HEADER_SIZE + needed_size; BlockHeader* new_header = reinterpret_cast(new_block_start); new_header->size = remaining_size; @@ -204,8 +201,7 @@ void FreeListArena::split_block(BlockHeader* header, size_t needed_size) { header->size = needed_size; - add_to_free_list(new_block_start + BlockHeader::HEADER_SIZE, - remaining_size); + add_to_free_list(new_block_start + BlockHeader::HEADER_SIZE, remaining_size); } void FreeListArena::add_to_free_list(void* ptr, size_t size) { @@ -240,8 +236,7 @@ BlockHeader* FreeListArena::find_next_block(BlockHeader* header) { } } - char* chunk_allocated_end = - chunk_start + chunk_allocated_sizes_[chunk_index]; + char* chunk_allocated_end = chunk_start + chunk_allocated_sizes_[chunk_index]; if (next_ptr + BlockHeader::HEADER_SIZE <= chunk_allocated_end) { return reinterpret_cast(next_ptr); diff --git a/src/memory/node_arena.cpp b/src/memory/node_arena.cpp index a1abc14..d1a4fb8 100644 --- a/src/memory/node_arena.cpp +++ b/src/memory/node_arena.cpp @@ -313,8 +313,7 @@ arrow::Result NodeArena::get_value_at_version( return layout->get_value_from_ptr(field_ptr, *field_layout); } - return layout->get_value(static_cast(handle.ptr), - *field_layout); + return layout->get_value(static_cast(handle.ptr), *field_layout); } // =========================================================================== @@ -336,8 +335,8 @@ arrow::Result> NodeArena::resolve_field_indices( return arrow::Status::Invalid("Invalid field in apply_updates: ", upd.field->name()); } - result.push_back({static_cast(fl->index), upd.value, upd.op, - upd.nested_path}); + result.push_back( + {static_cast(fl->index), upd.value, upd.op, upd.nested_path}); } return result; } @@ -393,8 +392,8 @@ arrow::Status NodeArena::materialize_versioned_schema_fields( char* batch_memory = nullptr; if (total_size > 0) { - batch_memory = static_cast( - version_arena_->allocate(total_size, max_alignment)); + batch_memory = + static_cast(version_arena_->allocate(total_size, max_alignment)); if (!batch_memory) { return arrow::Status::OutOfMemory( "Failed to batch allocate field storage"); @@ -550,15 +549,14 @@ arrow::Status NodeArena::set_field_value_internal( arrow::Result NodeArena::materialise_map_value(const Value& value) { if (value.type() == ValueType::STRING && value.holds_std_string()) { - ARROW_ASSIGN_OR_RAISE( - StringRef sr, string_arena_->store_string_auto(value.as_string())); + ARROW_ASSIGN_OR_RAISE(StringRef sr, + string_arena_->store_string_auto(value.as_string())); return Value{sr, ValueType::STRING}; } return value; } -arrow::Status NodeArena::set_nested_map_key(MapRef& ref, - const std::string& key, +arrow::Status NodeArena::set_nested_map_key(MapRef& ref, const std::string& key, const Value& value) { if (ref.is_null()) { ARROW_ASSIGN_OR_RAISE(ref, map_arena_->allocate()); @@ -627,8 +625,7 @@ arrow::Status NodeArena::set_nested_map_key(MapRef& ref, auto status = MapArena::set_entry(ref, key_ref, vtype, vptr); if (status.IsCapacityError()) { - ARROW_ASSIGN_OR_RAISE(MapRef grown, - map_arena_->copy(ref, ref.capacity())); + ARROW_ASSIGN_OR_RAISE(MapRef grown, map_arena_->copy(ref, ref.capacity())); map_arena_->mark_for_deletion(ref); ref = std::move(grown); return MapArena::set_entry(ref, key_ref, vtype, vptr); @@ -703,8 +700,8 @@ arrow::Result NodeArena::apply_nested_path_update_versioned( if (found && ptr) { current = *reinterpret_cast(ptr); } else if (!found) { - const char* base_ptr = layout->get_value_ptr( - static_cast(handle.ptr), fl.index); + const char* base_ptr = + layout->get_value_ptr(static_cast(handle.ptr), fl.index); if (base_ptr) { current = *reinterpret_cast(base_ptr); } @@ -764,8 +761,7 @@ arrow::Status NodeArena::append_to_array_field( if (!layout->set_field_value(base, *field_layout, Value{std::move(new_ref)})) { - return arrow::Status::Invalid( - "Failed to write array field after APPEND"); + return arrow::Status::Invalid("Failed to write array field after APPEND"); } return arrow::Status::OK(); } @@ -776,8 +772,7 @@ arrow::Status NodeArena::append_to_array_field( store_raw_array(field_layout->type_desc, elems)); if (!layout->set_field_value(base, *field_layout, Value{std::move(new_ref)})) { - return arrow::Status::Invalid( - "Failed to write array field after APPEND"); + return arrow::Status::Invalid("Failed to write array field after APPEND"); } return arrow::Status::OK(); } @@ -823,8 +818,8 @@ arrow::Status NodeArena::append_single_element(ArrayRef& ref, return array_arena_->append(ref, &v); } case ValueType::STRING: { - ARROW_ASSIGN_OR_RAISE( - StringRef sr, string_arena_->store_string_auto(elem.as_string())); + ARROW_ASSIGN_OR_RAISE(StringRef sr, + string_arena_->store_string_auto(elem.as_string())); return array_arena_->append(ref, &sr); } default: @@ -919,9 +914,8 @@ arrow::Result NodeArena::store_raw_array( const Value& elem = elements[i]; if (is_string_type(elem_type) && elem.holds_std_string()) { - ARROW_ASSIGN_OR_RAISE( - StringRef str_ref, - string_arena_->store_string_auto(elem.as_string())); + ARROW_ASSIGN_OR_RAISE(StringRef str_ref, + string_arena_->store_string_auto(elem.as_string())); *reinterpret_cast(dest) = std::move(str_ref); } else { write_value_to_memory(dest, elem_type, elem); @@ -935,8 +929,7 @@ arrow::Result NodeArena::store_raw_array( arrow::Result NodeArena::store_raw_map( const std::map& entries) { ARROW_ASSIGN_OR_RAISE( - MapRef ref, - map_arena_->allocate(static_cast(entries.size()))); + MapRef ref, map_arena_->allocate(static_cast(entries.size()))); for (const auto& [key, val] : entries) { ARROW_RETURN_NOT_OK(set_nested_map_key(ref, key, val)); } diff --git a/src/memory/string_arena.cpp b/src/memory/string_arena.cpp index 871290a..a790728 100644 --- a/src/memory/string_arena.cpp +++ b/src/memory/string_arena.cpp @@ -120,9 +120,8 @@ StringArena::StringArena() { arrow::Result StringArena::store_string(const std::string& str, uint32_t pool_id) { if (pool_id >= pools_.size()) { - return arrow::Status::Invalid( - "StringArena::store_string: invalid pool_id ", pool_id, - " (max: ", pools_.size() - 1, ")"); + return arrow::Status::Invalid("StringArena::store_string: invalid pool_id ", + pool_id, " (max: ", pools_.size() - 1, ")"); } return pools_[pool_id]->store_string(str, pool_id); }