From 90ea721ca99264f3f7bdfea1cd378ce31f982eee Mon Sep 17 00:00:00 2001 From: lichen2015 Date: Wed, 25 Feb 2026 10:56:42 +0800 Subject: [PATCH 01/15] init branch --- CMakeLists.txt | 6 + examples/CMakeLists.txt | 15 + examples/c_api/CMakeLists.txt | 65 + examples/c_api/basic_example.c | 239 + examples/c_api/collection_schema_example.c | 252 + examples/c_api/doc_example.c | 520 ++ examples/c_api/field_schema_example.c | 281 + examples/c_api/index_example.c | 328 ++ examples/c_api/optimized_example.c | 301 + src/CMakeLists.txt | 1 + src/c_api/CMakeLists.txt | 133 + src/c_api/c_api.cc | 5766 ++++++++++++++++++++ src/include/zvec/c_api.h | 2516 +++++++++ src/include/zvec/db/doc.h | 28 + tests/CMakeLists.txt | 1 + tests/c_api/CMakeLists.txt | 28 + tests/c_api/c_api_test.c | 2350 ++++++++ tests/c_api/utils.c | 940 ++++ tests/c_api/utils.h | 260 + 19 files changed, 14030 insertions(+) create mode 100644 examples/CMakeLists.txt create mode 100644 examples/c_api/CMakeLists.txt create mode 100644 examples/c_api/basic_example.c create mode 100644 examples/c_api/collection_schema_example.c create mode 100644 examples/c_api/doc_example.c create mode 100644 examples/c_api/field_schema_example.c create mode 100644 examples/c_api/index_example.c create mode 100644 examples/c_api/optimized_example.c create mode 100644 src/c_api/CMakeLists.txt create mode 100644 src/c_api/c_api.cc create mode 100644 src/include/zvec/c_api.h create mode 100644 tests/c_api/CMakeLists.txt create mode 100644 tests/c_api/c_api_test.c create mode 100644 tests/c_api/utils.c create mode 100644 tests/c_api/utils.h diff --git a/CMakeLists.txt b/CMakeLists.txt index a9a2c9aff..2b7638164 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -31,9 +31,15 @@ message(STATUS "BUILD_TOOLS:${BUILD_TOOLS}") option(USE_OSS_MIRROR "Use OSS mirror for faster third-party downloads" ON) message(STATUS "USE_OSS_MIRROR:${USE_OSS_MIRROR}") +option(BUILD_EXAMPLES "Build examples" ON) +message(STATUS "BUILD_EXAMPLES:${BUILD_EXAMPLES}") + cc_directory(thirdparty) cc_directories(src) cc_directories(tests) +if(BUILD_EXAMPLES) + cc_directories(examples) +endif() if(BUILD_TOOLS) cc_directories(tools) diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt new file mode 100644 index 000000000..66e943ade --- /dev/null +++ b/examples/CMakeLists.txt @@ -0,0 +1,15 @@ +# Copyright 2025-present the zvec project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +add_subdirectory(c_api) \ No newline at end of file diff --git a/examples/c_api/CMakeLists.txt b/examples/c_api/CMakeLists.txt new file mode 100644 index 000000000..759f744fb --- /dev/null +++ b/examples/c_api/CMakeLists.txt @@ -0,0 +1,65 @@ +# Copyright 2025-present the zvec project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Basic example +add_executable(c_api_basic_example basic_example.c) +target_link_libraries(c_api_basic_example PRIVATE zvec_c_api) +target_include_directories(c_api_basic_example PRIVATE + ${PROJECT_SOURCE_DIR}/src/include +) +set_target_properties(c_api_basic_example PROPERTIES + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/examples/c_api +) + + +# Schema example +add_executable(c_api_collection_schema_example collection_schema_example.c) +target_link_libraries(c_api_collection_schema_example PRIVATE zvec_c_api) +target_include_directories(c_api_collection_schema_example PRIVATE + ${PROJECT_SOURCE_DIR}/src/include +) +set_target_properties(c_api_collection_schema_example PROPERTIES + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/examples/c_api +) + +# Struct document example +add_executable(c_api_doc_example doc_example.c) +target_link_libraries(c_api_doc_example PRIVATE zvec_c_api) +target_include_directories(c_api_doc_example PRIVATE + ${PROJECT_SOURCE_DIR}/src/include +) +set_target_properties(c_api_doc_example PROPERTIES + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/examples/c_api +) + +# Index example +add_executable(c_api_index_example index_example.c) +target_link_libraries(c_api_index_example PRIVATE zvec_c_api) +set_target_properties(c_api_index_example PROPERTIES + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin/examples/c_api +) + +# Newly added field schema example +add_executable(c_api_field_schema_example field_schema_example.c) +target_link_libraries(c_api_field_schema_example PRIVATE zvec_c_api) +set_target_properties(c_api_field_schema_example PROPERTIES + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin/examples/c_api +) + +# Optimized example +add_executable(c_api_optimized_example optimized_example.c) +target_link_libraries(c_api_optimized_example PRIVATE zvec_c_api) +set_target_properties(c_api_optimized_example PROPERTIES + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin/examples/c_api +) diff --git a/examples/c_api/basic_example.c b/examples/c_api/basic_example.c new file mode 100644 index 000000000..081e631a2 --- /dev/null +++ b/examples/c_api/basic_example.c @@ -0,0 +1,239 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include "zvec/c_api.h" + +/** + * @brief Print error message and return error code + */ +static ZVecErrorCode handle_error(ZVecErrorCode error, const char *context) { + if (error != ZVEC_OK) { + char *error_msg = NULL; + zvec_get_last_error(&error_msg); + fprintf(stderr, "Error in %s: %d - %s\n", context, error, + error_msg ? error_msg : "Unknown error"); + zvec_free_str(error_msg); + } + return error; +} + +/** + * @brief Create a simple test collection using CollectionSchema + */ +static ZVecErrorCode create_simple_test_collection( + ZVecCollection **collection) { + // Create collection schema using C API + ZVecCollectionSchema *schema = + zvec_collection_schema_create("test_collection"); + if (!schema) { + return ZVEC_ERROR_INTERNAL_ERROR; + } + + ZVecErrorCode error = ZVEC_OK; + + // Create index parameters + ZVecInvertIndexParams *invert_params = + zvec_index_params_invert_create(true, false); + ZVecHnswIndexParams *hnsw_params = zvec_index_params_hnsw_create( + ZVEC_METRIC_TYPE_COSINE, ZVEC_QUANTIZE_TYPE_UNDEFINED, 16, 200, 50); + + // Create and add ID field (primary key) + ZVecFieldSchema *id_field = + zvec_field_schema_create("id", ZVEC_DATA_TYPE_STRING, false, 0); + zvec_field_schema_set_invert_index(id_field, invert_params); + error = zvec_collection_schema_add_field(schema, id_field); + if (error != ZVEC_OK) { + zvec_collection_schema_destroy(schema); + zvec_index_params_invert_destroy(invert_params); + zvec_index_params_hnsw_destroy(hnsw_params); + return error; + } + + // Create text field (inverted index) + ZVecFieldSchema *text_field = + zvec_field_schema_create("text", ZVEC_DATA_TYPE_STRING, true, 0); + zvec_field_schema_set_invert_index(text_field, invert_params); + error = zvec_collection_schema_add_field(schema, text_field); + if (error != ZVEC_OK) { + zvec_collection_schema_destroy(schema); + zvec_index_params_invert_destroy(invert_params); + zvec_index_params_hnsw_destroy(hnsw_params); + return error; + } + + // Create embedding field (HNSW index) + ZVecFieldSchema *embedding_field = zvec_field_schema_create( + "embedding", ZVEC_DATA_TYPE_VECTOR_FP32, false, 3); + zvec_field_schema_set_hnsw_index(embedding_field, hnsw_params); + error = zvec_collection_schema_add_field(schema, embedding_field); + if (error != ZVEC_OK) { + zvec_collection_schema_destroy(schema); + zvec_index_params_invert_destroy(invert_params); + zvec_index_params_hnsw_destroy(hnsw_params); + return error; + } + + // Use default options + ZVecCollectionOptions options = ZVEC_DEFAULT_OPTIONS(); + + // Create collection using the new API + error = zvec_collection_create_and_open("./test_collection", schema, &options, + collection); + + // Cleanup resources + zvec_collection_schema_destroy(schema); + zvec_index_params_invert_destroy(invert_params); + zvec_index_params_hnsw_destroy(hnsw_params); + + return error; +} + +/** + * @brief Basic C API usage example + */ +int main() { + printf("=== ZVec C API Basic Example ===\n\n"); + + ZVecErrorCode error; + + // Create collection using simplified function + ZVecCollection *collection = NULL; + error = create_simple_test_collection(&collection); + if (handle_error(error, "creating collection") != ZVEC_OK) { + return 1; + } + printf("✓ Collection created successfully\n"); + + // Prepare test data + float vector1[] = {0.1f, 0.2f, 0.3f}; + float vector2[] = {0.4f, 0.5f, 0.6f}; + + ZVecDoc *docs[2]; + for (int i = 0; i < 2; ++i) { + docs[i] = zvec_doc_create(); + if (!docs[i]) { + fprintf(stderr, "Failed to create document %d\n", i); + // Cleanup allocated resources + for (int j = 0; j < i; ++j) { + zvec_doc_destroy(docs[j]); + } + return ZVEC_ERROR_INTERNAL_ERROR; + } + } + + // Manually add fields to document 1 + zvec_doc_set_pk(docs[0], "doc1"); + zvec_doc_add_field_by_value(docs[0], "id", ZVEC_DATA_TYPE_STRING, "doc1", + strlen("doc1")); + zvec_doc_add_field_by_value(docs[0], "text", ZVEC_DATA_TYPE_STRING, + "First document", strlen("First document")); + zvec_doc_add_field_by_value(docs[0], "embedding", ZVEC_DATA_TYPE_VECTOR_FP32, + vector1, 3 * sizeof(float)); + + // Manually add fields to document 2 + zvec_doc_set_pk(docs[1], "doc2"); + zvec_doc_add_field_by_value(docs[1], "id", ZVEC_DATA_TYPE_STRING, "doc2", + strlen("doc2")); + zvec_doc_add_field_by_value(docs[1], "text", ZVEC_DATA_TYPE_STRING, + "Second document", strlen("Second document")); + zvec_doc_add_field_by_value(docs[1], "embedding", ZVEC_DATA_TYPE_VECTOR_FP32, + vector2, 3 * sizeof(float)); + + // Insert documents + size_t success_count = 0; + size_t error_count = 0; + error = zvec_collection_insert(collection, (const ZVecDoc **)docs, 2, + &success_count, &error_count); + if (handle_error(error, "inserting documents") != ZVEC_OK) { + zvec_collection_destroy(collection); + return 1; + } + printf("✓ Documents inserted - Success: %zu, Failed: %zu\n", success_count, + error_count); + for (int i = 0; i < 2; ++i) { + zvec_doc_destroy(docs[i]); + } + + // Flush collection + error = zvec_collection_flush(collection); + if (handle_error(error, "flushing collection") != ZVEC_OK) { + printf("Collection flush failed\n"); + } else { + printf("✓ Collection flushed successfully\n"); + } + + // Get collection statistics + ZVecCollectionStats *stats = NULL; + error = zvec_collection_get_stats(collection, &stats); + if (handle_error(error, "getting collection stats") == ZVEC_OK) { + printf("✓ Collection stats - Document count: %llu\n", + (unsigned long long)stats->doc_count); + // Free statistics memory + zvec_collection_stats_destroy(stats); + } + + printf("Testing vector query...\n"); + // Query documents + ZVecVectorQuery query = {0}; + query.field_name = + (ZVecString){.data = "embedding", .length = strlen("embedding")}; + query.query_vector = + (ZVecByteArray){.data = (uint8_t *)vector1, .length = 3 * sizeof(float)}; + query.topk = 10; + query.filter = (ZVecString){.data = "", .length = 0}; + query.include_vector = true; + query.include_doc_id = true; + query.output_fields = NULL; + + ZVecDoc **results = NULL; + size_t result_count = 0; + error = zvec_collection_query(collection, &query, &results, &result_count); + + if (error != ZVEC_OK) { + char *error_msg = NULL; + zvec_get_last_error(&error_msg); + printf("[ERROR] Query failed: %s\n", + error_msg ? error_msg : "Unknown error"); + zvec_free_str(error_msg); + goto cleanup; + } + + printf("✓ Query successful - Returned %zu results\n", result_count); + + // Process query results + for (size_t i = 0; i < result_count && i < 5; ++i) { + const ZVecDoc *doc = results[i]; + const char *pk = zvec_doc_get_pk_copy(doc); + + printf(" Result %zu: PK=%s, DocID=%llu, Score=%.4f\n", i + 1, + pk ? pk : "NULL", (unsigned long long)zvec_doc_get_doc_id(doc), + zvec_doc_get_score(doc)); + + if (pk) { + free((void *)pk); + } + } + + // Free query results memory + zvec_docs_free(results, result_count); + +cleanup: + // Cleanup resources + zvec_collection_destroy(collection); + printf("✓ Example completed\n"); + return 0; +} \ No newline at end of file diff --git a/examples/c_api/collection_schema_example.c b/examples/c_api/collection_schema_example.c new file mode 100644 index 000000000..af66daa04 --- /dev/null +++ b/examples/c_api/collection_schema_example.c @@ -0,0 +1,252 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include "zvec/c_api.h" + +/** + * @brief Print error message and return error code + */ +static ZVecErrorCode handle_error(ZVecErrorCode error, const char *context) { + if (error != ZVEC_OK) { + char *error_msg = NULL; + zvec_get_last_error(&error_msg); + fprintf(stderr, "Error in %s: %d - %s\n", context, error, + error_msg ? error_msg : "Unknown error"); + zvec_free_str(error_msg); + } + return error; +} + +/** + * @brief Collection schema creation and management example + */ +int main() { + printf("=== ZVec Collection Schema Example ===\n\n"); + + ZVecErrorCode error; + + // 1. Create collection schema + ZVecCollectionSchema *schema = + zvec_collection_schema_create("schema_example_collection"); + if (!schema) { + fprintf(stderr, "Failed to create collection schema\n"); + return 1; + } + printf("✓ Collection schema created successfully\n"); + + // 2. Set schema properties + schema->max_doc_count_per_segment = 1000000; + printf("✓ Set max documents per segment: %llu\n", + (unsigned long long)schema->max_doc_count_per_segment); + + // 3. Create index parameters + ZVecInvertIndexParams *invert_params = + zvec_index_params_invert_create(true, false); + ZVecHnswIndexParams *hnsw_params = zvec_index_params_hnsw_create( + ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_UNDEFINED, 16, 200, 50); + + if (!invert_params || !hnsw_params) { + fprintf(stderr, "Failed to create index parameters\n"); + zvec_collection_schema_destroy(schema); + return 1; + } + + // 4. Create and add ID field (primary key) + ZVecFieldSchema *id_field = + zvec_field_schema_create("id", ZVEC_DATA_TYPE_STRING, false, 0); + if (!id_field) { + fprintf(stderr, "Failed to create ID field\n"); + zvec_collection_schema_destroy(schema); + zvec_index_params_invert_destroy(invert_params); + zvec_index_params_hnsw_destroy(hnsw_params); + return 1; + } + + error = zvec_collection_schema_add_field(schema, id_field); + if (handle_error(error, "adding ID field") != ZVEC_OK) { + zvec_collection_schema_destroy(schema); + zvec_index_params_invert_destroy(invert_params); + zvec_index_params_hnsw_destroy(hnsw_params); + return 1; + } + printf("✓ ID field added successfully\n"); + + // 5. Create and add text field with inverted index + ZVecFieldSchema *text_field = + zvec_field_schema_create("content", ZVEC_DATA_TYPE_STRING, true, 0); + if (!text_field) { + fprintf(stderr, "Failed to create text field\n"); + zvec_collection_schema_destroy(schema); + zvec_index_params_invert_destroy(invert_params); + zvec_index_params_hnsw_destroy(hnsw_params); + return 1; + } + + zvec_field_schema_set_invert_index(text_field, invert_params); + error = zvec_collection_schema_add_field(schema, text_field); + if (handle_error(error, "adding text field") != ZVEC_OK) { + zvec_collection_schema_destroy(schema); + zvec_index_params_invert_destroy(invert_params); + zvec_index_params_hnsw_destroy(hnsw_params); + return 1; + } + printf("✓ Text field with inverted index added successfully\n"); + + // 6. Create and add vector field with HNSW index + ZVecFieldSchema *vector_field = zvec_field_schema_create( + "embedding", ZVEC_DATA_TYPE_VECTOR_FP32, false, 128); + if (!vector_field) { + fprintf(stderr, "Failed to create vector field\n"); + zvec_collection_schema_destroy(schema); + zvec_index_params_invert_destroy(invert_params); + zvec_index_params_hnsw_destroy(hnsw_params); + return 1; + } + + zvec_field_schema_set_hnsw_index(vector_field, hnsw_params); + error = zvec_collection_schema_add_field(schema, vector_field); + if (handle_error(error, "adding vector field") != ZVEC_OK) { + zvec_collection_schema_destroy(schema); + zvec_index_params_invert_destroy(invert_params); + zvec_index_params_hnsw_destroy(hnsw_params); + return 1; + } + printf("✓ Vector field with HNSW index added successfully\n"); + + // 7. Check field count + // Note: This function may not exist in current API, commenting out for now + // size_t field_count = zvec_collection_schema_get_field_count(schema); + // printf("✓ Total field count: %zu\n", field_count); + + // 8. Create collection with schema + ZVecCollectionOptions options = ZVEC_DEFAULT_OPTIONS(); + ZVecCollection *collection = NULL; + + error = zvec_collection_create_and_open("./schema_example_collection", schema, + &options, &collection); + if (handle_error(error, "creating collection with schema") != ZVEC_OK) { + zvec_collection_schema_destroy(schema); + zvec_index_params_invert_destroy(invert_params); + zvec_index_params_hnsw_destroy(hnsw_params); + return 1; + } + printf("✓ Collection created successfully with schema\n"); + + // 9. Prepare test data + float vector1[128]; + float vector2[128]; + for (int i = 0; i < 128; i++) { + vector1[i] = (float)(i + 1) / 128.0f; + vector2[i] = (float)(i + 2) / 128.0f; + } + + // 10. Create documents + ZVecDoc *docs[2]; + for (int i = 0; i < 2; i++) { + docs[i] = zvec_doc_create(); + if (!docs[i]) { + fprintf(stderr, "Failed to create document %d\n", i); + // Cleanup + for (int j = 0; j < i; j++) { + zvec_doc_destroy(docs[j]); + } + zvec_collection_destroy(collection); + zvec_collection_schema_destroy(schema); + zvec_index_params_invert_destroy(invert_params); + zvec_index_params_hnsw_destroy(hnsw_params); + return 1; + } + } + + // Add fields to document 1 + zvec_doc_set_pk(docs[0], "doc1"); + zvec_doc_add_field_by_value(docs[0], "id", ZVEC_DATA_TYPE_STRING, "doc1", + strlen("doc1")); + zvec_doc_add_field_by_value(docs[0], "content", ZVEC_DATA_TYPE_STRING, + "First test document", + strlen("First test document")); + zvec_doc_add_field_by_value(docs[0], "embedding", ZVEC_DATA_TYPE_VECTOR_FP32, + vector1, 128 * sizeof(float)); + + // Add fields to document 2 + zvec_doc_set_pk(docs[1], "doc2"); + zvec_doc_add_field_by_value(docs[1], "id", ZVEC_DATA_TYPE_STRING, "doc2", + strlen("doc2")); + zvec_doc_add_field_by_value(docs[1], "content", ZVEC_DATA_TYPE_STRING, + "Second test document", + strlen("Second test document")); + zvec_doc_add_field_by_value(docs[1], "embedding", ZVEC_DATA_TYPE_VECTOR_FP32, + vector2, 128 * sizeof(float)); + + // 11. Insert documents + size_t success_count = 0, error_count = 0; + error = zvec_collection_insert(collection, (const ZVecDoc **)docs, 2, + &success_count, &error_count); + if (handle_error(error, "inserting documents") != ZVEC_OK) { + // Cleanup + for (int i = 0; i < 2; i++) { + zvec_doc_destroy(docs[i]); + } + zvec_collection_destroy(collection); + zvec_collection_schema_destroy(schema); + zvec_index_params_invert_destroy(invert_params); + zvec_index_params_hnsw_destroy(hnsw_params); + return 1; + } + printf("✓ Documents inserted - Success: %zu, Failed: %zu\n", success_count, + error_count); + + // Cleanup documents + for (int i = 0; i < 2; i++) { + zvec_doc_destroy(docs[i]); + } + + // 12. Flush collection + error = zvec_collection_flush(collection); + if (handle_error(error, "flushing collection") == ZVEC_OK) { + printf("✓ Collection flushed successfully\n"); + } + + // 13. Query test + ZVecVectorQuery query = {0}; + query.field_name = + (ZVecString){.data = "embedding", .length = strlen("embedding")}; + query.query_vector = (ZVecByteArray){.data = (uint8_t *)vector1, + .length = 128 * sizeof(float)}; + query.topk = 5; + query.filter = (ZVecString){.data = "", .length = 0}; + query.include_vector = true; + query.include_doc_id = true; + query.output_fields = NULL; + + ZVecDoc **results = NULL; + size_t result_count = 0; + error = zvec_collection_query(collection, &query, &results, &result_count); + if (error == ZVEC_OK) { + printf("✓ Vector query successful - Returned %zu results\n", result_count); + zvec_docs_free(results, result_count); + } + + // 14. Cleanup resources + zvec_collection_destroy(collection); + zvec_collection_schema_destroy(schema); + zvec_index_params_invert_destroy(invert_params); + zvec_index_params_hnsw_destroy(hnsw_params); + printf("✓ Schema example completed\n"); + + return 0; +} \ No newline at end of file diff --git a/examples/c_api/doc_example.c b/examples/c_api/doc_example.c new file mode 100644 index 000000000..81a74506f --- /dev/null +++ b/examples/c_api/doc_example.c @@ -0,0 +1,520 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include +#include "zvec/c_api.h" + +/** + * @brief Print error message and return error code + */ +static ZVecErrorCode handle_error(ZVecErrorCode error, const char *context) { + if (error != ZVEC_OK) { + char *error_msg = NULL; + zvec_get_last_error(&error_msg); + fprintf(stderr, "Error in %s: %d - %s\n", context, error, + error_msg ? error_msg : "Unknown error"); + zvec_free_str(error_msg); + } + return error; +} + +/** + * @brief Create a test document with all data types + * @param doc_index Document index for generating unique data + * @return ZVecDoc* Created document pointer + */ +static ZVecDoc *create_full_type_test_doc(int doc_index) { + ZVecDoc *doc = zvec_doc_create(); + if (!doc) { + fprintf(stderr, "Failed to create document\n"); + return NULL; + } + + // Set primary key + char pk_buffer[32]; + snprintf(pk_buffer, sizeof(pk_buffer), "doc_%d", doc_index); + zvec_doc_set_pk(doc, pk_buffer); + + // Add Id field with inverted index + char id_buffer[32]; + snprintf(id_buffer, sizeof(id_buffer), "id_%d", doc_index); + zvec_doc_add_field_by_value(doc, "id", ZVEC_DATA_TYPE_STRING, id_buffer, + strlen(id_buffer)); + + // Add scalar fields with different data types + // String field + char string_value[64]; + snprintf(string_value, sizeof(string_value), "test_string_%d", doc_index); + zvec_doc_add_field_by_value(doc, "string_field", ZVEC_DATA_TYPE_STRING, + string_value, strlen(string_value)); + + // Boolean field + bool bool_value = (doc_index % 2 == 0); + zvec_doc_add_field_by_value(doc, "bool_field", ZVEC_DATA_TYPE_BOOL, + &bool_value, sizeof(bool_value)); + + // Integer fields + int32_t int32_value = doc_index * 1000; + zvec_doc_add_field_by_value(doc, "int32_field", ZVEC_DATA_TYPE_INT32, + &int32_value, sizeof(int32_value)); + + int64_t int64_value = (int64_t)doc_index * 1000000LL; + zvec_doc_add_field_by_value(doc, "int64_field", ZVEC_DATA_TYPE_INT64, + &int64_value, sizeof(int64_value)); + + // Floating point fields + float float_value = (float)doc_index * 1.5f; + zvec_doc_add_field_by_value(doc, "float_field", ZVEC_DATA_TYPE_FLOAT, + &float_value, sizeof(float_value)); + + double double_value = (double)doc_index * 2.718281828; + zvec_doc_add_field_by_value(doc, "double_field", ZVEC_DATA_TYPE_DOUBLE, + &double_value, sizeof(double_value)); + + // Vector fields with different dimensions + // FP32 vector (3D) + float fp32_vector[3] = {(float)doc_index, (float)doc_index * 2.0f, + (float)doc_index * 3.0f}; + zvec_doc_add_field_by_value(doc, "vector_fp32", ZVEC_DATA_TYPE_VECTOR_FP32, + fp32_vector, 3 * sizeof(float)); + + // Larger FP32 vector (16D) + float large_vector[16]; + for (int i = 0; i < 16; i++) { + large_vector[i] = (float)(doc_index * 16 + i) / 256.0f; + } + zvec_doc_add_field_by_value(doc, "large_vector", ZVEC_DATA_TYPE_VECTOR_FP32, + large_vector, 16 * sizeof(float)); + + return doc; +} + +/** + * @brief Compare two documents for equality + */ +static bool compare_documents(const ZVecDoc *doc1, const ZVecDoc *doc2) { + if (!doc1 || !doc2) return false; + + // Compare primary keys + const char *pk1 = zvec_doc_get_pk_pointer(doc1); + const char *pk2 = zvec_doc_get_pk_pointer(doc2); + + if (!pk1 || !pk2 || strcmp(pk1, pk2) != 0) { + return false; + } + + // TODO: Compare other fields and values + + return true; +} + +/** + * @brief Print document fields and their values + * @param doc The document to print + * @param doc_index Document index for identification + */ +static void print_doc(const ZVecDoc *doc, int doc_index) { + if (!doc) { + printf("Document %d: NULL document\n", doc_index); + return; + } + + printf("\n=== Document %d ===\n", doc_index); + + // Print primary key + const char *pk = zvec_doc_get_pk_pointer(doc); + printf("Primary Key: %s\n", pk ? pk : "NULL"); + + // Print document ID + uint64_t doc_id = zvec_doc_get_doc_id(doc); + printf("Document ID: %llu\n", (unsigned long long)doc_id); + + // Print score + float score = zvec_doc_get_score(doc); + printf("Score: %.6f\n", score); + + // Print scalar fields + printf("\nScalar Fields:\n"); + + // ID field (using pointer function for strings) + const void *id_value = NULL; + size_t id_size = 0; + ZVecErrorCode error = zvec_doc_get_field_value_pointer( + doc, "id", ZVEC_DATA_TYPE_STRING, &id_value, &id_size); + if (error == ZVEC_OK && id_value) { + printf(" id: %.*s\n", (int)id_size, (const char *)id_value); + } + + // String field (using pointer function for strings) + const void *string_value = NULL; + size_t string_size = 0; + error = zvec_doc_get_field_value_pointer( + doc, "string_field", ZVEC_DATA_TYPE_STRING, &string_value, &string_size); + if (error == ZVEC_OK && string_value) { + printf(" string_field: %.*s\n", (int)string_size, + (const char *)string_value); + } + + // Boolean field + bool bool_value; + error = zvec_doc_get_field_value_basic(doc, "bool_field", ZVEC_DATA_TYPE_BOOL, + &bool_value, sizeof(bool_value)); + if (error == ZVEC_OK) { + printf(" bool_field: %s\n", bool_value ? "true" : "false"); + } + + // Int32 field + int32_t int32_value; + error = + zvec_doc_get_field_value_basic(doc, "int32_field", ZVEC_DATA_TYPE_INT32, + &int32_value, sizeof(int32_value)); + if (error == ZVEC_OK) { + printf(" int32_field: %d\n", int32_value); + } + + // Int64 field + int64_t int64_value; + error = + zvec_doc_get_field_value_basic(doc, "int64_field", ZVEC_DATA_TYPE_INT64, + &int64_value, sizeof(int64_value)); + if (error == ZVEC_OK) { + printf(" int64_field: %lld\n", (long long)int64_value); + } + + // Float field + float float_value; + error = + zvec_doc_get_field_value_basic(doc, "float_field", ZVEC_DATA_TYPE_FLOAT, + &float_value, sizeof(float_value)); + if (error == ZVEC_OK) { + printf(" float_field: %.6f\n", float_value); + } + + // Double field + double double_value; + error = + zvec_doc_get_field_value_basic(doc, "double_field", ZVEC_DATA_TYPE_DOUBLE, + &double_value, sizeof(double_value)); + if (error == ZVEC_OK) { + printf(" double_field: %.6f\n", double_value); + } + + // Print vector fields (using copy function for complex types) + printf("\nVector Fields:\n"); + + // FP32 vector (3D) + void *fp32_vector = NULL; + size_t fp32_size = 0; + error = zvec_doc_get_field_value_copy( + doc, "vector_fp32", ZVEC_DATA_TYPE_VECTOR_FP32, &fp32_vector, &fp32_size); + if (error == ZVEC_OK && fp32_vector) { + const float *vec = (const float *)fp32_vector; + size_t dim = fp32_size / sizeof(float); + printf(" vector_fp32 (%zuD): [", dim); + for (size_t i = 0; i < dim && i < 10; i++) { // Limit to first 10 elements + printf("%.3f", vec[i]); + if (i < dim - 1 && i < 9) printf(", "); + } + if (dim > 10) printf(", ..."); + printf("]\n"); + free(fp32_vector); // Free the allocated memory + } + + // Large vector (16D) + void *large_vector = NULL; + size_t large_size = 0; + error = zvec_doc_get_field_value_copy(doc, "large_vector", + ZVEC_DATA_TYPE_VECTOR_FP32, + &large_vector, &large_size); + if (error == ZVEC_OK && large_vector) { + const float *vec = (const float *)large_vector; + size_t dim = large_size / sizeof(float); + printf(" large_vector (%zuD): [", dim); + for (size_t i = 0; i < dim && i < 10; i++) { // Limit to first 10 elements + printf("%.3f", vec[i]); + if (i < dim - 1 && i < 9) printf(", "); + } + if (dim > 10) printf(", ..."); + printf("]\n"); + free(large_vector); // Free the allocated memory + } + + printf("==================\n\n"); +} + +/** + * @brief Document creation, manipulation, and query example + */ +int main() { + printf("=== ZVec Document Example ===\n\n"); + + ZVecErrorCode error; + + // 1. Create collection schema for document testing + ZVecCollectionSchema *schema = + zvec_collection_schema_create("doc_example_collection"); + if (!schema) { + fprintf(stderr, "Failed to create collection schema\n"); + return -1; + } + printf("✓ Collection schema created\n"); + + // 2. Create index parameters + ZVecInvertIndexParams *invert_params = + zvec_index_params_invert_create(true, false); + ZVecHnswIndexParams *hnsw_params = zvec_index_params_hnsw_create( + ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_UNDEFINED, 16, 200, 50); + + if (!invert_params || !hnsw_params) { + fprintf(stderr, "Failed to create index parameters\n"); + zvec_collection_schema_destroy(schema); + return -1; + } + + // 3. Create fields for all data types + printf("Creating fields for all data types...\n"); + + // Id field with inverted index + ZVecFieldSchema *id_field = + zvec_field_schema_create("id", ZVEC_DATA_TYPE_STRING, false, 0); + if (id_field) { + zvec_field_schema_set_invert_index(id_field, invert_params); + error = zvec_collection_schema_add_field(schema, id_field); + if (handle_error(error, "adding ID field") == ZVEC_OK) { + printf("✓ ID field with inverted index added\n"); + } + } + + // Scalar fields + ZVecFieldSchema *string_field = + zvec_field_schema_create("string_field", ZVEC_DATA_TYPE_STRING, true, 0); + ZVecFieldSchema *bool_field = + zvec_field_schema_create("bool_field", ZVEC_DATA_TYPE_BOOL, true, 0); + ZVecFieldSchema *int32_field = + zvec_field_schema_create("int32_field", ZVEC_DATA_TYPE_INT32, true, 0); + ZVecFieldSchema *int64_field = + zvec_field_schema_create("int64_field", ZVEC_DATA_TYPE_INT64, true, 0); + ZVecFieldSchema *float_field = + zvec_field_schema_create("float_field", ZVEC_DATA_TYPE_FLOAT, true, 0); + ZVecFieldSchema *double_field = + zvec_field_schema_create("double_field", ZVEC_DATA_TYPE_DOUBLE, true, 0); + + if (string_field) zvec_collection_schema_add_field(schema, string_field); + if (bool_field) zvec_collection_schema_add_field(schema, bool_field); + if (int32_field) zvec_collection_schema_add_field(schema, int32_field); + if (int64_field) zvec_collection_schema_add_field(schema, int64_field); + if (float_field) zvec_collection_schema_add_field(schema, float_field); + if (double_field) zvec_collection_schema_add_field(schema, double_field); + + // Vector fields + ZVecFieldSchema *vector_fp32_field = zvec_field_schema_create( + "vector_fp32", ZVEC_DATA_TYPE_VECTOR_FP32, false, 3); + ZVecFieldSchema *large_vector_field = zvec_field_schema_create( + "large_vector", ZVEC_DATA_TYPE_VECTOR_FP32, false, 16); + + if (vector_fp32_field) { + zvec_field_schema_set_hnsw_index(vector_fp32_field, hnsw_params); + error = zvec_collection_schema_add_field(schema, vector_fp32_field); + if (handle_error(error, "adding vector FP32 field") == ZVEC_OK) { + printf("✓ Vector FP32 field with HNSW index added\n"); + } + } + + if (large_vector_field) { + zvec_field_schema_set_hnsw_index(large_vector_field, hnsw_params); + error = zvec_collection_schema_add_field(schema, large_vector_field); + if (handle_error(error, "adding large vector field") == ZVEC_OK) { + printf("✓ Large vector field with HNSW index added\n"); + } + } + + // 4. Create collection + ZVecCollectionOptions options = ZVEC_DEFAULT_OPTIONS(); + ZVecCollection *collection = NULL; + + error = zvec_collection_create_and_open("./doc_example_collection", schema, + &options, &collection); + if (handle_error(error, "creating collection") != ZVEC_OK) { + zvec_collection_schema_destroy(schema); + zvec_index_params_invert_destroy(invert_params); + zvec_index_params_hnsw_destroy(hnsw_params); + return -1; + } + printf("✓ Collection created successfully\n"); + + // 5. Create and insert multiple test documents + printf("Creating and inserting test documents...\n"); + + const int doc_count = 5; + ZVecDoc *test_docs[doc_count]; + + for (int i = 0; i < doc_count; i++) { + test_docs[i] = create_full_type_test_doc(i); + if (!test_docs[i]) { + fprintf(stderr, "Failed to create document %d\n", i); + // Cleanup + for (int j = 0; j < i; j++) { + zvec_doc_destroy(test_docs[j]); + } + goto cleanup; + } + printf("✓ Created document %d with PK: %s\n", i, + zvec_doc_get_pk_pointer(test_docs[i])); + } + + // Print all documents before insertion + printf("\nDocuments before insertion:\n"); + for (int i = 0; i < doc_count; i++) { + print_doc(test_docs[i], i); + } + + // Insert documents + size_t success_count = 0, error_count = 0; + error = zvec_collection_insert(collection, (const ZVecDoc **)test_docs, + doc_count, &success_count, &error_count); + if (handle_error(error, "inserting documents") == ZVEC_OK) { + printf("✓ Documents inserted - Success: %zu, Failed: %zu\n", success_count, + error_count); + } + + // 6. Flush collection + error = zvec_collection_flush(collection); + if (handle_error(error, "flushing collection") != ZVEC_OK) { + printf("Warning: Collection flush failed\n"); + } else { + printf("✓ Collection flushed successfully\n"); + } + + // Use the first document's vector for querying + float query_vector[] = {0.0f, 0.0f, 0.0f}; + ZVecVectorQuery query = { + .field_name = + (ZVecString){.data = "vector_fp32", .length = strlen("vector_fp32")}, + .query_vector = (ZVecByteArray){.data = (uint8_t *)query_vector, + .length = 3 * sizeof(float)}, + .topk = 5, + .filter = (ZVecString){.data = "", .length = 0}, + .include_vector = true, + .include_doc_id = true, + .output_fields = NULL}; + + ZVecDoc **query_results = NULL; + size_t result_count = 0; + + error = + zvec_collection_query(collection, &query, &query_results, &result_count); + if (handle_error(error, "querying documents") != ZVEC_OK) { + query_results = NULL; + result_count = 0; + } + + printf("Query returned %zu results\n", result_count); + + // Print query results + printf("\nQuery Results:\n"); + for (size_t i = 0; i < result_count; i++) { + print_doc(query_results[i], i); + } + + // Compare query results + for (size_t i = 0; i < result_count && i < doc_count; i++) { + const char *result_pk = zvec_doc_get_pk_pointer(query_results[i]); + printf("Comparing query result[%zu]: %s\n", i, result_pk); + + // Find matching original document + bool found = false; + for (int j = 0; j < doc_count; j++) { + const char *original_pk = zvec_doc_get_pk_pointer(test_docs[j]); + if (strcmp(result_pk, original_pk) == 0) { + if (compare_documents(test_docs[j], query_results[i])) { + printf("✓ Query result %s matches original document\n", result_pk); + } else { + printf("✗ Query result %s does not match original document\n", + result_pk); + } + found = true; + break; + } + } + + if (!found) { + printf("⚠ Original document not found for: %s\n", result_pk); + } + } + + // 7. Filter query test + printf("\n=== Filter Query Test ===\n"); + + // Create filtered query + ZVecVectorQuery filtered_query = query; + filtered_query.filter = + (ZVecString){.data = "string_field = 'string_field_0'", + .length = strlen("string_field = 'string_field_0'")}; + + ZVecDoc **filtered_results = NULL; + size_t filtered_count = 0; + + error = zvec_collection_query(collection, &filtered_query, &filtered_results, + &filtered_count); + if (handle_error(error, "filtered querying") == ZVEC_OK) { + printf("Filtered query returned %zu results\n", filtered_count); + + // Verify filter results + bool filter_correct = true; + for (size_t i = 0; i < filtered_count; i++) { + // Note: Field value access may require different API + // For now, we'll just check that we got results + const char *pk = zvec_doc_get_pk_pointer(filtered_results[i]); + if (strstr(pk, "doc_") == NULL) { + filter_correct = false; + break; + } + } + + if (filter_correct) { + printf("✓ Filter query results are correct\n"); + } else { + printf("✗ Filter query results are incorrect\n"); + } + + if (filtered_results) { + zvec_docs_free(filtered_results, filtered_count); + } + } + + // 8. Cleanup query results + if (query_results) { + zvec_docs_free(query_results, result_count); + } + + // 9. Cleanup documents + for (int i = 0; i < doc_count; i++) { + zvec_doc_destroy(test_docs[i]); + } + + // 10. Final cleanup +cleanup: + zvec_collection_destroy(collection); + zvec_collection_schema_destroy(schema); + zvec_index_params_invert_destroy(invert_params); + zvec_index_params_hnsw_destroy(hnsw_params); + + printf("✓ Document example completed\n"); + + return 0; +} \ No newline at end of file diff --git a/examples/c_api/field_schema_example.c b/examples/c_api/field_schema_example.c new file mode 100644 index 000000000..2c1bd9d79 --- /dev/null +++ b/examples/c_api/field_schema_example.c @@ -0,0 +1,281 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include "zvec/c_api.h" + +/** + * @brief Print error message and return error code + */ +static ZVecErrorCode handle_error(ZVecErrorCode error, const char *context) { + if (error != ZVEC_OK) { + char *error_msg = NULL; + zvec_get_last_error(&error_msg); + fprintf(stderr, "Error in %s: %d - %s\n", context, error, + error_msg ? error_msg : "Unknown error"); + zvec_free_str(error_msg); + } + return error; +} + +/** + * @brief Field schema creation and management example + */ +int main() { + printf("=== ZVec Field Schema Example ===\n\n"); + + ZVecErrorCode error; + + // 1. Create collection schema + ZVecCollectionSchema *schema = + zvec_collection_schema_create("field_example_collection"); + if (!schema) { + fprintf(stderr, "Failed to create collection schema\n"); + return -1; + } + printf("✓ Collection schema created successfully\n"); + + // 2. Create different types of index parameters + ZVecInvertIndexParams *invert_params = + zvec_index_params_invert_create(true, false); + ZVecHnswIndexParams *hnsw_params = zvec_index_params_hnsw_create( + ZVEC_METRIC_TYPE_COSINE, ZVEC_QUANTIZE_TYPE_UNDEFINED, 16, 200, 50); + ZVecFlatIndexParams *flat_params = zvec_index_params_flat_create( + ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_UNDEFINED); + + if (!invert_params || !hnsw_params || !flat_params) { + fprintf(stderr, "Failed to create index parameters\n"); + zvec_collection_schema_destroy(schema); + return -1; + } + + // 3. Create scalar fields with different data types + printf("Creating scalar fields...\n"); + + // String field with inverted index + ZVecFieldSchema *name_field = + zvec_field_schema_create("name", ZVEC_DATA_TYPE_STRING, false, 0); + if (name_field) { + zvec_field_schema_set_invert_index(name_field, invert_params); + error = zvec_collection_schema_add_field(schema, name_field); + if (handle_error(error, "adding name field") == ZVEC_OK) { + printf("✓ String field 'name' with inverted index added\n"); + } + } + + // Integer field + ZVecFieldSchema *age_field = + zvec_field_schema_create("age", ZVEC_DATA_TYPE_INT32, true, 0); + if (age_field) { + error = zvec_collection_schema_add_field(schema, age_field); + if (handle_error(error, "adding age field") == ZVEC_OK) { + printf("✓ Integer field 'age' added\n"); + } + } + + // Float field + ZVecFieldSchema *score_field = + zvec_field_schema_create("score", ZVEC_DATA_TYPE_FLOAT, true, 0); + if (score_field) { + error = zvec_collection_schema_add_field(schema, score_field); + if (handle_error(error, "adding score field") == ZVEC_OK) { + printf("✓ Float field 'score' added\n"); + } + } + + // Boolean field + ZVecFieldSchema *active_field = + zvec_field_schema_create("active", ZVEC_DATA_TYPE_BOOL, false, 0); + if (active_field) { + error = zvec_collection_schema_add_field(schema, active_field); + if (handle_error(error, "adding active field") == ZVEC_OK) { + printf("✓ Boolean field 'active' added\n"); + } + } + + // 4. Create vector fields with different dimensions and indexes + printf("Creating vector fields...\n"); + + // Small dimension vector with HNSW index + ZVecFieldSchema *small_vector_field = zvec_field_schema_create( + "small_vector", ZVEC_DATA_TYPE_VECTOR_FP32, false, 32); + if (small_vector_field) { + zvec_field_schema_set_hnsw_index(small_vector_field, hnsw_params); + error = zvec_collection_schema_add_field(schema, small_vector_field); + if (handle_error(error, "adding small vector field") == ZVEC_OK) { + printf( + "✓ Small vector field 'small_vector' (32D) with HNSW index added\n"); + } + } + + // Medium dimension vector with Flat index + ZVecFieldSchema *medium_vector_field = zvec_field_schema_create( + "medium_vector", ZVEC_DATA_TYPE_VECTOR_FP32, false, 128); + if (medium_vector_field) { + zvec_field_schema_set_flat_index(medium_vector_field, flat_params); + error = zvec_collection_schema_add_field(schema, medium_vector_field); + if (handle_error(error, "adding medium vector field") == ZVEC_OK) { + printf( + "✓ Medium vector field 'medium_vector' (128D) with Flat index " + "added\n"); + } + } + + // Large dimension vector with HNSW index + ZVecFieldSchema *large_vector_field = zvec_field_schema_create( + "large_vector", ZVEC_DATA_TYPE_VECTOR_FP32, false, 512); + if (large_vector_field) { + zvec_field_schema_set_hnsw_index(large_vector_field, hnsw_params); + error = zvec_collection_schema_add_field(schema, large_vector_field); + if (handle_error(error, "adding large vector field") == ZVEC_OK) { + printf( + "✓ Large vector field 'large_vector' (512D) with HNSW index added\n"); + } + } + + // 5. Create collection with the schema + ZVecCollectionOptions options = ZVEC_DEFAULT_OPTIONS(); + ZVecCollection *collection = NULL; + + error = zvec_collection_create_and_open("./field_example_collection", schema, + &options, &collection); + if (handle_error(error, "creating collection") != ZVEC_OK) { + zvec_collection_schema_destroy(schema); + zvec_index_params_invert_destroy(invert_params); + zvec_index_params_hnsw_destroy(hnsw_params); + zvec_index_params_flat_destroy(flat_params); + return -1; + } + printf("✓ Collection created successfully\n"); + + // 6. Create test documents with various field types + printf("Creating test documents...\n"); + + ZVecDoc *doc1 = zvec_doc_create(); + ZVecDoc *doc2 = zvec_doc_create(); + + if (!doc1 || !doc2) { + fprintf(stderr, "Failed to create documents\n"); + goto cleanup; + } + + // Document 1 + zvec_doc_set_pk(doc1, "user1"); + zvec_doc_add_field_by_value(doc1, "name", ZVEC_DATA_TYPE_STRING, + "Alice Johnson", strlen("Alice Johnson")); + int32_t age1 = 28; + zvec_doc_add_field_by_value(doc1, "age", ZVEC_DATA_TYPE_INT32, &age1, + sizeof(age1)); + float score1 = 87.5f; + zvec_doc_add_field_by_value(doc1, "score", ZVEC_DATA_TYPE_FLOAT, &score1, + sizeof(score1)); + bool active1 = true; + zvec_doc_add_field_by_value(doc1, "active", ZVEC_DATA_TYPE_BOOL, &active1, + sizeof(active1)); + + // Add vector data + float small_vec1[32]; + float medium_vec1[128]; + float large_vec1[512]; + + for (int i = 0; i < 32; i++) small_vec1[i] = (float)i / 32.0f; + for (int i = 0; i < 128; i++) medium_vec1[i] = (float)i / 128.0f; + for (int i = 0; i < 512; i++) large_vec1[i] = (float)i / 512.0f; + + zvec_doc_add_field_by_value(doc1, "small_vector", ZVEC_DATA_TYPE_VECTOR_FP32, + small_vec1, 32 * sizeof(float)); + zvec_doc_add_field_by_value(doc1, "medium_vector", ZVEC_DATA_TYPE_VECTOR_FP32, + medium_vec1, 128 * sizeof(float)); + zvec_doc_add_field_by_value(doc1, "large_vector", ZVEC_DATA_TYPE_VECTOR_FP32, + large_vec1, 512 * sizeof(float)); + + // Document 2 + zvec_doc_set_pk(doc2, "user2"); + zvec_doc_add_field_by_value(doc2, "name", ZVEC_DATA_TYPE_STRING, "Bob Smith", + strlen("Bob Smith")); + int32_t age2 = 35; + zvec_doc_add_field_by_value(doc2, "age", ZVEC_DATA_TYPE_INT32, &age2, + sizeof(age2)); + float score2 = 92.0f; + zvec_doc_add_field_by_value(doc2, "score", ZVEC_DATA_TYPE_FLOAT, &score2, + sizeof(score2)); + bool active2 = false; + zvec_doc_add_field_by_value(doc2, "active", ZVEC_DATA_TYPE_BOOL, &active2, + sizeof(active2)); + + // Add vector data + float small_vec2[32]; + float medium_vec2[128]; + float large_vec2[512]; + + for (int i = 0; i < 32; i++) small_vec2[i] = (float)(32 - i) / 32.0f; + for (int i = 0; i < 128; i++) medium_vec2[i] = (float)(128 - i) / 128.0f; + for (int i = 0; i < 512; i++) large_vec2[i] = (float)(512 - i) / 512.0f; + + zvec_doc_add_field_by_value(doc2, "small_vector", ZVEC_DATA_TYPE_VECTOR_FP32, + small_vec2, 32 * sizeof(float)); + zvec_doc_add_field_by_value(doc2, "medium_vector", ZVEC_DATA_TYPE_VECTOR_FP32, + medium_vec2, 128 * sizeof(float)); + zvec_doc_add_field_by_value(doc2, "large_vector", ZVEC_DATA_TYPE_VECTOR_FP32, + large_vec2, 512 * sizeof(float)); + + // 7. Insert documents + ZVecDoc *docs[] = {doc1, doc2}; + size_t success_count = 0, error_count = 0; + error = zvec_collection_insert(collection, (const ZVecDoc **)docs, 2, + &success_count, &error_count); + if (handle_error(error, "inserting documents") == ZVEC_OK) { + printf("✓ Documents inserted - Success: %zu, Failed: %zu\n", success_count, + error_count); + } + + // 8. Flush and test queries + zvec_collection_flush(collection); + printf("✓ Collection flushed\n"); + + // Test vector query on medium vector field + ZVecVectorQuery query = {0}; + query.field_name = + (ZVecString){.data = "medium_vector", .length = strlen("medium_vector")}; + query.query_vector = (ZVecByteArray){.data = (uint8_t *)medium_vec1, + .length = 128 * sizeof(float)}; + query.topk = 2; + query.filter = (ZVecString){.data = "", .length = 0}; + query.include_vector = false; + query.include_doc_id = true; + query.output_fields = NULL; + + ZVecDoc **results = NULL; + size_t result_count = 0; + error = zvec_collection_query(collection, &query, &results, &result_count); + if (error == ZVEC_OK) { + printf("✓ Vector query successful - Found %zu results\n", result_count); + zvec_docs_free(results, result_count); + } + + // 9. Cleanup +cleanup: + if (doc1) zvec_doc_destroy(doc1); + if (doc2) zvec_doc_destroy(doc2); + zvec_collection_destroy(collection); + zvec_collection_schema_destroy(schema); + zvec_index_params_invert_destroy(invert_params); + zvec_index_params_hnsw_destroy(hnsw_params); + zvec_index_params_flat_destroy(flat_params); + + printf("✓ Field schema example completed\n"); + return 0; +} \ No newline at end of file diff --git a/examples/c_api/index_example.c b/examples/c_api/index_example.c new file mode 100644 index 000000000..9e1639308 --- /dev/null +++ b/examples/c_api/index_example.c @@ -0,0 +1,328 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include "zvec/c_api.h" + +/** + * @brief Print error message and return error code + */ +static ZVecErrorCode handle_error(ZVecErrorCode error, const char *context) { + if (error != ZVEC_OK) { + char *error_msg = NULL; + zvec_get_last_error(&error_msg); + fprintf(stderr, "Error in %s: %d - %s\n", context, error, + error_msg ? error_msg : "Unknown error"); + zvec_free_str(error_msg); + } + return error; +} + +/** + * @brief Index creation and management example + */ +int main() { + printf("=== ZVec Index Example ===\n\n"); + + ZVecErrorCode error; + + // 1. Create collection schema + ZVecCollectionSchema *schema = + zvec_collection_schema_create("index_example_collection"); + if (!schema) { + fprintf(stderr, "Failed to create collection schema\n"); + return -1; + } + printf("✓ Collection schema created successfully\n"); + + // 2. Create different index parameter configurations + printf("Creating index parameters...\n"); + + // Inverted index parameters + ZVecInvertIndexParams *invert_params_standard = + zvec_index_params_invert_create(true, false); + ZVecInvertIndexParams *invert_params_extended = + zvec_index_params_invert_create(true, true); + + // HNSW index parameters with different configurations + ZVecHnswIndexParams *hnsw_params_fast = zvec_index_params_hnsw_create( + ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_UNDEFINED, 16, 100, 50); + ZVecHnswIndexParams *hnsw_params_balanced = zvec_index_params_hnsw_create( + ZVEC_METRIC_TYPE_COSINE, ZVEC_QUANTIZE_TYPE_UNDEFINED, 32, 200, 100); + ZVecHnswIndexParams *hnsw_params_accurate = zvec_index_params_hnsw_create( + ZVEC_METRIC_TYPE_IP, ZVEC_QUANTIZE_TYPE_UNDEFINED, 64, 400, 200); + + // Flat index parameters + ZVecFlatIndexParams *flat_params_l2 = zvec_index_params_flat_create( + ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_UNDEFINED); + ZVecFlatIndexParams *flat_params_cosine = zvec_index_params_flat_create( + ZVEC_METRIC_TYPE_COSINE, ZVEC_QUANTIZE_TYPE_UNDEFINED); + + if (!invert_params_standard || !invert_params_extended || !hnsw_params_fast || + !hnsw_params_balanced || !hnsw_params_accurate || !flat_params_l2 || + !flat_params_cosine) { + fprintf(stderr, "Failed to create index parameters\n"); + zvec_collection_schema_destroy(schema); + return -1; + } + + // 3. Create fields with different index types + printf("Creating fields with various index types...\n"); + + // Fields with inverted indexes + ZVecFieldSchema *id_field = + zvec_field_schema_create("id", ZVEC_DATA_TYPE_STRING, false, 0); + if (id_field) { + zvec_field_schema_set_invert_index(id_field, invert_params_standard); + error = zvec_collection_schema_add_field(schema, id_field); + if (handle_error(error, "adding ID field") == ZVEC_OK) { + printf("✓ ID field with standard inverted index added\n"); + } + } + + ZVecFieldSchema *category_field = + zvec_field_schema_create("category", ZVEC_DATA_TYPE_STRING, true, 0); + if (category_field) { + zvec_field_schema_set_invert_index(category_field, invert_params_extended); + error = zvec_collection_schema_add_field(schema, category_field); + if (handle_error(error, "adding category field") == ZVEC_OK) { + printf("✓ Category field with extended inverted index added\n"); + } + } + + // Vector fields with HNSW indexes (different configurations) + ZVecFieldSchema *fast_search_field = zvec_field_schema_create( + "fast_vector", ZVEC_DATA_TYPE_VECTOR_FP32, false, 64); + if (fast_search_field) { + zvec_field_schema_set_hnsw_index(fast_search_field, hnsw_params_fast); + error = zvec_collection_schema_add_field(schema, fast_search_field); + if (handle_error(error, "adding fast search field") == ZVEC_OK) { + printf("✓ Fast search vector field (64D) with HNSW index added\n"); + } + } + + ZVecFieldSchema *balanced_field = zvec_field_schema_create( + "balanced_vector", ZVEC_DATA_TYPE_VECTOR_FP32, false, 128); + if (balanced_field) { + zvec_field_schema_set_hnsw_index(balanced_field, hnsw_params_balanced); + error = zvec_collection_schema_add_field(schema, balanced_field); + if (handle_error(error, "adding balanced field") == ZVEC_OK) { + printf("✓ Balanced vector field (128D) with HNSW index added\n"); + } + } + + ZVecFieldSchema *accurate_field = zvec_field_schema_create( + "accurate_vector", ZVEC_DATA_TYPE_VECTOR_FP32, false, 256); + if (accurate_field) { + zvec_field_schema_set_hnsw_index(accurate_field, hnsw_params_accurate); + error = zvec_collection_schema_add_field(schema, accurate_field); + if (handle_error(error, "adding accurate field") == ZVEC_OK) { + printf("✓ Accurate vector field (256D) with HNSW index added\n"); + } + } + + // Vector field with Flat index + ZVecFieldSchema *exact_field = zvec_field_schema_create( + "exact_vector", ZVEC_DATA_TYPE_VECTOR_FP32, false, 32); + if (exact_field) { + zvec_field_schema_set_flat_index(exact_field, flat_params_l2); + error = zvec_collection_schema_add_field(schema, exact_field); + if (handle_error(error, "adding exact field") == ZVEC_OK) { + printf("✓ Exact search vector field (32D) with Flat index added\n"); + } + } + + // 4. Create collection + ZVecCollectionOptions options = ZVEC_DEFAULT_OPTIONS(); + ZVecCollection *collection = NULL; + + error = zvec_collection_create_and_open("./index_example_collection", schema, + &options, &collection); + if (handle_error(error, "creating collection") != ZVEC_OK) { + zvec_collection_schema_destroy(schema); + // Cleanup index parameters + zvec_index_params_invert_destroy(invert_params_standard); + zvec_index_params_invert_destroy(invert_params_extended); + zvec_index_params_hnsw_destroy(hnsw_params_fast); + zvec_index_params_hnsw_destroy(hnsw_params_balanced); + zvec_index_params_hnsw_destroy(hnsw_params_accurate); + zvec_index_params_flat_destroy(flat_params_l2); + zvec_index_params_flat_destroy(flat_params_cosine); + return -1; + } + printf("✓ Collection created successfully\n"); + + // 5. Create test data + printf("Creating test documents...\n"); + + ZVecDoc *docs[3]; + for (int i = 0; i < 3; i++) { + docs[i] = zvec_doc_create(); + if (!docs[i]) { + fprintf(stderr, "Failed to create document %d\n", i); + // Cleanup + for (int j = 0; j < i; j++) { + zvec_doc_destroy(docs[j]); + } + goto cleanup; + } + } + + // Prepare vector data + float fast_vec[3][64]; + float balanced_vec[3][128]; + float accurate_vec[3][256]; + float exact_vec[3][32]; + + // Generate different vector patterns for testing + for (int doc_idx = 0; doc_idx < 3; doc_idx++) { + for (int i = 0; i < 64; i++) { + fast_vec[doc_idx][i] = (float)(doc_idx * 64 + i) / (64.0f * 3.0f); + } + for (int i = 0; i < 128; i++) { + balanced_vec[doc_idx][i] = (float)(doc_idx * 128 + i) / (128.0f * 3.0f); + } + for (int i = 0; i < 256; i++) { + accurate_vec[doc_idx][i] = (float)(doc_idx * 256 + i) / (256.0f * 3.0f); + } + for (int i = 0; i < 32; i++) { + exact_vec[doc_idx][i] = (float)(doc_idx * 32 + i) / (32.0f * 3.0f); + } + } + + // Populate documents + for (int i = 0; i < 3; i++) { + char pk[16]; + snprintf(pk, sizeof(pk), "doc%d", i + 1); + zvec_doc_set_pk(docs[i], pk); + + char id_val[16]; + snprintf(id_val, sizeof(id_val), "ID_%d", i + 1); + zvec_doc_add_field_by_value(docs[i], "id", ZVEC_DATA_TYPE_STRING, id_val, + strlen(id_val)); + + char category_val[16]; + snprintf(category_val, sizeof(category_val), "cat_%d", (i % 2) + 1); + zvec_doc_add_field_by_value(docs[i], "category", ZVEC_DATA_TYPE_STRING, + category_val, strlen(category_val)); + + zvec_doc_add_field_by_value(docs[i], "fast_vector", + ZVEC_DATA_TYPE_VECTOR_FP32, fast_vec[i], + 64 * sizeof(float)); + zvec_doc_add_field_by_value(docs[i], "balanced_vector", + ZVEC_DATA_TYPE_VECTOR_FP32, balanced_vec[i], + 128 * sizeof(float)); + zvec_doc_add_field_by_value(docs[i], "accurate_vector", + ZVEC_DATA_TYPE_VECTOR_FP32, accurate_vec[i], + 256 * sizeof(float)); + zvec_doc_add_field_by_value(docs[i], "exact_vector", + ZVEC_DATA_TYPE_VECTOR_FP32, exact_vec[i], + 32 * sizeof(float)); + } + + // 6. Insert documents + size_t success_count = 0, error_count = 0; + error = zvec_collection_insert(collection, (const ZVecDoc **)docs, 3, + &success_count, &error_count); + if (handle_error(error, "inserting documents") == ZVEC_OK) { + printf("✓ Documents inserted - Success: %zu, Failed: %zu\n", success_count, + error_count); + } + + // Cleanup documents + for (int i = 0; i < 3; i++) { + zvec_doc_destroy(docs[i]); + } + + // 7. Flush collection to build indexes + error = zvec_collection_flush(collection); + if (handle_error(error, "flushing collection") == ZVEC_OK) { + printf("✓ Collection flushed - indexes built\n"); + } + + // 8. Test different query types + printf("Testing various index queries...\n"); + + // Test HNSW query (balanced) + ZVecVectorQuery hnsw_query = {0}; + hnsw_query.field_name = (ZVecString){.data = "balanced_vector", + .length = strlen("balanced_vector")}; + hnsw_query.query_vector = (ZVecByteArray){.data = (uint8_t *)balanced_vec[0], + .length = 128 * sizeof(float)}; + hnsw_query.topk = 2; + hnsw_query.filter = (ZVecString){.data = "", .length = 0}; + hnsw_query.include_vector = false; + hnsw_query.include_doc_id = true; + hnsw_query.output_fields = NULL; + + ZVecDoc **hnsw_results = NULL; + size_t hnsw_result_count = 0; + error = zvec_collection_query(collection, &hnsw_query, &hnsw_results, + &hnsw_result_count); + if (error == ZVEC_OK) { + printf("✓ HNSW query successful - Found %zu results\n", hnsw_result_count); + zvec_docs_free(hnsw_results, hnsw_result_count); + } + + // Test Flat query (exact) + ZVecVectorQuery flat_query = {0}; + flat_query.field_name = + (ZVecString){.data = "exact_vector", .length = strlen("exact_vector")}; + flat_query.query_vector = (ZVecByteArray){.data = (uint8_t *)exact_vec[0], + .length = 32 * sizeof(float)}; + flat_query.topk = 2; + flat_query.filter = (ZVecString){.data = "", .length = 0}; + flat_query.include_vector = false; + flat_query.include_doc_id = true; + flat_query.output_fields = NULL; + + ZVecDoc **flat_results = NULL; + size_t flat_result_count = 0; + error = zvec_collection_query(collection, &flat_query, &flat_results, + &flat_result_count); + if (error == ZVEC_OK) { + printf("✓ Flat (exact) query successful - Found %zu results\n", + flat_result_count); + zvec_docs_free(flat_results, flat_result_count); + } + + // 9. Performance comparison information + printf("\nIndex Performance Characteristics:\n"); + printf("- Inverted Index: Fast text search, supports filtering\n"); + printf( + "- HNSW Index: Approximate nearest neighbor search, good balance of " + "speed/accuracy\n"); + printf("- Flat Index: Exact search, slower but 100%% accurate\n"); + printf( + "- Trade-off: Speed vs Accuracy - choose based on your requirements\n"); + + // 10. Cleanup +cleanup: + zvec_collection_destroy(collection); + zvec_collection_schema_destroy(schema); + + // Cleanup index parameters + zvec_index_params_invert_destroy(invert_params_standard); + zvec_index_params_invert_destroy(invert_params_extended); + zvec_index_params_hnsw_destroy(hnsw_params_fast); + zvec_index_params_hnsw_destroy(hnsw_params_balanced); + zvec_index_params_hnsw_destroy(hnsw_params_accurate); + zvec_index_params_flat_destroy(flat_params_l2); + zvec_index_params_flat_destroy(flat_params_cosine); + + printf("✓ Index example completed\n"); + return 0; +} \ No newline at end of file diff --git a/examples/c_api/optimized_example.c b/examples/c_api/optimized_example.c new file mode 100644 index 000000000..dca683050 --- /dev/null +++ b/examples/c_api/optimized_example.c @@ -0,0 +1,301 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include "zvec/c_api.h" + +/** + * @brief Print error message and return error code + */ +static ZVecErrorCode handle_error(ZVecErrorCode error, const char *context) { + if (error != ZVEC_OK) { + char *error_msg = NULL; + zvec_get_last_error(&error_msg); + fprintf(stderr, "Error in %s: %d - %s\n", context, error, + error_msg ? error_msg : "Unknown error"); + zvec_free_str(error_msg); + } + return error; +} + +/** + * @brief Create test vector data + */ +static float *create_test_vector(size_t dimension) { + float *vector = malloc(dimension * sizeof(float)); + if (!vector) { + return NULL; + } + + for (size_t i = 0; i < dimension; i++) { + vector[i] = (float)rand() / RAND_MAX; + } + + return vector; +} + +/** + * @brief Optimized C API usage example with performance considerations + */ +int main() { + printf("=== ZVec Optimized C API Example ===\n\n"); + + // Get version information + const char *version = zvec_get_version(); + printf("ZVec Version: %s\n\n", version ? version : "Unknown"); + + ZVecErrorCode error; + + // 1. Create optimized collection schema + ZVecCollectionSchema *schema = + zvec_collection_schema_create("optimized_example_collection"); + if (!schema) { + fprintf(stderr, "Failed to create collection schema\n"); + return -1; + } + printf("✓ Collection schema created\n"); + + // 2. Create optimized index parameters + ZVecHnswIndexParams *hnsw_params = zvec_index_params_hnsw_create( + ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_UNDEFINED, + 32, // Higher M for better connectivity + 200, // Construction ef for quality + 50 // Search ef for performance + ); + + if (!hnsw_params) { + fprintf(stderr, "Failed to create HNSW parameters\n"); + zvec_collection_schema_destroy(schema); + return -1; + } + + // 3. Create fields with optimized configuration + ZVecFieldSchema *id_field = + zvec_field_schema_create("id", ZVEC_DATA_TYPE_STRING, false, 0); + ZVecFieldSchema *text_field = + zvec_field_schema_create("text", ZVEC_DATA_TYPE_STRING, true, 0); + ZVecFieldSchema *embedding_field = zvec_field_schema_create( + "embedding", ZVEC_DATA_TYPE_VECTOR_FP32, false, 128); + + if (!id_field || !text_field || !embedding_field) { + fprintf(stderr, "Failed to create field schemas\n"); + goto cleanup_params; + } + + // Set indexes + zvec_field_schema_set_hnsw_index(embedding_field, hnsw_params); + + // Add fields to schema + error = zvec_collection_schema_add_field(schema, id_field); + if (handle_error(error, "adding ID field") != ZVEC_OK) goto cleanup_fields; + + error = zvec_collection_schema_add_field(schema, text_field); + if (handle_error(error, "adding text field") != ZVEC_OK) goto cleanup_fields; + + error = zvec_collection_schema_add_field(schema, embedding_field); + if (handle_error(error, "adding embedding field") != ZVEC_OK) + goto cleanup_fields; + + printf("✓ Fields configured with indexes\n"); + + // 4. Create collection with optimized options + ZVecCollectionOptions options = ZVEC_DEFAULT_OPTIONS(); + options.enable_mmap = true; // Enable memory mapping for better performance + + ZVecCollection *collection = NULL; + error = zvec_collection_create_and_open("./optimized_example_collection", + schema, &options, &collection); + if (handle_error(error, "creating collection") != ZVEC_OK) { + goto cleanup_fields; + } + printf("✓ Collection created with optimized settings\n"); + + // 5. Bulk insert test data + const size_t DOC_COUNT = 1000; + const size_t BATCH_SIZE = 100; + + printf("Inserting %zu documents in batches of %zu...\n", DOC_COUNT, + BATCH_SIZE); + + clock_t start_time = clock(); + + for (size_t batch_start = 0; batch_start < DOC_COUNT; + batch_start += BATCH_SIZE) { + size_t current_batch_size = (batch_start + BATCH_SIZE > DOC_COUNT) + ? DOC_COUNT - batch_start + : BATCH_SIZE; + + ZVecDoc **batch_docs = malloc(current_batch_size * sizeof(ZVecDoc *)); + if (!batch_docs) { + fprintf(stderr, "Failed to allocate batch documents\n"); + break; + } + + // Create batch documents + for (size_t i = 0; i < current_batch_size; i++) { + batch_docs[i] = zvec_doc_create(); + if (!batch_docs[i]) { + fprintf(stderr, "Failed to create document\n"); + // Cleanup previous documents in batch + for (size_t j = 0; j < i; j++) { + zvec_doc_destroy(batch_docs[j]); + } + free(batch_docs); + goto cleanup_collection; + } + + size_t doc_id = batch_start + i; + char pk[32]; + snprintf(pk, sizeof(pk), "doc_%zu", doc_id); + zvec_doc_set_pk(batch_docs[i], pk); + + // Add ID field + char id_str[32]; + snprintf(id_str, sizeof(id_str), "ID_%zu", doc_id); + zvec_doc_add_field_by_value(batch_docs[i], "id", ZVEC_DATA_TYPE_STRING, + id_str, strlen(id_str)); + + // Add text field + char text_str[64]; + snprintf(text_str, sizeof(text_str), + "Document number %zu with sample text", doc_id); + zvec_doc_add_field_by_value(batch_docs[i], "text", ZVEC_DATA_TYPE_STRING, + text_str, strlen(text_str)); + + // Add vector field + float *vector = create_test_vector(128); + if (vector) { + zvec_doc_add_field_by_value(batch_docs[i], "embedding", + ZVEC_DATA_TYPE_VECTOR_FP32, vector, + 128 * sizeof(float)); + free(vector); + } + } + + // Insert batch + size_t success_count, error_count; + error = zvec_collection_insert(collection, (const ZVecDoc **)batch_docs, + current_batch_size, &success_count, + &error_count); + if (handle_error(error, "inserting batch") != ZVEC_OK) { + // Cleanup batch documents + for (size_t i = 0; i < current_batch_size; i++) { + zvec_doc_destroy(batch_docs[i]); + } + free(batch_docs); + goto cleanup_collection; + } + + printf(" Batch %zu-%zu: %zu successful, %zu failed\n", batch_start, + batch_start + current_batch_size - 1, success_count, error_count); + + // Cleanup batch documents + for (size_t i = 0; i < current_batch_size; i++) { + zvec_doc_destroy(batch_docs[i]); + } + free(batch_docs); + } + + clock_t insert_end_time = clock(); + double insert_time = + ((double)(insert_end_time - start_time)) / CLOCKS_PER_SEC; + printf("✓ Bulk insertion completed in %.3f seconds (%.0f docs/sec)\n", + insert_time, DOC_COUNT / insert_time); + + // 6. Flush and optimize collection + printf("Flushing and optimizing collection...\n"); + zvec_collection_flush(collection); + zvec_collection_optimize(collection); + printf("✓ Collection optimized\n"); + + // 7. Performance query test + printf("Testing query performance...\n"); + + float *query_vector = create_test_vector(128); + if (!query_vector) { + fprintf(stderr, "Failed to create query vector\n"); + goto cleanup_collection; + } + + ZVecVectorQuery query = {0}; + query.field_name = + (ZVecString){.data = "embedding", .length = strlen("embedding")}; + query.query_vector = (ZVecByteArray){.data = (uint8_t *)query_vector, + .length = 128 * sizeof(float)}; + query.topk = 10; + query.filter = (ZVecString){.data = "", .length = 0}; + query.include_vector = false; + query.include_doc_id = true; + query.output_fields = NULL; + + const int QUERY_COUNT = 100; + start_time = clock(); + + for (int q = 0; q < QUERY_COUNT; q++) { + ZVecDoc **results = NULL; + size_t result_count = 0; + + error = zvec_collection_query(collection, &query, &results, &result_count); + if (error != ZVEC_OK) { + char *error_msg = NULL; + zvec_get_last_error(&error_msg); + printf("Query %d failed: %s\n", q, + error_msg ? error_msg : "Unknown error"); + zvec_free_str(error_msg); + continue; + } + + if (results) { + zvec_docs_free(results, result_count); + } + } + + clock_t query_end_time = clock(); + double query_time = ((double)(query_end_time - start_time)) / CLOCKS_PER_SEC; + double avg_query_time = (query_time * 1000) / QUERY_COUNT; + + printf("✓ Performance test completed\n"); + printf(" Average query time: %.2f ms\n", avg_query_time); + printf(" Queries per second: %.0f\n", 1000.0 / avg_query_time); + + free(query_vector); + + // 8. Memory usage information + ZVecCollectionStats *stats = NULL; + error = zvec_collection_get_stats(collection, &stats); + if (error == ZVEC_OK && stats) { + printf("Collection Statistics:\n"); + printf(" Document count: %llu\n", (unsigned long long)stats->doc_count); + zvec_collection_stats_destroy(stats); + } + + // 9. Cleanup +cleanup_collection: + zvec_collection_destroy(collection); + +cleanup_fields: + // Field schemas are managed by the collection schema, no need to destroy + // individually + +cleanup_params: + zvec_collection_schema_destroy(schema); + zvec_index_params_hnsw_destroy(hnsw_params); + + printf("✓ Optimized example completed\n"); + + return 0; +} \ No newline at end of file diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index c516187c7..39cc3712a 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -8,6 +8,7 @@ git_version(ZVEC_VERSION ${CMAKE_CURRENT_SOURCE_DIR}) cc_directory(ailego) cc_directory(core) cc_directory(db) +cc_directory(c_api) if(BUILD_PYTHON_BINDINGS) cc_directory(binding) endif() diff --git a/src/c_api/CMakeLists.txt b/src/c_api/CMakeLists.txt new file mode 100644 index 000000000..c47fcaf31 --- /dev/null +++ b/src/c_api/CMakeLists.txt @@ -0,0 +1,133 @@ +# Copyright 2025-present the zvec project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake) +include(${PROJECT_ROOT_DIR}/cmake/option.cmake) + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CXX_EXTENSIONS OFF) + +# C API library source files +set(ZVEC_C_API_SOURCES + c_api.cc +) + +# C API library header files +set(ZVEC_C_API_HEADERS + ${PROJECT_SOURCE_DIR}/src/include/zvec/c_api.h +) + +# Create shared library +add_library(zvec_c_api SHARED + ${ZVEC_C_API_SOURCES} + ${ZVEC_C_API_HEADERS} +) + +# Set library properties +set_target_properties(zvec_c_api PROPERTIES + VERSION ${PROJECT_VERSION} + SOVERSION ${PROJECT_VERSION_MAJOR} + OUTPUT_NAME "zvec_c_api" +) + +find_package(Threads REQUIRED) + +# Link dependencies - Add force_load flag to ensure static initialization is executed +if(APPLE) + target_link_libraries(zvec_c_api + PRIVATE + "-Wl,-force_load" "$" + zvec_db + Threads::Threads + ) +else() + target_link_libraries(zvec_c_api + PRIVATE + "-Wl,--whole-archive" zvec_core "-Wl,--no-whole-archive" + zvec_db + Threads::Threads + ) +endif() + +# Include directories +target_include_directories(zvec_c_api + PUBLIC + $ + $ + PRIVATE + ${PROJECT_SOURCE_DIR}/src +) + +# Compile options +target_compile_options(zvec_c_api PRIVATE + $<$:-Wall -Wextra -Wpedantic> + $<$:-Wall -Wextra -Wpedantic> +) + +# Installation rules +install(TARGETS zvec_c_api + EXPORT zvecTargets + LIBRARY DESTINATION lib + ARCHIVE DESTINATION lib + RUNTIME DESTINATION bin + INCLUDES DESTINATION include +) + +install(FILES ${PROJECT_SOURCE_DIR}/src/include/zvec/c_api.h + DESTINATION include/zvec +) + +# Create static library version (optional) +if(BUILD_STATIC_LIBS) + add_library(zvec_c_api_static STATIC + ${ZVEC_C_API_SOURCES} + ${ZVEC_C_API_HEADERS} + ) + + set_target_properties(zvec_c_api_static PROPERTIES + OUTPUT_NAME "zvec_c_api" + ) + + # Static library also adds force_load flag + if(APPLE) + target_link_libraries(zvec_c_api_static + PRIVATE + "-Wl,-force_load" "$" + zvec_db + Threads::Threads + ) + else() + target_link_libraries(zvec_c_api_static + PRIVATE + "-Wl,--whole-archive" zvec_core "-Wl,--no-whole-archive" + zvec_db + Threads::Threads + ) + endif() + + target_include_directories(zvec_c_api_static + PUBLIC + $ + $ + PRIVATE + ${PROJECT_SOURCE_DIR}/src + ) + + install(TARGETS zvec_c_api_static + EXPORT zvecTargets + ARCHIVE DESTINATION lib + INCLUDES DESTINATION include + ) +endif() \ No newline at end of file diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc new file mode 100644 index 000000000..1c4d830d1 --- /dev/null +++ b/src/c_api/c_api.cc @@ -0,0 +1,5766 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "zvec/c_api.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// Global status flags +static std::atomic g_initialized{false}; +static std::mutex g_init_mutex; + +// Thread-local storage for error information +static thread_local std::string last_error_message; +static thread_local ZVecErrorDetails last_error_details; + +// Helper function: set error information +static void set_last_error(const std::string &msg) { + last_error_message = msg; + + last_error_details.code = ZVEC_ERROR_UNKNOWN; + last_error_details.message = last_error_message.c_str(); + last_error_details.file = nullptr; + last_error_details.line = 0; + last_error_details.function = nullptr; +} + +// Error setting function with detailed information +static void set_last_error_details(ZVecErrorCode code, const std::string &msg, + const char *file = nullptr, int line = 0, + const char *function = nullptr) { + last_error_message = msg; + last_error_details.code = code; + last_error_details.message = last_error_message.c_str(); + last_error_details.file = file; + last_error_details.line = line; + last_error_details.function = function; +} + +// ============================================================================= +// Version information interface implementation +// ============================================================================= + +// Store dynamically generated version information +static std::string g_version_info; +static std::mutex g_version_mutex; + +const char *zvec_get_version(void) { + std::lock_guard lock(g_version_mutex); + + if (g_version_info.empty()) { + try { + std::string version = ZVEC_VERSION_STRING; + + // Try to get Git information + std::string git_info; +#ifdef ZVEC_GIT_DESCRIBE + git_info = ZVEC_GIT_DESCRIBE; +#elif defined(ZVEC_GIT_COMMIT_HASH) + git_info = std::string("g") + ZVEC_GIT_COMMIT_HASH; +#endif + + if (!git_info.empty()) { + version += "-" + git_info; + } + + version += " (built " + std::string(__DATE__) + " " + + std::string(__TIME__) + ")"; + + g_version_info = version; + } catch (const std::exception &e) { + // If getting version information fails, fall back to basic version + g_version_info = ZVEC_VERSION_STRING; + } + } + + return g_version_info.c_str(); +} + +bool zvec_check_version(int major, int minor, int patch) { + if (major < 0 || minor < 0 || patch < 0) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Version numbers must be non-negative", __FILE__, + __LINE__, __FUNCTION__); + return false; + } + + if (ZVEC_VERSION_MAJOR > major) return true; + if (ZVEC_VERSION_MAJOR < major) return false; + + if (ZVEC_VERSION_MINOR > minor) return true; + if (ZVEC_VERSION_MINOR < minor) return false; + + return ZVEC_VERSION_PATCH >= patch; +} + +int zvec_get_version_major(void) { + return ZVEC_VERSION_MAJOR; +} + +int zvec_get_version_minor(void) { + return ZVEC_VERSION_MINOR; +} + +int zvec_get_version_patch(void) { + return ZVEC_VERSION_PATCH; +} + +// ============================================================================= +// String management functions implementation +// ============================================================================= + +ZVecString *zvec_string_create(const char *str) { + if (!str) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "String pointer cannot be null", __FILE__, __LINE__, + __FUNCTION__); + return nullptr; + } + + ZVecString *zstr = nullptr; + char *data_buffer = nullptr; + + try { + size_t len = strlen(str); + zstr = new ZVecString(); + data_buffer = new char[len + 1]; + strcpy(const_cast(data_buffer), str); + + zstr->data = data_buffer; + zstr->length = len; + zstr->capacity = len + 1; + + return zstr; + + } catch (const std::exception &e) { + if (data_buffer) { + delete[] data_buffer; + } + if (zstr) { + delete zstr; + } + + set_last_error_details(ZVEC_ERROR_INTERNAL_ERROR, + std::string("String creation failed: ") + e.what(), + __FILE__, __LINE__, __FUNCTION__); + return nullptr; + } +} + + +ZVecString *zvec_string_create_from_view(const ZVecStringView *view) { + if (!view || !view->data) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "String view or data cannot be null", __FILE__, + __LINE__, __FUNCTION__); + return nullptr; + } + + try { + auto zstr = new ZVecString(); + + zstr->data = new char[view->length + 1]; + memcpy(const_cast(zstr->data), view->data, view->length); + const_cast(zstr->data)[view->length] = '\0'; + zstr->length = view->length; + zstr->capacity = view->length + 1; + + return zstr; + } catch (const std::bad_alloc &e) { + set_last_error_details( + ZVEC_ERROR_RESOURCE_EXHAUSTED, + std::string("String creation from view failed: ") + e.what(), __FILE__, + __LINE__, __FUNCTION__); + return nullptr; + } catch (const std::exception &e) { + set_last_error_details( + ZVEC_ERROR_INTERNAL_ERROR, + std::string("String creation from view failed: ") + e.what(), __FILE__, + __LINE__, __FUNCTION__); + return nullptr; + } +} + +ZVecString *zvec_bin_create(const uint8_t *data, size_t length) { + if (!data) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Binary data pointer cannot be null", __FILE__, + __LINE__, __FUNCTION__); + return nullptr; + } + + try { + auto zstr = new ZVecString(); + + zstr->data = new char[length + 1]; + memcpy(const_cast(zstr->data), data, length); + const_cast(zstr->data)[length] = '\0'; // Null terminate for safety + zstr->length = length; + zstr->capacity = length + 1; + + return zstr; + } catch (const std::bad_alloc &e) { + set_last_error_details( + ZVEC_ERROR_RESOURCE_EXHAUSTED, + std::string("Binary string creation failed: ") + e.what(), __FILE__, + __LINE__, __FUNCTION__); + return nullptr; + } catch (const std::exception &e) { + set_last_error_details( + ZVEC_ERROR_INTERNAL_ERROR, + std::string("Binary string creation failed: ") + e.what(), __FILE__, + __LINE__, __FUNCTION__); + return nullptr; + } +} + +ZVecString *zvec_string_copy(const ZVecString *str) { + if (!str || !str->data) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Source string or data cannot be null", __FILE__, + __LINE__, __FUNCTION__); + return nullptr; + } + + return zvec_string_create(str->data); +} + +const char *zvec_string_c_str(const ZVecString *str) { + if (!str) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "String pointer cannot be null", __FILE__, __LINE__, + __FUNCTION__); + return nullptr; + } + + return str->data; +} + +size_t zvec_string_length(const ZVecString *str) { + if (!str) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "String pointer cannot be null", __FILE__, __LINE__, + __FUNCTION__); + return 0; + } + + return str->length; +} + +int zvec_string_compare(const ZVecString *str1, const ZVecString *str2) { + if (!str1 || !str2) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "String pointers cannot be null", __FILE__, __LINE__, + __FUNCTION__); + return -1; + } + + if (!str1->data || !str2->data) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "String data cannot be null", __FILE__, __LINE__, + __FUNCTION__); + return -1; + } + + return strcmp(str1->data, str2->data); +} + + +// ============================================================================= +// Configuration-related functions implementation +// ============================================================================= + +ZVecConsoleLogConfig *zvec_config_console_log_create(ZVecLogLevel level) { + try { + auto config = new ZVecConsoleLogConfig(); + config->level = level; + return config; + } catch (const std::exception &e) { + set_last_error(std::string("Failed to create console log config: ") + + e.what()); + return nullptr; + } +} + +ZVecFileLogConfig *zvec_config_file_log_create(ZVecLogLevel level, + const char *dir, + const char *basename, + uint32_t file_size, + uint32_t overdue_days) { + try { + auto config = new ZVecFileLogConfig(); + config->level = level; + config->dir = *(zvec_string_create(dir)); + config->basename = *(zvec_string_create(basename)); + config->file_size = file_size; + config->overdue_days = overdue_days; + return config; + } catch (const std::exception &e) { + set_last_error(std::string("Failed to create file log config: ") + + e.what()); + return nullptr; + } +} + +ZVecLogConfig *zvec_config_log_create(ZVecLogType type, void *config_data) { + try { + auto log_config = new ZVecLogConfig(); + log_config->type = type; + + switch (type) { + case ZVEC_LOG_TYPE_CONSOLE: { + if (config_data) { + auto console_config = + reinterpret_cast(config_data); + log_config->config.console_config = *console_config; + } else { + log_config->config.console_config.level = ZVEC_LOG_LEVEL_WARN; + } + break; + } + case ZVEC_LOG_TYPE_FILE: { + if (config_data) { + auto file_config = reinterpret_cast(config_data); + log_config->config.file_config = *file_config; + } else { + log_config->config.file_config.level = ZVEC_LOG_LEVEL_WARN; + log_config->config.file_config.dir = *zvec_string_create("./log"); + log_config->config.file_config.basename = *zvec_string_create("zvec"); + log_config->config.file_config.file_size = 100; + log_config->config.file_config.overdue_days = 7; + } + break; + } + default: + set_last_error("Invalid log type"); + delete log_config; + return nullptr; + } + + return log_config; + } catch (const std::exception &e) { + set_last_error(std::string("Failed to create log config: ") + e.what()); + return nullptr; + } +} + +ZVecConfigData *zvec_config_data_create(void) { + ZVecConfigData *config = nullptr; + ZVecConsoleLogConfig *log_config = nullptr; + ZVecLogConfig *final_log_config = nullptr; + + try { + config = new ZVecConfigData(); + + log_config = zvec_config_console_log_create(ZVEC_LOG_LEVEL_WARN); + if (!log_config) { + throw std::runtime_error("Failed to create console log config"); + } + + final_log_config = + zvec_config_log_create(ZVEC_LOG_TYPE_CONSOLE, log_config); + if (!final_log_config) { + throw std::runtime_error("Failed to create log config"); + } + + config->log_config = final_log_config; + + // Set default values from C++ ConfigData + zvec::GlobalConfig::ConfigData config_data; + config->memory_limit_bytes = config_data.memory_limit_bytes; + config->query_thread_count = config_data.query_thread_count; + config->invert_to_forward_scan_ratio = + config_data.invert_to_forward_scan_ratio; + config->brute_force_by_keys_ratio = config_data.brute_force_by_keys_ratio; + config->optimize_thread_count = config_data.optimize_thread_count; + + zvec_config_console_log_destroy(log_config); + return config; + + } catch (const std::exception &e) { + if (final_log_config) { + zvec_config_log_destroy(final_log_config); + } + if (log_config) { + zvec_config_console_log_destroy(log_config); + } + if (config) { + delete config; + } + + set_last_error(std::string("Failed to create config data: ") + e.what()); + return nullptr; + } +} + +void zvec_config_console_log_destroy(ZVecConsoleLogConfig *config) { + if (config) { + delete config; + } +} + +void zvec_config_file_log_destroy(ZVecFileLogConfig *config) { + if (config) { + if (config->dir.data) zvec_free_str(config->dir.data); + if (config->basename.data) zvec_free_str(config->basename.data); + delete config; + } +} + +void zvec_config_log_destroy(ZVecLogConfig *config) { + if (config) { + delete config; + } +} + +void zvec_config_data_destroy(ZVecConfigData *config) { + if (config) { + delete config; + } +} + +ZVecErrorCode zvec_config_data_set_memory_limit(ZVecConfigData *config, + uint64_t memory_limit_bytes) { + if (!config) { + set_last_error("Config data pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + config->memory_limit_bytes = memory_limit_bytes; + return ZVEC_OK; +} + +ZVecErrorCode zvec_config_data_set_log_config(ZVecConfigData *config, + ZVecLogConfig *log_config) { + if (!config) { + set_last_error("Config data pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + config->log_config = log_config; + return ZVEC_OK; +} + +ZVecErrorCode zvec_config_data_set_query_thread_count(ZVecConfigData *config, + uint32_t thread_count) { + if (!config) { + set_last_error("Config data pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + config->query_thread_count = thread_count; + return ZVEC_OK; +} + +ZVecErrorCode zvec_config_data_set_optimize_thread_count( + ZVecConfigData *config, uint32_t thread_count) { + if (!config) { + set_last_error("Config data pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + config->optimize_thread_count = thread_count; + return ZVEC_OK; +} + + +// ============================================================================= +// Initialization and cleanup interface implementation +// ============================================================================= + +ZVecErrorCode zvec_initialize(const ZVecConfigData *config) { + std::lock_guard lock(g_init_mutex); + + if (g_initialized.load()) { + set_last_error_details(ZVEC_ERROR_ALREADY_EXISTS, + "Library already initialized"); + return ZVEC_ERROR_ALREADY_EXISTS; + } + + try { + // Convert to C++ configuration object + if (config) { + zvec::GlobalConfig::ConfigData cpp_config{}; + cpp_config.memory_limit_bytes = config->memory_limit_bytes; + cpp_config.query_thread_count = config->query_thread_count; + cpp_config.invert_to_forward_scan_ratio = + config->invert_to_forward_scan_ratio; + cpp_config.brute_force_by_keys_ratio = config->brute_force_by_keys_ratio; + cpp_config.optimize_thread_count = config->optimize_thread_count; + + // Set log configuration + if (config->log_config) { + std::shared_ptr log_config; + + switch (config->log_config->type) { + case ZVEC_LOG_TYPE_CONSOLE: { + auto console_level = static_cast( + config->log_config->config.console_config.level); + log_config = std::make_shared( + console_level); + break; + } + case ZVEC_LOG_TYPE_FILE: { + auto file_level = static_cast( + config->log_config->config.file_config.level); + std::string dir(config->log_config->config.file_config.dir.data, + config->log_config->config.file_config.dir.length); + std::string basename( + config->log_config->config.file_config.basename.data, + config->log_config->config.file_config.basename.length); + log_config = std::make_shared( + file_level, dir, basename); + break; + } + default: + throw std::runtime_error("Unknown log type"); + } + cpp_config.log_config = log_config; + } + // Initialize global configuration + auto status = zvec::GlobalConfig::Instance().Initialize(cpp_config); + if (!status.ok()) { + set_last_error(status.message()); + return ZVEC_ERROR_INTERNAL_ERROR; + } + } else { + // Initialize with default configuration + zvec::GlobalConfig::ConfigData default_config; + auto status = zvec::GlobalConfig::Instance().Initialize(default_config); + if (!status.ok()) { + set_last_error(status.message()); + return ZVEC_ERROR_INTERNAL_ERROR; + } + } + g_initialized.store(true); + return ZVEC_OK; + } catch (const std::exception &e) { + set_last_error_details(ZVEC_ERROR_INTERNAL_ERROR, + std::string("Initialization failed: ") + e.what(), + __FILE__, __LINE__, __FUNCTION__); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +ZVecErrorCode zvec_shutdown(void) { + std::lock_guard lock(g_init_mutex); + + if (!g_initialized.load()) { + set_last_error_details(ZVEC_ERROR_FAILED_PRECONDITION, + "Library not initialized"); + return ZVEC_ERROR_FAILED_PRECONDITION; + } + + try { + g_initialized.store(false); + return ZVEC_OK; + } catch (const std::exception &e) { + set_last_error_details(ZVEC_ERROR_INTERNAL_ERROR, + std::string("Shutdown failed: ") + e.what(), + __FILE__, __LINE__, __FUNCTION__); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +ZVecErrorCode zvec_is_initialized(bool *initialized) { + if (!initialized) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Initialized flag pointer cannot be null", __FILE__, + __LINE__, __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + *initialized = g_initialized.load(); + return ZVEC_OK; +} + +// ============================================================================= +// Error handling interface implementation +// ============================================================================= + +ZVecErrorCode zvec_get_last_error_details(ZVecErrorDetails *error_details) { + if (!error_details) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Error details pointer cannot be null", __FILE__, + __LINE__, __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + *error_details = last_error_details; + return ZVEC_OK; +} + +void zvec_clear_error(void) { + last_error_message.clear(); + last_error_details = {}; +} + +// Helper functions: convert internal status to error code +static ZVecErrorCode status_to_error_code(const zvec::Status &status) { + if (status.code() < zvec::StatusCode::OK || + status.code() > zvec::StatusCode::UNKNOWN) { + set_last_error("Unexpected status code: " + + std::to_string(static_cast(status.code()))); + return ZVEC_ERROR_UNKNOWN; + } + + return static_cast(status.code()); +} + +// Helper function: handle Expected results +template +static ZVecErrorCode handle_expected_result( + const tl::expected &result, T *out_value = nullptr) { + if (result.has_value()) { + if (out_value) { + *out_value = result.value(); + } + return ZVEC_OK; + } else { + set_last_error(result.error().message()); + return status_to_error_code(result.error()); + } +} + +// Helper function: copy strings +static char *copy_string(const std::string &str) { + if (str.empty()) return nullptr; + + char *copy = new char[str.length() + 1]; + strcpy(copy, str.c_str()); + return copy; +} + +static zvec::DataType convert_data_type(ZVecDataType zvec_type) { + if (zvec_type < ZVEC_DATA_TYPE_UNDEFINED || + zvec_type > ZVEC_DATA_TYPE_ARRAY_DOUBLE) { + return zvec::DataType::UNDEFINED; + } + + return static_cast(zvec_type); +} + +static ZVecDataType convert_zvec_data_type(zvec::DataType cpp_type) { + if (cpp_type < zvec::DataType::UNDEFINED || + cpp_type > zvec::DataType::ARRAY_DOUBLE) { + return ZVEC_DATA_TYPE_UNDEFINED; + } + + return static_cast(cpp_type); +} + +// Helper function: convert metric type +static zvec::MetricType convert_metric_type(ZVecMetricType metric_type) { + if (metric_type < ZVEC_METRIC_TYPE_UNDEFINED || + metric_type > ZVEC_METRIC_TYPE_MIPSL2) { + return zvec::MetricType::UNDEFINED; + } + + return static_cast(metric_type); +} + +// Helper function: convert ZVecIndexType to internal IndexType +static zvec::IndexType convert_index_type(ZVecIndexType zvec_type) { + if (zvec_type < ZVEC_INDEX_TYPE_UNDEFINED || + zvec_type > ZVEC_INDEX_TYPE_INVERT) { + return zvec::IndexType::UNDEFINED; + } + + return static_cast(zvec_type); +} + +// Helper function: convert ZVecQuantizeType to internal QuantizeType +static zvec::QuantizeType convert_quantize_type(ZVecQuantizeType zvec_type) { + if (zvec_type < ZVEC_QUANTIZE_TYPE_UNDEFINED || + zvec_type > ZVEC_QUANTIZE_TYPE_INT4) { + return zvec::QuantizeType::UNDEFINED; + } + + return static_cast(zvec_type); +} + +// Helper function: set field index params +static zvec::Status set_field_index_params(zvec::FieldSchema::Ptr &field_schema, + const ZVecFieldSchema *zvec_field) { + if (!zvec_field->index_params) { + return zvec::Status::OK(); + } + + switch (zvec_field->index_params->index_type) { + case ZVEC_INDEX_TYPE_HNSW: { + const ZVecHnswIndexParams *params = + &zvec_field->index_params->params.hnsw_params; + auto metric = convert_metric_type(params->base.metric_type); + auto quantize = convert_quantize_type(params->base.quantize_type); + auto index_params = std::make_shared( + metric, params->m, params->ef_construction, quantize); + field_schema->set_index_params(index_params); + break; + } + case ZVEC_INDEX_TYPE_FLAT: { + const ZVecFlatIndexParams *params = + &zvec_field->index_params->params.flat_params; + auto metric = convert_metric_type(params->base.metric_type); + auto quantize = convert_quantize_type(params->base.quantize_type); + auto index_params = + std::make_shared(metric, quantize); + field_schema->set_index_params(index_params); + break; + } + case ZVEC_INDEX_TYPE_INVERT: { + const ZVecInvertIndexParams *params = + &zvec_field->index_params->params.invert_params; + auto index_params = std::make_shared( + params->enable_range_optimization, params->enable_extended_wildcard); + field_schema->set_index_params(index_params); + break; + } + default: + break; + } + + return zvec::Status::OK(); +} + +// ============================================================================= +// Memory Management interface implementation +// ============================================================================= + +void *zvec_malloc(size_t size) { + if (size == 0) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Cannot allocate zero bytes", __FILE__, __LINE__, + __FUNCTION__); + return nullptr; + } + + try { + return malloc(size); + } catch (const std::bad_alloc &e) { + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + std::string("Memory allocation failed: ") + e.what(), + __FILE__, __LINE__, __FUNCTION__); + return nullptr; + } +} + +void *zvec_realloc(void *ptr, size_t size) { + if (size == 0 && ptr == nullptr) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Cannot reallocate null pointer to zero size", + __FILE__, __LINE__, __FUNCTION__); + return nullptr; + } + + try { + return realloc(ptr, size); + } catch (const std::bad_alloc &e) { + set_last_error_details( + ZVEC_ERROR_RESOURCE_EXHAUSTED, + std::string("Memory reallocation failed: ") + e.what(), __FILE__, + __LINE__, __FUNCTION__); + return nullptr; + } +} + +void zvec_free(void *ptr) { + if (ptr) { + free(ptr); + } +} + +void zvec_free_string(ZVecString *str) { + if (str) { + if (str->data) { + delete[] str->data; + } + delete str; + } +} + +void zvec_free_string_array(ZVecStringArray *array) { + if (array) { + if (array->strings) { + for (size_t i = 0; i < array->count; ++i) { + zvec_free_string(&array->strings[i]); + } + delete[] array->strings; + } + delete array; + } +} + +void zvec_free_byte_array(ZVecMutableByteArray *array) { + if (array) { + if (array->data) { + delete[] array->data; + } + delete array; + } +} + +void zvec_free_str(char *str) { + if (str) { + free(str); + } +} + +void zvec_free_float_array(float *array) { + if (array) { + free(array); + } +} + +void zvec_free_str_array(char **array, size_t count) { + if (!array) return; + + // If count is 0, only free the string array itself, don't process internal + // strings + if (count == 0) { + free(array); + return; + } + + for (size_t i = 0; i < count; ++i) { + if (array[i]) { // Only free when string pointer is not null + free(array[i]); + } + } + free(array); +} + +ZVecErrorCode zvec_get_last_error(char **error_msg) { + if (!error_msg) { + set_last_error("Invalid argument: error_msg cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + *error_msg = copy_string(last_error_message); + return ZVEC_OK; +} + +void zvec_free_uint8_array(uint8_t *array) { + if (array) { + free(array); + } +} + +void zvec_free_field_schema_array(ZVecFieldSchema **array, size_t count) { + if (!array) return; + + for (size_t i = 0; i < count; ++i) { + zvec_free_field_schema(array[i]); + } + free(array); +} + +void zvec_free_field_schema(ZVecFieldSchema *field_schema) { + if (field_schema) { + if (field_schema->index_params) { + zvec_index_params_destroy(field_schema->index_params); + } + delete field_schema; + } +} + + +// ============================================================================= +// Index parameters management interface implementation +// ============================================================================= + +void zvec_index_params_base_init(ZVecBaseIndexParams *params, + ZVecIndexType index_type) { + if (params) { + params->index_type = index_type; + } +} + +void zvec_index_params_invert_init(ZVecInvertIndexParams *params, + bool enable_range_opt, + bool enable_wildcard) { + if (params) { + zvec_index_params_base_init(¶ms->base, ZVEC_INDEX_TYPE_INVERT); + params->enable_range_optimization = enable_range_opt; + params->enable_extended_wildcard = enable_wildcard; + } +} + +void zvec_index_params_vector_init(ZVecVectorIndexParams *params, + ZVecIndexType index_type, + ZVecMetricType metric_type, + ZVecQuantizeType quantize_type) { + if (params) { + zvec_index_params_base_init(¶ms->base, index_type); + params->metric_type = metric_type; + params->quantize_type = quantize_type; + } +} + +void zvec_index_params_hnsw_init(ZVecHnswIndexParams *params, + ZVecMetricType metric_type, int m, + int ef_construction, int ef_search, + ZVecQuantizeType quantize_type) { + if (params) { + zvec_index_params_vector_init(¶ms->base, ZVEC_INDEX_TYPE_HNSW, + metric_type, quantize_type); + params->m = m; + params->ef_construction = ef_construction; + params->ef_search = ef_search; + } +} + +void zvec_index_params_flat_init(ZVecFlatIndexParams *params, + ZVecMetricType metric_type, + ZVecQuantizeType quantize_type) { + if (params) { + zvec_index_params_vector_init(¶ms->base, ZVEC_INDEX_TYPE_FLAT, + metric_type, quantize_type); + } +} + +void zvec_index_params_ivf_init(ZVecIVFIndexParams *params, + ZVecMetricType metric_type, int n_list, + int n_iters, bool use_soar, int n_probe, + ZVecQuantizeType quantize_type) { + if (params) { + zvec_index_params_vector_init(¶ms->base, ZVEC_INDEX_TYPE_IVF, + metric_type, quantize_type); + params->n_list = n_list; + params->n_iters = n_iters; + params->use_soar = use_soar; + params->n_probe = n_probe; + } +} + +void zvec_index_params_init_default(ZVecIndexParams *params, + ZVecIndexType index_type, + ZVecMetricType metric_type) { + if (!params) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Index params pointer cannot be null", __FILE__, + __LINE__, __FUNCTION__); + return; + } + + params->index_type = index_type; + + switch (index_type) { + case ZVEC_INDEX_TYPE_INVERT: + zvec_index_params_invert_init(¶ms->params.invert_params, false, + false); + break; + + case ZVEC_INDEX_TYPE_HNSW: + zvec_index_params_hnsw_init(¶ms->params.hnsw_params, metric_type, 16, + 200, 50, ZVEC_QUANTIZE_TYPE_UNDEFINED); + break; + + case ZVEC_INDEX_TYPE_FLAT: + zvec_index_params_flat_init(¶ms->params.flat_params, metric_type, + ZVEC_QUANTIZE_TYPE_UNDEFINED); + break; + + case ZVEC_INDEX_TYPE_IVF: + zvec_index_params_ivf_init(¶ms->params.ivf_params, metric_type, 100, + 10, false, 10, ZVEC_QUANTIZE_TYPE_UNDEFINED); + break; + + default: + set_last_error_details(ZVEC_ERROR_NOT_SUPPORTED, "Unsupported index type", + __FILE__, __LINE__, __FUNCTION__); + break; + } +} + +void zvec_index_params_destroy(ZVecIndexParams *params) { + if (params) { + delete params; + } +} + +ZVecInvertIndexParams *zvec_index_params_invert_create(bool enable_range_opt, + bool enable_wildcard) { + try { + auto params = new ZVecInvertIndexParams(); + zvec_index_params_base_init(¶ms->base, ZVEC_INDEX_TYPE_INVERT); + params->enable_range_optimization = enable_range_opt; + params->enable_extended_wildcard = enable_wildcard; + return params; + } catch (const std::exception &e) { + set_last_error(std::string("Failed to create invert index params: ") + + e.what()); + return nullptr; + } +} + +ZVecVectorIndexParams *zvec_index_params_vector_create( + ZVecIndexType index_type, ZVecMetricType metric_type, + ZVecQuantizeType quantize_type) { + try { + auto params = new ZVecVectorIndexParams(); + zvec_index_params_base_init(¶ms->base, index_type); + params->metric_type = metric_type; + params->quantize_type = quantize_type; + return params; + } catch (const std::exception &e) { + set_last_error(std::string("Failed to create vector index params: ") + + e.what()); + return nullptr; + } +} + +ZVecHnswIndexParams *zvec_index_params_hnsw_create( + ZVecMetricType metric_type, ZVecQuantizeType quantize_type, int m, + int ef_construction, int ef_search) { + try { + auto params = new ZVecHnswIndexParams(); + zvec_index_params_vector_init(¶ms->base, ZVEC_INDEX_TYPE_HNSW, + metric_type, quantize_type); + params->m = m; + params->ef_construction = ef_construction; + params->ef_search = ef_search; + return params; + } catch (const std::exception &e) { + set_last_error(std::string("Failed to create HNSW index params: ") + + e.what()); + return nullptr; + } +} + +ZVecFlatIndexParams *zvec_index_params_flat_create( + ZVecMetricType metric_type, ZVecQuantizeType quantize_type) { + try { + auto params = new ZVecFlatIndexParams(); + zvec_index_params_vector_init(¶ms->base, ZVEC_INDEX_TYPE_FLAT, + metric_type, quantize_type); + return params; + } catch (const std::exception &e) { + set_last_error(std::string("Failed to create Flat index params: ") + + e.what()); + return nullptr; + } +} + +ZVecIVFIndexParams *zvec_index_params_ivf_create(ZVecMetricType metric_type, + ZVecQuantizeType quantize_type, + int n_list, int n_iters, + bool use_soar, int n_probe) { + try { + auto params = new ZVecIVFIndexParams(); + zvec_index_params_vector_init(¶ms->base, ZVEC_INDEX_TYPE_IVF, + metric_type, quantize_type); + params->n_list = n_list; + params->n_iters = n_iters; + params->use_soar = use_soar; + params->n_probe = n_probe; + return params; + } catch (const std::exception &e) { + set_last_error(std::string("Failed to create IVF index params: ") + + e.what()); + return nullptr; + } +} + +void zvec_index_params_invert_destroy(ZVecInvertIndexParams *params) { + if (params) { + delete params; + } +} + +void zvec_index_params_vector_destroy(ZVecVectorIndexParams *params) { + if (params) { + delete params; + } +} + +void zvec_index_params_hnsw_destroy(ZVecHnswIndexParams *params) { + if (params) { + delete params; + } +} + +void zvec_index_params_flat_destroy(ZVecFlatIndexParams *params) { + if (params) { + delete params; + } +} + +void zvec_index_params_ivf_destroy(ZVecIVFIndexParams *params) { + if (params) { + delete params; + } +} + +// ============================================================================= +// FieldSchema management interface implementation +// ============================================================================= + +ZVecFieldSchema *zvec_field_schema_create(const char *name, + ZVecDataType data_type, bool nullable, + uint32_t dimension) { + if (!name) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Field name cannot be null", __FILE__, __LINE__, + __FUNCTION__); + return nullptr; + } + + try { + auto schema = new ZVecFieldSchema(); + + schema->name = zvec_string_create(name); + if (!schema->name) { + delete schema; + return nullptr; + } + + schema->data_type = data_type; + schema->nullable = nullable; + schema->dimension = dimension; + schema->index_params = nullptr; + + return schema; + } catch (const std::bad_alloc &e) { + set_last_error_details( + ZVEC_ERROR_RESOURCE_EXHAUSTED, + std::string("Field schema creation failed: ") + e.what(), __FILE__, + __LINE__, __FUNCTION__); + return nullptr; + } catch (const std::exception &e) { + set_last_error_details( + ZVEC_ERROR_INTERNAL_ERROR, + std::string("Field schema creation failed: ") + e.what(), __FILE__, + __LINE__, __FUNCTION__); + return nullptr; + } +} + +void zvec_field_schema_destroy(ZVecFieldSchema *schema) { + if (schema) { + zvec_free_string(schema->name); + if (schema->index_params) { + zvec_index_params_destroy(schema->index_params); + schema->index_params = nullptr; + } + delete schema; + } +} + +ZVecErrorCode zvec_field_schema_set_index_params( + ZVecFieldSchema *schema, const ZVecIndexParams *index_params) { + if (!schema) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Field schema pointer cannot be null", __FILE__, + __LINE__, __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + if (!index_params) { + if (schema->index_params) { + zvec_index_params_destroy(schema->index_params); + delete schema->index_params; + schema->index_params = nullptr; + } + return ZVEC_OK; + } + + try { + if (!schema->index_params) { + schema->index_params = new ZVecIndexParams(); + } + + *schema->index_params = *index_params; + + return ZVEC_OK; + } catch (const std::bad_alloc &e) { + set_last_error_details( + ZVEC_ERROR_RESOURCE_EXHAUSTED, + std::string("Failed to set index params: ") + e.what(), __FILE__, + __LINE__, __FUNCTION__); + return ZVEC_ERROR_RESOURCE_EXHAUSTED; + } catch (const std::exception &e) { + set_last_error_details( + ZVEC_ERROR_INTERNAL_ERROR, + std::string("Failed to set index params: ") + e.what(), __FILE__, + __LINE__, __FUNCTION__); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +void zvec_field_schema_set_invert_index( + ZVecFieldSchema *field_schema, const ZVecInvertIndexParams *invert_params) { + if (field_schema && invert_params) { + if (!field_schema->index_params) { + field_schema->index_params = new ZVecIndexParams(); + } + + field_schema->index_params->index_type = ZVEC_INDEX_TYPE_INVERT; + field_schema->index_params->params.invert_params = *invert_params; + } +} + +void zvec_field_schema_set_hnsw_index(ZVecFieldSchema *field_schema, + const ZVecHnswIndexParams *hnsw_params) { + if (field_schema && hnsw_params) { + if (!field_schema->index_params) { + field_schema->index_params = new ZVecIndexParams(); + } + + field_schema->index_params->index_type = ZVEC_INDEX_TYPE_HNSW; + field_schema->index_params->params.hnsw_params = *hnsw_params; + } +} + +void zvec_field_schema_set_flat_index(ZVecFieldSchema *field_schema, + const ZVecFlatIndexParams *flat_params) { + if (field_schema && flat_params) { + if (!field_schema->index_params) { + field_schema->index_params = new ZVecIndexParams(); + } + + field_schema->index_params->index_type = ZVEC_INDEX_TYPE_FLAT; + field_schema->index_params->params.flat_params = *flat_params; + } +} + +void zvec_field_schema_set_ivf_index(ZVecFieldSchema *field_schema, + const ZVecIVFIndexParams *ivf_params) { + if (field_schema && ivf_params) { + if (!field_schema->index_params) { + field_schema->index_params = new ZVecIndexParams(); + } + + field_schema->index_params->index_type = ZVEC_INDEX_TYPE_IVF; + field_schema->index_params->params.ivf_params = *ivf_params; + } +} + +static void zvec_field_schema_cleanup(ZVecFieldSchema *field_schema) { + if (!field_schema) return; + + if (field_schema->index_params) { + zvec_index_params_destroy(field_schema->index_params); + delete field_schema->index_params; + field_schema->index_params = nullptr; + } + + zvec_free_string(field_schema->name); + field_schema->name = nullptr; +} + + +// ============================================================================= +// CollectionOptions management interface implementation +// ============================================================================= + +void zvec_collection_options_init_default(ZVecCollectionOptions *options) { + if (!options) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection options pointer cannot be null", + __FILE__, __LINE__, __FUNCTION__); + return; + } + + options->enable_mmap = true; + options->max_buffer_size = zvec::DEFAULT_MAX_BUFFER_SIZE; + options->read_only = false; + options->max_doc_count_per_segment = zvec::MAX_DOC_COUNT_PER_SEGMENT; +} + +// ============================================================================= +// CollectionSchema management interface implementation +// ============================================================================= + +ZVecCollectionSchema *zvec_collection_schema_create(const char *name) { + if (!name) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection name cannot be null", __FILE__, __LINE__, + __FUNCTION__); + return nullptr; + } + + try { + auto schema = new ZVecCollectionSchema(); + + schema->name = zvec_string_create(name); + if (!schema->name) { + delete schema; + return nullptr; + } + + schema->fields = nullptr; + schema->field_count = 0; + schema->field_capacity = 0; + schema->max_doc_count_per_segment = zvec::MAX_DOC_COUNT_PER_SEGMENT; + + return schema; + } catch (const std::bad_alloc &e) { + set_last_error_details( + ZVEC_ERROR_RESOURCE_EXHAUSTED, + std::string("Collection schema creation failed: ") + e.what(), __FILE__, + __LINE__, __FUNCTION__); + return nullptr; + } catch (const std::exception &e) { + set_last_error_details( + ZVEC_ERROR_INTERNAL_ERROR, + std::string("Collection schema creation failed: ") + e.what(), __FILE__, + __LINE__, __FUNCTION__); + return nullptr; + } +} + +void zvec_collection_schema_destroy(ZVecCollectionSchema *schema) { + if (schema) { + zvec_free_string(schema->name); + + if (schema->fields) { + for (size_t i = 0; i < schema->field_count; ++i) { + zvec_field_schema_destroy(schema->fields[i]); + } + delete[] schema->fields; + } + + delete schema; + } +} + +ZVecErrorCode zvec_collection_schema_add_field(ZVecCollectionSchema *schema, + ZVecFieldSchema *field) { + if (!schema) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection schema pointer cannot be null", __FILE__, + __LINE__, __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + if (!field || !field->name) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Field or field name cannot be null", __FILE__, + __LINE__, __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + try { + for (size_t i = 0; i < schema->field_count; ++i) { + if (schema->fields[i]->name && field->name && + zvec_string_compare(schema->fields[i]->name, field->name) == 0) { + set_last_error_details( + ZVEC_ERROR_ALREADY_EXISTS, + std::string("Field '") + field->name->data + "' already exists", + __FILE__, __LINE__, __FUNCTION__); + return ZVEC_ERROR_ALREADY_EXISTS; + } + } + + if (schema->field_count >= schema->field_capacity) { + size_t new_capacity = + schema->field_capacity == 0 ? 8 : schema->field_capacity * 2; + auto new_fields = new ZVecFieldSchema *[new_capacity]; + + for (size_t i = 0; i < schema->field_count; ++i) { + new_fields[i] = schema->fields[i]; + } + + delete[] schema->fields; + schema->fields = new_fields; + schema->field_capacity = new_capacity; + } + + schema->fields[schema->field_count] = field; + schema->field_count++; + + return ZVEC_OK; + } catch (const std::bad_alloc &e) { + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + std::string("Failed to add field: ") + e.what(), + __FILE__, __LINE__, __FUNCTION__); + return ZVEC_ERROR_RESOURCE_EXHAUSTED; + } catch (const std::exception &e) { + set_last_error_details(ZVEC_ERROR_INTERNAL_ERROR, + std::string("Failed to add field: ") + e.what(), + __FILE__, __LINE__, __FUNCTION__); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +ZVecErrorCode zvec_collection_schema_add_fields(ZVecCollectionSchema *schema, + const ZVecFieldSchema *fields, + size_t field_count) { + if (!schema) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection schema pointer cannot be null", __FILE__, + __LINE__, __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + if (!fields && field_count > 0) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Fields array cannot be null when field_count > 0", + __FILE__, __LINE__, __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + if (field_count == 0) { + return ZVEC_OK; + } + + try { + for (size_t i = 0; i < field_count; ++i) { + const ZVecFieldSchema &field = fields[i]; + if (!field.name || !field.name->data || field.name->length == 0) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + std::string("Field at index ") + + std::to_string(i) + " has invalid name", + __FILE__, __LINE__, __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + } + + size_t total_needed = schema->field_count + field_count; + if (total_needed > schema->field_capacity) { + size_t new_capacity = schema->field_capacity; + while (new_capacity < total_needed) { + new_capacity = new_capacity == 0 ? 8 : new_capacity * 2; + } + + auto new_fields = new ZVecFieldSchema *[new_capacity]; + + for (size_t i = 0; i < schema->field_count; ++i) { + new_fields[i] = schema->fields[i]; + } + + delete[] schema->fields; + schema->fields = new_fields; + schema->field_capacity = new_capacity; + } + + for (size_t i = 0; i < field_count; ++i) { + const ZVecFieldSchema &src_field = fields[i]; + + ZVecFieldSchema *new_field = new ZVecFieldSchema(); + + new_field->name = zvec_string_copy(src_field.name); + + new_field->data_type = src_field.data_type; + new_field->nullable = src_field.nullable; + new_field->dimension = src_field.dimension; + + if (src_field.index_params) { + new_field->index_params = new ZVecIndexParams(); + *(new_field->index_params) = *(src_field.index_params); + } else { + new_field->index_params = nullptr; + } + + schema->fields[schema->field_count] = new_field; + schema->field_count++; + } + + return ZVEC_OK; + } catch (const std::bad_alloc &e) { + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + std::string("Failed to add fields: ") + e.what(), + __FILE__, __LINE__, __FUNCTION__); + return ZVEC_ERROR_RESOURCE_EXHAUSTED; + } catch (const std::exception &e) { + set_last_error_details(ZVEC_ERROR_INTERNAL_ERROR, + std::string("Failed to add fields: ") + e.what(), + __FILE__, __LINE__, __FUNCTION__); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +ZVecErrorCode zvec_collection_schema_remove_field(ZVecCollectionSchema *schema, + const char *field_name) { + if (!schema) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection schema pointer cannot be null", __FILE__, + __LINE__, __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + if (!field_name) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Field name cannot be null", __FILE__, __LINE__, + __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + try { + for (size_t i = 0; i < schema->field_count; ++i) { + if (schema->fields[i]->name && + strcmp(schema->fields[i]->name->data, field_name) == 0) { + zvec_field_schema_destroy(schema->fields[i]); + + for (size_t j = i; j < schema->field_count - 1; ++j) { + schema->fields[j] = schema->fields[j + 1]; + } + + schema->field_count--; + return ZVEC_OK; + } + } + + set_last_error_details(ZVEC_ERROR_NOT_FOUND, + std::string("Field '") + field_name + "' not found", + __FILE__, __LINE__, __FUNCTION__); + return ZVEC_ERROR_NOT_FOUND; + } catch (const std::exception &e) { + set_last_error_details(ZVEC_ERROR_INTERNAL_ERROR, + std::string("Failed to remove field: ") + e.what(), + __FILE__, __LINE__, __FUNCTION__); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +ZVecErrorCode zvec_collection_schema_remove_fields( + ZVecCollectionSchema *schema, const char *const *field_names, + size_t field_count) { + if (!schema) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection schema pointer cannot be null", __FILE__, + __LINE__, __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + if (!field_names && field_count > 0) { + set_last_error_details( + ZVEC_ERROR_INVALID_ARGUMENT, + "Field names array cannot be null when field_count > 0", __FILE__, + __LINE__, __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + if (field_count == 0) { + return ZVEC_OK; + } + + try { + for (size_t i = 0; i < field_count; ++i) { + if (!field_names[i]) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + std::string("Field name at index ") + + std::to_string(i) + " is null", + __FILE__, __LINE__, __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + } + + std::vector remove_indices; + std::vector not_found_fields; + + for (size_t field_idx = 0; field_idx < field_count; ++field_idx) { + std::string target_name(field_names[field_idx]); + bool found = false; + + for (size_t i = 0; i < schema->field_count; ++i) { + if (schema->fields[i]->name && + strcmp(schema->fields[i]->name->data, target_name.c_str()) == 0) { + remove_indices.push_back(i); + found = true; + break; + } + } + + if (!found) { + not_found_fields.push_back(target_name); + } + } + + if (!not_found_fields.empty()) { + std::string error_msg = "Fields not found: "; + for (size_t i = 0; i < not_found_fields.size(); ++i) { + error_msg += "'" + not_found_fields[i] + "'"; + if (i < not_found_fields.size() - 1) { + error_msg += ", "; + } + } + set_last_error_details(ZVEC_ERROR_NOT_FOUND, error_msg, __FILE__, + __LINE__, __FUNCTION__); + return ZVEC_ERROR_NOT_FOUND; + } + + std::sort(remove_indices.begin(), remove_indices.end(), + std::greater()); + + for (size_t remove_index : remove_indices) { + zvec_field_schema_destroy(schema->fields[remove_index]); + + for (size_t j = remove_index; j < schema->field_count - 1; ++j) { + schema->fields[j] = schema->fields[j + 1]; + } + + schema->field_count--; + } + + return ZVEC_OK; + } catch (const std::exception &e) { + set_last_error_details(ZVEC_ERROR_INTERNAL_ERROR, + std::string("Failed to remove fields: ") + e.what(), + __FILE__, __LINE__, __FUNCTION__); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +ZVecFieldSchema *zvec_collection_schema_find_field( + const ZVecCollectionSchema *schema, const char *field_name) { + if (!schema || !field_name) { + return nullptr; + } + + for (size_t i = 0; i < schema->field_count; ++i) { + if (schema->fields[i]->name && + strcmp(schema->fields[i]->name->data, field_name) == 0) { + return schema->fields[i]; + } + } + + return nullptr; +} + +size_t zvec_collection_schema_get_field_count( + const ZVecCollectionSchema *schema) { + if (!schema) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection schema pointer cannot be null", __FILE__, + __LINE__, __FUNCTION__); + return 0; + } + + return schema->field_count; +} + +ZVecFieldSchema *zvec_collection_schema_get_field( + const ZVecCollectionSchema *schema, size_t index) { + if (!schema) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection schema pointer cannot be null", __FILE__, + __LINE__, __FUNCTION__); + return nullptr; + } + + if (index >= schema->field_count) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Field index out of bounds", __FILE__, __LINE__, + __FUNCTION__); + return nullptr; + } + + return schema->fields[index]; +} + +ZVecErrorCode zvec_collection_schema_set_max_doc_count_per_segment( + ZVecCollectionSchema *schema, uint64_t max_doc_count) { + if (!schema) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection schema pointer cannot be null", __FILE__, + __LINE__, __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + schema->max_doc_count_per_segment = max_doc_count; + return ZVEC_OK; +} + +uint64_t zvec_collection_schema_get_max_doc_count_per_segment( + const ZVecCollectionSchema *schema) { + if (!schema) return 0; + return schema->max_doc_count_per_segment; +} + + +ZVecErrorCode zvec_collection_schema_validate( + const ZVecCollectionSchema *schema, ZVecString **error_msg) { + if (!schema) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection schema pointer cannot be null", __FILE__, + __LINE__, __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + if (error_msg) { + *error_msg = nullptr; + } + + if (!schema->name) { + if (error_msg) { + *error_msg = zvec_string_create("Collection name is required"); + } + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection name is required", __FILE__, __LINE__, + __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + if (schema->field_count == 0) { + if (error_msg) { + *error_msg = zvec_string_create("At least one field is required"); + } + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "At least one field is required", __FILE__, __LINE__, + __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + for (size_t i = 0; i < schema->field_count; ++i) { + auto field = schema->fields[i]; + if (!field) { + if (error_msg) { + *error_msg = zvec_string_create("Null field found"); + } + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, "Null field found", + __FILE__, __LINE__, __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + if (!field->name) { + if (error_msg) { + *error_msg = zvec_string_create("Field name is required"); + } + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Field name is required", __FILE__, __LINE__, + __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + } + + return ZVEC_OK; +} + +void zvec_collection_schema_cleanup(ZVecCollectionSchema *schema) { + if (!schema) return; + + try { + if (schema->name) { + zvec_free_string(schema->name); + } + + if (schema->fields) { + for (size_t i = 0; i < schema->field_count; ++i) { + zvec_field_schema_cleanup(schema->fields[i]); + } + delete[] schema->fields; + schema->fields = nullptr; + schema->field_count = 0; + } + + schema->max_doc_count_per_segment = 0; + } catch (const std::exception &e) { + fprintf(stderr, + "Warning: Exception in zvec_collection_schema_cleanup: %s\n", + e.what()); + } +} + + +// ============================================================================= +// Helper functions +// ============================================================================= + +const char *zvec_error_code_to_string(ZVecErrorCode error_code) { + switch (error_code) { + case ZVEC_OK: + return "OK"; + case ZVEC_ERROR_NOT_FOUND: + return "NOT_FOUND"; + case ZVEC_ERROR_ALREADY_EXISTS: + return "ALREADY_EXISTS"; + case ZVEC_ERROR_INVALID_ARGUMENT: + return "INVALID_ARGUMENT"; + case ZVEC_ERROR_PERMISSION_DENIED: + return "PERMISSION_DENIED"; + case ZVEC_ERROR_FAILED_PRECONDITION: + return "FAILED_PRECONDITION"; + case ZVEC_ERROR_RESOURCE_EXHAUSTED: + return "RESOURCE_EXHAUSTED"; + case ZVEC_ERROR_UNAVAILABLE: + return "UNAVAILABLE"; + case ZVEC_ERROR_INTERNAL_ERROR: + return "INTERNAL_ERROR"; + case ZVEC_ERROR_NOT_SUPPORTED: + return "NOT_SUPPORTED"; + case ZVEC_ERROR_UNKNOWN: + return "UNKNOWN"; + default: + return "UNKNOWN_ERROR_CODE"; + } +} + +const char *zvec_data_type_to_string(ZVecDataType data_type) { + switch (data_type) { + case ZVEC_DATA_TYPE_UNDEFINED: + return "UNDEFINED"; + case ZVEC_DATA_TYPE_BINARY: + return "BINARY"; + case ZVEC_DATA_TYPE_STRING: + return "STRING"; + case ZVEC_DATA_TYPE_BOOL: + return "BOOL"; + case ZVEC_DATA_TYPE_INT32: + return "INT32"; + case ZVEC_DATA_TYPE_INT64: + return "INT64"; + case ZVEC_DATA_TYPE_UINT32: + return "UINT32"; + case ZVEC_DATA_TYPE_UINT64: + return "UINT64"; + case ZVEC_DATA_TYPE_FLOAT: + return "FLOAT"; + case ZVEC_DATA_TYPE_DOUBLE: + return "DOUBLE"; + case ZVEC_DATA_TYPE_VECTOR_BINARY32: + return "VECTOR_BINARY32"; + case ZVEC_DATA_TYPE_VECTOR_BINARY64: + return "VECTOR_BINARY64"; + case ZVEC_DATA_TYPE_VECTOR_FP16: + return "VECTOR_FP16"; + case ZVEC_DATA_TYPE_VECTOR_FP32: + return "VECTOR_FP32"; + case ZVEC_DATA_TYPE_VECTOR_FP64: + return "VECTOR_FP64"; + case ZVEC_DATA_TYPE_VECTOR_INT4: + return "VECTOR_INT4"; + case ZVEC_DATA_TYPE_VECTOR_INT8: + return "VECTOR_INT8"; + case ZVEC_DATA_TYPE_VECTOR_INT16: + return "VECTOR_INT16"; + case ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16: + return "SPARSE_VECTOR_FP16"; + case ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32: + return "SPARSE_VECTOR_FP32"; + case ZVEC_DATA_TYPE_ARRAY_BINARY: + return "ARRAY_BINARY"; + case ZVEC_DATA_TYPE_ARRAY_STRING: + return "ARRAY_STRING"; + case ZVEC_DATA_TYPE_ARRAY_BOOL: + return "ARRAY_BOOL"; + case ZVEC_DATA_TYPE_ARRAY_INT32: + return "ARRAY_INT32"; + case ZVEC_DATA_TYPE_ARRAY_INT64: + return "ARRAY_INT64"; + case ZVEC_DATA_TYPE_ARRAY_UINT32: + return "ARRAY_UINT32"; + case ZVEC_DATA_TYPE_ARRAY_UINT64: + return "ARRAY_UINT64"; + case ZVEC_DATA_TYPE_ARRAY_FLOAT: + return "ARRAY_FLOAT"; + case ZVEC_DATA_TYPE_ARRAY_DOUBLE: + return "ARRAY_DOUBLE"; + default: + return "UNKNOWN_DATA_TYPE"; + } +} + +const char *zvec_index_type_to_string(ZVecIndexType index_type) { + switch (index_type) { + case ZVEC_INDEX_TYPE_UNDEFINED: + return "UNDEFINED"; + case ZVEC_INDEX_TYPE_HNSW: + return "HNSW"; + case ZVEC_INDEX_TYPE_IVF: + return "IVF"; + case ZVEC_INDEX_TYPE_FLAT: + return "FLAT"; + case ZVEC_INDEX_TYPE_INVERT: + return "INVERT"; + default: + return "UNKNOWN_INDEX_TYPE"; + } +} + +const char *zvec_metric_type_to_string(ZVecMetricType metric_type) { + switch (metric_type) { + case ZVEC_METRIC_TYPE_UNDEFINED: + return "UNDEFINED"; + case ZVEC_METRIC_TYPE_L2: + return "L2"; + case ZVEC_METRIC_TYPE_IP: + return "IP"; + case ZVEC_METRIC_TYPE_COSINE: + return "COSINE"; + case ZVEC_METRIC_TYPE_MIPSL2: + return "MIPSL2"; + default: + return "UNKNOWN_METRIC_TYPE"; + } +} + +ZVecErrorCode zvec_get_system_info(ZVecString **info_json) { + if (!info_json) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Info JSON pointer cannot be null", __FILE__, + __LINE__, __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + try { + std::ostringstream oss; + oss << "{"; + oss << "\"version\":\"" << ZVEC_VERSION_STRING << "\","; + oss << "\"platform\":\"" + << +#ifdef _WIN32 + "Windows" +#elif __APPLE__ + "macOS" +#elif __linux__ + "Linux" +#else + "Unknown" +#endif + << "\","; + oss << "\"architecture\":\"" + << +#ifdef __x86_64__ + "x86_64" +#elif __aarch64__ + "ARM64" +#elif __arm__ + "ARM" +#else + "Unknown" +#endif + << "\","; + oss << "\"compiler\":\"" + << +#ifdef __GNUC__ + "GCC " << __GNUC__ << "." << __GNUC_MINOR__ +#elif _MSC_VER + "MSVC " << _MSC_VER +#elif __clang__ + "Clang " << __clang_major__ << "." << __clang_minor__ +#else + "Unknown" +#endif + << "\""; + oss << "}"; + + *info_json = zvec_string_create(oss.str().c_str()); + if (!*info_json) { + return ZVEC_ERROR_RESOURCE_EXHAUSTED; + } + + return ZVEC_OK; + } catch (const std::exception &e) { + set_last_error_details( + ZVEC_ERROR_INTERNAL_ERROR, + std::string("Failed to get system info: ") + e.what(), __FILE__, + __LINE__, __FUNCTION__); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +bool check_is_vector_field(const ZVecFieldSchema &zvec_field) { + bool is_vector_field = + (zvec_field.data_type == ZVEC_DATA_TYPE_VECTOR_FP32 || + zvec_field.data_type == ZVEC_DATA_TYPE_VECTOR_FP64 || + zvec_field.data_type == ZVEC_DATA_TYPE_VECTOR_FP16 || + zvec_field.data_type == ZVEC_DATA_TYPE_VECTOR_BINARY32 || + zvec_field.data_type == ZVEC_DATA_TYPE_VECTOR_BINARY64 || + zvec_field.data_type == ZVEC_DATA_TYPE_VECTOR_INT4 || + zvec_field.data_type == ZVEC_DATA_TYPE_VECTOR_INT8 || + zvec_field.data_type == ZVEC_DATA_TYPE_VECTOR_INT16 || + zvec_field.data_type == ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32 || + zvec_field.data_type == ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16); + return is_vector_field; +} + +// ============================================================================= +// Doc functions implementation +// ============================================================================= + +ZVecDoc *zvec_doc_create(void) { + try { + auto doc_ptr = + new std::shared_ptr(std::make_shared()); + return reinterpret_cast(doc_ptr); + + } catch (const std::exception &e) { + set_last_error(std::string("Failed to create document: ") + e.what()); + return nullptr; + } +} + +void zvec_doc_destroy(ZVecDoc *doc) { + if (doc) { + delete reinterpret_cast *>(doc); + } +} + +void zvec_doc_clear(ZVecDoc *doc) { + if (doc) { + try { + auto doc_ptr = reinterpret_cast *>(doc); + (*doc_ptr)->clear(); + } catch (const std::exception &e) { + set_last_error(std::string("Failed to cleanup document: ") + e.what()); + } + } +} + +void zvec_docs_free(ZVecDoc **docs, size_t count) { + if (!docs) return; + + for (size_t i = 0; i < count; ++i) { + zvec_doc_destroy(docs[i]); + } + + free(docs); +} + +void zvec_doc_set_pk(ZVecDoc *doc, const char *pk) { + if (!doc || !pk) return; + + try { + auto doc_ptr = reinterpret_cast *>(doc); + (*doc_ptr)->set_pk(std::string(pk)); + } catch (const std::exception &e) { + set_last_error(std::string("Failed to set document PK: ") + e.what()); + } +} + +void zvec_doc_set_doc_id(ZVecDoc *doc, uint64_t doc_id) { + if (!doc) return; + + try { + auto doc_ptr = reinterpret_cast *>(doc); + (*doc_ptr)->set_doc_id(doc_id); + } catch (const std::exception &e) { + set_last_error(std::string("Failed to set document id: ") + e.what()); + } +} + + +void zvec_doc_set_score(ZVecDoc *doc, float score) { + if (!doc) return; + + try { + auto doc_ptr = reinterpret_cast *>(doc); + (*doc_ptr)->set_score(score); + } catch (const std::exception &e) { + set_last_error(std::string("Failed to set document score: ") + e.what()); + } +} + +void zvec_doc_set_operator(ZVecDoc *doc, ZVecDocOperator op) { + if (!doc) return; + + try { + auto doc_ptr = reinterpret_cast *>(doc); + (*doc_ptr)->set_operator(static_cast(op)); + } catch (const std::exception &e) { + set_last_error(std::string("Failed to set document operator: ") + e.what()); + } +} + +// ============================================================================= +// Document interface implementation +// ============================================================================= + +// Helper function to extract scalar values from raw data +template +T extract_scalar_value(const void *value, size_t value_size, + ZVecErrorCode *error_code) { + if (value_size != sizeof(T)) { + if (error_code) { + *error_code = ZVEC_ERROR_INVALID_ARGUMENT; + } + return T{}; + } + return *static_cast(value); +} + +// Helper function to extract vector values from raw data +template +std::vector extract_vector_values(const void *value, size_t value_size, + ZVecErrorCode *error_code) { + if (value_size % sizeof(T) != 0) { + if (error_code) { + *error_code = ZVEC_ERROR_INVALID_ARGUMENT; + } + return std::vector(); + } + size_t count = value_size / sizeof(T); + const T *vals = static_cast(value); + return std::vector(vals, vals + count); +} + +// Helper function to extract array values from raw data +template +std::vector extract_array_values(const void *value, size_t value_size, + ZVecErrorCode *error_code) { + if (value_size % sizeof(T) != 0) { + if (error_code) { + *error_code = ZVEC_ERROR_INVALID_ARGUMENT; + } + return std::vector(); + } + size_t count = value_size / sizeof(T); + const T *vals = static_cast(value); + return std::vector(vals, vals + count); +} + +// Helper function to handle sparse vector extraction +template +std::pair, std::vector> extract_sparse_vector( + const void *value, size_t value_size, ZVecErrorCode *error_code) { + if (value_size < sizeof(uint32_t)) { + if (error_code) { + *error_code = ZVEC_ERROR_INVALID_ARGUMENT; + } + return std::make_pair(std::vector(), std::vector()); + } + + const uint32_t *data = static_cast(value); + uint32_t nnz = data[0]; + + size_t required_size = + sizeof(uint32_t) + nnz * (sizeof(uint32_t) + sizeof(T)); + if (value_size < required_size) { + if (error_code) { + *error_code = ZVEC_ERROR_INVALID_ARGUMENT; + } + return std::make_pair(std::vector(), std::vector()); + } + + const uint32_t *indices = data + 1; + const T *values = reinterpret_cast(indices + nnz); + + std::vector index_vec(indices, indices + nnz); + std::vector value_vec(values, values + nnz); + + return std::make_pair(std::move(index_vec), std::move(value_vec)); +} + +// Helper function to extract string array from raw data +std::vector extract_string_array(const void *value, + size_t value_size) { + std::vector string_array; + const char *data = static_cast(value); + size_t pos = 0; + + while (pos < value_size) { + size_t str_len = strlen(data + pos); + if (pos + str_len >= value_size) { + break; + } + string_array.emplace_back(data + pos, str_len); + pos += str_len + 1; + } + return string_array; +} + +// Helper function to extract binary array from raw data +std::vector extract_binary_array(const void *value, + size_t value_size) { + std::vector binary_array; + const char *data = static_cast(value); + size_t pos = 0; + + while (pos < value_size) { + if (pos + sizeof(uint32_t) > value_size) { + break; + } + uint32_t bin_len = *reinterpret_cast(data + pos); + pos += sizeof(uint32_t); + + if (pos + bin_len > value_size) { + break; + } + binary_array.emplace_back(data + pos, bin_len); + pos += bin_len; + } + return binary_array; +} + +static std::vector convert_zvec_docs_to_internal( + const ZVecDoc **zvec_docs, size_t doc_count) { + std::vector docs; + docs.reserve(doc_count); + + for (size_t i = 0; i < doc_count; ++i) { + docs.push_back( + *(*reinterpret_cast *>(zvec_docs[i]))); + } + + return docs; +} + + +static zvec::Status convert_zvec_collection_schema_to_internal( + const ZVecCollectionSchema *schema, + zvec::CollectionSchema::Ptr &collection_schema) { + std::string coll_name(schema->name->data, schema->name->length); + collection_schema = std::make_shared(coll_name); + collection_schema->set_max_doc_count_per_segment( + schema->max_doc_count_per_segment); + + for (size_t i = 0; i < schema->field_count; ++i) { + const ZVecFieldSchema &zvec_field = *schema->fields[i]; + zvec::DataType data_type = convert_data_type(zvec_field.data_type); + std::string field_name = + std::string(zvec_field.name->data, zvec_field.name->length); + zvec::FieldSchema::Ptr field_schema; + + bool is_vector_field = check_is_vector_field(zvec_field); + + if (is_vector_field) { + field_schema = std::make_shared( + field_name, data_type, zvec_field.dimension, zvec_field.nullable); + } else { + field_schema = std::make_shared(field_name, data_type, + zvec_field.nullable); + } + + if (zvec_field.index_params != nullptr) { + zvec::Status status = set_field_index_params(field_schema, &zvec_field); + if (!status.ok()) { + return status; + } + } + + zvec::Status status = collection_schema->add_field(field_schema); + if (!status.ok()) { + return status; + } + } + + return zvec::Status::OK(); +} + +static zvec::Status convert_zvec_field_schema_to_internal( + const ZVecFieldSchema &zvec_field, zvec::FieldSchema::Ptr &field_schema) { + // Validate input + if (!zvec_field.name) { + return zvec::Status::InvalidArgument("Field name cannot be null"); + } + + zvec::DataType data_type = convert_data_type(zvec_field.data_type); + if (data_type == zvec::DataType::UNDEFINED) { + return zvec::Status::InvalidArgument("Invalid data type"); + } + + std::string field_name(zvec_field.name->data, zvec_field.name->length); + bool is_vector_field = check_is_vector_field(zvec_field); + + if (is_vector_field) { + field_schema = std::make_shared( + field_name, data_type, zvec_field.dimension, zvec_field.nullable); + + if (zvec_field.index_params != nullptr) { + switch (zvec_field.index_params->index_type) { + case ZVEC_INDEX_TYPE_HNSW: { + auto *params = &zvec_field.index_params->params.hnsw_params; + auto metric = convert_metric_type(params->base.metric_type); + auto quantize = convert_quantize_type(params->base.quantize_type); + auto index_params = std::make_shared( + metric, params->m, params->ef_construction, quantize); + field_schema->set_index_params(index_params); + break; + } + case ZVEC_INDEX_TYPE_FLAT: { + auto *params = &zvec_field.index_params->params.flat_params; + auto metric = convert_metric_type(params->base.metric_type); + auto quantize = convert_quantize_type(params->base.quantize_type); + auto index_params = + std::make_shared(metric, quantize); + field_schema->set_index_params(index_params); + break; + } + case ZVEC_INDEX_TYPE_IVF: { + auto *params = &zvec_field.index_params->params.ivf_params; + auto metric = convert_metric_type(params->base.metric_type); + auto quantize = convert_quantize_type(params->base.quantize_type); + auto index_params = std::make_shared( + metric, params->n_list, params->n_iters, params->use_soar, + quantize); + field_schema->set_index_params(index_params); + break; + } + default: + field_schema->set_index_params( + std::make_shared(zvec::MetricType::L2)); + break; + } + } else { + field_schema->set_index_params( + std::make_shared(zvec::MetricType::L2)); + } + } else { + field_schema = std::make_shared(field_name, data_type, + zvec_field.nullable); + + if (zvec_field.index_params != nullptr && + zvec_field.index_params->index_type == ZVEC_INDEX_TYPE_INVERT) { + auto *params = &zvec_field.index_params->params.invert_params; + auto index_params = std::make_shared( + params->enable_range_optimization, params->enable_extended_wildcard); + field_schema->set_index_params(index_params); + } + } + + return zvec::Status::OK(); +} + +ZVecErrorCode zvec_doc_add_field_by_value(ZVecDoc *doc, const char *field_name, + ZVecDataType data_type, + const void *value, + size_t value_size) { + if (!doc || !field_name || !value) { + set_last_error("Invalid arguments: null pointer"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + try { + auto doc_ptr = reinterpret_cast *>(doc); + std::string name(field_name); + ZVecErrorCode error_code = ZVEC_OK; + + switch (data_type) { + // Scalar types + case ZVEC_DATA_TYPE_BOOL: { + bool val = extract_scalar_value(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for bool type"); + return error_code; + } + (*doc_ptr)->set(name, val); + break; + } + case ZVEC_DATA_TYPE_INT32: { + int32_t val = + extract_scalar_value(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for int32 type"); + return error_code; + } + (*doc_ptr)->set(name, val); + break; + } + case ZVEC_DATA_TYPE_INT64: { + int64_t val = + extract_scalar_value(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for int64 type"); + return error_code; + } + (*doc_ptr)->set(name, val); + break; + } + case ZVEC_DATA_TYPE_UINT32: { + uint32_t val = + extract_scalar_value(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for uint32 type"); + return error_code; + } + (*doc_ptr)->set(name, val); + break; + } + case ZVEC_DATA_TYPE_UINT64: { + uint64_t val = + extract_scalar_value(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for uint64 type"); + return error_code; + } + (*doc_ptr)->set(name, val); + break; + } + case ZVEC_DATA_TYPE_FLOAT: { + float val = extract_scalar_value(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for float type"); + return error_code; + } + (*doc_ptr)->set(name, val); + break; + } + case ZVEC_DATA_TYPE_DOUBLE: { + double val = + extract_scalar_value(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for double type"); + return error_code; + } + (*doc_ptr)->set(name, val); + break; + } + + // String and binary types + case ZVEC_DATA_TYPE_STRING: + case ZVEC_DATA_TYPE_BINARY: { + std::string val(static_cast(value), value_size); + (*doc_ptr)->set(name, val); + break; + } + + // Vector types + case ZVEC_DATA_TYPE_VECTOR_FP32: { + auto vec = extract_vector_values(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for vector_fp32 type"); + return error_code; + } + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP16: { + auto vec = extract_vector_values(value, value_size, + &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for vector_fp16 type"); + return error_code; + } + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP64: { + auto vec = + extract_vector_values(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for vector_fp64 type"); + return error_code; + } + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_VECTOR_INT8: { + auto vec = + extract_vector_values(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for vector_int8 type"); + return error_code; + } + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_VECTOR_INT16: { + auto vec = + extract_vector_values(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for vector_int16 type"); + return error_code; + } + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_VECTOR_INT4: { + // INT4 vectors are packed - each byte contains 2 int4 values + size_t count = value_size * 2; + const int8_t *packed_vals = static_cast(value); + std::vector vec; + vec.reserve(count); + + // Unpack int4 values + for (size_t i = 0; i < value_size; ++i) { + int8_t byte_val = packed_vals[i]; + // Extract lower 4 bits + vec.push_back(byte_val & 0x0F); + // Extract upper 4 bits + vec.push_back((byte_val >> 4) & 0x0F); + } + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_VECTOR_BINARY32: { + auto vec = + extract_vector_values(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for vector_binary32 type"); + return error_code; + } + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_VECTOR_BINARY64: { + auto vec = + extract_vector_values(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for vector_binary64 type"); + return error_code; + } + (*doc_ptr)->set(name, vec); + break; + } + + // Sparse vector types + case ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32: { + auto sparse_vec = + extract_sparse_vector(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid sparse vector data size"); + return error_code; + } + (*doc_ptr)->set(name, sparse_vec); + break; + } + case ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16: { + auto sparse_vec = extract_sparse_vector( + value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid sparse vector data size"); + return error_code; + } + (*doc_ptr)->set(name, sparse_vec); + break; + } + + // Array types + case ZVEC_DATA_TYPE_ARRAY_BOOL: { + auto vec = extract_array_values(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for array_bool type"); + return error_code; + } + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_ARRAY_INT32: { + auto vec = + extract_array_values(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for array_int32 type"); + return error_code; + } + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_ARRAY_INT64: { + auto vec = + extract_array_values(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for array_int64 type"); + return error_code; + } + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_ARRAY_UINT32: { + auto vec = + extract_array_values(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for array_uint32 type"); + return error_code; + } + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_ARRAY_UINT64: { + auto vec = + extract_array_values(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for array_uint64 type"); + return error_code; + } + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_ARRAY_FLOAT: { + auto vec = extract_array_values(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for array_float type"); + return error_code; + } + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_ARRAY_DOUBLE: { + auto vec = extract_array_values(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for array_double type"); + return error_code; + } + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_ARRAY_STRING: { + auto string_array = extract_string_array(value, value_size); + (*doc_ptr)->set(name, string_array); + break; + } + case ZVEC_DATA_TYPE_ARRAY_BINARY: { + auto binary_array = extract_binary_array(value, value_size); + (*doc_ptr)->set(name, binary_array); + break; + } + + default: + set_last_error("Unsupported data type: " + std::to_string(data_type)); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + return ZVEC_OK; + } catch (const std::exception &e) { + set_last_error(std::string("Failed to add field: ") + e.what()); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +ZVecErrorCode zvec_doc_add_field_by_struct(ZVecDoc *doc, + const ZVecDocField *field) { + if (!doc || !field) { + set_last_error("Invalid arguments: null pointer"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + try { + auto doc_ptr = reinterpret_cast *>(doc); + + std::string name(field->name.data, field->name.length); + + switch (field->data_type) { + // Scalar basic types + case ZVEC_DATA_TYPE_BOOL: { + (*doc_ptr)->set(name, field->value.bool_value); + break; + } + case ZVEC_DATA_TYPE_INT32: { + (*doc_ptr)->set(name, field->value.int32_value); + break; + } + case ZVEC_DATA_TYPE_INT64: { + (*doc_ptr)->set(name, field->value.int64_value); + break; + } + case ZVEC_DATA_TYPE_UINT32: { + (*doc_ptr)->set(name, field->value.uint32_value); + break; + } + case ZVEC_DATA_TYPE_UINT64: { + (*doc_ptr)->set(name, field->value.uint64_value); + break; + } + case ZVEC_DATA_TYPE_FLOAT: { + (*doc_ptr)->set(name, field->value.float_value); + break; + } + case ZVEC_DATA_TYPE_DOUBLE: { + (*doc_ptr)->set(name, field->value.double_value); + break; + } + + // String and binary types + case ZVEC_DATA_TYPE_STRING: { + std::string val(field->value.string_value.data, + field->value.string_value.length); + (*doc_ptr)->set(name, val); + break; + } + case ZVEC_DATA_TYPE_BINARY: { + std::string val( + reinterpret_cast(field->value.binary_value.data), + field->value.binary_value.length); + (*doc_ptr)->set(name, val); + break; + } + + // Vector types + case ZVEC_DATA_TYPE_VECTOR_BINARY32: { + std::vector vec( + reinterpret_cast(field->value.vector_value.data), + reinterpret_cast(field->value.vector_value.data) + + field->value.vector_value.length); + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_VECTOR_BINARY64: { + std::vector vec( + reinterpret_cast(field->value.vector_value.data), + reinterpret_cast(field->value.vector_value.data) + + field->value.vector_value.length); + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP16: { + std::vector vec( + reinterpret_cast( + field->value.vector_value.data), + reinterpret_cast( + field->value.vector_value.data) + + field->value.vector_value.length); + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP32: { + std::vector vec( + field->value.vector_value.data, + field->value.vector_value.data + field->value.vector_value.length); + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP64: { + std::vector vec( + reinterpret_cast(field->value.vector_value.data), + reinterpret_cast(field->value.vector_value.data) + + field->value.vector_value.length); + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_VECTOR_INT4: { + size_t byte_count = (field->value.vector_value.length + 1) / 2; + const int8_t *packed_data = + reinterpret_cast(field->value.vector_value.data); + std::vector vec; + vec.reserve(field->value.vector_value.length); + + for (size_t i = 0; + i < byte_count && vec.size() < field->value.vector_value.length; + ++i) { + int8_t byte_val = packed_data[i]; + // Extract lower 4 bits + vec.push_back(byte_val & 0x0F); + // Extract upper 4 bits + if (vec.size() < field->value.vector_value.length) { + vec.push_back((byte_val >> 4) & 0x0F); + } + } + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_VECTOR_INT8: { + std::vector vec( + reinterpret_cast(field->value.vector_value.data), + reinterpret_cast(field->value.vector_value.data) + + field->value.vector_value.length); + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_VECTOR_INT16: { + std::vector vec( + reinterpret_cast(field->value.vector_value.data), + reinterpret_cast(field->value.vector_value.data) + + field->value.vector_value.length); + (*doc_ptr)->set(name, vec); + break; + } + + // Sparse vector types + case ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16: { + std::vector vec( + reinterpret_cast( + field->value.vector_value.data), + reinterpret_cast( + field->value.vector_value.data) + + field->value.vector_value.length); + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32: { + std::vector vec( + field->value.vector_value.data, + field->value.vector_value.data + field->value.vector_value.length); + (*doc_ptr)->set(name, vec); + break; + } + + // Array types + case ZVEC_DATA_TYPE_ARRAY_BINARY: { + std::vector array_values; + const uint8_t *data_ptr = field->value.binary_value.data; + size_t total_length = field->value.binary_value.length; + size_t offset = 0; + + while (offset + sizeof(uint32_t) <= total_length) { + uint32_t elem_length = + *reinterpret_cast(data_ptr + offset); + offset += sizeof(uint32_t); + + if (offset + elem_length <= total_length) { + std::string elem(reinterpret_cast(data_ptr + offset), + elem_length); + array_values.push_back(elem); + offset += elem_length; + } else { + break; + } + } + (*doc_ptr)->set(name, array_values); + break; + } + case ZVEC_DATA_TYPE_ARRAY_STRING: { + std::vector array_values; + const char *data_ptr = field->value.string_value.data; + size_t total_length = field->value.string_value.length; + size_t offset = 0; + + while (offset < total_length) { + size_t str_len = strlen(data_ptr + offset); + if (str_len > 0 && offset + str_len <= total_length) { + array_values.emplace_back(data_ptr + offset, str_len); + offset += str_len + 1; + } else { + break; + } + } + (*doc_ptr)->set(name, array_values); + break; + } + case ZVEC_DATA_TYPE_ARRAY_BOOL: { + std::vector array_values( + reinterpret_cast(field->value.binary_value.data), + reinterpret_cast(field->value.binary_value.data) + + field->value.binary_value.length); + (*doc_ptr)->set(name, array_values); + break; + } + case ZVEC_DATA_TYPE_ARRAY_INT32: { + std::vector array_values( + reinterpret_cast(field->value.vector_value.data), + reinterpret_cast(field->value.vector_value.data) + + field->value.vector_value.length); + (*doc_ptr)->set(name, array_values); + break; + } + case ZVEC_DATA_TYPE_ARRAY_INT64: { + std::vector array_values( + reinterpret_cast(field->value.vector_value.data), + reinterpret_cast(field->value.vector_value.data) + + field->value.vector_value.length); + (*doc_ptr)->set(name, array_values); + break; + } + case ZVEC_DATA_TYPE_ARRAY_UINT32: { + std::vector array_values( + reinterpret_cast(field->value.vector_value.data), + reinterpret_cast(field->value.vector_value.data) + + field->value.vector_value.length); + (*doc_ptr)->set(name, array_values); + break; + } + case ZVEC_DATA_TYPE_ARRAY_UINT64: { + std::vector array_values( + reinterpret_cast(field->value.vector_value.data), + reinterpret_cast(field->value.vector_value.data) + + field->value.vector_value.length); + (*doc_ptr)->set(name, array_values); + break; + } + case ZVEC_DATA_TYPE_ARRAY_FLOAT: { + std::vector array_values( + field->value.vector_value.data, + field->value.vector_value.data + field->value.vector_value.length); + (*doc_ptr)->set(name, array_values); + break; + } + case ZVEC_DATA_TYPE_ARRAY_DOUBLE: { + std::vector array_values( + reinterpret_cast(field->value.vector_value.data), + reinterpret_cast(field->value.vector_value.data) + + field->value.vector_value.length); + (*doc_ptr)->set(name, array_values); + break; + } + + default: + set_last_error("Unsupported data type: " + + std::to_string(field->data_type)); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + return ZVEC_OK; + } catch (const std::exception &e) { + set_last_error(std::string("Failed to add field: ") + e.what()); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +const char *zvec_doc_get_pk_pointer(const ZVecDoc *doc) { + if (!doc) return nullptr; + auto doc_ptr = reinterpret_cast *>(doc); + return (*doc_ptr)->pk_ref().data(); +} + +const char *zvec_doc_get_pk_copy(const ZVecDoc *doc) { + if (!doc) return nullptr; + auto doc_ptr = reinterpret_cast *>(doc); + const std::string &pk = (*doc_ptr)->pk_ref(); + if (pk.empty()) return nullptr; + + char *result = new char[pk.length() + 1]; + strcpy(result, pk.c_str()); + return result; +} + +uint64_t zvec_doc_get_doc_id(const ZVecDoc *doc) { + if (!doc) return 0; + + try { + auto doc_ptr = reinterpret_cast *>(doc); + return (*doc_ptr)->doc_id(); + } catch (const std::exception &e) { + set_last_error(std::string("Failed to get document ID: ") + e.what()); + return 0; + } +} + +float zvec_doc_get_score(const ZVecDoc *doc) { + if (!doc) return 0.0f; + + try { + auto doc_ptr = reinterpret_cast *>(doc); + return (*doc_ptr)->score(); + } catch (const std::exception &e) { + set_last_error(std::string("Failed to get document score: ") + e.what()); + return 0.0f; + } +} + +ZVecDocOperator zvec_doc_get_operator(const ZVecDoc *doc) { + if (!doc) return ZVEC_DOC_OP_INSERT; // default + try { + auto doc_ptr = reinterpret_cast *>(doc); + zvec::Operator op = (*doc_ptr)->get_operator(); + return static_cast(op); + } catch (const std::exception &e) { + set_last_error(std::string("Failed to get document operator: ") + e.what()); + return ZVEC_DOC_OP_INSERT; + } +} + +size_t zvec_doc_get_field_count(const ZVecDoc *doc) { + if (!doc) return 0; + + try { + auto doc_ptr = reinterpret_cast *>(doc); + return (*doc_ptr)->field_names().size(); + } catch (const std::exception &e) { + set_last_error(std::string("Failed to get field count: ") + e.what()); + return 0; + } +} + +ZVecErrorCode zvec_doc_get_field_value_basic(const ZVecDoc *doc, + const char *field_name, + ZVecDataType field_type, + void *value_buffer, + size_t buffer_size) { + if (!doc || !field_name || !value_buffer) { + set_last_error("Invalid arguments: null pointer"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + try { + auto doc_ptr = reinterpret_cast *>(doc); + + // Check if field exists + if (!(*doc_ptr)->has(field_name)) { + set_last_error("Field not found in document"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + // Handle basic data types that return values directly + switch (field_type) { + case ZVEC_DATA_TYPE_BOOL: { + if (buffer_size < sizeof(bool)) { + set_last_error("Buffer too small for bool value"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + const bool val = (*doc_ptr)->get_ref(field_name); + *static_cast(value_buffer) = val; + break; + } + case ZVEC_DATA_TYPE_INT32: { + if (buffer_size < sizeof(int32_t)) { + set_last_error("Buffer too small for int32 value"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + const int32_t val = (*doc_ptr)->get_ref(field_name); + *static_cast(value_buffer) = val; + break; + } + case ZVEC_DATA_TYPE_INT64: { + if (buffer_size < sizeof(int64_t)) { + set_last_error("Buffer too small for int64 value"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + const int64_t val = (*doc_ptr)->get_ref(field_name); + *static_cast(value_buffer) = val; + break; + } + case ZVEC_DATA_TYPE_UINT32: { + if (buffer_size < sizeof(uint32_t)) { + set_last_error("Buffer too small for uint32 value"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + const uint32_t val = (*doc_ptr)->get_ref(field_name); + *static_cast(value_buffer) = val; + break; + } + case ZVEC_DATA_TYPE_UINT64: { + if (buffer_size < sizeof(uint64_t)) { + set_last_error("Buffer too small for uint64 value"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + const uint64_t val = (*doc_ptr)->get_ref(field_name); + *static_cast(value_buffer) = val; + break; + } + case ZVEC_DATA_TYPE_FLOAT: { + if (buffer_size < sizeof(float)) { + set_last_error("Buffer too small for float value"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + const float val = (*doc_ptr)->get_ref(field_name); + *static_cast(value_buffer) = val; + break; + } + case ZVEC_DATA_TYPE_DOUBLE: { + if (buffer_size < sizeof(double)) { + set_last_error("Buffer too small for double value"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + const double val = (*doc_ptr)->get_ref(field_name); + *static_cast(value_buffer) = val; + break; + } + default: { + set_last_error("Data type not supported for basic value return"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + } + + return ZVEC_OK; + } catch (const std::exception &e) { + set_last_error(std::string("Exception occurred: ") + e.what()); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +ZVecErrorCode zvec_doc_get_field_value_copy(const ZVecDoc *doc, + const char *field_name, + ZVecDataType field_type, + void **value, size_t *value_size) { + if (!doc || !field_name || !value || !value_size) { + set_last_error("Invalid arguments: null pointer"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + try { + auto doc_ptr = reinterpret_cast *>(doc); + + // Check if field exists + if (!(*doc_ptr)->has(field_name)) { + set_last_error("Field not found in document"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + // Handle copy-returning data types (allocate new memory) + switch (field_type) { + // Basic types - copy the actual values + case ZVEC_DATA_TYPE_BOOL: { + const bool val = (*doc_ptr)->get_ref(field_name); + void *buffer = malloc(sizeof(bool)); + if (!buffer) { + set_last_error("Memory allocation failed for bool"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + *static_cast(buffer) = val; + *value = buffer; + *value_size = sizeof(bool); + break; + } + case ZVEC_DATA_TYPE_INT32: { + const int32_t val = (*doc_ptr)->get_ref(field_name); + void *buffer = malloc(sizeof(int32_t)); + if (!buffer) { + set_last_error("Memory allocation failed for int32"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + *static_cast(buffer) = val; + *value = buffer; + *value_size = sizeof(int32_t); + break; + } + case ZVEC_DATA_TYPE_INT64: { + const int64_t val = (*doc_ptr)->get_ref(field_name); + void *buffer = malloc(sizeof(int64_t)); + if (!buffer) { + set_last_error("Memory allocation failed for int64"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + *static_cast(buffer) = val; + *value = buffer; + *value_size = sizeof(int64_t); + break; + } + case ZVEC_DATA_TYPE_UINT32: { + const uint32_t val = (*doc_ptr)->get_ref(field_name); + void *buffer = malloc(sizeof(uint32_t)); + if (!buffer) { + set_last_error("Memory allocation failed for uint32"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + *static_cast(buffer) = val; + *value = buffer; + *value_size = sizeof(uint32_t); + break; + } + case ZVEC_DATA_TYPE_UINT64: { + const uint64_t val = (*doc_ptr)->get_ref(field_name); + void *buffer = malloc(sizeof(uint64_t)); + if (!buffer) { + set_last_error("Memory allocation failed for uint64"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + *static_cast(buffer) = val; + *value = buffer; + *value_size = sizeof(uint64_t); + break; + } + case ZVEC_DATA_TYPE_FLOAT: { + const float val = (*doc_ptr)->get_ref(field_name); + void *buffer = malloc(sizeof(float)); + if (!buffer) { + set_last_error("Memory allocation failed for float"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + *static_cast(buffer) = val; + *value = buffer; + *value_size = sizeof(float); + break; + } + case ZVEC_DATA_TYPE_DOUBLE: { + const double val = (*doc_ptr)->get_ref(field_name); + void *buffer = malloc(sizeof(double)); + if (!buffer) { + set_last_error("Memory allocation failed for double"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + *static_cast(buffer) = val; + *value = buffer; + *value_size = sizeof(double); + break; + } + + // String and binary types - copy the data + case ZVEC_DATA_TYPE_BINARY: + case ZVEC_DATA_TYPE_STRING: { + const std::string &val = (*doc_ptr)->get_ref(field_name); + void *buffer = malloc(val.length()); + if (!buffer) { + set_last_error("Memory allocation failed for string/binary"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + memcpy(buffer, val.data(), val.length()); + *value = buffer; + *value_size = val.length(); + break; + } + + // Vector types - copy the data + case ZVEC_DATA_TYPE_VECTOR_BINARY32: { + const std::vector &val = + (*doc_ptr)->get_ref>(field_name); + size_t total_size = val.size() * sizeof(uint32_t); + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for uint32 vector"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + memcpy(buffer, val.data(), total_size); + *value = buffer; + *value_size = total_size; + break; + } + case ZVEC_DATA_TYPE_VECTOR_BINARY64: { + const std::vector &val = + (*doc_ptr)->get_ref>(field_name); + size_t total_size = val.size() * sizeof(uint64_t); + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for uint64 vector"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + memcpy(buffer, val.data(), total_size); + *value = buffer; + *value_size = total_size; + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP16: { + const std::vector &val = + (*doc_ptr)->get_ref>(field_name); + size_t total_size = val.size() * sizeof(zvec::float16_t); + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for fp16 vector"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + memcpy(buffer, val.data(), total_size); + *value = buffer; + *value_size = total_size; + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP32: { + const std::vector &val = + (*doc_ptr)->get_ref>(field_name); + size_t total_size = val.size() * sizeof(float); + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for fp32 vector"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + memcpy(buffer, val.data(), total_size); + *value = buffer; + *value_size = total_size; + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP64: { + const std::vector &val = + (*doc_ptr)->get_ref>(field_name); + size_t total_size = val.size() * sizeof(double); + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for fp64 vector"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + memcpy(buffer, val.data(), total_size); + *value = buffer; + *value_size = total_size; + break; + } + case ZVEC_DATA_TYPE_VECTOR_INT4: + case ZVEC_DATA_TYPE_VECTOR_INT8: { + const std::vector &val = + (*doc_ptr)->get_ref>(field_name); + size_t total_size = val.size() * sizeof(int8_t); + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for int8 vector"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + memcpy(buffer, val.data(), total_size); + *value = buffer; + *value_size = total_size; + break; + } + case ZVEC_DATA_TYPE_VECTOR_INT16: { + const std::vector &val = + (*doc_ptr)->get_ref>(field_name); + size_t total_size = val.size() * sizeof(int16_t); + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for int16 vector"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + memcpy(buffer, val.data(), total_size); + *value = buffer; + *value_size = total_size; + break; + } + + // Sparse vector types - create flattened representation + case ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16: { + using SparseVecFP16 = + std::pair, std::vector>; + const SparseVecFP16 &sparse_vec = + (*doc_ptr)->get_ref(field_name); + size_t nnz = sparse_vec.first.size(); + size_t total_size = + sizeof(size_t) + nnz * (sizeof(uint32_t) + sizeof(zvec::float16_t)); + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for sparse vector FP16"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + char *ptr = static_cast(buffer); + *reinterpret_cast(ptr) = nnz; + ptr += sizeof(size_t); + + for (size_t i = 0; i < nnz; ++i) { + *reinterpret_cast(ptr) = sparse_vec.first[i]; + ptr += sizeof(uint32_t); + } + for (size_t i = 0; i < nnz; ++i) { + *reinterpret_cast(ptr) = sparse_vec.second[i]; + ptr += sizeof(zvec::float16_t); + } + + *value = buffer; + *value_size = total_size; + break; + } + case ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32: { + using SparseVecFP32 = + std::pair, std::vector>; + const SparseVecFP32 &sparse_vec = + (*doc_ptr)->get_ref(field_name); + size_t nnz = sparse_vec.first.size(); + size_t total_size = + sizeof(size_t) + nnz * (sizeof(uint32_t) + sizeof(float)); + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for sparse vector FP32"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + char *ptr = static_cast(buffer); + *reinterpret_cast(ptr) = nnz; + ptr += sizeof(size_t); + + for (size_t i = 0; i < nnz; ++i) { + *reinterpret_cast(ptr) = sparse_vec.first[i]; + ptr += sizeof(uint32_t); + } + for (size_t i = 0; i < nnz; ++i) { + *reinterpret_cast(ptr) = sparse_vec.second[i]; + ptr += sizeof(float); + } + + *value = buffer; + *value_size = total_size; + break; + } + + // Array types - create serialized representations + case ZVEC_DATA_TYPE_ARRAY_BINARY: { + using BinaryArray = std::vector; + const BinaryArray &array_vals = + (*doc_ptr)->get_ref(field_name); + size_t total_size = 0; + for (const auto &bin_val : array_vals) { + total_size += bin_val.length(); + } + + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for binary array"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + char *ptr = static_cast(buffer); + for (const auto &bin_val : array_vals) { + memcpy(ptr, bin_val.data(), bin_val.length()); + ptr += bin_val.length(); + } + + *value = buffer; + *value_size = total_size; + break; + } + case ZVEC_DATA_TYPE_ARRAY_STRING: { + using StringArray = std::vector; + const StringArray &array_vals = + (*doc_ptr)->get_ref(field_name); + size_t total_size = 0; + for (const auto &str_val : array_vals) { + total_size += str_val.length() + 1; // +1 for null terminator + } + + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for string array"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + char *ptr = static_cast(buffer); + for (const auto &str_val : array_vals) { + memcpy(ptr, str_val.c_str(), str_val.length()); + ptr += str_val.length(); + *ptr = '\0'; + ptr++; + } + + *value = buffer; + *value_size = total_size; + break; + } + case ZVEC_DATA_TYPE_ARRAY_BOOL: { + using BoolArray = std::vector; + const BoolArray &array_vals = + (*doc_ptr)->get_ref(field_name); + size_t byte_count = (array_vals.size() + 7) / 8; + void *buffer = malloc(byte_count); + if (!buffer) { + set_last_error("Memory allocation failed for bool array"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + uint8_t *bytes = static_cast(buffer); + memset(bytes, 0, byte_count); + + for (size_t i = 0; i < array_vals.size(); ++i) { + if (array_vals[i]) { + bytes[i / 8] |= (1 << (i % 8)); + } + } + + *value = buffer; + *value_size = byte_count; + break; + } + case ZVEC_DATA_TYPE_ARRAY_INT32: { + using Int32Array = std::vector; + const Int32Array &array_vals = + (*doc_ptr)->get_ref(field_name); + size_t total_size = array_vals.size() * sizeof(int32_t); + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for int32 array"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + memcpy(buffer, array_vals.data(), total_size); + *value = buffer; + *value_size = total_size; + break; + } + case ZVEC_DATA_TYPE_ARRAY_INT64: { + using Int64Array = std::vector; + const Int64Array &array_vals = + (*doc_ptr)->get_ref(field_name); + size_t total_size = array_vals.size() * sizeof(int64_t); + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for int64 array"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + memcpy(buffer, array_vals.data(), total_size); + *value = buffer; + *value_size = total_size; + break; + } + case ZVEC_DATA_TYPE_ARRAY_UINT32: { + using UInt32Array = std::vector; + const UInt32Array &array_vals = + (*doc_ptr)->get_ref(field_name); + size_t total_size = array_vals.size() * sizeof(uint32_t); + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for uint32 array"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + memcpy(buffer, array_vals.data(), total_size); + *value = buffer; + *value_size = total_size; + break; + } + case ZVEC_DATA_TYPE_ARRAY_UINT64: { + using UInt64Array = std::vector; + const UInt64Array &array_vals = + (*doc_ptr)->get_ref(field_name); + size_t total_size = array_vals.size() * sizeof(uint64_t); + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for uint64 array"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + memcpy(buffer, array_vals.data(), total_size); + *value = buffer; + *value_size = total_size; + break; + } + case ZVEC_DATA_TYPE_ARRAY_FLOAT: { + using FloatArray = std::vector; + const FloatArray &array_vals = + (*doc_ptr)->get_ref(field_name); + size_t total_size = array_vals.size() * sizeof(float); + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for float array"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + memcpy(buffer, array_vals.data(), total_size); + *value = buffer; + *value_size = total_size; + break; + } + case ZVEC_DATA_TYPE_ARRAY_DOUBLE: { + using DoubleArray = std::vector; + const DoubleArray &array_vals = + (*doc_ptr)->get_ref(field_name); + size_t total_size = array_vals.size() * sizeof(double); + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for double array"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + memcpy(buffer, array_vals.data(), total_size); + *value = buffer; + *value_size = total_size; + break; + } + default: { + set_last_error("Unknown data type"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + } + + return ZVEC_OK; + } catch (const std::exception &e) { + set_last_error(std::string("Exception occurred: ") + e.what()); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +ZVecErrorCode zvec_doc_get_field_value_pointer(const ZVecDoc *doc, + const char *field_name, + ZVecDataType field_type, + const void **value, + size_t *value_size) { + if (!doc || !field_name || !value || !value_size) { + set_last_error("Invalid arguments: null pointer"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + try { + auto doc_ptr = reinterpret_cast *>(doc); + + // Check if field exists + if (!(*doc_ptr)->has(field_name)) { + set_last_error("Field not found in document"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + // Get field value based on data type + switch (field_type) { + case ZVEC_DATA_TYPE_BINARY: { + const std::string &val = (*doc_ptr)->get_ref(field_name); + *value = val.data(); + *value_size = val.length(); + break; + } + case ZVEC_DATA_TYPE_STRING: { + const std::string &val = (*doc_ptr)->get_ref(field_name); + *value = val.c_str(); + *value_size = val.length(); + break; + } + case ZVEC_DATA_TYPE_BOOL: { + const bool val = (*doc_ptr)->get_ref(field_name); + *value = &val; + *value_size = sizeof(bool); + break; + } + case ZVEC_DATA_TYPE_INT32: { + const int32_t val = (*doc_ptr)->get_ref(field_name); + *value = &val; + *value_size = sizeof(int32_t); + break; + } + case ZVEC_DATA_TYPE_INT64: { + const int64_t val = (*doc_ptr)->get_ref(field_name); + *value = &val; + *value_size = sizeof(int64_t); + break; + } + case ZVEC_DATA_TYPE_UINT32: { + const uint32_t val = (*doc_ptr)->get_ref(field_name); + *value = &val; + *value_size = sizeof(uint32_t); + break; + } + case ZVEC_DATA_TYPE_UINT64: { + const uint64_t val = (*doc_ptr)->get_ref(field_name); + *value = &val; + *value_size = sizeof(uint64_t); + break; + } + case ZVEC_DATA_TYPE_FLOAT: { + const float val = (*doc_ptr)->get_ref(field_name); + *value = &val; + *value_size = sizeof(float); + break; + } + case ZVEC_DATA_TYPE_DOUBLE: { + const double val = (*doc_ptr)->get_ref(field_name); + *value = &val; + *value_size = sizeof(double); + break; + } + case ZVEC_DATA_TYPE_VECTOR_BINARY32: { + const std::vector &val = + (*doc_ptr)->get_ref>(field_name); + *value = val.data(); + *value_size = val.size() * sizeof(uint32_t); + break; + } + case ZVEC_DATA_TYPE_VECTOR_BINARY64: { + const std::vector &val = + (*doc_ptr)->get_ref>(field_name); + *value = val.data(); + *value_size = val.size() * sizeof(uint64_t); + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP16: { + // FP16 vectors typically stored as uint16_t + const std::vector &val = + (*doc_ptr)->get_ref>(field_name); + *value = val.data(); + *value_size = val.size() * sizeof(zvec::float16_t); + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP32: { + const std::vector &val = + (*doc_ptr)->get_ref>(field_name); + *value = val.data(); + *value_size = val.size() * sizeof(float); + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP64: { + const std::vector &val = + (*doc_ptr)->get_ref>(field_name); + *value = val.data(); + *value_size = val.size() * sizeof(double); + break; + } + case ZVEC_DATA_TYPE_VECTOR_INT4: { + // INT4 vectors typically stored as int8_t with 2 values per byte + const std::vector &val = + (*doc_ptr)->get_ref>(field_name); + *value = val.data(); + *value_size = val.size() * sizeof(int8_t); + break; + } + case ZVEC_DATA_TYPE_VECTOR_INT8: { + const std::vector &val = + (*doc_ptr)->get_ref>(field_name); + *value = val.data(); + *value_size = val.size() * sizeof(int8_t); + break; + } + case ZVEC_DATA_TYPE_VECTOR_INT16: { + const std::vector &val = + (*doc_ptr)->get_ref>(field_name); + *value = val.data(); + *value_size = val.size() * sizeof(int16_t); + break; + } + case ZVEC_DATA_TYPE_ARRAY_INT32: { + auto &array_vals = + (*doc_ptr)->get_ref>(field_name); + *value = array_vals.data(); + *value_size = array_vals.size() * sizeof(int32_t); + break; + } + case ZVEC_DATA_TYPE_ARRAY_INT64: { + auto &array_vals = + (*doc_ptr)->get_ref>(field_name); + *value = array_vals.data(); + *value_size = array_vals.size() * sizeof(int64_t); + break; + } + case ZVEC_DATA_TYPE_ARRAY_UINT32: { + auto &array_vals = + (*doc_ptr)->get_ref>(field_name); + *value = array_vals.data(); + *value_size = array_vals.size() * sizeof(uint32_t); + break; + } + case ZVEC_DATA_TYPE_ARRAY_UINT64: { + auto &array_vals = + (*doc_ptr)->get_ref>(field_name); + *value = array_vals.data(); + *value_size = array_vals.size() * sizeof(uint64_t); + break; + } + case ZVEC_DATA_TYPE_ARRAY_FLOAT: { + auto &array_vals = (*doc_ptr)->get_ref>(field_name); + *value = array_vals.data(); + *value_size = array_vals.size() * sizeof(float); + break; + } + case ZVEC_DATA_TYPE_ARRAY_DOUBLE: { + auto &array_vals = (*doc_ptr)->get_ref>(field_name); + *value = array_vals.data(); + *value_size = array_vals.size() * sizeof(double); + break; + } + default: { + set_last_error("Unknown data type"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + } + + return ZVEC_OK; + } catch (const std::exception &e) { + set_last_error(std::string("Exception occurred: ") + e.what()); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +bool zvec_doc_is_empty(const ZVecDoc *doc) { + if (!doc) { + set_last_error("Document pointer is null"); + return true; + } + + try { + auto doc_ptr = reinterpret_cast *>(doc); + return (*doc_ptr)->is_empty(); + } catch (const std::exception &e) { + set_last_error(std::string("Failed to check if document is empty: ") + + e.what()); + return true; + } +} + +ZVecErrorCode zvec_doc_remove_field(ZVecDoc *doc, const char *field_name) { + if (!doc || !field_name) { + set_last_error("Document pointer or field name is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + try { + auto doc_ptr = reinterpret_cast *>(doc); + (*doc_ptr)->remove(std::string(field_name)); + return ZVEC_OK; + } catch (const std::exception &e) { + set_last_error(std::string("Failed to remove field: ") + e.what()); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + + +bool zvec_doc_has_field(const ZVecDoc *doc, const char *field_name) { + if (!doc || !field_name) { + set_last_error("Document pointer or field name is null"); + return false; + } + + try { + auto doc_ptr = reinterpret_cast *>(doc); + return (*doc_ptr)->has(std::string(field_name)); + } catch (const std::exception &e) { + set_last_error(std::string("Failed to check field existence: ") + e.what()); + return false; + } +} + +bool zvec_doc_has_field_value(const ZVecDoc *doc, const char *field_name) { + if (!doc || !field_name) { + set_last_error("Document pointer or field name is null"); + return false; + } + + try { + auto doc_ptr = reinterpret_cast *>(doc); + return (*doc_ptr)->has_value(std::string(field_name)); + } catch (const std::exception &e) { + set_last_error(std::string("Failed to check field value existence: ") + + e.what()); + return false; + } +} + +bool zvec_doc_is_field_null(const ZVecDoc *doc, const char *field_name) { + if (!doc || !field_name) { + set_last_error("Document pointer or field name is null"); + return false; + } + + try { + auto doc_ptr = reinterpret_cast *>(doc); + return (*doc_ptr)->is_null(std::string(field_name)); + } catch (const std::exception &e) { + set_last_error(std::string("Failed to check if field is null: ") + + e.what()); + return false; + } +} + +ZVecErrorCode zvec_doc_get_field_names(const ZVecDoc *doc, char ***field_names, + size_t *count) { + if (!doc || !field_names || !count) { + set_last_error("Invalid arguments"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + try { + auto doc_ptr = reinterpret_cast *>(doc); + std::vector names = (*doc_ptr)->field_names(); + + *count = names.size(); + if (*count == 0) { + *field_names = nullptr; + return ZVEC_OK; + } + + *field_names = static_cast(malloc(*count * sizeof(char *))); + if (!*field_names) { + set_last_error("Failed to allocate memory for field names"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + for (size_t i = 0; i < *count; ++i) { + (*field_names)[i] = copy_string(names[i]); + if (!(*field_names)[i]) { + for (size_t j = 0; j < i; ++j) { + free((*field_names)[j]); + } + free(*field_names); + *field_names = nullptr; + set_last_error("Failed to copy field name"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + } + + return ZVEC_OK; + } catch (const std::exception &e) { + set_last_error(std::string("Failed to get field names: ") + e.what()); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +ZVecErrorCode zvec_doc_serialize(const ZVecDoc *doc, uint8_t **data, + size_t *size) { + if (!doc || !data || !size) { + set_last_error("Invalid arguments"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + try { + auto doc_ptr = reinterpret_cast *>(doc); + std::vector serialized_data = (*doc_ptr)->serialize(); + + *size = serialized_data.size(); + if (*size == 0) { + *data = nullptr; + return ZVEC_OK; + } + + *data = static_cast(malloc(*size)); + if (!*data) { + set_last_error("Failed to allocate memory for serialized data"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + memcpy(*data, serialized_data.data(), *size); + return ZVEC_OK; + } catch (const std::exception &e) { + set_last_error(std::string("Failed to serialize document: ") + e.what()); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +ZVecErrorCode zvec_doc_deserialize(const uint8_t *data, size_t size, + ZVecDoc **doc) { + if (!data || !doc || size == 0) { + set_last_error("Invalid arguments"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + try { + auto deserialized_doc = zvec::Doc::deserialize(data, size); + if (!deserialized_doc) { + set_last_error("Failed to deserialize document"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + auto doc_ptr = new std::shared_ptr(deserialized_doc); + *doc = reinterpret_cast(doc_ptr); + return ZVEC_OK; + } catch (const std::exception &e) { + set_last_error(std::string("Failed to deserialize document: ") + e.what()); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +void zvec_doc_merge(ZVecDoc *doc, const ZVecDoc *other) { + if (!doc || !other) { + set_last_error("Document pointers are null"); + return; + } + + try { + auto doc_ptr = reinterpret_cast *>(doc); + auto other_ptr = + reinterpret_cast *>(other); + (*doc_ptr)->merge(**other_ptr); + } catch (const std::exception &e) { + set_last_error(std::string("Failed to merge documents: ") + e.what()); + } +} + +size_t zvec_doc_memory_usage(const ZVecDoc *doc) { + if (!doc) { + set_last_error("Document pointer is null"); + return 0; + } + + try { + auto doc_ptr = reinterpret_cast *>(doc); + return (*doc_ptr)->memory_usage(); + } catch (const std::exception &e) { + set_last_error(std::string("Failed to get document memory usage: ") + + e.what()); + return 0; + } +} + +ZVecErrorCode zvec_doc_validate(const ZVecDoc *doc, + const ZVecCollectionSchema *schema, + bool is_update, char **error_msg) { + if (!doc || !schema) { + set_last_error("Document or schema pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + try { + std::shared_ptr schema_ptr = nullptr; + auto status = + convert_zvec_collection_schema_to_internal(schema, schema_ptr); + if (!status.ok()) { + if (error_msg) { + *error_msg = copy_string(status.message()); + } + return status_to_error_code(status); + } + + auto doc_ptr = reinterpret_cast *>(doc); + status = (*doc_ptr)->validate(schema_ptr, is_update); + if (!status.ok()) { + if (error_msg) { + *error_msg = copy_string(status.message()); + } + return status_to_error_code(status); + } + + if (error_msg) { + *error_msg = nullptr; + } + return ZVEC_OK; + } catch (const std::exception &e) { + set_last_error(std::string("Failed to validate document: ") + e.what()); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +ZVecErrorCode zvec_doc_to_detail_string(const ZVecDoc *doc, char **detail_str) { + if (!doc || !detail_str) { + set_last_error("Invalid arguments"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + try { + auto doc_ptr = reinterpret_cast *>(doc); + std::string detail = (*doc_ptr)->to_detail_string(); + *detail_str = copy_string(detail); + + if (!*detail_str && !detail.empty()) { + set_last_error("Failed to copy detail string"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + return ZVEC_OK; + } catch (const std::exception &e) { + set_last_error(std::string("Failed to get document detail string: ") + + e.what()); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +// ============================================================================= +// Collection functions implementation +// ============================================================================= + +ZVecErrorCode zvec_collection_create_and_open( + const char *path, const ZVecCollectionSchema *schema, + const ZVecCollectionOptions *options, ZVecCollection **collection) { + try { + if (!path || !schema || !collection) { + set_last_error("Path, schema, or collection cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + std::shared_ptr schema_ptr = nullptr; + auto status = + convert_zvec_collection_schema_to_internal(schema, schema_ptr); + if (!status.ok()) { + set_last_error(status.message()); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + zvec::CollectionOptions collection_options; + if (options) { + collection_options.enable_mmap_ = options->enable_mmap; + collection_options.max_buffer_size_ = options->max_buffer_size; + collection_options.read_only_ = options->read_only; + } + + auto result = + zvec::Collection::CreateAndOpen(path, *schema_ptr, collection_options); + ZVecErrorCode error_code = handle_expected_result(result); + + if (error_code == ZVEC_OK) { + *collection = reinterpret_cast( + new std::shared_ptr(std::move(result.value()))); + } + + return error_code; + } catch (const std::exception &e) { + set_last_error( + std::string( + "Exception in zvec_collection_create_and_open_with_schema: ") + + e.what()); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +ZVecErrorCode zvec_collection_open(const char *path, + const ZVecCollectionOptions *options, + ZVecCollection **collection) { + if (!path || !collection) { + set_last_error("Invalid arguments: path and collection cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + try { + zvec::CollectionOptions collection_options; + if (options) { + collection_options.enable_mmap_ = options->enable_mmap; + collection_options.max_buffer_size_ = options->max_buffer_size; + collection_options.read_only_ = options->read_only; + } + + auto result = zvec::Collection::Open(path, collection_options); + ZVecErrorCode error_code = handle_expected_result(result); + + if (error_code == ZVEC_OK) { + *collection = reinterpret_cast( + new std::shared_ptr(std::move(result.value()))); + } + + return error_code; + } catch (const std::exception &e) { + set_last_error(std::string("Exception occurred: ") + e.what()); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +ZVecErrorCode zvec_collection_close(ZVecCollection *collection) { + if (!collection) { + set_last_error("Invalid argument: collection cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + try { + delete reinterpret_cast *>(collection); + return ZVEC_OK; + } catch (const std::exception &e) { + set_last_error(std::string("Exception occurred: ") + e.what()); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +ZVecErrorCode zvec_collection_destroy(ZVecCollection *collection) { + if (!collection) { + set_last_error("Invalid argument: collection cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + try { + auto &coll = + *reinterpret_cast *>(collection); + zvec::Status status = coll->Destroy(); + if (!status.ok()) { + set_last_error(status.message()); + } + + return status_to_error_code(status); + } catch (const std::exception &e) { + set_last_error(std::string("Exception occurred: ") + e.what()); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +ZVecErrorCode zvec_collection_flush(ZVecCollection *collection) { + if (!collection) { + set_last_error("Invalid argument: collection cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + try { + auto &coll = + *reinterpret_cast *>(collection); + zvec::Status status = coll->Flush(); + + if (!status.ok()) { + set_last_error(status.message()); + } + + return status_to_error_code(status); + } catch (const std::exception &e) { + set_last_error(std::string("Exception occurred: ") + e.what()); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +ZVecErrorCode zvec_collection_get_path(const ZVecCollection *collection, + char **path) { + if (!collection || !path) { + set_last_error("Invalid arguments: collection and path cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + try { + auto &coll = *reinterpret_cast *>( + collection); + auto result = coll->Path(); + + ZVecErrorCode error_code = handle_expected_result(result); + if (error_code == ZVEC_OK) { + *path = copy_string(result.value()); + } + + return error_code; + } catch (const std::exception &e) { + set_last_error(std::string("Exception occurred: ") + e.what()); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +ZVecErrorCode zvec_collection_get_name(const ZVecCollection *collection, + char **name) { + if (!collection || !name) { + set_last_error("Invalid arguments: collection and name cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + try { + auto &coll = *reinterpret_cast *>( + collection); + auto result = coll->Schema(); + + ZVecErrorCode error_code = handle_expected_result(result); + if (error_code == ZVEC_OK) { + *name = copy_string(result.value().name()); + } + + return error_code; + } catch (const std::exception &e) { + set_last_error(std::string("Exception occurred: ") + e.what()); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +ZVecErrorCode zvec_collection_get_schema(const ZVecCollection *collection, + ZVecCollectionSchema **schema) { + if (!collection || !schema) { + set_last_error("Invalid arguments: collection and schema cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + try { + auto &coll = *reinterpret_cast *>( + collection); + auto result = coll->Schema(); + + ZVecErrorCode error_code = handle_expected_result(result); + if (error_code == ZVEC_OK) { + const auto &cpp_schema = result.value(); + + // Create new schema structure + ZVecCollectionSchema *c_schema = new ZVecCollectionSchema(); + if (!c_schema) { + set_last_error("Failed to allocate memory for schema"); + return ZVEC_ERROR_RESOURCE_EXHAUSTED; + } + + // Initialize the schema structure + c_schema->name = nullptr; + c_schema->fields = nullptr; + c_schema->field_count = 0; + c_schema->field_capacity = 0; + c_schema->max_doc_count_per_segment = + cpp_schema.max_doc_count_per_segment(); + + // Set collection name + c_schema->name = zvec_string_create(cpp_schema.name().c_str()); + if (!c_schema->name) { + delete c_schema; + set_last_error("Failed to allocate memory for collection name"); + return ZVEC_ERROR_RESOURCE_EXHAUSTED; + } + + // Convert and copy fields + const auto &cpp_fields = cpp_schema.fields(); + c_schema->field_count = cpp_fields.size(); + c_schema->field_capacity = cpp_fields.size(); + + if (c_schema->field_count > 0) { + // Allocate array of field pointers + c_schema->fields = new ZVecFieldSchema *[c_schema->field_count]; + if (!c_schema->fields) { + zvec_collection_schema_destroy(c_schema); + set_last_error("Failed to allocate memory for fields"); + return ZVEC_ERROR_RESOURCE_EXHAUSTED; + } + + // Initialize all field pointers to nullptr + for (size_t i = 0; i < c_schema->field_count; ++i) { + c_schema->fields[i] = nullptr; + } + + size_t i = 0; + for (const auto &cpp_field : cpp_fields) { + try { + // Create new field schema + c_schema->fields[i] = new ZVecFieldSchema(); + + // Copy field name using zvec_string_create + c_schema->fields[i]->name = + zvec_string_create(cpp_field->name().c_str()); + if (!c_schema->fields[i]->name) { + throw std::bad_alloc(); + } + + // Convert data type + c_schema->fields[i]->data_type = + convert_zvec_data_type(cpp_field->data_type()); + + // Copy dimension for vector fields + c_schema->fields[i]->dimension = cpp_field->dimension(); + + // Copy nullable flag + c_schema->fields[i]->nullable = cpp_field->nullable(); + + // Initialize index parameters + c_schema->fields[i]->index_params = nullptr; + + // Convert index parameters based on the actual type + auto index_params = cpp_field->index_params(); + if (index_params) { + switch (index_params->type()) { + case zvec::IndexType::HNSW: { + // Cast to HnswIndexParams and convert + auto hnsw_params = + std::dynamic_pointer_cast( + index_params); + if (hnsw_params) { + auto c_hnsw_params = new ZVecHnswIndexParams(); + if (!c_hnsw_params) { + throw std::bad_alloc(); + } + + // Initialize the base vector index parameters + c_hnsw_params->base.base.index_type = ZVEC_INDEX_TYPE_HNSW; + c_hnsw_params->base.metric_type = + static_cast(hnsw_params->metric_type()); + c_hnsw_params->base.quantize_type = + static_cast( + hnsw_params->quantize_type()); + + // Set HNSW-specific parameters + c_hnsw_params->m = hnsw_params->m(); + c_hnsw_params->ef_construction = + hnsw_params->ef_construction(); + + // Assign to field schema (using pointer assignment) + c_schema->fields[i]->index_params = + reinterpret_cast(c_hnsw_params); + c_schema->fields[i]->index_params->index_type = + ZVEC_INDEX_TYPE_HNSW; + } + break; + } + + case zvec::IndexType::IVF: { + // Cast to IVFIndexParams and convert + auto ivf_params = + std::dynamic_pointer_cast( + index_params); + if (ivf_params) { + auto c_ivf_params = new ZVecIVFIndexParams(); + if (!c_ivf_params) { + throw std::bad_alloc(); + } + + // Initialize the base vector index parameters + c_ivf_params->base.base.index_type = ZVEC_INDEX_TYPE_IVF; + c_ivf_params->base.metric_type = + static_cast(ivf_params->metric_type()); + c_ivf_params->base.quantize_type = + static_cast( + ivf_params->quantize_type()); + + // Set IVF-specific parameters + c_ivf_params->n_list = ivf_params->n_list(); + c_ivf_params->n_iters = ivf_params->n_iters(); + c_ivf_params->use_soar = ivf_params->use_soar(); + + // Assign to field schema (using pointer assignment) + c_schema->fields[i]->index_params = + reinterpret_cast(c_ivf_params); + c_schema->fields[i]->index_params->index_type = + ZVEC_INDEX_TYPE_IVF; + } + break; + } + + case zvec::IndexType::FLAT: { + // Cast to FlatIndexParams and convert + auto flat_params = + std::dynamic_pointer_cast( + index_params); + if (flat_params) { + auto c_flat_params = new ZVecFlatIndexParams(); + if (!c_flat_params) { + throw std::bad_alloc(); + } + + // Initialize the base vector index parameters + c_flat_params->base.base.index_type = ZVEC_INDEX_TYPE_FLAT; + c_flat_params->base.metric_type = + static_cast(flat_params->metric_type()); + c_flat_params->base.quantize_type = + static_cast( + flat_params->quantize_type()); + + // Flat index has no additional parameters + + // Assign to field schema (using pointer assignment) + c_schema->fields[i]->index_params = + reinterpret_cast(c_flat_params); + c_schema->fields[i]->index_params->index_type = + ZVEC_INDEX_TYPE_FLAT; + } + break; + } + + case zvec::IndexType::INVERT: { + // Cast to InvertIndexParams and convert + auto invert_params = + std::dynamic_pointer_cast( + index_params); + if (invert_params) { + auto c_invert_params = new ZVecInvertIndexParams(); + if (!c_invert_params) { + throw std::bad_alloc(); + } + + // Initialize the base index parameters + c_invert_params->base.index_type = ZVEC_INDEX_TYPE_INVERT; + + // Set Invert-specific parameters + c_invert_params->enable_range_optimization = + invert_params->enable_range_optimization(); + c_invert_params->enable_extended_wildcard = + invert_params->enable_extended_wildcard(); + + // Assign to field schema (using pointer assignment) + c_schema->fields[i]->index_params = + reinterpret_cast(c_invert_params); + c_schema->fields[i]->index_params->index_type = + ZVEC_INDEX_TYPE_INVERT; + } + break; + } + + default: + // For undefined or unsupported index types, set to NULL + c_schema->fields[i]->index_params = nullptr; + c_schema->fields[i]->index_params->index_type = + ZVEC_INDEX_TYPE_UNDEFINED; + break; + } + } else { + // No index parameters, set to NULL + c_schema->fields[i]->index_params = nullptr; + } + } catch (const std::bad_alloc &) { + // Clean up already allocated fields + for (size_t j = 0; j <= i; ++j) { + if (c_schema->fields[j]) { + zvec_field_schema_destroy(c_schema->fields[j]); + } + } + delete[] c_schema->fields; + zvec_free_string(c_schema->name); + delete c_schema; + set_last_error("Failed to allocate memory for field"); + return ZVEC_ERROR_RESOURCE_EXHAUSTED; + } + + ++i; + } + } + + *schema = c_schema; + } + + return error_code; + } catch (const std::exception &e) { + set_last_error(std::string("Exception occurred: ") + e.what()); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +ZVecErrorCode zvec_collection_get_options(const ZVecCollection *collection, + ZVecCollectionOptions **options) { + if (!collection || !options) { + set_last_error("Invalid arguments"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + try { + auto collection_ptr = + reinterpret_cast *>(collection); + auto result = (*collection_ptr)->Options(); + + if (!result.has_value()) { + set_last_error("Failed to get collection option: " + + result.error().message()); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + // 创建并初始化选项结构体 + *options = new ZVecCollectionOptions(); + + (*options)->enable_mmap = result.value().enable_mmap_; + (*options)->max_buffer_size = result.value().max_buffer_size_; + (*options)->read_only = result.value().read_only_; + (*options)->max_doc_count_per_segment = zvec::MAX_DOC_COUNT_PER_SEGMENT; + + return ZVEC_OK; + } catch (const std::exception &e) { + set_last_error(std::string("Failed to get collection options: ") + + e.what()); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +ZVecErrorCode zvec_collection_get_stats(const ZVecCollection *collection, + ZVecCollectionStats **stats) { + if (!collection || !stats) { + set_last_error("Invalid arguments"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + try { + auto collection_ptr = + reinterpret_cast *>(collection); + auto result = (*collection_ptr)->Stats(); + + if (!result.has_value()) { + set_last_error("Failed to get collection stats: " + + result.error().message()); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + *stats = new ZVecCollectionStats(); + ZVecErrorCode error_code = handle_expected_result(result); + if (error_code == ZVEC_OK) { + (*stats)->doc_count = result.value().doc_count; + (*stats)->index_count = result.value().index_completeness.size(); + if ((*stats)->index_count > 0) { + (*stats)->index_completeness = + static_cast(malloc((*stats)->index_count * sizeof(float))); + (*stats)->index_names = static_cast( + malloc((*stats)->index_count * sizeof(ZVecString *))); + int i = 0; + for (auto &[name, completeness] : result.value().index_completeness) { + (*stats)->index_completeness[i] = completeness; + (*stats)->index_names[i] = zvec_string_create(name.c_str()); + i++; + } + } + } else { + (*stats)->index_completeness = nullptr; + *(*stats)->index_names = nullptr; + } + + return error_code; + } catch (const std::exception &e) { + set_last_error(std::string("Failed to get detailed collection stats: ") + + e.what()); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +void zvec_collection_stats_destroy(ZVecCollectionStats *stats) { + if (stats) { + if (stats->index_names) { + for (size_t i = 0; i < stats->index_count; ++i) { + zvec_free_string(stats->index_names[i]); + } + free(stats->index_names); + } + + if (stats->index_completeness) { + free(stats->index_completeness); + } + + delete stats; + } +} + +// ============================================================================= +// QueryParams functions implementation +// ============================================================================= + +ZVecQueryParams *zvec_query_params_create(ZVecIndexType index_type) { + try { + auto params = new ZVecQueryParams(); + params->index_type = index_type; + params->radius = 0.0f; + params->is_linear = false; + params->is_using_refiner = false; + return params; + } catch (const std::exception &e) { + set_last_error(std::string("Failed to create query params: ") + e.what()); + return nullptr; + } +} + +ZVecHnswQueryParams *zvec_query_params_hnsw_create(ZVecIndexType index_type, + int ef, float radius, + bool is_linear, + bool is_using_refiner) { + try { + auto params = new ZVecHnswQueryParams(); + params->base.index_type = index_type; + params->base.radius = radius; + params->base.is_linear = is_linear; + params->base.is_using_refiner = is_using_refiner; + params->ef = ef; + return params; + } catch (const std::exception &e) { + set_last_error(std::string("Failed to create HNSW query params: ") + + e.what()); + return nullptr; + } +} + +ZVecIVFQueryParams *zvec_query_params_ivf_create(ZVecIndexType index_type, + int nprobe, + bool is_using_refiner, + float scale_factor) { + try { + auto params = new ZVecIVFQueryParams(); + params->base.index_type = index_type; + params->base.is_using_refiner = is_using_refiner; + params->nprobe = nprobe; + params->scale_factor = scale_factor; + return params; + } catch (const std::exception &e) { + set_last_error(std::string("Failed to create IVF query params: ") + + e.what()); + return nullptr; + } +} + +ZVecFlatQueryParams *zvec_query_params_flat_create(ZVecIndexType index_type, + bool is_using_refiner, + float scale_factor) { + try { + auto params = new ZVecFlatQueryParams(); + params->base.index_type = index_type; + params->base.is_using_refiner = is_using_refiner; + params->scale_factor = scale_factor; + return params; + } catch (const std::exception &e) { + set_last_error(std::string("Failed to create Flat query params: ") + + e.what()); + return nullptr; + } +} + +ZVecQueryParamsUnion *zvec_query_params_union_create(ZVecIndexType index_type) { + try { + auto params = new ZVecQueryParamsUnion(); + params->index_type = index_type; + + switch (index_type) { + case ZVEC_INDEX_TYPE_HNSW: + params->params.hnsw_params.base.index_type = index_type; + params->params.hnsw_params.ef = + zvec::core_interface::kDefaultHnswEfSearch; + break; + case ZVEC_INDEX_TYPE_IVF: + params->params.ivf_params.base.index_type = index_type; + params->params.ivf_params.nprobe = 10; + params->params.ivf_params.scale_factor = 10.0f; + break; + case ZVEC_INDEX_TYPE_FLAT: + params->params.flat_params.base.index_type = index_type; + params->params.flat_params.scale_factor = 10.0f; + break; + default: + params->params.base_params.index_type = index_type; + break; + } + + return params; + } catch (const std::exception &e) { + set_last_error(std::string("Failed to create query params union: ") + + e.what()); + return nullptr; + } +} + +void zvec_query_params_destroy(ZVecQueryParams *params) { + if (params) { + delete params; + } +} + +void zvec_query_params_hnsw_destroy(ZVecHnswQueryParams *params) { + if (params) { + delete params; + } +} + +void zvec_query_params_ivf_destroy(ZVecIVFQueryParams *params) { + if (params) { + delete params; + } +} + +void zvec_query_params_flat_destroy(ZVecFlatQueryParams *params) { + if (params) { + delete params; + } +} + +void zvec_query_params_union_destroy(ZVecQueryParamsUnion *params) { + if (params) { + delete params; + } +} + +ZVecErrorCode zvec_query_params_set_index_type(ZVecQueryParams *params, + ZVecIndexType index_type) { + if (!params) { + set_last_error("Query params pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + params->index_type = index_type; + return ZVEC_OK; +} + +ZVecErrorCode zvec_query_params_set_radius(ZVecQueryParams *params, + float radius) { + if (!params) { + set_last_error("Query params pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + params->radius = radius; + return ZVEC_OK; +} + +ZVecErrorCode zvec_query_params_set_is_linear(ZVecQueryParams *params, + bool is_linear) { + if (!params) { + set_last_error("Query params pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + params->is_linear = is_linear; + return ZVEC_OK; +} + +ZVecErrorCode zvec_query_params_set_is_using_refiner(ZVecQueryParams *params, + bool is_using_refiner) { + if (!params) { + set_last_error("Query params pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + params->is_using_refiner = is_using_refiner; + return ZVEC_OK; +} + +ZVecErrorCode zvec_query_params_hnsw_set_ef(ZVecHnswQueryParams *params, + int ef) { + if (!params) { + set_last_error("HNSW query params pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + params->ef = ef; + return ZVEC_OK; +} + +ZVecErrorCode zvec_query_params_ivf_set_nprobe(ZVecIVFQueryParams *params, + int nprobe) { + if (!params) { + set_last_error("IVF query params pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + params->nprobe = nprobe; + return ZVEC_OK; +} + +ZVecErrorCode zvec_query_params_ivf_set_scale_factor(ZVecIVFQueryParams *params, + float scale_factor) { + if (!params) { + set_last_error("Query params pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + params->scale_factor = scale_factor; + return ZVEC_OK; +} + + +// ============================================================================= +// Index Interface Implementation +// ============================================================================= + +ZVecErrorCode zvec_collection_create_index( + ZVecCollection *collection, const char *column_name, + const ZVecIndexParams *index_params) { + if (!collection || !column_name || !index_params) { + set_last_error( + "Invalid arguments: collection, column_name, and index_params cannot " + "be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + try { + auto coll_ptr = + reinterpret_cast *>(collection); + std::string field_name_str(column_name); + + switch (index_params->index_type) { + case ZVEC_INDEX_TYPE_INVERT: { + const ZVecInvertIndexParams *invert_params = + &index_params->params.invert_params; + auto cpp_params = std::make_shared( + invert_params->enable_range_optimization, + invert_params->enable_extended_wildcard); + auto status = (*coll_ptr)->CreateIndex(field_name_str, cpp_params); + return status_to_error_code(status); + } + + case ZVEC_INDEX_TYPE_HNSW: { + const ZVecHnswIndexParams *hnsw_params = + &index_params->params.hnsw_params; + auto metric = convert_metric_type(hnsw_params->base.metric_type); + auto quantize = convert_quantize_type(hnsw_params->base.quantize_type); + auto cpp_params = std::make_shared( + metric, hnsw_params->m, hnsw_params->ef_construction, quantize); + auto status = (*coll_ptr)->CreateIndex(field_name_str, cpp_params); + return status_to_error_code(status); + } + + case ZVEC_INDEX_TYPE_FLAT: { + const ZVecFlatIndexParams *flat_params = + &index_params->params.flat_params; + auto metric = convert_metric_type(flat_params->base.metric_type); + auto quantize = convert_quantize_type(flat_params->base.quantize_type); + auto cpp_params = + std::make_shared(metric, quantize); + auto status = (*coll_ptr)->CreateIndex(field_name_str, cpp_params); + return status_to_error_code(status); + } + + case ZVEC_INDEX_TYPE_IVF: { + const ZVecIVFIndexParams *ivf_params = &index_params->params.ivf_params; + auto metric = convert_metric_type(ivf_params->base.metric_type); + auto quantize = convert_quantize_type(ivf_params->base.quantize_type); + auto cpp_params = std::make_shared( + metric, ivf_params->n_list, ivf_params->n_iters, + ivf_params->use_soar, quantize); + auto status = (*coll_ptr)->CreateIndex(field_name_str, cpp_params); + return status_to_error_code(status); + } + + default: { + set_last_error("Unsupported index type"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + } + } catch (const std::exception &e) { + set_last_error(std::string("Exception in zvec_collection_create_index: ") + + e.what()); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +ZVecErrorCode zvec_collection_create_index_with_params( + ZVecCollection *collection, const ZVecString *field_name, + const void *index_params) { + if (!collection || !field_name || !index_params) { + set_last_error("Invalid arguments"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + auto coll_ptr = + reinterpret_cast *>(collection); + std::string field_name_str(field_name->data, field_name->length); + + const ZVecBaseIndexParams *base_params = + static_cast(index_params); + + try { + switch (base_params->index_type) { + case ZVEC_INDEX_TYPE_INVERT: { + const ZVecInvertIndexParams *invert_params = + static_cast(index_params); + auto cpp_params = std::make_shared( + invert_params->enable_range_optimization, + invert_params->enable_extended_wildcard); + auto status = (*coll_ptr)->CreateIndex(field_name_str, cpp_params); + return status_to_error_code(status); + } + + case ZVEC_INDEX_TYPE_HNSW: { + const ZVecHnswIndexParams *hnsw_params = + static_cast(index_params); + auto metric = convert_metric_type(hnsw_params->base.metric_type); + auto quantize = convert_quantize_type(hnsw_params->base.quantize_type); + auto cpp_params = std::make_shared( + metric, hnsw_params->m, hnsw_params->ef_construction, quantize); + auto status = (*coll_ptr)->CreateIndex(field_name_str, cpp_params); + return status_to_error_code(status); + } + + case ZVEC_INDEX_TYPE_FLAT: { + const ZVecFlatIndexParams *flat_params = + static_cast(index_params); + auto metric = convert_metric_type(flat_params->base.metric_type); + auto quantize = convert_quantize_type(flat_params->base.quantize_type); + auto cpp_params = + std::make_shared(metric, quantize); + auto status = (*coll_ptr)->CreateIndex(field_name_str, cpp_params); + return status_to_error_code(status); + } + + case ZVEC_INDEX_TYPE_IVF: { + const ZVecIVFIndexParams *ivf_params = + static_cast(index_params); + auto metric = convert_metric_type(ivf_params->base.metric_type); + auto quantize = convert_quantize_type(ivf_params->base.quantize_type); + auto cpp_params = std::make_shared( + metric, ivf_params->n_list, ivf_params->n_iters, + ivf_params->use_soar, quantize); + auto status = (*coll_ptr)->CreateIndex(field_name_str, cpp_params); + return status_to_error_code(status); + } + + default: { + set_last_error("Unsupported index type"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + } + } catch (const std::exception &e) { + set_last_error(e.what()); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +ZVecErrorCode zvec_collection_create_hnsw_index( + ZVecCollection *collection, const ZVecString *field_name, + const ZVecHnswIndexParams *hnsw_params) { + if (!hnsw_params) { + set_last_error("Invalid HNSW parameters"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + return zvec_collection_create_index_with_params(collection, field_name, + hnsw_params); +} + +ZVecErrorCode zvec_collection_create_flat_index( + ZVecCollection *collection, const ZVecString *field_name, + const ZVecFlatIndexParams *flat_params) { + if (!flat_params) { + set_last_error("Invalid Flat parameters"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + return zvec_collection_create_index_with_params(collection, field_name, + flat_params); +} + +ZVecErrorCode zvec_collection_create_ivf_index( + ZVecCollection *collection, const ZVecString *field_name, + const ZVecIVFIndexParams *ivf_params) { + if (!ivf_params) { + set_last_error("Invalid IVF parameters"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + return zvec_collection_create_index_with_params(collection, field_name, + ivf_params); +} + +ZVecErrorCode zvec_collection_create_invert_index( + ZVecCollection *collection, const ZVecString *field_name, + const ZVecInvertIndexParams *invert_params) { + if (!invert_params) { + set_last_error("Invalid Invert parameters"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + return zvec_collection_create_index_with_params(collection, field_name, + invert_params); +} + +ZVecErrorCode zvec_collection_drop_index(ZVecCollection *collection, + const char *column_name) { + if (!collection || !column_name) { + set_last_error( + "Invalid arguments: collection and column_name cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + try { + auto coll_ptr = + reinterpret_cast *>(collection); + zvec::Status status = (*coll_ptr)->DropIndex(column_name); + if (!status.ok()) { + set_last_error(status.message()); + } + + return status_to_error_code(status); + } catch (const std::exception &e) { + set_last_error(std::string("Exception occurred: ") + e.what()); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +ZVecErrorCode zvec_collection_optimize(ZVecCollection *collection) { + if (!collection) { + set_last_error("Invalid argument: collection cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + try { + auto coll_ptr = + reinterpret_cast *>(collection); + zvec::Status status = (*coll_ptr)->Optimize(); + if (!status.ok()) { + set_last_error(status.message()); + } + + return status_to_error_code(status); + } catch (const std::exception &e) { + set_last_error(std::string("Exception occurred: ") + e.what()); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + + +// ============================================================================= +// Column Interface Implementation +// ============================================================================= + +ZVecErrorCode zvec_collection_add_column(ZVecCollection *collection, + const ZVecFieldSchema *field_schema, + const char *expression) { + if (!collection || !field_schema) { + set_last_error( + "Invalid arguments: collection and field_schema cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + try { + auto coll_ptr = + reinterpret_cast *>(collection); + + zvec::DataType data_type = convert_data_type(field_schema->data_type); + if (data_type == zvec::DataType::UNDEFINED) { + set_last_error("Invalid data type"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + std::string field_name(field_schema->name->data, + field_schema->name->length); + bool is_vector_field = check_is_vector_field(*field_schema); + zvec::FieldSchema::Ptr schema; + if (is_vector_field) { + schema = std::make_shared(field_name, data_type, + field_schema->dimension, + field_schema->nullable); + } else { + schema = std::make_shared(field_name, data_type, + field_schema->nullable); + } + + std::string expr = expression ? expression : ""; + zvec::Status status = (*coll_ptr)->AddColumn(schema, expr); + + if (!status.ok()) { + set_last_error(status.message()); + } + + return status_to_error_code(status); + } catch (const std::exception &e) { + set_last_error(std::string("Exception occurred: ") + e.what()); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +ZVecErrorCode zvec_collection_drop_column(ZVecCollection *collection, + const char *column_name) { + if (!collection || !column_name) { + set_last_error( + "Invalid arguments: collection and column_name cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + try { + auto coll_ptr = + reinterpret_cast *>(collection); + zvec::Status status = (*coll_ptr)->DropColumn(column_name); + + if (!status.ok()) { + set_last_error(status.message()); + } + + return status_to_error_code(status); + } catch (const std::exception &e) { + set_last_error(std::string("Exception occurred: ") + e.what()); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +ZVecErrorCode zvec_collection_alter_column(ZVecCollection *collection, + const char *column_name, + const char *new_name, + const ZVecFieldSchema *new_schema) { + if (!collection || !column_name) { + set_last_error( + "Invalid arguments: collection and column_name cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + try { + auto coll_ptr = + reinterpret_cast *>(collection); + std::string rename = new_name ? new_name : ""; + + zvec::FieldSchema::Ptr schema = nullptr; + if (new_schema) { + auto status = convert_zvec_field_schema_to_internal(*new_schema, schema); + if (!status.ok()) { + set_last_error(status.message()); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + } + + zvec::Status status = (*coll_ptr)->AlterColumn(column_name, rename, schema); + if (!status.ok()) { + set_last_error(status.message()); + } + + return status_to_error_code(status); + } catch (const std::exception &e) { + set_last_error(std::string("Exception occurred: ") + e.what()); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +// ============================================================================= +// DML Interface Implementation +// ============================================================================= + +ZVecErrorCode zvec_collection_insert(ZVecCollection *collection, + const ZVecDoc **docs, size_t doc_count, + size_t *success_count, + size_t *error_count) { + if (!collection || !docs || doc_count == 0 || !success_count || + !error_count) { + set_last_error( + "Invalid arguments: collection, docs, doc_count, success_count and " + "error_count cannot be null/zero"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + try { + auto coll_ptr = + reinterpret_cast *>(collection); + + std::vector internal_docs = + convert_zvec_docs_to_internal(docs, doc_count); + + auto result = (*coll_ptr)->Insert(internal_docs); + ZVecErrorCode error_code = handle_expected_result(result); + + if (error_code == ZVEC_OK) { + *success_count = 0; + *error_count = 0; + for (const auto &status : result.value()) { + if (status.ok()) { + (*success_count)++; + } else { + (*error_count)++; + } + } + } else { + *success_count = 0; + *error_count = doc_count; + } + + return error_code; + } catch (const std::exception &e) { + set_last_error(std::string("Exception in zvec_collection_insert_docs: ") + + e.what()); + *success_count = 0; + *error_count = doc_count; + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +ZVecErrorCode zvec_collection_update(ZVecCollection *collection, + const ZVecDoc **docs, size_t doc_count, + size_t *success_count, + size_t *error_count) { + if (!collection || !docs || doc_count == 0 || !success_count || + !error_count) { + set_last_error( + "Invalid arguments: collection, docs, doc_count, success_count and " + "error_count cannot be null/zero"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + try { + auto coll_ptr = + reinterpret_cast *>(collection); + + std::vector internal_docs = + convert_zvec_docs_to_internal(docs, doc_count); + + auto result = (*coll_ptr)->Update(internal_docs); + ZVecErrorCode error_code = handle_expected_result(result); + + if (error_code == ZVEC_OK) { + *success_count = 0; + *error_count = 0; + for (const auto &status : result.value()) { + if (status.ok()) { + (*success_count)++; + } else { + (*error_count)++; + } + } + } + + return error_code; + } catch (const std::exception &e) { + set_last_error(std::string("Exception occurred: ") + e.what()); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + + +ZVecErrorCode zvec_collection_upsert(ZVecCollection *collection, + const ZVecDoc **docs, size_t doc_count, + size_t *success_count, + size_t *error_count) { + if (!collection || !docs || doc_count == 0 || !success_count || + !error_count) { + set_last_error( + "Invalid arguments: collection, docs, doc_count, success_count and " + "error_count cannot be null/zero"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + try { + auto coll_ptr = + reinterpret_cast *>(collection); + + std::vector internal_docs = + convert_zvec_docs_to_internal(docs, doc_count); + + auto result = (*coll_ptr)->Upsert(internal_docs); + ZVecErrorCode error_code = handle_expected_result(result); + + if (error_code == ZVEC_OK) { + *success_count = 0; + *error_count = 0; + for (const auto &status : result.value()) { + if (status.ok()) { + (*success_count)++; + } else { + (*error_count)++; + } + } + } + + return error_code; + } catch (const std::exception &e) { + set_last_error(std::string("Exception occurred: ") + e.what()); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +ZVecErrorCode zvec_collection_delete(ZVecCollection *collection, + const char *const *pks, size_t pk_count, + size_t *success_count, + size_t *error_count) { + if (!collection || !pks || pk_count == 0 || !success_count || !error_count) { + set_last_error( + "Invalid arguments: collection, pks, pk_count, success_count and " + "error_count cannot be null/zero"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + try { + auto coll_ptr = + reinterpret_cast *>(collection); + + std::vector primary_keys; + primary_keys.reserve(pk_count); + for (size_t i = 0; i < pk_count; ++i) { + if (pks[i]) { + primary_keys.emplace_back(pks[i]); + } + } + + auto result = (*coll_ptr)->Delete(primary_keys); + ZVecErrorCode error_code = handle_expected_result(result); + + if (error_code == ZVEC_OK) { + *success_count = 0; + *error_count = 0; + for (const auto &status : result.value()) { + if (status.ok()) { + (*success_count)++; + } else { + (*error_count)++; + } + } + } + + return error_code; + } catch (const std::exception &e) { + set_last_error(std::string("Exception occurred: ") + e.what()); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +ZVecErrorCode zvec_collection_delete_by_filter(ZVecCollection *collection, + const char *filter) { + if (!collection || !filter) { + set_last_error("Invalid arguments: collection,filter cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + try { + auto coll_ptr = + reinterpret_cast *>(collection); + + auto status = (*coll_ptr)->DeleteByFilter(filter); + if (!status.ok()) { + set_last_error(status.message()); + return status_to_error_code(status); + } + return ZVEC_OK; + } catch (const std::exception &e) { + set_last_error(std::string("Exception occurred: ") + e.what()); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + + +// ============================================================================= +// Data query interface implementation +// ============================================================================= + + +// Helper function to convert common query parameters +void convert_common_query_params(zvec::VectorQuery &internal_query, + const ZVecVectorQuery *query) { + internal_query.topk_ = query->topk; + internal_query.field_name_ = + std::string(query->field_name.data, query->field_name.length); + internal_query.filter_ = + std::string(query->filter.data, query->filter.length); + internal_query.include_vector_ = query->include_vector; + internal_query.include_doc_id_ = query->include_doc_id; + + // Binary data conversion (query_vector) + if (query->query_vector.data && query->query_vector.length > 0) { + internal_query.query_vector_.assign( + reinterpret_cast(query->query_vector.data), + query->query_vector.length); + } + + // Sparse vector data conversion + if (query->query_sparse_indices.data && + query->query_sparse_indices.length > 0) { + internal_query.query_sparse_indices_.assign( + reinterpret_cast(query->query_sparse_indices.data), + query->query_sparse_indices.length); + } + + if (query->query_sparse_values.data && + query->query_sparse_values.length > 0) { + internal_query.query_sparse_values_.assign( + reinterpret_cast(query->query_sparse_values.data), + query->query_sparse_values.length); + } + + // Output fields conversion + if (query->output_fields && query->output_fields->count > 0) { + internal_query.output_fields_ = std::vector(); + for (size_t i = 0; i < query->output_fields->count; ++i) { + internal_query.output_fields_->emplace_back( + query->output_fields->strings[i].data, + query->output_fields->strings[i].length); + } + } +} + +// Helper function to convert query parameters +void convert_query_params(zvec::VectorQuery &internal_query, + const ZVecVectorQuery *query) { + convert_common_query_params(internal_query, query); + + // QueryParams conversion + if (query->query_params) { + auto query_params = std::make_shared( + static_cast(query->query_params->index_type)); + + switch (query->query_params->index_type) { + case ZVEC_INDEX_TYPE_HNSW: { + auto hnsw_params = std::make_shared( + query->query_params->params.hnsw_params.ef, + query->query_params->params.hnsw_params.base.radius, + query->query_params->params.hnsw_params.base.is_linear, + query->query_params->params.hnsw_params.base.is_using_refiner); + internal_query.query_params_ = hnsw_params; + break; + } + case ZVEC_INDEX_TYPE_IVF: { + auto ivf_params = std::make_shared( + query->query_params->params.ivf_params.nprobe, + query->query_params->params.ivf_params.base.is_using_refiner, + query->query_params->params.ivf_params.scale_factor); + internal_query.query_params_ = ivf_params; + break; + } + case ZVEC_INDEX_TYPE_FLAT: { + auto flat_params = std::make_shared( + query->query_params->params.flat_params.base.is_using_refiner, + query->query_params->params.flat_params.scale_factor); + internal_query.query_params_ = flat_params; + break; + } + default: { + query_params->set_radius( + query->query_params->params.base_params.radius); + query_params->set_is_linear( + query->query_params->params.base_params.is_linear); + query_params->set_is_using_refiner( + query->query_params->params.base_params.is_using_refiner); + internal_query.query_params_ = query_params; + break; + } + } + } +} + +// Helper function to convert group by query parameters +void convert_groupby_query_params(zvec::GroupByVectorQuery &internal_query, + const ZVecGroupByVectorQuery *query) { + internal_query.field_name_ = + std::string(query->field_name.data, query->field_name.length); + internal_query.filter_ = + std::string(query->filter.data, query->filter.length); + internal_query.include_vector_ = query->include_vector; + internal_query.group_by_field_name_ = std::string( + query->group_by_field_name.data, query->group_by_field_name.length); + internal_query.group_count_ = query->group_count; + internal_query.group_topk_ = query->group_topk; + + if (query->query_vector.data && query->query_vector.length > 0) { + internal_query.query_vector_.assign( + reinterpret_cast(query->query_vector.data), + query->query_vector.length); + } + + if (query->query_sparse_indices.data && + query->query_sparse_indices.length > 0) { + internal_query.query_sparse_indices_.assign( + reinterpret_cast(query->query_sparse_indices.data), + query->query_sparse_indices.length); + } + + if (query->query_sparse_values.data && + query->query_sparse_values.length > 0) { + internal_query.query_sparse_values_.assign( + reinterpret_cast(query->query_sparse_values.data), + query->query_sparse_values.length); + } + + if (query->output_fields && query->output_fields->count > 0) { + if (!internal_query.output_fields_.has_value()) { + internal_query.output_fields_ = std::vector(); + } + for (size_t i = 0; i < query->output_fields->count; ++i) { + internal_query.output_fields_->push_back( + std::string(query->output_fields->strings[i].data, + query->output_fields->strings[i].length)); + } + } + + if (query->query_params) { + auto query_params = std::make_shared( + static_cast(query->query_params->index_type)); + + switch (query->query_params->index_type) { + case ZVEC_INDEX_TYPE_HNSW: { + auto hnsw_params = std::make_shared( + query->query_params->params.hnsw_params.ef, + query->query_params->params.hnsw_params.base.radius, + query->query_params->params.hnsw_params.base.is_linear, + query->query_params->params.hnsw_params.base.is_using_refiner); + internal_query.query_params_ = hnsw_params; + break; + } + case ZVEC_INDEX_TYPE_IVF: { + auto ivf_params = std::make_shared( + query->query_params->params.ivf_params.nprobe, + query->query_params->params.ivf_params.base.is_using_refiner, + query->query_params->params.ivf_params.scale_factor); + internal_query.query_params_ = ivf_params; + break; + } + case ZVEC_INDEX_TYPE_FLAT: { + auto flat_params = std::make_shared( + query->query_params->params.flat_params.base.is_using_refiner, + query->query_params->params.flat_params.scale_factor); + internal_query.query_params_ = flat_params; + break; + } + default: { + query_params->set_radius( + query->query_params->params.base_params.radius); + query_params->set_is_linear( + query->query_params->params.base_params.is_linear); + query_params->set_is_using_refiner( + query->query_params->params.base_params.is_using_refiner); + internal_query.query_params_ = query_params; + break; + } + } + } +} + +// Helper function to convert document results to C API format +ZVecErrorCode convert_document_results( + const std::vector> &query_results, + ZVecDoc ***results, size_t *result_count) { + *result_count = query_results.size(); + *results = static_cast(malloc(*result_count * sizeof(ZVecDoc *))); + + if (!*results) { + set_last_error("Failed to allocate memory for query results"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + for (size_t i = 0; i < *result_count; ++i) { + const auto &internal_doc = query_results[i]; + // Create new document wrapper + ZVecDoc *c_doc = zvec_doc_create(); + if (!c_doc) { + // Clean up previously allocated documents + for (size_t j = 0; j < i; ++j) { + zvec_doc_destroy((*results)[j]); + } + free(*results); + *results = nullptr; + *result_count = 0; + set_last_error("Failed to create document wrapper"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + // Copy the C++ document to our wrapper + auto doc_ptr = reinterpret_cast *>(c_doc); + *(*doc_ptr) = *internal_doc; // Copy assignment + (*results)[i] = c_doc; // Store the pointer, not dereference + } + + return ZVEC_OK; +} + +// Helper function to convert grouped document results to C API format +ZVecErrorCode convert_grouped_document_results( + const std::vector &group_results, ZVecDoc ***results, + ZVecString ***group_by_values, size_t *result_count) { + // Calculate total document count across all groups + size_t total_docs = 0; + for (const auto &group_result : group_results) { + total_docs += group_result.docs_.size(); + } + + // Allocate memory for document pointers and group by values + *result_count = total_docs; + *results = static_cast(malloc(*result_count * sizeof(ZVecDoc *))); + *group_by_values = static_cast( + malloc(group_results.size() * sizeof(ZVecString *))); + + if (!*results) { + set_last_error("Failed to allocate memory for query results"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + // Convert C++ grouped results to C API format + size_t doc_index = 0; + for (const auto &group_result : group_results) { + for (const auto &internal_doc : group_result.docs_) { + if (doc_index >= *result_count) { + break; + } + + // Create new document wrapper + ZVecDoc *c_doc = zvec_doc_create(); + if (!c_doc) { + // Clean up previously allocated documents + for (size_t j = 0; j < doc_index; ++j) { + zvec_doc_destroy((*results)[j]); + } + free(*results); + *results = nullptr; + *result_count = 0; + set_last_error("Failed to create document wrapper"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + // Copy the C++ document to our wrapper + auto doc_ptr = + reinterpret_cast *>(c_doc); + *(*doc_ptr) = internal_doc; // Copy assignment + + ZVecString *c_group_value = + zvec_string_create(group_result.group_by_value_.c_str()); + if (!c_group_value) { + for (size_t j = 0; j < doc_index; ++j) { + zvec_doc_destroy((*results)[j]); + zvec_free_string((*group_by_values)[doc_index]); + } + free(*results); + *results = nullptr; + *result_count = 0; + set_last_error("Failed to create string wrapper"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + (*group_by_values)[doc_index] = c_group_value; + (*results)[doc_index] = c_doc; + ++doc_index; + } + } + + return ZVEC_OK; +} + +// Helper function to convert fetched document results to C API format +ZVecErrorCode convert_fetched_document_results(const zvec::DocPtrMap &doc_map, + ZVecDoc ***results, + size_t *doc_count) { + // Calculate actual document count (some PKs might not exist) + size_t actual_count = 0; + for (const auto &[pk, doc_ptr] : doc_map) { + if (doc_ptr) { + actual_count++; + } + } + + // Allocate memory for document pointers + *doc_count = actual_count; + if (*doc_count == 0) { + *results = nullptr; + return ZVEC_OK; + } + + *results = static_cast(malloc(*doc_count * sizeof(ZVecDoc *))); + if (!*results) { + set_last_error("Failed to allocate memory for document pointers"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + // Convert C++ DocPtrMap to C ZVecDoc pointer array + size_t index = 0; + for (const auto &[pk, doc_ptr] : doc_map) { + if (doc_ptr && index < *doc_count) { + // Create new document wrapper + ZVecDoc *c_doc = zvec_doc_create(); + if (!c_doc) { + // Clean up previously allocated documents + for (size_t j = 0; j < index; ++j) { + zvec_doc_destroy((*results)[j]); + } + free(*results); + *results = nullptr; + *doc_count = 0; + set_last_error("Failed to create document wrapper"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + // Copy the C++ document to our wrapper + auto cpp_doc_ptr = reinterpret_cast *>(c_doc); + *(*cpp_doc_ptr) = *doc_ptr; // Copy assignment + + // Set the primary key explicitly + zvec_doc_set_pk(c_doc, pk.c_str()); + + (*results)[index] = c_doc; + ++index; + } + } + + return ZVEC_OK; +} + +ZVecErrorCode zvec_collection_query(const ZVecCollection *collection, + const ZVecVectorQuery *query, + ZVecDoc ***results, size_t *result_count) { + if (!collection || !query || !results || !result_count) { + set_last_error( + "Invalid arguments: collection, query, results and result_count cannot " + "be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + try { + auto coll_ptr = + reinterpret_cast *>(collection); + + // Convert query parameters using helper function + zvec::VectorQuery internal_query; + convert_query_params(internal_query, query); + + auto result = (*coll_ptr)->Query(internal_query); + ZVecErrorCode error_code = handle_expected_result(result); + + if (error_code == ZVEC_OK) { + const auto &query_results = result.value(); + error_code = + convert_document_results(query_results, results, result_count); + } else { + *results = nullptr; + *result_count = 0; + } + + return error_code; + } catch (const std::exception &e) { + set_last_error(std::string("Exception occurred: ") + e.what()); + *results = nullptr; + *result_count = 0; + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +ZVecErrorCode zvec_collection_query_by_group( + const ZVecCollection *collection, const ZVecGroupByVectorQuery *query, + ZVecDoc ***results, ZVecString ***group_by_values, size_t *result_count) { + if (!collection || !query || !results || !group_by_values || !result_count) { + set_last_error( + "Invalid arguments: collection, query, results, group_by_values and " + "result_count cannot " + "be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + try { + auto coll_ptr = + reinterpret_cast *>(collection); + + zvec::GroupByVectorQuery internal_query; + convert_groupby_query_params(internal_query, query); + + auto result = (*coll_ptr)->GroupByQuery(internal_query); + ZVecErrorCode error_code = handle_expected_result(result); + + if (error_code == ZVEC_OK) { + const auto &group_results = result.value(); + error_code = convert_grouped_document_results( + group_results, results, group_by_values, result_count); + } else { + *results = nullptr; + *group_by_values = nullptr; + *result_count = 0; + } + + return error_code; + } catch (const std::exception &e) { + set_last_error(std::string("Exception occurred: ") + e.what()); + *results = nullptr; + *group_by_values = nullptr; + *result_count = 0; + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +ZVecErrorCode zvec_collection_fetch(ZVecCollection *collection, + const char *const *pks, size_t pk_count, + ZVecDoc ***results, size_t *doc_count) { + if (!collection || !pks || !results || !doc_count) { + set_last_error( + "Invalid arguments: collection, pks, results and doc_count cannot " + "be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + // Handle empty case + if (pk_count == 0) { + *results = nullptr; + *doc_count = 0; + return ZVEC_OK; + } + + try { + auto coll_ptr = + reinterpret_cast *>(collection); + + // Convert C array to C++ vector + std::vector pk_vector; + pk_vector.reserve(pk_count); + for (size_t i = 0; i < pk_count; ++i) { + if (pks[i]) { + pk_vector.emplace_back(pks[i]); + } else { + set_last_error("Null primary key at index " + std::to_string(i)); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + } + + // Call C++ fetch method + auto result = (*coll_ptr)->Fetch(pk_vector); + if (!result.has_value()) { + set_last_error("Failed to fetch documents: " + result.error().message()); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + const auto &doc_map = result.value(); + return convert_fetched_document_results(doc_map, results, doc_count); + + } catch (const std::exception &e) { + set_last_error(std::string("Exception in zvec_collection_fetch: ") + + e.what()); + *results = nullptr; + *doc_count = 0; + return ZVEC_ERROR_INTERNAL_ERROR; + } +} diff --git a/src/include/zvec/c_api.h b/src/include/zvec/c_api.h new file mode 100644 index 000000000..0ae01935a --- /dev/null +++ b/src/include/zvec/c_api.h @@ -0,0 +1,2516 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ZVEC_C_API_H +#define ZVEC_C_API_H + +#include +#include +#include + +// ============================================================================= +// API Export Control +// ============================================================================= + +#if defined(_WIN32) || defined(__CYGWIN__) +#ifdef ZVEC_BUILD_SHARED +#define ZVEC_EXPORT __declspec(dllexport) +#elif defined(ZVEC_USE_SHARED) +#define ZVEC_EXPORT __declspec(dllimport) +#else +#define ZVEC_EXPORT +#endif +#define ZVEC_CALL __cdecl +#else +#if __GNUC__ >= 4 +#define ZVEC_EXPORT __attribute__((visibility("default"))) +#else +#define ZVEC_EXPORT +#endif +#define ZVEC_CALL +#endif + +#ifdef __cplusplus +extern "C" { +#endif + + +// ============================================================================= +// Version Information +// ============================================================================= + +/** @brief Major version number */ +#define ZVEC_VERSION_MAJOR 0 + +/** @brief Minor version number */ +#define ZVEC_VERSION_MINOR 3 + +/** @brief Patch version number */ +#define ZVEC_VERSION_PATCH 0 + +/** @brief Full version string */ +#define ZVEC_VERSION_STRING "0.3.0" + +/** + * @brief Get library version information + * + * Return format: "{base_version}[-{git_info}] (built {build_time})" + * Example: "0.3.0-g3f8a2b1 (built 2025-05-13 10:30:45)" + * + * @return const char* Version string, managed internally by the library, caller + * should not free + */ +ZVEC_EXPORT const char *ZVEC_CALL zvec_get_version(void); + +/** + * @brief Check API version compatibility + * + * Check if the current library version meets the specified minimum version + * requirements Following semantic versioning specification: MAJOR.MINOR.PATCH + * + * @param major Required major version number + * @param minor Required minor version number + * @param patch Required patch version number + * @return bool Returns true if compatible, false otherwise + */ +ZVEC_EXPORT bool ZVEC_CALL zvec_check_version(int major, int minor, int patch); + +/** + * @brief Get major version number + * + * @return int Major version number + */ +ZVEC_EXPORT int ZVEC_CALL zvec_get_version_major(void); + +/** + * @brief Get minor version number + * + * @return int Minor version number + */ +ZVEC_EXPORT int ZVEC_CALL zvec_get_version_minor(void); + + +/** + * @brief Get patch version number + * + * @return int Patch version number + */ +ZVEC_EXPORT int ZVEC_CALL zvec_get_version_patch(void); + + +// ============================================================================= +// Error Code Definitions +// ============================================================================= + +/** + * @brief ZVec C API error code enumeration + */ +typedef enum { + ZVEC_OK = 0, /**< Success */ + ZVEC_ERROR_NOT_FOUND = 1, /**< Resource not found */ + ZVEC_ERROR_ALREADY_EXISTS = 2, /**< Resource already exists */ + ZVEC_ERROR_INVALID_ARGUMENT = 3, /**< Invalid argument */ + ZVEC_ERROR_PERMISSION_DENIED = 4, /**< Permission denied */ + ZVEC_ERROR_FAILED_PRECONDITION = 5, /**< Failed precondition */ + ZVEC_ERROR_RESOURCE_EXHAUSTED = 6, /**< Resource exhausted */ + ZVEC_ERROR_UNAVAILABLE = 7, /**< Unavailable */ + ZVEC_ERROR_INTERNAL_ERROR = 8, /**< Internal error */ + ZVEC_ERROR_NOT_SUPPORTED = 9, /**< Unsupported operation */ + ZVEC_ERROR_UNKNOWN = 10 /**< Unknown error */ +} ZVecErrorCode; + +/** + * @brief Error details structure + */ +typedef struct { + ZVecErrorCode code; /**< Error code */ + const char *message; /**< Error message */ + const char *file; /**< File where error occurred */ + int line; /**< Line number where error occurred */ + const char *function; /**< Function where error occurred */ +} ZVecErrorDetails; + +/** + * @brief Get detailed information of the last error + * @param[out] error_details Pointer to error details structure + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_get_last_error_details(ZVecErrorDetails *error_details); + +/** + * @brief Get last error message + * @param[out] error_msg Returned error message string (needs to be freed by + * calling zvec_free) + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_get_last_error(char **error_msg); + +/** + * @brief Clear error status + */ +ZVEC_EXPORT void ZVEC_CALL zvec_clear_error(void); + + +// ============================================================================= +// Basic Data Structures +// ============================================================================= + +/** + * @brief String view structure (does not own memory) + */ +typedef struct { + const char *data; /**< String data pointer */ + size_t length; /**< String length */ +} ZVecStringView; + +/** + * @brief Mutable string structure (owns memory) + */ +typedef struct { + char *data; /**< String data pointer */ + size_t length; /**< String length */ + size_t capacity; /**< Allocated capacity */ +} ZVecString; + +/** + * @brief String array structure + */ +typedef struct { + ZVecString *strings; /**< String array */ + size_t count; /**< String count */ +} ZVecStringArray; + +/** + * @brief Float array structure + */ +typedef struct { + const float *data; + size_t length; +} ZVecFloatArray; + +/** + * @brief Integer array structure + */ +typedef struct { + const int64_t *data; + size_t length; +} ZVecInt64Array; + +/** + * @brief Byte array structure + */ +typedef struct { + const uint8_t *data; /**< Byte data pointer */ + size_t length; /**< Array length */ +} ZVecByteArray; + +/** + * @brief Mutable byte array structure + */ +typedef struct { + uint8_t *data; /**< Byte data pointer */ + size_t length; /**< Current length */ + size_t capacity; /**< Allocated capacity */ +} ZVecMutableByteArray; + +// ============================================================================= +// String management functions +// ============================================================================= + +/** + * @brief Create string from C string + * @param str C string + * @return ZVecString* Pointer to the newly created string + */ +ZVEC_EXPORT ZVecString *ZVEC_CALL zvec_string_create(const char *str); + +/** + * @brief Create string from string view + * + * Creates a new ZVecString by copying data from a ZVecStringView. + * The created string owns its memory and must be freed with zvec_free_string(). + * + * @param view Pointer to source string view (must not be NULL) + * @return ZVecString* New string instance on success, NULL on error + * @note Caller is responsible for freeing the returned string + */ +ZVEC_EXPORT ZVecString *ZVEC_CALL zvec_string_create_from_view(const ZVecStringView *view); + +/** + * @brief Create binary-safe string from raw data + * + * Creates a new ZVecString from raw binary data that may contain null bytes. + * Unlike zvec_string_create(), this function takes explicit length parameter + * and doesn't rely on null-termination. + * The created string owns its memory and must be freed with zvec_free_string(). + * + * @param data Raw binary data pointer (must not be NULL) + * @param length Length of data in bytes + * @return ZVecString* New string instance on success, NULL on error + * @note Caller is responsible for freeing the returned string + * @note This function is suitable for binary data containing null bytes + */ +ZVEC_EXPORT ZVecString *ZVEC_CALL zvec_bin_create(const uint8_t *data, size_t length); + +/** + * @brief Copy string + * + * Creates a new ZVecString by copying an existing string. + * The created string owns its memory and must be freed with zvec_free_string(). + * + * @param str Pointer to source string (must not be NULL) + * @return ZVecString* New string instance on success, NULL on error + * @note Caller is responsible for freeing the returned string + */ +ZVEC_EXPORT ZVecString *ZVEC_CALL zvec_string_copy(const ZVecString *str); + +/** + * @brief Get C string from ZVecString + * @param str ZVecString pointer + * @return const char* C string + */ +ZVEC_EXPORT const char *ZVEC_CALL zvec_string_c_str(const ZVecString *str); + +/** + * @brief Get string length + * @param str ZVecString pointer + * @return size_t String length + */ +ZVEC_EXPORT size_t ZVEC_CALL zvec_string_length(const ZVecString *str); + +/** + * @brief Compare two strings + * @param str1 First string + * @param str2 Second string + * @return int Comparison result (-1, 0, or 1) + */ +ZVEC_EXPORT int ZVEC_CALL zvec_string_compare(const ZVecString *str1, + const ZVecString *str2); + + +// ============================================================================= +// Configuration and Options Structures +// ============================================================================= + +/** + * @brief Log level enumeration + */ +typedef enum { + ZVEC_LOG_LEVEL_DEBUG = 0, + ZVEC_LOG_LEVEL_INFO = 1, + ZVEC_LOG_LEVEL_WARN = 2, + ZVEC_LOG_LEVEL_ERROR = 3, + ZVEC_LOG_LEVEL_FATAL = 4 +} ZVecLogLevel; + +/** + * @brief Log type enumeration + */ +typedef enum { ZVEC_LOG_TYPE_CONSOLE = 0, ZVEC_LOG_TYPE_FILE = 1 } ZVecLogType; + +/** + * @brief Console log configuration structure + */ +typedef struct { + ZVecLogLevel level; /**< Log level */ +} ZVecConsoleLogConfig; + +/** + * @brief File log configuration structure + */ +typedef struct { + ZVecLogLevel level; /**< Log level */ + ZVecString dir; /**< Log directory */ + ZVecString basename; /**< Log file base name */ + uint32_t file_size; /**< Log file size (MB) */ + uint32_t overdue_days; /**< Log expiration days */ +} ZVecFileLogConfig; + +/** + * @brief Log configuration union + */ +typedef struct { + ZVecLogType type; /**< Log type */ + union { + ZVecConsoleLogConfig console_config; /**< Console log configuration */ + ZVecFileLogConfig file_config; /**< File log configuration */ + } config; +} ZVecLogConfig; + +/** + * @brief ZVec configuration data structure (corresponds to zvec::ConfigData) + */ +typedef struct { + uint64_t memory_limit_bytes; /**< Memory limit in bytes */ + + // log + ZVecLogConfig *log_config; /**< Log configuration (optional, NULL means using + default configuration) */ + + // query + uint32_t query_thread_count; /**< Query thread count */ + float invert_to_forward_scan_ratio; /**< Inverted to forward scan ratio */ + float brute_force_by_keys_ratio; /**< Brute force by keys ratio */ + + // optimize + uint32_t optimize_thread_count; /**< Optimize thread count */ +} ZVecConfigData; + +/** + * @brief Create console log configuration + * @param level Log level + * @return ZVecConsoleLogConfig* Pointer to the newly created console log + * configuration + */ +ZVEC_EXPORT ZVecConsoleLogConfig *ZVEC_CALL +zvec_config_console_log_create(ZVecLogLevel level); + +/** + * @brief Create file log configuration + * @param level Log level + * @param dir Log directory + * @param basename Log file base name + * @param file_size Log file size (MB) + * @param overdue_days Log expiration days + * @return ZVecFileLogConfig* Pointer to the newly created file log + * configuration + */ +ZVEC_EXPORT ZVecFileLogConfig *ZVEC_CALL zvec_config_file_log_create( + ZVecLogLevel level, const char *dir, const char *basename, + uint32_t file_size, uint32_t overdue_days); + +/** + * @brief Create log configuration + * @param type Log type + * @param config_data Configuration data (specific to log type) + * @return ZVecLogConfig* Pointer to the newly created log configuration + */ +ZVEC_EXPORT ZVecLogConfig *ZVEC_CALL zvec_config_log_create(ZVecLogType type, + void *config_data); + +/** + * @brief Destroy console log configuration + * @param config Console log configuration pointer + */ +ZVEC_EXPORT void ZVEC_CALL +zvec_config_console_log_destroy(ZVecConsoleLogConfig *config); + +/** + * @brief Destroy file log configuration + * @param config File log configuration pointer + */ +ZVEC_EXPORT void ZVEC_CALL +zvec_config_file_log_destroy(ZVecFileLogConfig *config); + +/** + * @brief Destroy log configuration + * @param config Log configuration pointer + */ +ZVEC_EXPORT void ZVEC_CALL zvec_config_log_destroy(ZVecLogConfig *config); + +/** + * @brief Create configuration data + * @return ZVecConfigData* Pointer to the newly created configuration data + */ +ZVEC_EXPORT ZVecConfigData *ZVEC_CALL zvec_config_data_create(void); + +/** + * @brief Destroy configuration data + * @param config Configuration data pointer + */ +ZVEC_EXPORT void ZVEC_CALL zvec_config_data_destroy(ZVecConfigData *config); + +/** + * @brief Set memory limit in configuration data + * @param config Configuration data pointer + * @param memory_limit_bytes Memory limit in bytes + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_config_data_set_memory_limit( + ZVecConfigData *config, uint64_t memory_limit_bytes); + +/** + * @brief Set log configuration in configuration data + * @param config Configuration data pointer + * @param log_config Log configuration pointer + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_config_data_set_log_config( + ZVecConfigData *config, ZVecLogConfig *log_config); + +/** + * @brief Set query thread count in configuration data + * @param config Configuration data pointer + * @param thread_count Query thread count + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_config_data_set_query_thread_count( + ZVecConfigData *config, uint32_t thread_count); + +/** + * @brief Set optimize thread count in configuration data + * @param config Configuration data pointer + * @param thread_count Optimize thread count + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_config_data_set_optimize_thread_count( + ZVecConfigData *config, uint32_t thread_count); + +/** + * @brief Destroy log configuration + * @param config Log configuration structure pointer + */ +void zvec_config_log_destroy(ZVecLogConfig *config); + +// ============================================================================= +// Initialization and Cleanup Interface +// ============================================================================= + +/** + * @brief Initialize ZVec library + * @param config Configuration data (optional, NULL means using default + * configuration) + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_initialize(const ZVecConfigData *config); + +/** + * @brief Clean up ZVec library resources + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_shutdown(void); + +/** + * @brief Check if library is initialized + * @param[out] initialized Whether initialized + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_is_initialized(bool *initialized); + +// ============================================================================= +// Data Type Enumerations +// ============================================================================= + +/** + * @brief Data type enumeration + */ +typedef enum { + ZVEC_DATA_TYPE_UNDEFINED = 0, + + ZVEC_DATA_TYPE_BINARY = 1, + ZVEC_DATA_TYPE_STRING = 2, + ZVEC_DATA_TYPE_BOOL = 3, + ZVEC_DATA_TYPE_INT32 = 4, + ZVEC_DATA_TYPE_INT64 = 5, + ZVEC_DATA_TYPE_UINT32 = 6, + ZVEC_DATA_TYPE_UINT64 = 7, + ZVEC_DATA_TYPE_FLOAT = 8, + ZVEC_DATA_TYPE_DOUBLE = 9, + + ZVEC_DATA_TYPE_VECTOR_BINARY32 = 20, + ZVEC_DATA_TYPE_VECTOR_BINARY64 = 21, + ZVEC_DATA_TYPE_VECTOR_FP16 = 22, + ZVEC_DATA_TYPE_VECTOR_FP32 = 23, + ZVEC_DATA_TYPE_VECTOR_FP64 = 24, + ZVEC_DATA_TYPE_VECTOR_INT4 = 25, + ZVEC_DATA_TYPE_VECTOR_INT8 = 26, + ZVEC_DATA_TYPE_VECTOR_INT16 = 27, + + ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16 = 30, + ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32 = 31, + + ZVEC_DATA_TYPE_ARRAY_BINARY = 40, + ZVEC_DATA_TYPE_ARRAY_STRING = 41, + ZVEC_DATA_TYPE_ARRAY_BOOL = 42, + ZVEC_DATA_TYPE_ARRAY_INT32 = 43, + ZVEC_DATA_TYPE_ARRAY_INT64 = 44, + ZVEC_DATA_TYPE_ARRAY_UINT32 = 45, + ZVEC_DATA_TYPE_ARRAY_UINT64 = 46, + ZVEC_DATA_TYPE_ARRAY_FLOAT = 47, + ZVEC_DATA_TYPE_ARRAY_DOUBLE = 48 +} ZVecDataType; + +/** + * @brief Index type enumeration + */ +typedef enum { + ZVEC_INDEX_TYPE_UNDEFINED = 0, + ZVEC_INDEX_TYPE_HNSW = 1, + ZVEC_INDEX_TYPE_IVF = 3, + ZVEC_INDEX_TYPE_FLAT = 4, + ZVEC_INDEX_TYPE_INVERT = 10 +} ZVecIndexType; + +/** + * @brief Distance metric type enumeration + */ +typedef enum { + ZVEC_METRIC_TYPE_UNDEFINED = 0, + ZVEC_METRIC_TYPE_L2 = 1, + ZVEC_METRIC_TYPE_IP = 2, + ZVEC_METRIC_TYPE_COSINE = 3, + ZVEC_METRIC_TYPE_MIPSL2 = 4 +} ZVecMetricType; + +/** + * @brief Quantization type enumeration + */ +typedef enum { + ZVEC_QUANTIZE_TYPE_UNDEFINED = 0, + ZVEC_QUANTIZE_TYPE_FP16 = 1, + ZVEC_QUANTIZE_TYPE_INT8 = 2, + ZVEC_QUANTIZE_TYPE_INT4 = 3 +} ZVecQuantizeType; + +// ============================================================================= +// Forward Declarations +// ============================================================================= + +typedef struct ZVecCollection ZVecCollection; + +// ============================================================================= +// Index Parameters Structures +// ============================================================================= + +/** + * @brief Base index parameters structure + */ +typedef struct { + ZVecIndexType index_type; /**< Index type */ +} ZVecBaseIndexParams; + +/** + * @brief Scalar index parameters structure + */ +typedef struct { + ZVecBaseIndexParams base; /**< Inherit base parameters */ + bool enable_range_optimization; /**< Whether to enable range optimization */ + bool enable_extended_wildcard; /**< Whether to enable extended wildcard */ +} ZVecInvertIndexParams; + +/** + * @brief Vector index base parameters structure + */ +typedef struct { + ZVecBaseIndexParams base; /**< Inherit base parameters */ + ZVecMetricType metric_type; /**< Distance metric type */ + ZVecQuantizeType quantize_type; /**< Quantization type */ +} ZVecVectorIndexParams; + +/** + * @brief HNSW index parameters structure + */ +typedef struct { + ZVecVectorIndexParams base; /**< Inherit vector index parameters */ + int m; /**< Graph connectivity parameter */ + int ef_construction; /**< Exploration factor during construction */ + int ef_search; /**< Exploration factor during search */ +} ZVecHnswIndexParams; + +/** + * @brief Flat index parameters structure + */ +typedef struct { + ZVecVectorIndexParams base; /**< Inherit vector index parameters */ + // Flat index has no additional parameters +} ZVecFlatIndexParams; + +/** + * @brief IVF index parameters structure + */ +typedef struct { + ZVecVectorIndexParams base; /**< Inherit vector index parameters */ + int n_list; /**< Number of cluster centers */ + int n_iters; /**< Number of iterations */ + bool use_soar; /**< Whether to use SOAR algorithm */ + int n_probe; /**< Number of clusters to probe during search */ +} ZVecIVFIndexParams; + +/** + * @brief Generic index parameters union + */ +typedef struct { + ZVecIndexType index_type; /**< Index type */ + union { + ZVecInvertIndexParams invert_params; /**< Scalar index parameters */ + ZVecHnswIndexParams hnsw_params; /**< HNSW index parameters */ + ZVecFlatIndexParams flat_params; /**< Flat index parameters */ + ZVecIVFIndexParams ivf_params; /**< IVF index parameters */ + } params; +} ZVecIndexParams; + +// ============================================================================= +// Field Schema Structures +// ============================================================================= + +/** + * @brief Field schema structure + */ +typedef struct { + ZVecString *name; /**< Field name */ + ZVecDataType data_type; /**< Data type */ + bool nullable; /**< Whether nullable */ + uint32_t dimension; /**< Vector dimension (only used for vector fields) */ + ZVecIndexParams *index_params; /**< Index parameters, NULL means no index */ +} ZVecFieldSchema; + + +// ============================================================================= +// Index Parameters Creation and Destruction Interface +// ============================================================================= + +/** + * @brief Initialize base index parameters + * @param params Base index parameters structure pointer + * @param index_type Index type + */ +ZVEC_EXPORT void ZVEC_CALL zvec_index_params_base_init( + ZVecBaseIndexParams *params, ZVecIndexType index_type); + +/** + * @brief Initialize scalar index parameters + * @param params Scalar index parameters structure pointer + * @param enable_range_opt Whether to enable range optimization + * @param enable_wildcard Whether to enable wildcard expansion + */ +ZVEC_EXPORT void ZVEC_CALL zvec_index_params_invert_init( + ZVecInvertIndexParams *params, bool enable_range_opt, bool enable_wildcard); + +/** + * @brief Initialize vector index parameters + * @param params Vector index parameters structure pointer + * @param index_type Index type + * @param metric_type Metric type + * @param quantize_type Quantization type + */ +ZVEC_EXPORT void ZVEC_CALL zvec_index_params_vector_init( + ZVecVectorIndexParams *params, ZVecIndexType index_type, + ZVecMetricType metric_type, ZVecQuantizeType quantize_type); + +/** + * @brief Initialize HNSW index parameters + * @param params HNSW index parameters structure pointer + * @param metric_type Metric type + * @param m Connectivity parameter + * @param ef_construction Construction exploration factor + * @param ef_search Search exploration factor + * @param quantize_type Quantization type + */ +ZVEC_EXPORT void ZVEC_CALL zvec_index_params_hnsw_init(ZVecHnswIndexParams *params, + ZVecMetricType metric_type, int m, + int ef_construction, int ef_search, + ZVecQuantizeType quantize_type); + +/** + * @brief Initialize Flat index parameters + * @param params Flat index parameters structure pointer + * @param metric_type Metric type + * @param quantize_type Quantization type + */ +ZVEC_EXPORT void ZVEC_CALL zvec_index_params_flat_init(ZVecFlatIndexParams *params, + ZVecMetricType metric_type, + ZVecQuantizeType quantize_type); + +/** + * @brief Initialize IVF index parameters + * @param params IVF index parameters structure pointer + * @param metric_type Metric type + * @param n_list Number of cluster centers + * @param n_iters Number of iterations + * @param use_soar Whether to use SOAR algorithm + * @param n_probe Search probe count + * @param quantize_type Quantization type + */ +ZVEC_EXPORT void ZVEC_CALL zvec_index_params_ivf_init(ZVecIVFIndexParams *params, + ZVecMetricType metric_type, int n_list, + int n_iters, bool use_soar, int n_probe, + ZVecQuantizeType quantize_type); + +/** + * @brief Initialize generic index parameters + * @param params Generic index parameters structure pointer + * @param index_type Index type + * @param metric_type Metric type (only valid for vector indexes) + */ +ZVEC_EXPORT void ZVEC_CALL zvec_index_params_init_default(ZVecIndexParams *params, + ZVecIndexType index_type, + ZVecMetricType metric_type); + +/** + * @brief Destroy index parameters (free internal dynamically allocated memory) + * @param params Index parameters structure pointer + */ +ZVEC_EXPORT void ZVEC_CALL zvec_index_params_destroy(ZVecIndexParams *params); + + +/** + * @brief Create inverted index parameters + * @param enable_range_opt Whether to enable range optimization + * @param enable_wildcard Whether to enable extended wildcard + * @return ZVecInvertIndexParams* Pointer to the newly created index parameters + */ +ZVEC_EXPORT ZVecInvertIndexParams *ZVEC_CALL +zvec_index_params_invert_create(bool enable_range_opt, bool enable_wildcard); + +/** + * @brief Create vector index base parameters + * @param index_type Index type + * @param metric_type Metric type + * @param quantize_type Quantization type + * @return ZVecVectorIndexParams* Pointer to the newly created index parameters + */ +ZVEC_EXPORT ZVecVectorIndexParams *ZVEC_CALL zvec_index_params_vector_create( + ZVecIndexType index_type, ZVecMetricType metric_type, + ZVecQuantizeType quantize_type); + +/** + * @brief Create HNSW index parameters + * @param metric_type Metric type + * @param quantize_type Quantization type + * @param m Graph degree parameter + * @param ef_construction Exploration factor during construction + * @param ef_search Exploration factor during search + + * @return ZVecHnswIndexParams* Pointer to the newly created index parameters + */ +ZVEC_EXPORT ZVecHnswIndexParams *ZVEC_CALL zvec_index_params_hnsw_create( + ZVecMetricType metric_type, ZVecQuantizeType quantize_type, int m, + int ef_construction, int ef_search); + +/** + * @brief Create Flat index parameters + * @param metric_type Metric type + * @param quantize_type Quantization type + * @return ZVecFlatIndexParams* Pointer to the newly created index parameters + */ +ZVEC_EXPORT ZVecFlatIndexParams *ZVEC_CALL zvec_index_params_flat_create( + ZVecMetricType metric_type, ZVecQuantizeType quantize_type); + +/** + * @brief Create IVF index parameters + * @param metric_type Metric type + * @param n_list Number of cluster centers + * @param n_iters Number of iterations + * @param use_soar Whether to use SOAR algorithm + * @param n_probe Number of clusters to probe during search + * @param quantize_type Quantization type + * @return ZVecIVFIndexParams* Pointer to the newly created index parameters + */ +ZVEC_EXPORT ZVecIVFIndexParams *ZVEC_CALL zvec_index_params_ivf_create( + ZVecMetricType metric_type, ZVecQuantizeType quantize_type, int n_list, + int n_iters, bool use_soar, int n_probe); + + +/** + * @brief Destroy inverted index parameters + * @param params Index parameters pointer + */ +ZVEC_EXPORT void ZVEC_CALL +zvec_index_params_invert_destroy(ZVecInvertIndexParams *params); + +/** + * @brief Destroy vector index parameters + * @param params Index parameters pointer + */ +ZVEC_EXPORT void ZVEC_CALL +zvec_index_params_vector_destroy(ZVecVectorIndexParams *params); + +/** + * @brief Destroy HNSW index parameters + * @param params Index parameters pointer + */ +ZVEC_EXPORT void ZVEC_CALL +zvec_index_params_hnsw_destroy(ZVecHnswIndexParams *params); + +/** + * @brief Destroy Flat index parameters + * @param params Index parameters pointer + */ +ZVEC_EXPORT void ZVEC_CALL +zvec_index_params_flat_destroy(ZVecFlatIndexParams *params); + +/** + * @brief Destroy IVF index parameters + * @param params Index parameters pointer + */ +ZVEC_EXPORT void ZVEC_CALL +zvec_index_params_ivf_destroy(ZVecIVFIndexParams *params); + + +// ============================================================================= +// Query Parameters Structures +// ============================================================================= + +/** + * @brief Base query parameters structure (corresponds to zvec::QueryParams) + */ +typedef struct { + ZVecIndexType index_type; /**< Index type */ + float radius; /**< Search radius */ + bool is_linear; /**< Whether linear search */ + bool is_using_refiner; /**< Whether using refiner */ +} ZVecQueryParams; + +/** + * @brief HNSW query parameters structure (corresponds to zvec::HnswQueryParams) + */ +typedef struct { + ZVecQueryParams base; /**< Inherit base query parameters */ + int ef; /**< Exploration factor during search */ +} ZVecHnswQueryParams; + +/** + * @brief IVF query parameters structure (corresponds to zvec::IVFQueryParams) + */ +typedef struct { + ZVecQueryParams base; /**< Inherit base query parameters */ + int nprobe; /**< Number of clusters to probe during search */ + float scale_factor; /**< Scale factor */ +} ZVecIVFQueryParams; + +/** + * @brief Flat query parameters structure (corresponds to zvec::FlatQueryParams) + */ +typedef struct { + ZVecQueryParams base; /**< Inherit base query parameters */ + float scale_factor; /**< Scale factor */ +} ZVecFlatQueryParams; + +/** + * @brief Query parameters union (supports query parameters for different index + * types) + */ +typedef struct { + ZVecIndexType index_type; /**< Index type, used to distinguish the parameter + type stored in the union */ + union { + ZVecQueryParams base_params; /**< Base query parameters */ + ZVecHnswQueryParams hnsw_params; /**< HNSW query parameters */ + ZVecIVFQueryParams ivf_params; /**< IVF query parameters */ + ZVecFlatQueryParams flat_params; /**< Flat query parameters */ + } params; +} ZVecQueryParamsUnion; + +// ============================================================================= +// Query Structures (Updated Version, Including QueryParams) +// ============================================================================= + +/** + * @brief Vector query structure (aligned with zvec::VectorQuery, includes + * QueryParams) + */ +typedef struct { + int topk; /**< Number of results to return */ + ZVecString field_name; /**< Query field name */ + ZVecByteArray query_vector; /**< Query vector (binary data) */ + ZVecByteArray + query_sparse_indices; /**< Sparse vector indices (binary data) */ + ZVecByteArray query_sparse_values; /**< Sparse vector values (binary data) */ + ZVecString filter; /**< Filter expression */ + bool include_vector; /**< Whether to include vector data */ + bool include_doc_id; /**< Whether to include document ID */ + ZVecStringArray *output_fields; /**< Output field list (NULL means all) */ + ZVecQueryParamsUnion *query_params; /**< Query parameters (optional, NULL + means using default parameters) */ +} ZVecVectorQuery; + +/** + * @brief Grouped vector query structure (aligned with zvec::GroupByVectorQuery, + * includes QueryParams) + */ +typedef struct { + ZVecString field_name; /**< Query field name */ + ZVecByteArray query_vector; /**< Query vector (binary data) */ + ZVecByteArray + query_sparse_indices; /**< Sparse vector indices (binary data) */ + ZVecByteArray query_sparse_values; /**< Sparse vector values (binary data) */ + ZVecString filter; /**< Filter expression */ + bool include_vector; /**< Whether to include vector data */ + ZVecStringArray *output_fields; /**< Output field list */ + ZVecString group_by_field_name; /**< Group by field name */ + uint32_t group_count; /**< Number of groups */ + uint32_t group_topk; /**< Number of results to return per group */ + ZVecQueryParamsUnion *query_params; /**< Query parameters (optional, NULL + means using default parameters) */ +} ZVecGroupByVectorQuery; + + +// ============================================================================= +// Query Parameters Management Functions +// ============================================================================= + +/** + * @brief Create base query parameters + * @param index_type Index type + * @return ZVecQueryParams* Pointer to the newly created query parameters + */ +ZVEC_EXPORT ZVecQueryParams *ZVEC_CALL +zvec_query_params_create(ZVecIndexType index_type); + +/** + * @brief Create HNSW query parameters + * @param index_type Index type (should be ZVEC_INDEX_TYPE_HNSW) + * @param ef Exploration factor during search + * @param radius Search radius + * @param is_linear Whether linear search + * @param is_using_refiner Whether using refiner + * @return ZVecHnswQueryParams* Pointer to the newly created HNSW query + * parameters + */ +ZVEC_EXPORT ZVecHnswQueryParams *ZVEC_CALL +zvec_query_params_hnsw_create(ZVecIndexType index_type, int ef, float radius, + bool is_linear, bool is_using_refiner); + +/** + * @brief Create IVF query parameters + * @param index_type Index type (should be ZVEC_INDEX_TYPE_IVF) + * @param nprobe Number of clusters to probe during search + * @param is_using_refiner Whether using refiner + * @param scale_factor Scale factor + * @return ZVecIVFQueryParams* Pointer to the newly created IVF query parameters + */ +ZVEC_EXPORT ZVecIVFQueryParams *ZVEC_CALL +zvec_query_params_ivf_create(ZVecIndexType index_type, int nprobe, + bool is_using_refiner, float scale_factor); + +/** + * @brief Create Flat query parameters + * @param index_type Index type (should be ZVEC_INDEX_TYPE_FLAT) + * @param is_using_refiner Whether using refiner + * @param scale_factor Scale factor + * @return ZVecFlatQueryParams* Pointer to the newly created Flat query + * parameters + */ +ZVEC_EXPORT ZVecFlatQueryParams *ZVEC_CALL zvec_query_params_flat_create( + ZVecIndexType index_type, bool is_using_refiner, float scale_factor); + +/** + * @brief Create query parameters union + * @param index_type Index type + * @return ZVecQueryParamsUnion* Pointer to the newly created query parameters + * union + */ +ZVEC_EXPORT ZVecQueryParamsUnion *ZVEC_CALL +zvec_query_params_union_create(ZVecIndexType index_type); + + +/** + * @brief Destroy base query parameters + * @param params HNSW query parameters pointer + */ +ZVEC_EXPORT void ZVEC_CALL +zvec_query_params_destroy(ZVecQueryParams *params); + +/** + * @brief Destroy HNSW query parameters + * @param params HNSW query parameters pointer + */ +ZVEC_EXPORT void ZVEC_CALL +zvec_query_params_hnsw_destroy(ZVecHnswQueryParams *params); + +/** + * @brief Destroy IVF query parameters + * @param params IVF query parameters pointer + */ +ZVEC_EXPORT void ZVEC_CALL +zvec_query_params_ivf_destroy(ZVecIVFQueryParams *params); + +/** + * @brief Destroy Flat query parameters + * @param params Flat query parameters pointer + */ +ZVEC_EXPORT void ZVEC_CALL +zvec_query_params_flat_destroy(ZVecFlatQueryParams *params); + +/** + * @brief Destroy query parameters union + * @param params Query parameters union pointer + */ +ZVEC_EXPORT void ZVEC_CALL +zvec_query_params_union_destroy(ZVecQueryParamsUnion *params); + +/** + * @brief Set query parameters index type + * @param params Query parameters pointer + * @param index_type Index type + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_query_params_set_index_type( + ZVecQueryParams *params, ZVecIndexType index_type); + +/** + * @brief Set search radius for query parameters + * @param params Query parameters pointer + * @param radius Search radius + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_query_params_set_radius(ZVecQueryParams *params, float radius); + +/** + * @brief Set scale factor for query parameters + * @param params Query parameters pointer + * @param scale_factor Scale factor + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_query_params_set_is_linear(ZVecQueryParams *params, bool is_linear); + +/** + * @brief Set whether to use refiner for query parameters + * @param params Query parameters pointer + * @param is_using_refiner Whether to use refiner + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_query_params_set_is_using_refiner( + ZVecQueryParams *params, bool is_using_refiner); + +/** + * @brief Set exploration factor for HNSW query parameters + * @param params HNSW query parameters pointer + * @param ef Exploration factor + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_query_params_hnsw_set_ef(ZVecHnswQueryParams *params, int ef); + +/** + * @brief Set number of probe clusters for IVF query parameters + * @param params IVF query parameters pointer + * @param nprobe Number of probe clusters + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_query_params_ivf_set_nprobe(ZVecIVFQueryParams *params, int nprobe); + +/** + * @brief Set scale factor for IVF/Flat query parameters + * @param params IVF or Flat query parameters pointer + * @param scale_factor Scale factor + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_query_params_ivf_set_scale_factor(ZVecIVFQueryParams *params, float scale_factor); + +/** + * @brief Collection options structure + */ +typedef struct { + bool enable_mmap; /**< Whether to enable memory mapping */ + size_t max_buffer_size; /**< Maximum buffer size */ + bool read_only; /**< Whether read-only mode */ + uint64_t max_doc_count_per_segment; /**< Maximum document count per segment */ +} ZVecCollectionOptions; + + +/** + * @brief Collection statistics structure + */ +typedef struct { + uint64_t doc_count; /**< Total document count */ + ZVecString **index_names; /**< Index name array */ + float *index_completeness; /**< Index completeness array */ + size_t index_count; /**< Index name count */ +} ZVecCollectionStats; + + +/** + * @brief Create field schema + * @param name Field name + * @param data_type Data type + * @param nullable Whether nullable + * @param dimension Vector dimension + * @return ZVecFieldSchema* Pointer to the newly created field schema + */ +ZVEC_EXPORT ZVecFieldSchema *ZVEC_CALL +zvec_field_schema_create(const char *name, ZVecDataType data_type, + bool nullable, uint32_t dimension); + +/** + * @brief Destroy field schema + * @param schema Field schema pointer + */ +ZVEC_EXPORT void ZVEC_CALL zvec_field_schema_destroy(ZVecFieldSchema *schema); + +/** + * @brief Set index parameters for field + * @param schema Field schema pointer + * @param index_params Index parameters pointer + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_field_schema_set_index_params( + ZVecFieldSchema *schema, const ZVecIndexParams *index_params); + + +/** + * @brief Set inverted index parameters for field schema + * @param field_schema Field schema pointer + * @param invert_params Inverted index parameters pointer + */ +void zvec_field_schema_set_invert_index( + ZVecFieldSchema *field_schema, const ZVecInvertIndexParams *invert_params); + +/** + * @brief Set HNSW index parameters for field schema + * @param field_schema Field schema pointer + * @param hnsw_params HNSW index parameters pointer + */ +void zvec_field_schema_set_hnsw_index(ZVecFieldSchema *field_schema, + const ZVecHnswIndexParams *hnsw_params); + +/** + * @brief Set Flat index parameters for field schema + * @param field_schema Field schema pointer + * @param flat_params Flat index parameters pointer + */ +void zvec_field_schema_set_flat_index(ZVecFieldSchema *field_schema, + const ZVecFlatIndexParams *flat_params); + +/** + * @brief Set IVF index parameters for field schema + * @param field_schema Field schema pointer + * @param ivf_params IVF index parameters pointer + */ +void zvec_field_schema_set_ivf_index(ZVecFieldSchema *field_schema, + const ZVecIVFIndexParams *ivf_params); + + +// ============================================================================= +// Collection Schema Structures +// ============================================================================= + +/** + * @brief Collection schema structure + */ +typedef struct { + ZVecString *name; /**< Collection name */ + ZVecFieldSchema **fields; /**< Field array */ + size_t field_count; /**< Field count */ + size_t field_capacity; /**< Field array capacity */ + uint64_t max_doc_count_per_segment; /**< Maximum document count per segment */ +} ZVecCollectionSchema; + +/** + * @brief Create collection schema + * @param name Collection name + * @return ZVecCollectionSchema* Pointer to the newly created collection schema + */ +ZVEC_EXPORT ZVecCollectionSchema *ZVEC_CALL +zvec_collection_schema_create(const char *name); + +/** + * @brief Destroy collection schema + * @param schema Collection schema pointer + */ +ZVEC_EXPORT void ZVEC_CALL +zvec_collection_schema_destroy(ZVecCollectionSchema *schema); + +/** + * @brief Add field to collection schema + * @param schema Collection schema pointer + * @param field Field schema pointer (function takes ownership) + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_schema_add_field( + ZVecCollectionSchema *schema, ZVecFieldSchema *field); + +/** + * @brief Add multiple fields to collection schema at once + * + * @param schema Collection schema pointer + * @param fields Array of fields to add + * @param field_count Number of fields to add + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_schema_add_fields( + ZVecCollectionSchema *schema, const ZVecFieldSchema *fields, + size_t field_count); + +/** + * @brief Remove field + * @param schema Collection schema pointer + * @param field_name Field name + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_schema_remove_field( + ZVecCollectionSchema *schema, const char *field_name); + +/** + * @brief Remove multiple fields from collection schema at once + * + * @param schema Collection schema pointer + * @param field_names Array of field names to remove + * @param field_count Number of fields to remove + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_schema_remove_fields( + ZVecCollectionSchema *schema, const char *const *field_names, + size_t field_count); + +/** + * @brief Get field count + * + * @param schema Collection schema pointer + * @return size_t Field count + */ +ZVEC_EXPORT size_t ZVEC_CALL +zvec_collection_schema_get_field_count(const ZVecCollectionSchema *schema); + +/** + * @brief Find field + * @param schema Collection schema pointer + * @param field_name Field name + * @return ZVecFieldSchema* Field schema pointer, returns NULL if not found + */ +ZVEC_EXPORT ZVecFieldSchema *ZVEC_CALL zvec_collection_schema_find_field( + const ZVecCollectionSchema *schema, const char *field_name); + +/** + * @brief Validate collection schema + * @param schema Collection schema pointer + * @param[out] error_msg Error message (needs to be freed by calling + * zvec_free_string) + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_schema_validate( + const ZVecCollectionSchema *schema, ZVecString **error_msg); + + +/** + * @brief Get field by index + * @param schema Collection schema pointer + * @param index Field index + * @return ZVecFieldSchema* Field schema pointer + */ +ZVEC_EXPORT ZVecFieldSchema *ZVEC_CALL zvec_collection_schema_get_field( + const ZVecCollectionSchema *schema, size_t index); + +/** + * @brief Set maximum document count per segment + * @param schema Collection schema pointer + * @param max_doc_count Maximum document count + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_collection_schema_set_max_doc_count_per_segment( + ZVecCollectionSchema *schema, uint64_t max_doc_count); + +/** + * @brief Get maximum document count per segment of collection schema + * + * @param schema Collection schema pointer + * @return uint64_t Maximum document count per segment + */ +ZVEC_EXPORT uint64_t ZVEC_CALL +zvec_collection_schema_get_max_doc_count_per_segment( + const ZVecCollectionSchema *schema); + + +// ============================================================================= +// Collection Management Functions +// ============================================================================= + +/** + * @brief Create and open collection + * @param path Collection path + * @param schema Collection schema pointer + * @param options Collection options pointer (NULL uses default options) + * @param[out] collection Returned collection handle + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_create_and_open( + const char *path, const ZVecCollectionSchema *schema, + const ZVecCollectionOptions *options, ZVecCollection **collection); + + +/** + * @brief Open existing collection + * @param path Collection path + * @param options Collection options pointer (NULL uses default options) + * @param[out] collection Returned collection handle + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_collection_open(const char *path, const ZVecCollectionOptions *options, + ZVecCollection **collection); + + +/** + * @brief Close collection + * @param collection Collection handle + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_collection_close(ZVecCollection *collection); + + +/** + * @brief Destroy collection + * + * @param collection Collection handle + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_collection_destroy(ZVecCollection *collection); + +/** + * @brief Flush collection data to disk + * @param collection Collection handle + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_collection_flush(ZVecCollection *collection); + +/** + * @brief Get collection path + * @param collection Collection handle + * @param[out] path Returned path string (needs to be freed by calling + * zvec_free_string) + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_collection_get_path(const ZVecCollection *collection, ZVecString **path); + + +/** + * @brief Get collection name + * @param collection Collection handle + * @param[out] name Returned collection name (needs to be freed by calling + * zvec_free_string) + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_collection_get_name(const ZVecCollection *collection, ZVecString **name); + +/** + * @brief Get collection schema + * @param collection Collection handle + * @param[out] schema + * Returned collection schema pointer (needs to be freed by calling + * zvec_collection_schema_destroy) + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_get_schema( + const ZVecCollection *collection, ZVecCollectionSchema **schema); + + +/** + * @brief Initialize default collection options + * @param options Collection options structure pointer + */ +ZVEC_EXPORT void ZVEC_CALL +zvec_collection_options_init_default(ZVecCollectionOptions *options); + +/** + * @brief Get collection options + * @param collection Collection handle + * @param[out] options + * Returned collection options pointer (needs to be freed by calling + * zvec_collection_options_destroy) + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_get_options( + const ZVecCollection *collection, ZVecCollectionOptions **options); + +/** + * @brief Get collection statistics + * @param collection Collection handle + * @param[out] stats + * Returned statistics pointer (needs to be freed by calling + * zvec_collection_stats_destroy) + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_get_stats( + const ZVecCollection *collection, ZVecCollectionStats **stats); + +/** + * @brief Destroy collection statistics + * @param stats Statistics pointer + */ +ZVEC_EXPORT void ZVEC_CALL +zvec_collection_stats_destroy(ZVecCollectionStats *stats); + + +/** + * @brief Free field schema array memory + * + * @param array Field schema array pointer + * @param count Array element count + */ +ZVEC_EXPORT void ZVEC_CALL zvec_free_field_schema_array(ZVecFieldSchema **array, + size_t count); + +/** + * @brief Check if collection has specified field + * @param collection Collection handle + * @param field_name Field name + * @param[out] exists Whether exists + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_has_field( + const ZVecCollection *collection, const char *field_name, bool *exists); + +/** + * @brief Get field information + * @param collection Collection handle + * @param field_name Field name + * @param[out] field_schema + * Returned field schema pointer (needs to be freed by calling + * zvec_field_schema_destroy) + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_get_field_info( + const ZVecCollection *collection, const char *field_name, + ZVecFieldSchema **field_schema); + +/** + * @brief Free field schema memory + * + * @param field_schema Field schema pointer to be freed + */ +ZVEC_EXPORT void ZVEC_CALL +zvec_free_field_schema(ZVecFieldSchema *field_schema); + + +// ============================================================================= +// Index Management Interface +// ============================================================================= + +/** + * @brief Create index + * + * @param collection Collection handle + * @param column_name Column name + * @param index_params Index parameters + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_create_index( + ZVecCollection *collection, const char *column_name, + const ZVecIndexParams *index_params); + +/** + * @brief Create index for collection field (using specific type parameters) + * @param collection Collection handle + * @param field_name Field name + * @param index_params Index parameters (select appropriate structure based on + * index type) + * @return Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_create_index_with_params( + ZVecCollection *collection, const ZVecString *field_name, + const void + *index_params); // Determine specific type based on index_type field + +/** + * @brief Create HNSW index for collection field + * @param collection Collection handle + * @param field_name Field name + * @param hnsw_params HNSW index parameters + * @return Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_create_hnsw_index( + ZVecCollection *collection, const ZVecString *field_name, + const ZVecHnswIndexParams *hnsw_params); + +/** + * @brief Create Flat index for collection field + * @param collection Collection handle + * @param field_name Field name + * @param flat_params Flat index parameters + * @return Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_create_flat_index( + ZVecCollection *collection, const ZVecString *field_name, + const ZVecFlatIndexParams *flat_params); + +/** + * @brief Create IVF index for collection field + * @param collection Collection handle + * @param field_name Field name + * @param ivf_params IVF index parameters + * @return Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_create_ivf_index( + ZVecCollection *collection, const ZVecString *field_name, + const ZVecIVFIndexParams *ivf_params); + +/** + * @brief Create scalar index for collection field + * @param collection Collection handle + * @param field_name Field name + * @param invert_params Scalar index parameters + * @return Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_create_invert_index( + ZVecCollection *collection, const ZVecString *field_name, + const ZVecInvertIndexParams *invert_params); + +/** + * @brief Drop index + * @param collection Collection handle + * @param field_name Field name + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_collection_drop_index(ZVecCollection *collection, const char *field_name); + +/** + * @brief Optimize collection (rebuild indexes, merge segments, etc.) + * @param collection Collection handle + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_collection_optimize(ZVecCollection *collection); + +/** + * @brief Get index statistics + * @param collection Collection handle + * @param field_name Field name + * @param[out] completeness Index completeness (0.0-1.0) + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_collection_get_index_stats(const ZVecCollection *collection, + const char *field_name, float *completeness); + + +/** + * @brief Compact collection (reclaim space) + * @param collection Collection handle + * @return ZVecErrorCode Error code */ + +/** + * @brief Get detailed information of the last error + * @param[out] error_details Pointer to error details structure + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_get_last_error_details(ZVecErrorDetails *error_details); + +/** + * @brief Clear error status + */ +ZVEC_EXPORT void ZVEC_CALL zvec_clear_error(void); + + +// ============================================================================= +// Field Management Interface (DDL) +// ============================================================================= + +/** + * @brief Add field + * @param collection Collection handle + * @param field_schema Field schema pointer + * @param default_expression Default value expression (can be NULL) + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_add_field( + ZVecCollection *collection, const ZVecFieldSchema *field_schema, + const char *default_expression); + +/** + * @brief Drop field + * @param collection Collection handle + * @param field_name Field name + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_collection_drop_field(ZVecCollection *collection, const char *field_name); + +/** + * @brief Alter field + * @param collection Collection handle + * @param old_name Original field name + * @param new_name New field name (can be NULL to indicate no renaming) + * @param new_schema New field schema (can be NULL to indicate no schema + * modification) + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_alter_field( + ZVecCollection *collection, const char *old_name, const char *new_name, + const ZVecFieldSchema *new_schema); + + +/** + * @brief Document structure (opaque pointer mode) + * Internal implementation details are not visible to the outside, and + * operations are performed through API functions + */ +typedef struct ZVecDoc ZVecDoc; + +// ============================================================================= +// Data Manipulation Interface (DML) +// ============================================================================= + +/** + * @brief Insert documents into collection + * @param collection Collection handle + * @param docs Document array + * @param doc_count Document count + * @param[out] success_count Number of successfully inserted documents + * @param[out] error_count Number of failed insertions + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_insert( + ZVecCollection *collection, const ZVecDoc **docs, size_t doc_count, + size_t *success_count, size_t *error_count); + +/** + * @brief Update documents in collection + * @param collection Collection handle + * @param docs Document array + * @param doc_count Document count + * @param[out] success_count Number of successfully updated documents + * @param[out] error_count Number of failed updates + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_update( + ZVecCollection *collection, const ZVecDoc **docs, size_t doc_count, + size_t *success_count, size_t *error_count); + +/** + * @brief Insert or update documents in collection (upsert operation) + * @param collection Collection handle + * @param docs Document array + * @param doc_count Document count + * @param[out] success_count Number of successful operations + * @param[out] error_count Number of failed operations + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_upsert( + ZVecCollection *collection, const ZVecDoc **docs, size_t doc_count, + size_t *success_count, size_t *error_count); + +/** + * @brief Delete documents from collection + * @param collection Collection handle + * @param pks Primary key array + * @param pk_count Primary key count + * @param[out] success_count Number of successfully deleted documents + * @param[out] error_count Number of failed deletions + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_delete( + ZVecCollection *collection, const char *const *pks, size_t pk_count, + size_t *success_count, size_t *error_count); + +/** + * @brief Delete documents by filter condition + * @param collection Collection handle + * @param filter Filter expression + * @param[out] deleted_count Number of deleted documents + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_delete_by_filter( + ZVecCollection *collection, const char *filter); + +// ============================================================================= +// Data Query Interface (DQL) +// ============================================================================= + +/** + * @brief Vector similarity search + * @param collection Collection handle + * @param query Query parameters pointer + * @param[out] results Returned document array (needs to be freed by calling + * zvec_docs_free) + * @param[out] result_count Number of returned results + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_query( + const ZVecCollection *collection, const ZVecVectorQuery *query, + ZVecDoc ***results, size_t *result_count); + +/** + * @brief Grouped vector similarity search + * @param collection Collection handle + * @param query Grouped query parameters pointer + * @param[out] results Returned document array (needs to be freed by calling + * zvec_docs_free) + * @param[out] group_by_values Returned group by field values array (needs to be + * freed by calling zvec_free_string_array) + * @param[out] result_count Number of returned results + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_query_by_group( + const ZVecCollection *collection, const ZVecGroupByVectorQuery *query, + ZVecDoc ***results, ZVecString ***group_by_values, size_t *result_count); + +/** + * @brief Get documents by primary keys + * @param collection Collection handle + * @param primary_keys Primary key array + * @param count Number of primary keys + * @param[out] documents Returned document array (needs to be freed by calling + * zvec_docs_free) + * @param[out] found_count Number of found documents + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_get_by_primary_keys( + ZVecCollection *collection, const char *const *primary_keys, size_t count, + ZVecDoc ***documents, size_t *found_count); + +/** + * @brief Query documents by filter condition + * @param collection Collection handle + * @param filter_expression Filter expression + * @param limit Result limit + * @param offset Offset + * @param[out] documents Returned document array + * @param[out] result_count Number of returned results + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_query_by_filter( + const ZVecCollection *collection, const char *filter_expression, + size_t limit, size_t offset, ZVecDoc ***documents, size_t *result_count); + +// ============================================================================= +// Document Related Structures +// ============================================================================= + +/** + * @brief Document field value union + */ +typedef union { + bool bool_value; + int32_t int32_value; + int64_t int64_value; + uint32_t uint32_value; + uint64_t uint64_value; + float float_value; + double double_value; + ZVecString string_value; + ZVecFloatArray vector_value; + ZVecByteArray binary_value; /**< Binary data value */ +} ZVecFieldValue; + +/** + * @brief Document field structure + */ +typedef struct { + ZVecString name; ///< Field name + ZVecDataType data_type; ///< Data type + ZVecFieldValue value; ///< Field value +} ZVecDocField; + +/** + * @brief Document operator enumeration + */ +typedef enum { + ZVEC_DOC_OP_INSERT = 0, ///< Insert operation + ZVEC_DOC_OP_UPDATE = 1, ///< Update operation + ZVEC_DOC_OP_UPSERT = 2, ///< Insert or update operation + ZVEC_DOC_OP_DELETE = 3 ///< Delete operation +} ZVecDocOperator; + + +// ============================================================================= +// Data Manipulation Interface (DML) +// ============================================================================= + +/** + * @brief Create a new document object + * + * @return ZVecDoc* Pointer to the newly created document object, returns NULL + * on failure + */ +ZVEC_EXPORT ZVecDoc *ZVEC_CALL zvec_doc_create(void); + +/** + * @brief Destroy the document object and release all resources + * + * @param doc Pointer to the document object + */ +ZVEC_EXPORT void ZVEC_CALL zvec_doc_destroy(ZVecDoc *doc); + +/** + * @brief Clear the document object + * + * @param doc Pointer to the document object + */ +ZVEC_EXPORT void ZVEC_CALL zvec_doc_clear(ZVecDoc *doc); + +/** + * @brief Add field to document by value + * + * @param doc Document object pointer + * @param field_name Field name + * @param data_type Data type + * @param value Value pointer + * @param value_size Value size + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_doc_add_field_by_value( + ZVecDoc *doc, const char *field_name, ZVecDataType data_type, + const void *value, size_t value_size); + +/** + * @brief Add field to document by structure + * + * @param doc Document object pointer + * @param field Field structure pointer + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_doc_add_field_by_struct(ZVecDoc *doc, const ZVecDocField *field); + +/** + * @brief Remove field from document + * + * @param doc Document structure pointer + * @param field_name Field name + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_doc_remove_field(ZVecDoc *doc, const char *field_name); + + +/** + * @brief Batch release document array + * + * @param documents Document pointer array + * @param count Document count + */ +ZVEC_EXPORT void ZVEC_CALL zvec_docs_free(ZVecDoc **documents, size_t count); + +/** + * @brief Set document primary key + * + * @param doc Pointer to the document structure + * @param pk Primary key string + */ +ZVEC_EXPORT void ZVEC_CALL zvec_doc_set_pk(ZVecDoc *doc, const char *pk); + +/** + * @brief Set document ID + * + * @param doc Document structure pointer + * @param doc_id Document ID + */ +ZVEC_EXPORT void ZVEC_CALL zvec_doc_set_doc_id(ZVecDoc *doc, uint64_t doc_id); + +/** + * @brief Set document score + * + * @param doc Document structure pointer + * @param score Score value + */ +ZVEC_EXPORT void ZVEC_CALL zvec_doc_set_score(ZVecDoc *doc, float score); + +/** + * @brief Set document operator + * + * @param doc Document structure pointer + * @param op Operator + */ +ZVEC_EXPORT void ZVEC_CALL zvec_doc_set_operator(ZVecDoc *doc, + ZVecDocOperator op); + +/** + * @brief Get document ID + * + * @param doc Document structure pointer + * @return uint64_t Document ID + */ +ZVEC_EXPORT uint64_t ZVEC_CALL zvec_doc_get_doc_id(const ZVecDoc *doc); + +/** + * @brief Get document score + * + * @param doc Document structure pointer + * @return float Score value + */ +ZVEC_EXPORT float ZVEC_CALL zvec_doc_get_score(const ZVecDoc *doc); + +/** + * @brief Get document operator + * + * @param doc Document structure pointer + * @return ZVecDocOperator Operator + */ +ZVEC_EXPORT ZVecDocOperator ZVEC_CALL zvec_doc_get_operator(const ZVecDoc *doc); + +/** + * @brief Get document field count + * + * @param doc Document structure pointer + * @return size_t Field count + */ +ZVEC_EXPORT size_t ZVEC_CALL zvec_doc_get_field_count(const ZVecDoc *doc); + + +/** + * @brief Get document primary key pointer (no copy) + * + * @param doc Document object pointer + * @return const char* Primary key string pointer, returns NULL if not set + */ +ZVEC_EXPORT const char *ZVEC_CALL zvec_doc_get_pk_pointer(const ZVecDoc *doc); + +/** + * @brief Get document primary key copy (needs manual release) + * + * @param doc Document object pointer + * @return const char* Primary key string copy, needs to call free() to release, + * returns NULL if not set + */ +ZVEC_EXPORT const char *ZVEC_CALL zvec_doc_get_pk_copy(const ZVecDoc *doc); + +/** + * @brief Get field value (basic type returned directly) + * + * Supports basic numeric data types: BOOL, INT32, INT64, UINT32, UINT64, + * FLOAT, DOUBLE. The value is copied directly into the provided buffer. + * For STRING, BINARY, and VECTOR types, use zvec_doc_get_field_value_copy + * or zvec_doc_get_field_value_pointer instead. + * + * @param doc Document object pointer + * @param field_name Field name + * @param field_type Field type (must be a basic numeric type) + * @param value_buffer Output buffer to receive the value + * @param buffer_size Size of the output buffer + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_doc_get_field_value_basic( + const ZVecDoc *doc, const char *field_name, ZVecDataType field_type, + void *value_buffer, size_t buffer_size); + +/** + * @brief Get field value copy (allocate new memory) + * + * Supports all data types including: + * - Basic types: BOOL, INT32, INT64, UINT32, UINT64, FLOAT, DOUBLE + * - String types: STRING, BINARY + * - Vector types: VECTOR_FP32, VECTOR_FP64, VECTOR_FP16, VECTOR_INT4, + * VECTOR_INT8, VECTOR_INT16, VECTOR_BINARY32, VECTOR_BINARY64 + * - Sparse vector types: SPARSE_VECTOR_FP32, SPARSE_VECTOR_FP16 + * - Array types: ARRAY_STRING, ARRAY_BINARY, ARRAY_BOOL, ARRAY_INT32, + * ARRAY_INT64, ARRAY_UINT32, ARRAY_UINT64, ARRAY_FLOAT, ARRAY_DOUBLE + * + * The returned value pointer must be manually freed using appropriate + * deallocation functions (free() for basic types and strings, + * zvec_free_uint8_array() for binary data). + * + * @param doc Document object pointer + * @param field_name Field name + * @param field_type Field type + * @param[out] value Returned value pointer (needs manual release) + * @param[out] value_size Returned value size + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_doc_get_field_value_copy( + const ZVecDoc *doc, const char *field_name, ZVecDataType field_type, + void **value, size_t *value_size); + +/** + * @brief Get field value pointer (data remains in document) + * + * Supports data types where direct pointer access is safe: + * - Basic types: BOOL, INT32, INT64, UINT32, UINT64, FLOAT, DOUBLE + * - String types: STRING (returns null-terminated C string), BINARY + * - Vector types: VECTOR_FP32, VECTOR_FP64, VECTOR_FP16, VECTOR_INT4, + * VECTOR_INT8, VECTOR_INT16, VECTOR_BINARY32, VECTOR_BINARY64 + * - Array types: ARRAY_INT32, ARRAY_INT64, ARRAY_UINT32, ARRAY_UINT64, + * ARRAY_FLOAT, ARRAY_DOUBLE + * + * The returned pointer points to data within the document object and + * does not require manual memory management. The pointer remains valid + * as long as the document exists. + * + * @param doc Document object pointer + * @param field_name Field name + * @param field_type Field type + * @param[out] value Returned value pointer (points to document-internal data) + * @param[out] value_size Returned value size + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_doc_get_field_value_pointer( + const ZVecDoc *doc, const char *field_name, ZVecDataType field_type, + const void **value, size_t *value_size); + +/** + * @brief Check if document is empty + * + * @param doc Document object pointer + * @return bool Returns true if document is empty, otherwise returns false + */ +ZVEC_EXPORT bool ZVEC_CALL zvec_doc_is_empty(const ZVecDoc *doc); + +/** + * @brief Check if document contains specified field + * + * @param doc Document object pointer + * @param field_name Field name + * @return bool Returns true if field exists, otherwise returns false + */ +ZVEC_EXPORT bool ZVEC_CALL zvec_doc_has_field(const ZVecDoc *doc, + const char *field_name); + +/** + * @brief Check if document field has value + * + * @param doc Document object pointer + * @param field_name Field name + * @return bool Returns true if field has value, otherwise returns false + */ +ZVEC_EXPORT bool ZVEC_CALL zvec_doc_has_field_value(const ZVecDoc *doc, + const char *field_name); + +/** + * @brief Check if document field is null + * + * @param doc Document object pointer + * @param field_name Field name + * @return bool Returns true if field is null, otherwise returns false + */ +ZVEC_EXPORT bool ZVEC_CALL zvec_doc_is_field_null(const ZVecDoc *doc, + const char *field_name); + +/** + * @brief Get all field names of document + * + * @param doc Document object pointer + * @param[out] field_names + * Returned field name array (needs to call zvec_free_str_array to release) + * @param[out] count Returned field count + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_doc_get_field_names( + const ZVecDoc *doc, char ***field_names, size_t *count); + +/** + * @brief Release string array memory + * + * @param array String array pointer + * @param count Array element count + */ +ZVEC_EXPORT void ZVEC_CALL zvec_free_str_array(char **array, size_t count); + +/** + * @brief Serialize document + * + * @param doc Document object pointer + * @param[out] data Returned serialized data (needs to call + * zvec_free_uint8_array to release) + * @param[out] size Returned data size + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_doc_serialize(const ZVecDoc *doc, + uint8_t **data, + size_t *size); + +/** + * @brief Deserialize document + * + * @param data Serialized data + * @param size Data size + * @param[out] doc Returned document object pointer (needs to call + * zvec_doc_destroy to release) + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_doc_deserialize(const uint8_t *data, + size_t size, + ZVecDoc **doc); + +/** + * @brief Merge two documents + * + * @param doc Target document object pointer + * @param other Source document object pointer + */ +ZVEC_EXPORT void ZVEC_CALL zvec_doc_merge(ZVecDoc *doc, const ZVecDoc *other); + +/** + * @brief Get document memory usage + * + * @param doc Document object pointer + * @return size_t Memory usage (bytes) + */ +ZVEC_EXPORT size_t ZVEC_CALL zvec_doc_memory_usage(const ZVecDoc *doc); + +/** + * @brief Validate document against Schema + * + * @param doc Document object pointer + * @param schema Schema object pointer + * @param is_update Whether it's an update operation + * @param[out] error_msg Error message (needs manual release) + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_doc_validate(const ZVecDoc *doc, const ZVecCollectionSchema *schema, + bool is_update, char **error_msg); + +/** + * @brief Get detailed string representation of document + * + * @param doc Document object pointer + * @param[out] detail_str Returned detailed string (needs manual release) + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_doc_to_detail_string(const ZVecDoc *doc, char **detail_str); + +/** + * @brief Free docs array memory + * @param docs Document array pointer + * @param count Document count + */ +ZVEC_EXPORT void ZVEC_CALL zvec_docs_free(ZVecDoc **docs, size_t count); + + +// ============================================================================= +// Query Parameter Constructor Functions +// ============================================================================= + +/** + * @brief Create vector query parameters + * @param field_name Query field name + * @param query_data Query vector data + * @param query_length Query vector length + * @param top_k Number of results to return + * @return ZVecVectorQuery* Pointer to the newly created query parameters + */ +ZVEC_EXPORT ZVecVectorQuery *ZVEC_CALL +zvec_vector_query_create(const char *field_name, const float *query_data, + size_t query_length, int top_k); + +/** + * @brief Destroy vector query parameters + * @param query Query parameters pointer + */ +ZVEC_EXPORT void ZVEC_CALL zvec_vector_query_destroy(ZVecVectorQuery *query); + +/** + * @brief Set query filter condition + * @param query Query parameters pointer + * @param filter_expression Filter expression + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_vector_query_set_filter( + ZVecVectorQuery *query, const char *filter_expression); + +/** + * @brief Set output fields + * @param query Query parameters pointer + * @param field_names Field name array + * @param count Field count + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_vector_query_set_output_fields( + ZVecVectorQuery *query, const char *const *field_names, size_t count); + +/** + * @brief Set timeout + * @param query Query parameters pointer + * @param timeout_ms Timeout in milliseconds + */ +ZVEC_EXPORT void ZVEC_CALL zvec_vector_query_set_timeout(ZVecVectorQuery *query, + int timeout_ms); + +/** + * @brief Create grouped vector query parameters + * @param field_name Query field name + * @param query_data Query vector data + * @param query_length Query vector length + * @param group_by_field Group by field name + * @param group_count Number of groups + * @param group_top_k Number of results to return per group + * @return ZVecGroupByVectorQuery* Pointer to the newly created query parameters + */ +ZVEC_EXPORT ZVecGroupByVectorQuery *ZVEC_CALL zvec_grouped_vector_query_create( + const char *field_name, const float *query_data, size_t query_length, + const char *group_by_field, uint32_t group_count, uint32_t group_top_k); + +/** + * @brief Destroy grouped vector query parameters + * @param query Query parameters pointer + */ +ZVEC_EXPORT void ZVEC_CALL +zvec_grouped_vector_query_destroy(ZVecGroupByVectorQuery *query); + + +// ============================================================================= +// Utility Functions +// ============================================================================= + +/** + * @brief Convert error code to description string + * @param error_code Error code + * @return const char* Error description string + */ +ZVEC_EXPORT const char *ZVEC_CALL +zvec_error_code_to_string(ZVecErrorCode error_code); + +/** + * @brief Convert data type to string + * @param data_type Data type + * @return const char* Data type string + */ +ZVEC_EXPORT const char *ZVEC_CALL +zvec_data_type_to_string(ZVecDataType data_type); + +/** + * @brief Convert index type to string + * @param index_type Index type + * @return const char* Index type string + */ +ZVEC_EXPORT const char *ZVEC_CALL +zvec_index_type_to_string(ZVecIndexType index_type); + +/** + * @brief Convert metric type to string + * @param metric_type Metric type + * @return const char* Metric type string + */ +const char *zvec_metric_type_to_string(ZVecMetricType metric_type); + +/** + * @brief Get system information + * @param[out] info_json System information JSON string (needs to be freed by + * calling zvec_free_string) + * @return ZVecErrorCode Error code + */ +ZVecErrorCode zvec_get_system_info(ZVecString **info_json); + +// ============================================================================= +// Memory Management Interface +// ============================================================================= + +/** + * @brief Allocate memory + * @param size Number of bytes to allocate + * @return void* Allocated memory pointer, returns NULL on failure + */ +ZVEC_EXPORT void *ZVEC_CALL zvec_malloc(size_t size); + +/** + * @brief Reallocate memory + * @param ptr Original memory pointer + * @param size New number of bytes + * @return void* Reallocation memory pointer, returns NULL on failure + */ +ZVEC_EXPORT void *ZVEC_CALL zvec_realloc(void *ptr, size_t size); + +/** + * @brief Free memory + * @param ptr Memory pointer to free + */ +ZVEC_EXPORT void ZVEC_CALL zvec_free(void *ptr); + +/** + * @brief Free string memory + * @param str String pointer to free + */ +ZVEC_EXPORT void ZVEC_CALL zvec_free_string(ZVecString *str); + +/** + * @brief Free string array memory + * @param array String array pointer to free + */ +ZVEC_EXPORT void ZVEC_CALL zvec_free_string_array(ZVecStringArray *array); + +/** + * @brief Free byte array memory + * @param array Byte array pointer to free + */ +ZVEC_EXPORT void ZVEC_CALL zvec_free_byte_array(ZVecMutableByteArray *array); + +/** + * @brief Free string memory + * @param str String pointer to free + */ +ZVEC_EXPORT void ZVEC_CALL zvec_free_str(char *str); + +/** + * @brief Release uint8_t array memory + * + * @param array uint8_t array pointer + */ +ZVEC_EXPORT void ZVEC_CALL zvec_free_uint8_array(uint8_t *array); + + +// ============================================================================= +// Helper Functions +// ============================================================================= + +/** + * @brief Simplified HNSW index parameters initialization macro + * @param metric Distance metric type + * @param m_ Connectivity parameter + * @param ef_construction Exploration factor during construction + * @param ef_search Exploration factor during search + * @param quant Quantization type + * + * Usage example: + * ZVecHnswIndexParams params = ZVEC_HNSW_PARAMS(ZVEC_METRIC_TYPE_COSINE, 16, + * 200, 50, ZVEC_QUANTIZE_TYPE_UNDEFINED); + */ +#define ZVEC_HNSW_PARAMS(metric, m_, ef_construction, ef_search, quant) \ + (ZVecHnswIndexParams) { \ + .base.base.index_type = ZVEC_INDEX_TYPE_HNSW, .base.metric_type = metric, \ + .base.quantize_type = quant, .m = m_, .ef_construction = ef_construction, \ + .ef_search = ef_search \ + } + +/** + * @brief Simplified inverted index parameters initialization macro + * @param range_opt Whether to enable range optimization + * @param wildcard Whether to enable wildcard expansion + * + * Usage example: + * ZVecInvertIndexParams params = ZVEC_INVERT_PARAMS(true, false); + */ +#define ZVEC_INVERT_PARAMS(range_opt, wildcard) \ + (ZVecInvertIndexParams) { \ + .base.index_type = ZVEC_INDEX_TYPE_INVERT, \ + .enable_range_optimization = range_opt, \ + .enable_extended_wildcard = wildcard \ + } + +/** + * @brief Simplified Flat index parameters initialization macro + * @param metric Distance metric type + * @param quant Quantization type + */ +#define ZVEC_FLAT_PARAMS(metric, quant) \ + (ZVecFlatIndexParams) { \ + .base.index_type = ZVEC_INDEX_TYPE_FLAT, .base.metric_type = metric, \ + .base.quantize_type = quant \ + } + +/** + * @brief Simplified IVF index parameters initialization macro + * @param metric Distance metric type + * @param nlist Number of cluster centers + * @param niters Number of iterations + * @param soar Whether to use SOAR algorithm + * @param nprobe Number of clusters to probe during search + * @param quant Quantization type + */ +#define ZVEC_IVF_PARAMS(metric, nlist, niters, soar, nprobe, quant) \ + (ZVecIVFIndexParams) { \ + .base.index_type = ZVEC_INDEX_TYPE_IVF, .base.metric_type = metric, \ + .base.quantize_type = quant, .n_list = nlist, .n_iters = niters, \ + .use_soar = soar, .n_probe = nprobe \ + } + +/** + * @brief Simplified string view initialization macro + * @param str String content + * + * Usage example: + * ZVecStringView name = ZVEC_STRING_VIEW("my_collection"); + */ +#define ZVEC_STRING_VIEW(str) \ + (ZVecStringView) { \ + .data = str, .length = strlen(str) \ + } + +// Has been replaced by the new ZVEC_STRING_VIEW macro + +/** + * @brief Simplified float array initialization macro + * @param data_ptr Float array pointer + * @param len Array length + * + * Usage example: + * float vectors[] = {0.1f, 0.2f, 0.3f}; + * ZVecFloatArray vec_array = ZVEC_FLOAT_ARRAY(vectors, 3); + */ +#define ZVEC_FLOAT_ARRAY(data_ptr, len) \ + (ZVecFloatArray) { \ + .data = data_ptr, .length = len \ + } + +/** + * @brief Simplified integer array initialization macro + * @param data_ptr Integer array pointer + * @param len Array length + */ +#define ZVEC_INT64_ARRAY(data_ptr, len) \ + (ZVecInt64Array) { \ + .data = data_ptr, .length = len \ + } + + +/** + * @brief Simplified inverted index parameters initialization macro + * @param range_opt Whether to enable range optimization + * @param wildcard Whether to enable wildcard expansion + * + * Usage example: + * ZVecInvertIndexParams params = ZVEC_INVERT_PARAMS(true, false); + */ +#define ZVEC_INVERT_PARAMS(range_opt, wildcard) \ + (ZVecInvertIndexParams) { \ + .base.index_type = ZVEC_INDEX_TYPE_INVERT, \ + .enable_range_optimization = range_opt, \ + .enable_extended_wildcard = wildcard \ + } + + +/** + * @brief Simplified collection options initialization macro (using default + * values) + * + * Usage example: + * ZVecCollectionOptions opts = ZVEC_DEFAULT_OPTIONS(); + */ +#define ZVEC_DEFAULT_OPTIONS() \ + (ZVecCollectionOptions){.enable_mmap = true, \ + .max_buffer_size = 1048576, \ + .read_only = false, \ + .max_doc_count_per_segment = 1000000} + +/** + * @brief Simplified vector query initialization macro + * @param field_name_str Query field name + * @param query_vec Query vector array + * @param top_k Number of results to return + * @param filter_str Filter condition string + * + * Usage example: + * ZVecVectorQuery query = ZVEC_VECTOR_QUERY("embedding", query_vectors, 10, + * ""); + */ +#define ZVEC_VECTOR_QUERY(field_name_str, query_vec, top_k, filter_str) \ + (ZVecVectorQuery){.field_name = ZVEC_STRING(field_name_str), \ + .query_vector = query_vec, \ + .topk = top_k, \ + .filter = ZVEC_STRING(filter_str), \ + .include_vector = 1, \ + .include_doc_id = 1} + +/** + * @brief Simplified document field initialization macro + * @param name_str Field name + * @param type Data type + * @param value_union Field value union + * + * Usage example: + * ZVecDocField field = ZVEC_DOC_FIELD("id", ZVEC_DATA_TYPE_STRING, + * {.string_value = ZVEC_STRING("doc1")}); + */ +#define ZVEC_DOC_FIELD(name_str, type, value_union) \ + (ZVecDocField) { \ + .name = ZVEC_STRING(name_str), .data_type = type, .value = value_union \ + } + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // ZVEC_C_API_H diff --git a/src/include/zvec/db/doc.h b/src/include/zvec/db/doc.h index 5f927fa1f..e14886ca2 100644 --- a/src/include/zvec/db/doc.h +++ b/src/include/zvec/db/doc.h @@ -68,6 +68,10 @@ class Doc { return pk_; } + const std::string &pk_ref() const { + return pk_; + } + void set_score(float score) { score_ = score; } @@ -103,6 +107,10 @@ class Doc { return op_; } + Operator get_operator() const { + return op_; + } + // Set field value template bool set(const std::string &field_name, T value) { @@ -232,6 +240,26 @@ class Doc { return std::nullopt; } + // Get field value as const reference, throws exception if field doesn't exist + // or type mismatches + template + const T &get_ref(const std::string &field_name) const { + auto it = fields_.find(field_name); + if (it == fields_.end()) { + throw std::runtime_error("Field '" + field_name + "' not found"); + } + + if (std::holds_alternative(it->second)) { + throw std::runtime_error("Field '" + field_name + "' is null"); + } + + try { + return std::get(it->second); + } catch (const std::bad_variant_access &) { + throw std::runtime_error("Field '" + field_name + "' type mismatch"); + } + } + void remove(const std::string &field_name) { fields_.erase(field_name); } diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 03250f1c8..e1ffc3262 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -4,3 +4,4 @@ include(${PROJECT_ROOT_DIR}/cmake/option.cmake) cc_directories(ailego) cc_directories(db) cc_directories(core) +cc_directories(c_api) \ No newline at end of file diff --git a/tests/c_api/CMakeLists.txt b/tests/c_api/CMakeLists.txt new file mode 100644 index 000000000..ad2f62e16 --- /dev/null +++ b/tests/c_api/CMakeLists.txt @@ -0,0 +1,28 @@ +# Copyright 2025-present the zvec project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +include(${CMAKE_SOURCE_DIR}/cmake/bazel.cmake) + +file(GLOB_RECURSE ALL_TEST_SRCS *_test.c) + +foreach(CC_SRCS ${ALL_TEST_SRCS}) + get_filename_component(CC_TARGET ${CC_SRCS} NAME_WE) + cc_gtest( + NAME ${CC_TARGET} + STRICT + LIBS zvec_c_api + SRCS ${CC_SRCS} utils.c + INCS . .. ../../src + ) +endforeach() diff --git a/tests/c_api/c_api_test.c b/tests/c_api/c_api_test.c new file mode 100644 index 000000000..5abcb5332 --- /dev/null +++ b/tests/c_api/c_api_test.c @@ -0,0 +1,2350 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "zvec/c_api.h" +#include +#include +#include +#include +#include +#include +#ifdef _POSIX_C_SOURCE +#include +#endif +#include +#include "utils.h" + +// ============================================================================= +// Test helper macro definitions +// ============================================================================= + +static int test_count = 0; +static int passed_count = 0; +static int current_test_passed = 1; // Track if current test function passes + +#define TEST_START() \ + do { \ + printf("Running test: %s\n", __func__); \ + test_count++; \ + current_test_passed = 1; \ + } while (0) + +#define TEST_ASSERT(condition) \ + do { \ + if (condition) { \ + printf(" ✓ PASS\n"); \ + } else { \ + printf(" ✗ FAIL at line %d\n", __LINE__); \ + current_test_passed = 0; \ + } \ + } while (0) + +#define TEST_END() \ + do { \ + if (current_test_passed) { \ + passed_count++; \ + } \ + } while (0) + +// ============================================================================= +// Helper functions tests +// ============================================================================= + +void test_version_functions(void) { + TEST_START(); + + // Test version retrieval functions + const char *version = zvec_get_version(); + TEST_ASSERT(version != NULL); + + // Test version component retrieval + int major = zvec_get_version_major(); + int minor = zvec_get_version_minor(); + int patch = zvec_get_version_patch(); + + TEST_ASSERT(major >= 0); + TEST_ASSERT(minor >= 0); + TEST_ASSERT(patch >= 0); + + TEST_ASSERT(zvec_check_version(major, minor, patch)); + + // Test version checking functions + bool compatible = zvec_check_version(0, 3, 0); + TEST_ASSERT(compatible == true); + + bool not_compatible = zvec_check_version(99, 99, 99); + TEST_ASSERT(not_compatible == false); + + TEST_END(); +} + +void test_error_handling_functions(void) { + TEST_START(); + + char *error_msg = NULL; + ZVecErrorCode err = zvec_get_last_error(&error_msg); + TEST_ASSERT(err == ZVEC_OK); + + if (error_msg) { + zvec_free_str(error_msg); + } + + // Test error clearing + zvec_clear_error(); + + // Test error details retrieval + ZVecErrorDetails error_details = {0}; + err = zvec_get_last_error_details(&error_details); + TEST_ASSERT(err == ZVEC_OK); + + TEST_END(); +} + +void test_zvec_config() { + TEST_START(); + + // Test 1: Console log config creation and destruction + ZVecConsoleLogConfig *console_config = + zvec_config_console_log_create(ZVEC_LOG_LEVEL_INFO); + TEST_ASSERT(console_config != NULL); + if (console_config) { + TEST_ASSERT(console_config->level == ZVEC_LOG_LEVEL_INFO); + zvec_config_console_log_destroy(console_config); + } + + // Test 2: File log config creation and destruction + ZVecFileLogConfig *file_config = zvec_config_file_log_create( + ZVEC_LOG_LEVEL_WARN, "./logs", "test_log", 100, 7); + TEST_ASSERT(file_config != NULL); + if (file_config) { + TEST_ASSERT(file_config->level == ZVEC_LOG_LEVEL_WARN); + TEST_ASSERT(strcmp(file_config->dir.data, "./logs") == 0); + TEST_ASSERT(strcmp(file_config->basename.data, "test_log") == 0); + TEST_ASSERT(file_config->file_size == 100); + TEST_ASSERT(file_config->overdue_days == 7); + zvec_config_file_log_destroy(file_config); + } + + // Test 3: File log config edge cases + ZVecFileLogConfig *empty_file_config = + zvec_config_file_log_create(ZVEC_LOG_LEVEL_INFO, "", "", 0, 0); + TEST_ASSERT(empty_file_config != NULL); + if (empty_file_config) { + TEST_ASSERT(empty_file_config->level == ZVEC_LOG_LEVEL_INFO); + TEST_ASSERT(strcmp(empty_file_config->dir.data, "") == 0); + TEST_ASSERT(strcmp(empty_file_config->basename.data, "") == 0); + TEST_ASSERT(empty_file_config->file_size == 0); + TEST_ASSERT(empty_file_config->overdue_days == 0); + zvec_config_file_log_destroy(empty_file_config); + } + + // Test 4: Log config creation with console type + ZVecConsoleLogConfig *temp_console = + zvec_config_console_log_create(ZVEC_LOG_LEVEL_ERROR); + ZVecLogConfig *log_config_console = + zvec_config_log_create(ZVEC_LOG_TYPE_CONSOLE, temp_console); + TEST_ASSERT(log_config_console != NULL); + if (log_config_console) { + TEST_ASSERT(log_config_console->type == ZVEC_LOG_TYPE_CONSOLE); + TEST_ASSERT(log_config_console->config.console_config.level == + ZVEC_LOG_LEVEL_ERROR); + zvec_config_log_destroy(log_config_console); + } + if (temp_console) { + zvec_config_console_log_destroy(temp_console); + } + + // Test 5: Log config creation with file type + ZVecFileLogConfig *temp_file = zvec_config_file_log_create( + ZVEC_LOG_LEVEL_DEBUG, "./logs", "app", 50, 30); + ZVecLogConfig *log_config_file = + zvec_config_log_create(ZVEC_LOG_TYPE_FILE, temp_file); + TEST_ASSERT(log_config_file != NULL); + if (log_config_file) { + TEST_ASSERT(log_config_file->type == ZVEC_LOG_TYPE_FILE); + TEST_ASSERT(log_config_file->config.file_config.level == + ZVEC_LOG_LEVEL_DEBUG); + TEST_ASSERT( + strcmp(log_config_file->config.file_config.dir.data, "./logs") == 0); + TEST_ASSERT( + strcmp(log_config_file->config.file_config.basename.data, "app") == 0); + zvec_config_log_destroy(log_config_file); + } + if (temp_file) { + zvec_config_file_log_destroy(temp_file); + } + + // Test 6: Log config with NULL config data (should use defaults) + ZVecLogConfig *log_config_default = + zvec_config_log_create(ZVEC_LOG_TYPE_CONSOLE, NULL); + TEST_ASSERT(log_config_default != NULL); + if (log_config_default) { + TEST_ASSERT(log_config_default->type == ZVEC_LOG_TYPE_CONSOLE); + TEST_ASSERT(log_config_default->config.console_config.level == + ZVEC_LOG_LEVEL_WARN); + zvec_config_log_destroy(log_config_default); + } + + // Test 7: Config data creation and basic operations + ZVecConfigData *config_data = zvec_config_data_create(); + TEST_ASSERT(config_data != NULL); + if (config_data) { + // Test initial values + TEST_ASSERT(config_data->log_config != NULL); + TEST_ASSERT(config_data->log_config->type == ZVEC_LOG_TYPE_CONSOLE); + + // Test memory limit setting + ZVecErrorCode err = + zvec_config_data_set_memory_limit(config_data, 1024 * 1024 * 1024); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(config_data->memory_limit_bytes == 1024 * 1024 * 1024); + + // Test thread count settings + err = zvec_config_data_set_query_thread_count(config_data, 8); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(config_data->query_thread_count == 8); + + err = zvec_config_data_set_optimize_thread_count(config_data, 4); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(config_data->optimize_thread_count == 4); + + // Test log config replacement + ZVecConsoleLogConfig *new_console = + zvec_config_console_log_create(ZVEC_LOG_LEVEL_DEBUG); + ZVecLogConfig *new_log_config = + zvec_config_log_create(ZVEC_LOG_TYPE_CONSOLE, new_console); + if (new_log_config) { + err = zvec_config_data_set_log_config(config_data, new_log_config); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(config_data->log_config == new_log_config); + } + + zvec_config_data_destroy(config_data); + if (new_console) zvec_config_console_log_destroy(new_console); + if (new_log_config) zvec_config_log_destroy(new_log_config); + } + + // Test 8: Edge cases and error conditions + // Test NULL pointer handling + ZVecErrorCode err = zvec_config_data_set_memory_limit(NULL, 1024); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + + err = zvec_config_data_set_log_config(NULL, NULL); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + + err = zvec_config_data_set_query_thread_count(NULL, 1); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + + err = zvec_config_data_set_optimize_thread_count(NULL, 1); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + + // Test boundary values + ZVecConfigData *boundary_config = zvec_config_data_create(); + if (boundary_config) { + // Test zero values + err = zvec_config_data_set_memory_limit(boundary_config, 0); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(boundary_config->memory_limit_bytes == 0); + + // Test maximum values + err = zvec_config_data_set_memory_limit(boundary_config, UINT64_MAX); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(boundary_config->memory_limit_bytes == UINT64_MAX); + + // Test zero thread counts + err = zvec_config_data_set_query_thread_count(boundary_config, 0); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(boundary_config->query_thread_count == 0); + + err = zvec_config_data_set_optimize_thread_count(boundary_config, 0); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(boundary_config->optimize_thread_count == 0); + + zvec_config_data_destroy(boundary_config); + } + + // Test 9: Memory leak prevention - double destroy safety + ZVecConfigData *double_destroy_test = zvec_config_data_create(); + if (double_destroy_test) { + zvec_config_data_destroy(double_destroy_test); + } + + TEST_END(); +} + +void test_zvec_initialize() { + TEST_START(); + + ZVecConfigData *config = zvec_config_data_create(); + TEST_ASSERT(config != NULL); + if (config) { + TEST_ASSERT(config->log_config != NULL); + TEST_ASSERT(config->log_config->type == ZVEC_LOG_TYPE_CONSOLE); + } + ZVecErrorCode err = zvec_initialize(config); + TEST_ASSERT(err == ZVEC_OK); + bool is_initialized = false; + zvec_is_initialized(&is_initialized); + TEST_ASSERT(is_initialized); + + TEST_END(); +} + +// ============================================================================= +// Schema-related tests +// ============================================================================= + +void test_schema_basic_operations(void) { + TEST_START(); + + // Test 1: Basic Schema creation and destruction + ZVecCollectionSchema *schema = zvec_collection_schema_create("demo"); + TEST_ASSERT(schema != NULL); + TEST_ASSERT(schema->name != NULL); + TEST_ASSERT(strcmp(schema->name->data, "demo") == 0); + TEST_ASSERT(schema->field_count == 0); + TEST_ASSERT(schema->fields == NULL); + TEST_ASSERT(schema->max_doc_count_per_segment > 0); + + // Test 2: Schema field count operations + size_t initial_count = zvec_collection_schema_get_field_count(schema); + TEST_ASSERT(initial_count == 0); + + // Test 3: Adding fields to schema + ZVecFieldSchema *id_field = + zvec_field_schema_create("id", ZVEC_DATA_TYPE_INT64, false, 0); + ZVecErrorCode err = zvec_collection_schema_add_field(schema, id_field); + TEST_ASSERT(err == ZVEC_OK); + + size_t count_after_add = zvec_collection_schema_get_field_count(schema); + TEST_ASSERT(count_after_add == 1); + + // Test 4: Finding fields in schema + const ZVecFieldSchema *found_field = + zvec_collection_schema_find_field(schema, "id"); + TEST_ASSERT(found_field != NULL); + TEST_ASSERT(strcmp(found_field->name->data, "id") == 0); + TEST_ASSERT(found_field->data_type == ZVEC_DATA_TYPE_INT64); + + // Test 5: Getting field by index + ZVecFieldSchema *indexed_field = zvec_collection_schema_get_field(schema, 0); + TEST_ASSERT(indexed_field != NULL); + TEST_ASSERT(strcmp(indexed_field->name->data, "id") == 0); + + // Test 6: Adding multiple fields + ZVecFieldSchema fields_to_add[2]; + ZVecFieldSchema *name_field = + zvec_field_schema_create("name", ZVEC_DATA_TYPE_STRING, false, 0); + ZVecFieldSchema *age_field = + zvec_field_schema_create("age", ZVEC_DATA_TYPE_INT32, true, 0); + + fields_to_add[0] = *name_field; + fields_to_add[1] = *age_field; + + err = zvec_collection_schema_add_fields(schema, fields_to_add, 2); + TEST_ASSERT(err == ZVEC_OK); + + size_t count_after_multi_add = zvec_collection_schema_get_field_count(schema); + TEST_ASSERT(count_after_multi_add == 3); + + // Test 7: Finding newly added fields + const ZVecFieldSchema *name_found = + zvec_collection_schema_find_field(schema, "name"); + TEST_ASSERT(name_found != NULL); + TEST_ASSERT(strcmp(name_found->name->data, "name") == 0); + + const ZVecFieldSchema *age_found = + zvec_collection_schema_find_field(schema, "age"); + TEST_ASSERT(age_found != NULL); + TEST_ASSERT(strcmp(age_found->name->data, "age") == 0); + + // Test 8: Setting and getting max doc count + err = zvec_collection_schema_set_max_doc_count_per_segment(schema, 10000); + TEST_ASSERT(err == ZVEC_OK); + + uint64_t max_doc_count = + zvec_collection_schema_get_max_doc_count_per_segment(schema); + TEST_ASSERT(max_doc_count == 10000); + + // Test 9: Schema validation + ZVecString *validation_error = NULL; + err = zvec_collection_schema_validate(schema, &validation_error); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(validation_error == NULL); + + // Test 10: Removing single field + err = zvec_collection_schema_remove_field(schema, "age"); + TEST_ASSERT(err == ZVEC_OK); + + size_t count_after_remove = zvec_collection_schema_get_field_count(schema); + TEST_ASSERT(count_after_remove == 2); + + const ZVecFieldSchema *removed_field = + zvec_collection_schema_find_field(schema, "age"); + TEST_ASSERT(removed_field == NULL); + + // Test 11: Removing multiple fields + const char *fields_to_remove[] = {"name", "id"}; + err = zvec_collection_schema_remove_fields(schema, fields_to_remove, 2); + TEST_ASSERT(err == ZVEC_OK); + + size_t final_count = zvec_collection_schema_get_field_count(schema); + TEST_ASSERT(final_count == 0); + + // Test 12: Schema cleanup + zvec_collection_schema_destroy(schema); + + TEST_END(); +} + +void test_schema_edge_cases(void) { + TEST_START(); + + // Test 1: NULL parameter handling for schema creation + ZVecCollectionSchema *null_schema = zvec_collection_schema_create(NULL); + TEST_ASSERT(null_schema == NULL); + + // Test 2: Empty string schema name + ZVecCollectionSchema *empty_schema = zvec_collection_schema_create(""); + TEST_ASSERT(empty_schema != NULL); + TEST_ASSERT(empty_schema->name != NULL); + TEST_ASSERT(strcmp(empty_schema->name->data, "") == 0); + zvec_collection_schema_destroy(empty_schema); + + // Test 3: Very long schema name + char long_name[1024]; + memset(long_name, 'a', 1023); + long_name[1023] = '\0'; + ZVecCollectionSchema *long_schema = zvec_collection_schema_create(long_name); + TEST_ASSERT(long_schema != NULL); + TEST_ASSERT(long_schema->name != NULL); + TEST_ASSERT(strlen(long_schema->name->data) == 1023); + zvec_collection_schema_destroy(long_schema); + + // Test 4: NULL schema parameter handling for all functions + ZVecErrorCode err; + size_t count = zvec_collection_schema_get_field_count(NULL); + TEST_ASSERT(count == 0); + + const ZVecFieldSchema *null_field = + zvec_collection_schema_find_field(NULL, "test"); + TEST_ASSERT(null_field == NULL); + + ZVecFieldSchema *null_indexed_field = + zvec_collection_schema_get_field(NULL, 0); + TEST_ASSERT(null_indexed_field == NULL); + + uint64_t null_max_doc_count = + zvec_collection_schema_get_max_doc_count_per_segment(NULL); + TEST_ASSERT(null_max_doc_count == 0); + + err = zvec_collection_schema_set_max_doc_count_per_segment(NULL, 1000); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + + ZVecString *null_validation_error = NULL; + err = zvec_collection_schema_validate(NULL, &null_validation_error); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + TEST_ASSERT(null_validation_error == NULL); + + err = zvec_collection_schema_add_field(NULL, NULL); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + + err = zvec_collection_schema_add_fields(NULL, NULL, 0); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + + err = zvec_collection_schema_remove_field(NULL, "test"); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + + const char *null_field_names[] = {NULL}; + err = zvec_collection_schema_remove_fields(NULL, null_field_names, 1); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + + // Test 5: Working with valid schema for edge cases + ZVecCollectionSchema *schema = zvec_collection_schema_create("edge_test"); + TEST_ASSERT(schema != NULL); + + // Test 6: Adding NULL field to schema + err = zvec_collection_schema_add_field(schema, NULL); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + + // Test 7: Adding fields with NULL array + err = zvec_collection_schema_add_fields(schema, NULL, 5); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + + // Test 8: Adding zero fields + err = zvec_collection_schema_add_fields(schema, NULL, 0); + TEST_ASSERT(err == ZVEC_OK); + + // Test 9: Finding field with NULL name + const ZVecFieldSchema *null_name_field = + zvec_collection_schema_find_field(schema, NULL); + TEST_ASSERT(null_name_field == NULL); + + // Test 10: Finding non-existent field + const ZVecFieldSchema *nonexistent_field = + zvec_collection_schema_find_field(schema, "nonexistent"); + TEST_ASSERT(nonexistent_field == NULL); + + // Test 11: Getting field with invalid index + ZVecFieldSchema *invalid_index_field = + zvec_collection_schema_get_field(schema, 1000); + TEST_ASSERT(invalid_index_field == NULL); + + // Test 12: Getting field from empty schema with index 0 + ZVecFieldSchema *zero_index_field = + zvec_collection_schema_get_field(schema, 0); + TEST_ASSERT(zero_index_field == NULL); + + // Test 13: Removing field with NULL name + err = zvec_collection_schema_remove_field(schema, NULL); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + + // Test 14: Removing non-existent field + err = zvec_collection_schema_remove_field(schema, "nonexistent"); + TEST_ASSERT(err == ZVEC_ERROR_NOT_FOUND); + + // Test 15: Removing fields with NULL array + err = zvec_collection_schema_remove_fields(schema, NULL, 5); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + + // Test 16: Removing zero fields + err = zvec_collection_schema_remove_fields(schema, NULL, 0); + TEST_ASSERT(err == ZVEC_OK); + + // Test 17: Setting extremely large max doc count + err = + zvec_collection_schema_set_max_doc_count_per_segment(schema, UINT64_MAX); + TEST_ASSERT(err == ZVEC_OK); + uint64_t retrieved_max_count = + zvec_collection_schema_get_max_doc_count_per_segment(schema); + TEST_ASSERT(retrieved_max_count == UINT64_MAX); + + // Test 18: Setting zero max doc count + err = zvec_collection_schema_set_max_doc_count_per_segment(schema, 0); + TEST_ASSERT(err == ZVEC_OK); + uint64_t zero_max_count = + zvec_collection_schema_get_max_doc_count_per_segment(schema); + TEST_ASSERT(zero_max_count == 0); + + // Test 19: Schema validation with empty schema + ZVecString *empty_validation_error = NULL; + err = zvec_collection_schema_validate(schema, &empty_validation_error); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + + // Test 20: Add duplicate field names + ZVecFieldSchema *first_id = + zvec_field_schema_create("duplicate_id", ZVEC_DATA_TYPE_INT64, false, 0); + ZVecFieldSchema *second_id = + zvec_field_schema_create("duplicate_id", ZVEC_DATA_TYPE_STRING, false, 0); + + err = zvec_collection_schema_add_field(schema, first_id); + TEST_ASSERT(err == ZVEC_OK); + + err = zvec_collection_schema_add_field(schema, second_id); + TEST_ASSERT(err == ZVEC_ERROR_ALREADY_EXISTS); + zvec_field_schema_destroy(second_id); + + // Verify fields + size_t field_count = zvec_collection_schema_get_field_count(schema); + TEST_ASSERT(field_count == 1); + + // Test 21: Cleanup + zvec_collection_schema_destroy(schema); + + TEST_END(); +} + +void test_schema_field_operations(void) { + TEST_START(); + + ZVecCollectionSchema *schema = zvec_test_create_temp_schema(); + TEST_ASSERT(schema != NULL); + + if (schema) { + // Test field count + size_t initial_count = zvec_collection_schema_get_field_count(schema); + TEST_ASSERT(initial_count == 5); + + // Test finding non-existent field + const ZVecFieldSchema *nonexistent = + zvec_collection_schema_find_field(schema, "nonexistent"); + TEST_ASSERT(nonexistent == NULL); + + // Test finding existing field + const ZVecFieldSchema *id_field = + zvec_collection_schema_find_field(schema, "id"); + TEST_ASSERT(id_field != NULL); + if (id_field) { + TEST_ASSERT(strcmp(id_field->name->data, "id") == 0); + TEST_ASSERT(id_field->data_type == ZVEC_DATA_TYPE_INT64); + } + + zvec_collection_schema_destroy(schema); + } + + TEST_END(); +} + +void test_normal_schema_creation(void) { + TEST_START(); + + ZVecCollectionSchema *schema = + zvec_test_create_normal_schema(false, "test_normal", NULL, NULL, 1000); + TEST_ASSERT(schema != NULL); + + if (schema) { + TEST_ASSERT(strcmp(schema->name->data, "test_normal") == 0); + + // Verify field count + size_t field_count = zvec_collection_schema_get_field_count(schema); + TEST_ASSERT(field_count > 0); + + zvec_collection_schema_destroy(schema); + } + + TEST_END(); +} + +void test_schema_with_indexes(void) { + TEST_START(); + + // Test Schema with scalar index + ZVecCollectionSchema *scalar_index_schema = + zvec_test_create_schema_with_scalar_index(true, true, + "scalar_index_test"); + TEST_ASSERT(scalar_index_schema != NULL); + if (scalar_index_schema) { + zvec_collection_schema_destroy(scalar_index_schema); + } + + // Test Schema with vector index + ZVecCollectionSchema *vector_index_schema = + zvec_test_create_schema_with_vector_index(false, "vector_index_test", + NULL); + TEST_ASSERT(vector_index_schema != NULL); + if (vector_index_schema) { + zvec_collection_schema_destroy(vector_index_schema); + } + + TEST_END(); +} + +void test_schema_max_doc_count(void) { + TEST_START(); + + // Test 1: Setting max doc count to a valid value + ZVecCollectionSchema *schema = zvec_collection_schema_create("max_doc_test"); + TEST_ASSERT(schema != NULL); + + ZVecErrorCode err = + zvec_collection_schema_set_max_doc_count_per_segment(schema, 1000); + TEST_ASSERT(err == ZVEC_OK); + + uint64_t max_doc_count = + zvec_collection_schema_get_max_doc_count_per_segment(schema); + TEST_ASSERT(max_doc_count == 1000); + + zvec_collection_schema_destroy(schema); + + // Test 2: Setting max doc count to zero + schema = zvec_collection_schema_create("max_doc_test"); + TEST_ASSERT(schema != NULL); + + err = zvec_collection_schema_set_max_doc_count_per_segment(schema, 0); + TEST_ASSERT(err == ZVEC_OK); + + max_doc_count = zvec_collection_schema_get_max_doc_count_per_segment(schema); + TEST_ASSERT(max_doc_count == 0); + + zvec_collection_schema_destroy(schema); + + // Test 3: Setting max doc count to maximum value + schema = zvec_collection_schema_create("max_doc_test"); + TEST_ASSERT(schema != NULL); + + err = + zvec_collection_schema_set_max_doc_count_per_segment(schema, UINT64_MAX); + TEST_ASSERT(err == ZVEC_OK); + + max_doc_count = zvec_collection_schema_get_max_doc_count_per_segment(schema); + TEST_ASSERT(max_doc_count == UINT64_MAX); + + zvec_collection_schema_destroy(schema); + + TEST_END(); +} + +// ============================================================================= +// Collection-related tests +// ============================================================================= + +void test_collection_basic_operations(void) { + TEST_START(); + + // Create temporary directory + char temp_dir[] = "/tmp/zvec_test_collection_basic_operations"; + + ZVecCollectionSchema *schema = zvec_test_create_temp_schema(); + TEST_ASSERT(schema != NULL); + + if (schema) { + ZVecCollection *collection = NULL; + ZVecErrorCode err = + zvec_collection_create_and_open(temp_dir, schema, NULL, &collection); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(collection != NULL); + + if (collection) { + // Test collection operations + ZVecDoc *doc1 = zvec_test_create_doc(1, schema, NULL); + ZVecDoc *doc2 = zvec_test_create_doc(2, schema, NULL); + ZVecDoc *doc3 = zvec_test_create_doc(3, schema, NULL); + + TEST_ASSERT(doc1 != NULL); + TEST_ASSERT(doc2 != NULL); + TEST_ASSERT(doc3 != NULL); + + if (doc1 && doc2 && doc3) { + ZVecDoc *docs[] = {doc1, doc2, doc3}; + size_t success_count, error_count; + + // Test insert operation + err = zvec_collection_insert(collection, (const ZVecDoc **)docs, 3, + &success_count, &error_count); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(success_count == 3); + TEST_ASSERT(error_count == 0); + + // Test update operation + zvec_doc_set_score(doc1, 0.95f); + ZVecDoc *update_docs[] = {doc1}; + err = zvec_collection_update(collection, (const ZVecDoc **)update_docs, + 1, &success_count, &error_count); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(success_count == 1); + TEST_ASSERT(error_count == 0); + + // Test upsert operation + zvec_doc_set_pk(doc3, "pk_3_modified"); + ZVecDoc *upsert_docs[] = {doc3}; + err = zvec_collection_upsert(collection, (const ZVecDoc **)upsert_docs, + 1, &success_count, &error_count); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(success_count == 1); + TEST_ASSERT(error_count == 0); + + // Test delete operation by primary keys + const char *pks[] = {"pk_1", "pk_2"}; + err = zvec_collection_delete(collection, pks, 2, &success_count, + &error_count); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(success_count == 2); + TEST_ASSERT(error_count == 0); + + // Test delete by filter + err = zvec_collection_delete_by_filter(collection, "id > 0"); + TEST_ASSERT(err == ZVEC_OK); + + // Clean up documents + zvec_doc_destroy(doc1); + zvec_doc_destroy(doc2); + zvec_doc_destroy(doc3); + } + + // Test collection flush + err = zvec_collection_flush(collection); + TEST_ASSERT(err == ZVEC_OK); + + // Test collection optimization + err = zvec_collection_optimize(collection); + TEST_ASSERT(err == ZVEC_OK); + + zvec_collection_destroy(collection); + } + + zvec_collection_schema_destroy(schema); + } + + // Clean up temporary directory + char cmd[256]; + snprintf(cmd, sizeof(cmd), "rm -rf %s", temp_dir); + system(cmd); + + TEST_END(); +} + +void test_collection_edge_cases(void) { + TEST_START(); + + char temp_dir[] = "/tmp/zvec_test_collection_edge_cases"; + + ZVecCollectionSchema *schema = zvec_test_create_temp_schema(); + TEST_ASSERT(schema != NULL); + + if (schema) { + ZVecCollection *collection = NULL; + + // Test empty name collection + ZVecErrorCode err = + zvec_collection_create_and_open(temp_dir, schema, NULL, &collection); + TEST_ASSERT(err == ZVEC_OK); + if (collection) { + zvec_collection_destroy(collection); + collection = NULL; + } + + // Test long name collection + char long_name[256]; + memset(long_name, 'a', 255); + long_name[255] = '\0'; + + char long_path[512]; + snprintf(long_path, sizeof(long_path), "%s/%s", temp_dir, + "very_long_collection_name_that_tests_path_limits"); + + err = zvec_collection_create_and_open(long_path, schema, NULL, &collection); + TEST_ASSERT(err == ZVEC_OK); + if (collection) { + zvec_collection_destroy(collection); + collection = NULL; + } + + // Test NULL name集合 + err = zvec_collection_create_and_open(temp_dir, schema, NULL, &collection); + TEST_ASSERT(err != ZVEC_OK); + + zvec_collection_schema_destroy(schema); + } + + // Clean up temporary directory + char cmd[256]; + snprintf(cmd, sizeof(cmd), "rm -rf %s", temp_dir); + system(cmd); + + TEST_END(); +} + +void test_collection_delete_by_filter(void) { + TEST_START(); + + char temp_dir[] = "/tmp/zvec_test_collection_delete_by_filter"; + + ZVecCollectionSchema *schema = zvec_test_create_temp_schema(); + TEST_ASSERT(schema != NULL); + + if (schema) { + ZVecCollection *collection = NULL; + ZVecErrorCode err = + zvec_collection_create_and_open(temp_dir, schema, NULL, &collection); + TEST_ASSERT(err == ZVEC_OK); + + if (collection) { + // Test normal deletion filtering + err = zvec_collection_delete_by_filter(collection, "id > 1"); + TEST_ASSERT(err == ZVEC_OK); + + // Test NULL filter + err = zvec_collection_delete_by_filter(collection, NULL); + TEST_ASSERT(err != ZVEC_OK); + + // Test empty string filter + err = zvec_collection_delete_by_filter(collection, ""); + TEST_ASSERT(err == ZVEC_OK); + + zvec_collection_destroy(collection); + } + + zvec_collection_schema_destroy(schema); + } + + // Clean up temporary directory + char cmd[256]; + snprintf(cmd, sizeof(cmd), "rm -rf %s", temp_dir); + system(cmd); + + TEST_END(); +} + +void test_collection_stats(void) { + TEST_START(); + + char temp_dir[] = "/tmp/zvec_test_collection_stats"; + + ZVecCollectionSchema *schema = zvec_test_create_temp_schema(); + TEST_ASSERT(schema != NULL); + + if (schema) { + ZVecCollection *collection = NULL; + ZVecErrorCode err = + zvec_collection_create_and_open(temp_dir, schema, NULL, &collection); + TEST_ASSERT(err == ZVEC_OK); + + if (collection) { + ZVecCollectionStats *stats = NULL; + err = zvec_collection_get_stats(collection, &stats); + TEST_ASSERT(err == ZVEC_OK); + + if (stats) { + // Basic validation of statistics + TEST_ASSERT(stats->doc_count == + 0); // New collection should have no documents + zvec_collection_stats_destroy(stats); + } + + zvec_collection_destroy(collection); + } + + zvec_collection_schema_destroy(schema); + } + + // Clean up temporary directory + char cmd[256]; + snprintf(cmd, sizeof(cmd), "rm -rf %s", temp_dir); + system(cmd); + + TEST_END(); +} + +// ============================================================================= +// Field-related tests +// ============================================================================= + +void test_field_schema_functions(void) { + TEST_START(); + + // Test scalar field creation + ZVecFieldSchema scalar_field = {0}; + ZVecString name1 = {0}; + name1.data = "test_field"; + name1.length = 10; + scalar_field.name = &name1; + scalar_field.data_type = ZVEC_DATA_TYPE_STRING; + scalar_field.nullable = true; + scalar_field.dimension = 0; + + TEST_ASSERT(strcmp(scalar_field.name->data, "test_field") == 0); + TEST_ASSERT(scalar_field.data_type == ZVEC_DATA_TYPE_STRING); + TEST_ASSERT(scalar_field.nullable == true); + + // Test vector field creation + ZVecFieldSchema vector_field = {0}; + ZVecString name2 = {0}; + name2.data = "vec_field"; + name2.length = 9; + vector_field.name = &name2; + vector_field.data_type = ZVEC_DATA_TYPE_VECTOR_FP32; + vector_field.nullable = false; + vector_field.dimension = 128; + + TEST_ASSERT(strcmp(vector_field.name->data, "vec_field") == 0); + TEST_ASSERT(vector_field.data_type == ZVEC_DATA_TYPE_VECTOR_FP32); + TEST_ASSERT(vector_field.dimension == 128); + + // Test sparse vector field creation + ZVecFieldSchema sparse_field = {0}; + ZVecString name3 = {0}; + name3.data = "sparse_field"; + name3.length = 12; + sparse_field.name = &name3; + sparse_field.data_type = ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32; + sparse_field.nullable = false; + sparse_field.dimension = 0; + + TEST_ASSERT(strcmp(sparse_field.name->data, "sparse_field") == 0); + TEST_ASSERT(sparse_field.data_type == ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32); + + TEST_END(); +} + +void test_field_helper_functions(void) { + TEST_START(); + + // Test scalar field helper functions + ZVecInvertIndexParams *invert_params = + zvec_test_create_default_invert_params(true); + ZVecFieldSchema *scalar_field = zvec_test_create_scalar_field( + "test_scalar", ZVEC_DATA_TYPE_INT32, true, invert_params); + TEST_ASSERT(scalar_field != NULL); + if (scalar_field) { + TEST_ASSERT(strcmp(scalar_field->name->data, "test_scalar") == 0); + TEST_ASSERT(scalar_field->data_type == ZVEC_DATA_TYPE_INT32); + free(scalar_field); + } + if (invert_params) { + free(invert_params); + } + + // Test vector field helper functions + ZVecHnswIndexParams *hnsw_params = zvec_test_create_default_hnsw_params(); + ZVecFieldSchema *vector_field = zvec_test_create_vector_field( + "test_vector", ZVEC_DATA_TYPE_VECTOR_FP32, 128, false, hnsw_params); + TEST_ASSERT(vector_field != NULL); + if (vector_field) { + TEST_ASSERT(strcmp(vector_field->name->data, "test_vector") == 0); + TEST_ASSERT(vector_field->data_type == ZVEC_DATA_TYPE_VECTOR_FP32); + TEST_ASSERT(vector_field->dimension == 128); + free(vector_field); + } + if (hnsw_params) { + free(hnsw_params); + } + + TEST_END(); +} + +// ============================================================================= +// Document-related tests +// ============================================================================= + +void test_doc_creation(void) { + TEST_START(); + + ZVecCollectionSchema *schema = zvec_test_create_temp_schema(); + TEST_ASSERT(schema != NULL); + + if (schema) { + // Test complete document creation + ZVecDoc *doc = zvec_test_create_doc(1, schema, NULL); + TEST_ASSERT(doc != NULL); + if (doc) { + zvec_doc_destroy(doc); + } + + // Test null value document creation + ZVecDoc *null_doc = zvec_test_create_doc_null(2, schema, NULL); + TEST_ASSERT(null_doc != NULL); + if (null_doc) { + zvec_doc_destroy(null_doc); + } + + zvec_collection_schema_destroy(schema); + } + + TEST_END(); +} + +void test_doc_primary_key(void) { + TEST_START(); + + // Test primary key generation + char *pk = zvec_test_make_pk(12345); + TEST_ASSERT(pk != NULL); + if (pk) { + TEST_ASSERT(strcmp(pk, "pk_12345") == 0); + free(pk); + } + + TEST_END(); +} + +void test_doc_functions(void) { + TEST_START(); + + // Create test document using utility function + ZVecCollectionSchema *schema = zvec_test_create_temp_schema(); + TEST_ASSERT(schema != NULL); + + ZVecDoc *doc = zvec_doc_create(); + TEST_ASSERT(doc != NULL); + + // Test primary key operations + zvec_doc_set_pk(doc, "test_doc_complete"); + const char *pk = zvec_doc_get_pk_pointer(doc); + TEST_ASSERT(pk != NULL); + TEST_ASSERT(strcmp(pk, "test_doc_complete") == 0); + + // Test document ID and score operations + zvec_doc_set_doc_id(doc, 99999); + uint64_t doc_id = zvec_doc_get_doc_id(doc); + TEST_ASSERT(doc_id == 99999); + + zvec_doc_set_score(doc, 0.95f); + float score = zvec_doc_get_score(doc); + TEST_ASSERT(score == 0.95f); + + // Test operator operations + zvec_doc_set_operator(doc, ZVEC_DOC_OP_INSERT); + ZVecDocOperator op = zvec_doc_get_operator(doc); + TEST_ASSERT(op == ZVEC_DOC_OP_INSERT); + + ZVecErrorCode err; + + // ==================== COMPREHENSIVE DATA TYPE TESTING ==================== + + printf( + "=== Testing zvec_doc_get_field_value_basic with all supported types " + "===\n"); + + // Test all basic numeric types that zvec_doc_get_field_value_basic supports + // BOOL type + ZVecDocField bool_field; + bool_field.name.data = "bool_field"; + bool_field.name.length = strlen("bool_field"); + bool_field.data_type = ZVEC_DATA_TYPE_BOOL; + bool_field.value.bool_value = true; + err = zvec_doc_add_field_by_struct(doc, &bool_field); + TEST_ASSERT(err == ZVEC_OK); + + bool bool_result; + err = zvec_doc_get_field_value_basic(doc, "bool_field", ZVEC_DATA_TYPE_BOOL, + &bool_result, sizeof(bool_result)); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(bool_result == true); + + // INT32 type + ZVecDocField int32_field; + int32_field.name.data = "int32_field"; + int32_field.name.length = strlen("int32_field"); + int32_field.data_type = ZVEC_DATA_TYPE_INT32; + int32_field.value.int32_value = -2147483648; // Min int32 + err = zvec_doc_add_field_by_struct(doc, &int32_field); + TEST_ASSERT(err == ZVEC_OK); + + int32_t int32_result; + err = zvec_doc_get_field_value_basic(doc, "int32_field", ZVEC_DATA_TYPE_INT32, + &int32_result, sizeof(int32_result)); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(int32_result == -2147483648); + + // INT64 type + ZVecDocField int64_field; + int64_field.name.data = "int64_field"; + int64_field.name.length = strlen("int64_field"); + int64_field.data_type = ZVEC_DATA_TYPE_INT64; + int64_field.value.int64_value = 9223372036854775807LL; // Max int64 + err = zvec_doc_add_field_by_struct(doc, &int64_field); + TEST_ASSERT(err == ZVEC_OK); + + int64_t int64_result; + err = zvec_doc_get_field_value_basic(doc, "int64_field", ZVEC_DATA_TYPE_INT64, + &int64_result, sizeof(int64_result)); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(int64_result == 9223372036854775807LL); + + // UINT32 type + ZVecDocField uint32_field; + uint32_field.name.data = "uint32_field"; + uint32_field.name.length = strlen("uint32_field"); + uint32_field.data_type = ZVEC_DATA_TYPE_UINT32; + uint32_field.value.uint32_value = 4294967295U; // Max uint32 + err = zvec_doc_add_field_by_struct(doc, &uint32_field); + TEST_ASSERT(err == ZVEC_OK); + + uint32_t uint32_result; + err = + zvec_doc_get_field_value_basic(doc, "uint32_field", ZVEC_DATA_TYPE_UINT32, + &uint32_result, sizeof(uint32_result)); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(uint32_result == 4294967295U); + + // UINT64 type + ZVecDocField uint64_field; + uint64_field.name.data = "uint64_field"; + uint64_field.name.length = strlen("uint64_field"); + uint64_field.data_type = ZVEC_DATA_TYPE_UINT64; + uint64_field.value.uint64_value = 18446744073709551615ULL; // Max uint64 + err = zvec_doc_add_field_by_struct(doc, &uint64_field); + TEST_ASSERT(err == ZVEC_OK); + + uint64_t uint64_result; + err = + zvec_doc_get_field_value_basic(doc, "uint64_field", ZVEC_DATA_TYPE_UINT64, + &uint64_result, sizeof(uint64_result)); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(uint64_result == 18446744073709551615ULL); + + // FLOAT type + ZVecDocField float_field; + float_field.name.data = "float_field"; + float_field.name.length = strlen("float_field"); + float_field.data_type = ZVEC_DATA_TYPE_FLOAT; + float_field.value.float_value = 3.14159265359f; + err = zvec_doc_add_field_by_struct(doc, &float_field); + TEST_ASSERT(err == ZVEC_OK); + + float float_result; + err = zvec_doc_get_field_value_basic(doc, "float_field", ZVEC_DATA_TYPE_FLOAT, + &float_result, sizeof(float_result)); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(fabsf(float_result - 3.14159265359f) < 1e-6f); + + // DOUBLE type + ZVecDocField double_field; + double_field.name.data = "double_field"; + double_field.name.length = strlen("double_field"); + double_field.data_type = ZVEC_DATA_TYPE_DOUBLE; + double_field.value.double_value = 2.71828182845904523536; + err = zvec_doc_add_field_by_struct(doc, &double_field); + TEST_ASSERT(err == ZVEC_OK); + + double double_result; + err = + zvec_doc_get_field_value_basic(doc, "double_field", ZVEC_DATA_TYPE_DOUBLE, + &double_result, sizeof(double_result)); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(fabs(double_result - 2.71828182845904523536) < 1e-15); + + printf( + "=== Testing zvec_doc_get_field_value_copy with all supported types " + "===\n"); + + // Test STRING type with zvec_doc_get_field_value_copy + ZVecDocField string_field; + string_field.name.data = "string_field"; + string_field.name.length = strlen("string_field"); + string_field.data_type = ZVEC_DATA_TYPE_STRING; + string_field.value.string_value = *zvec_string_create("Hello, 世界!"); + err = zvec_doc_add_field_by_struct(doc, &string_field); + TEST_ASSERT(err == ZVEC_OK); + + void *string_result; + size_t string_size; + err = zvec_doc_get_field_value_copy( + doc, "string_field", ZVEC_DATA_TYPE_STRING, &string_result, &string_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(string_result != NULL); + TEST_ASSERT(string_size == strlen("Hello, 世界!")); + TEST_ASSERT(memcmp(string_result, "Hello, 世界!", string_size) == 0); + free(string_result); + + // Test BINARY type with zvec_doc_get_field_value_copy + ZVecDocField binary_field; + binary_field.name.data = "binary_field"; + binary_field.name.length = strlen("binary_field"); + binary_field.data_type = ZVEC_DATA_TYPE_BINARY; + uint8_t binary_data[] = {0x00, 0x01, 0x02, 0xFF, 0xFE, 0xFD}; + binary_field.value.string_value = + *zvec_bin_create(binary_data, sizeof(binary_data)); + err = zvec_doc_add_field_by_struct(doc, &binary_field); + TEST_ASSERT(err == ZVEC_OK); + + void *binary_result; + size_t binary_size; + err = zvec_doc_get_field_value_copy( + doc, "binary_field", ZVEC_DATA_TYPE_BINARY, &binary_result, &binary_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(binary_result != NULL); + TEST_ASSERT(binary_size == 6); + TEST_ASSERT(memcmp(binary_result, "\x00\x01\x02\xFF\xFE\xFD", binary_size) == + 0); + free(binary_result); + + // Test VECTOR_FP32 type with zvec_doc_get_field_value_copy + float test_vector[] = {1.1f, 2.2f, 3.3f, 4.4f, 5.5f}; + ZVecDocField fp32_vec_field; + fp32_vec_field.name.data = "fp32_vec_field"; + fp32_vec_field.name.length = strlen("fp32_vec_field"); + fp32_vec_field.data_type = ZVEC_DATA_TYPE_VECTOR_FP32; + fp32_vec_field.value.vector_value.data = test_vector; + fp32_vec_field.value.vector_value.length = 5; + err = zvec_doc_add_field_by_struct(doc, &fp32_vec_field); + TEST_ASSERT(err == ZVEC_OK); + + void *fp32_vec_result; + size_t fp32_vec_size; + err = zvec_doc_get_field_value_copy(doc, "fp32_vec_field", + ZVEC_DATA_TYPE_VECTOR_FP32, + &fp32_vec_result, &fp32_vec_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(fp32_vec_result != NULL); + TEST_ASSERT(fp32_vec_size == 5 * sizeof(float)); + TEST_ASSERT(memcmp(fp32_vec_result, test_vector, fp32_vec_size) == 0); + free(fp32_vec_result); + + + printf( + "=== Testing zvec_doc_get_field_value_pointer with all supported types " + "===\n"); + + // Test pointer access to basic types + const void *bool_ptr; + size_t bool_ptr_size; + err = zvec_doc_get_field_value_pointer(doc, "bool_field", ZVEC_DATA_TYPE_BOOL, + &bool_ptr, &bool_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(bool_ptr != NULL); + TEST_ASSERT(bool_ptr_size == sizeof(bool)); + TEST_ASSERT(*(const bool *)bool_ptr == true); + + const void *int32_ptr; + size_t int32_ptr_size; + err = zvec_doc_get_field_value_pointer( + doc, "int32_field", ZVEC_DATA_TYPE_INT32, &int32_ptr, &int32_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(int32_ptr != NULL); + TEST_ASSERT(int32_ptr_size == sizeof(int32_t)); + TEST_ASSERT(*(const int32_t *)int32_ptr == -2147483648); + + // Test pointer access to STRING (should return null-terminated C string) + const void *string_ptr; + size_t string_ptr_size; + err = zvec_doc_get_field_value_pointer(doc, "string_field", + ZVEC_DATA_TYPE_STRING, &string_ptr, + &string_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(string_ptr != NULL); + TEST_ASSERT(string_ptr_size == strlen("Hello, 世界!")); + TEST_ASSERT(memcmp(string_ptr, "Hello, 世界!", string_ptr_size) == 0); + + // Test pointer access to BINARY + const void *binary_ptr; + size_t binary_ptr_size; + err = zvec_doc_get_field_value_pointer(doc, "binary_field", + ZVEC_DATA_TYPE_BINARY, &binary_ptr, + &binary_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(binary_ptr != NULL); + TEST_ASSERT(binary_ptr_size == 6); + TEST_ASSERT(memcmp(binary_ptr, "\x00\x01\x02\xFF\xFE\xFD", binary_ptr_size) == + 0); + + // Test pointer access to VECTOR_FP32 + const void *fp32_vec_ptr; + size_t fp32_vec_ptr_size; + err = zvec_doc_get_field_value_pointer(doc, "fp32_vec_field", + ZVEC_DATA_TYPE_VECTOR_FP32, + &fp32_vec_ptr, &fp32_vec_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(fp32_vec_ptr != NULL); + TEST_ASSERT(fp32_vec_ptr_size == 5 * sizeof(float)); + TEST_ASSERT(memcmp(fp32_vec_ptr, test_vector, fp32_vec_ptr_size) == 0); + + // Declare dummy variables for error testing + const void *dummy_ptr; + size_t dummy_ptr_size; + + // ==================== FIELD OPERATIONS TESTING ==================== + + // Test field operations + size_t field_count = zvec_doc_get_field_count(doc); + TEST_ASSERT(field_count >= 10); // All the fields we've added + + // Test field existence checks + TEST_ASSERT(zvec_doc_has_field(doc, "bool_field") == true); + TEST_ASSERT(zvec_doc_has_field(doc, "int32_field") == true); + TEST_ASSERT(zvec_doc_has_field(doc, "string_field") == true); + TEST_ASSERT(zvec_doc_has_field(doc, "nonexistent") == false); + + TEST_ASSERT(zvec_doc_has_field_value(doc, "bool_field") == true); + TEST_ASSERT(zvec_doc_is_field_null(doc, "bool_field") == false); + TEST_ASSERT(zvec_doc_is_field_null(doc, "nonexistent") == false); + + // Test field names retrieval + char **field_names; + size_t name_count; + err = zvec_doc_get_field_names(doc, &field_names, &name_count); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(name_count >= 10); + TEST_ASSERT(field_names != NULL); + + // Verify some expected fields are present + bool found_key_fields = false; + for (size_t i = 0; i < name_count; i++) { + if (strcmp(field_names[i], "bool_field") == 0 || + strcmp(field_names[i], "int32_field") == 0 || + strcmp(field_names[i], "string_field") == 0) { + found_key_fields = true; + break; + } + } + TEST_ASSERT(found_key_fields == true); + + zvec_free_str_array(field_names, name_count); + + // ==================== ERROR CONDITION TESTING ==================== + + printf("=== Testing error conditions ===\n"); + + // Test non-existent field + err = + zvec_doc_get_field_value_basic(doc, "missing_field", ZVEC_DATA_TYPE_INT32, + &int32_result, sizeof(int32_result)); + TEST_ASSERT(err != ZVEC_OK); + + err = + zvec_doc_get_field_value_copy(doc, "missing_field", ZVEC_DATA_TYPE_STRING, + &string_result, &string_size); + TEST_ASSERT(err != ZVEC_OK); + + err = zvec_doc_get_field_value_pointer( + doc, "missing_field", ZVEC_DATA_TYPE_FLOAT, &dummy_ptr, &dummy_ptr_size); + TEST_ASSERT(err != ZVEC_OK); + + // Test wrong data type access + err = + zvec_doc_get_field_value_basic(doc, "string_field", ZVEC_DATA_TYPE_INT32, + &int32_result, sizeof(int32_result)); + TEST_ASSERT(err != ZVEC_OK); + + err = zvec_doc_get_field_value_copy(doc, "int32_field", ZVEC_DATA_TYPE_STRING, + &string_result, &string_size); + TEST_ASSERT(err != ZVEC_OK); + + err = zvec_doc_get_field_value_pointer( + doc, "bool_field", ZVEC_DATA_TYPE_FLOAT, &dummy_ptr, &dummy_ptr_size); + TEST_ASSERT(err != ZVEC_OK); + + // ==================== DOCUMENT SERIALIZATION TESTING ==================== + + printf("=== Testing document serialization ===\n"); + + uint8_t *serialized_data; + size_t data_size; + err = zvec_doc_serialize(doc, &serialized_data, &data_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(serialized_data != NULL); + TEST_ASSERT(data_size > 0); + + ZVecDoc *deserialized_doc; + err = zvec_doc_deserialize(serialized_data, data_size, &deserialized_doc); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(deserialized_doc != NULL); + + // Verify deserialized document has same field count + size_t deserialized_field_count = zvec_doc_get_field_count(deserialized_doc); + TEST_ASSERT(deserialized_field_count == field_count); + + // Test a field from deserialized document + int32_t deserialized_int32; + err = zvec_doc_get_field_value_basic( + deserialized_doc, "int32_field", ZVEC_DATA_TYPE_INT32, + &deserialized_int32, sizeof(deserialized_int32)); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(deserialized_int32 == -2147483648); + + // ==================== CLEANUP ==================== + + zvec_doc_destroy(deserialized_doc); + zvec_free_uint8_array(serialized_data); + zvec_free_str(string_field.value.string_value.data); + zvec_free_str(binary_field.value.string_value.data); + zvec_doc_destroy(doc); + zvec_collection_schema_destroy(schema); + + TEST_END(); +} + +// ============================================================================= +// Index parameter tests +// ============================================================================= + +void test_index_params(void) { + TEST_START(); + + // Test HNSW parameter creation + ZVecHnswIndexParams *hnsw_params = zvec_test_create_default_hnsw_params(); + TEST_ASSERT(hnsw_params != NULL); + if (hnsw_params) { + free(hnsw_params); + } + + // Test Flat parameter creation + ZVecFlatIndexParams *flat_params = zvec_test_create_default_flat_params(); + TEST_ASSERT(flat_params != NULL); + if (flat_params) { + free(flat_params); + } + + // Test scalar index parameter creation + ZVecInvertIndexParams *invert_params = + zvec_test_create_default_invert_params(true); + TEST_ASSERT(invert_params != NULL); + if (invert_params) { + free(invert_params); + } + + TEST_END(); +} + +// ============================================================================= +// Memory management tests +// ============================================================================= +void test_zvec_string_functions(void) { + TEST_START(); + + // Test string creation and basic operations + ZVecString *str1 = zvec_string_create("Hello World"); + TEST_ASSERT(str1 != NULL); + TEST_ASSERT(zvec_string_length(str1) == 11); + TEST_ASSERT(strcmp(zvec_string_c_str(str1), "Hello World") == 0); + + // Test string copy + ZVecString *str2 = zvec_string_copy(str1); + TEST_ASSERT(str2 != NULL); + TEST_ASSERT(zvec_string_length(str2) == 11); + TEST_ASSERT(strcmp(zvec_string_c_str(str2), "Hello World") == 0); + + // Test string comparison + int cmp_result = zvec_string_compare(str1, str2); + TEST_ASSERT(cmp_result == 0); + + ZVecString *str3 = zvec_string_create("Hello"); + TEST_ASSERT(zvec_string_compare(str1, str3) > 0); + + // Test string creation from view + ZVecStringView view = {"Hello View", 10}; + ZVecString *str4 = zvec_string_create_from_view(&view); + TEST_ASSERT(str4 != NULL); + TEST_ASSERT(zvec_string_length(str4) == 10); + TEST_ASSERT(strcmp(zvec_string_c_str(str4), "Hello View") == 0); + + // Test string view with embedded null bytes + char binary_data[] = {'H', 'e', 'l', 'l', 'o', '\0', 'W', 'o', 'r', 'l', 'd'}; + ZVecStringView binary_view = {binary_data, 11}; + ZVecString *str5 = zvec_string_create_from_view(&binary_view); + TEST_ASSERT(str5 != NULL); + TEST_ASSERT(zvec_string_length(str5) == 11); + // Note: strcmp will stop at first null byte, so we need to compare manually + TEST_ASSERT(memcmp(zvec_string_c_str(str5), binary_data, 11) == 0); + + // Cleanup + zvec_free_string(str1); + zvec_free_string(str2); + zvec_free_string(str3); + zvec_free_string(str4); + zvec_free_string(str5); + + TEST_END(); +} + +void test_index_params_functions(void) { + TEST_START(); + + // Test base index params + ZVecBaseIndexParams base_params; + zvec_index_params_base_init(&base_params, ZVEC_INDEX_TYPE_HNSW); + TEST_ASSERT(base_params.index_type == ZVEC_INDEX_TYPE_HNSW); + + // Test invert index params + ZVecInvertIndexParams invert_params; + zvec_index_params_invert_init(&invert_params, true, false); + TEST_ASSERT(invert_params.base.index_type == ZVEC_INDEX_TYPE_INVERT); + TEST_ASSERT(invert_params.enable_range_optimization == true); + TEST_ASSERT(invert_params.enable_extended_wildcard == false); + + // Test vector index params + ZVecVectorIndexParams vector_params; + zvec_index_params_vector_init(&vector_params, ZVEC_INDEX_TYPE_HNSW, + ZVEC_METRIC_TYPE_L2, + ZVEC_QUANTIZE_TYPE_UNDEFINED); + TEST_ASSERT(vector_params.base.index_type == ZVEC_INDEX_TYPE_HNSW); + TEST_ASSERT(vector_params.metric_type == ZVEC_METRIC_TYPE_L2); + TEST_ASSERT(vector_params.quantize_type == ZVEC_QUANTIZE_TYPE_UNDEFINED); + + // Test HNSW index params + ZVecHnswIndexParams hnsw_params; + zvec_index_params_hnsw_init(&hnsw_params, ZVEC_METRIC_TYPE_COSINE, 16, 200, + 50, ZVEC_QUANTIZE_TYPE_UNDEFINED); + TEST_ASSERT(hnsw_params.base.base.index_type == ZVEC_INDEX_TYPE_HNSW); + TEST_ASSERT(hnsw_params.base.metric_type == ZVEC_METRIC_TYPE_COSINE); + TEST_ASSERT(hnsw_params.m == 16); + TEST_ASSERT(hnsw_params.ef_construction == 200); + TEST_ASSERT(hnsw_params.ef_search == 50); + + // Test Flat index params + ZVecFlatIndexParams flat_params; + zvec_index_params_flat_init(&flat_params, ZVEC_METRIC_TYPE_IP, + ZVEC_QUANTIZE_TYPE_UNDEFINED); + TEST_ASSERT(flat_params.base.base.index_type == ZVEC_INDEX_TYPE_FLAT); + TEST_ASSERT(flat_params.base.metric_type == ZVEC_METRIC_TYPE_IP); + + // Test IVF index params + ZVecIVFIndexParams ivf_params; + zvec_index_params_ivf_init(&ivf_params, ZVEC_METRIC_TYPE_L2, 100, 10, true, 5, + ZVEC_QUANTIZE_TYPE_UNDEFINED); + TEST_ASSERT(ivf_params.base.base.index_type == ZVEC_INDEX_TYPE_IVF); + TEST_ASSERT(ivf_params.n_list == 100); + TEST_ASSERT(ivf_params.n_iters == 10); + TEST_ASSERT(ivf_params.use_soar == true); + TEST_ASSERT(ivf_params.n_probe == 5); + + TEST_END(); +} + +void test_utility_functions(void) { + TEST_START(); + + // Test error code to string conversion + const char *error_str = zvec_error_code_to_string(ZVEC_OK); + TEST_ASSERT(error_str != NULL); + TEST_ASSERT(strlen(error_str) > 0); + + error_str = zvec_error_code_to_string(ZVEC_ERROR_INVALID_ARGUMENT); + TEST_ASSERT(error_str != NULL); + + // Test data type to string conversion + const char *data_type_str = zvec_data_type_to_string(ZVEC_DATA_TYPE_INT32); + TEST_ASSERT(data_type_str != NULL); + TEST_ASSERT(strlen(data_type_str) > 0); + + data_type_str = zvec_data_type_to_string(ZVEC_DATA_TYPE_STRING); + TEST_ASSERT(data_type_str != NULL); + + // Test index type to string conversion + const char *index_type_str = zvec_index_type_to_string(ZVEC_INDEX_TYPE_HNSW); + TEST_ASSERT(index_type_str != NULL); + TEST_ASSERT(strlen(index_type_str) > 0); + + index_type_str = zvec_index_type_to_string(ZVEC_INDEX_TYPE_INVERT); + TEST_ASSERT(index_type_str != NULL); + + TEST_END(); +} + +void test_memory_management_functions(void) { + TEST_START(); + + // Test basic memory allocation + void *ptr = zvec_malloc(1024); + TEST_ASSERT(ptr != NULL); + + // Test memory reallocation + void *new_ptr = zvec_realloc(ptr, 2048); + TEST_ASSERT(new_ptr != NULL); + + // Test memory deallocation + zvec_free(new_ptr); + + // Test string allocation and deallocation + ZVecString *str = zvec_string_create("Test String"); + TEST_ASSERT(str != NULL); + zvec_free_string(str); + + TEST_END(); +} + +void test_query_params_functions(void) { + TEST_START(); + + // Test basic query parameters creation and destruction + ZVecQueryParams *base_params = zvec_query_params_create(ZVEC_INDEX_TYPE_HNSW); + TEST_ASSERT(base_params != NULL); + + // Test union query parameters + ZVecQueryParamsUnion *union_params = + zvec_query_params_union_create(ZVEC_INDEX_TYPE_HNSW); + TEST_ASSERT(union_params != NULL); + + // Test HNSW query parameters + ZVecHnswQueryParams *hnsw_params = zvec_query_params_hnsw_create( + ZVEC_INDEX_TYPE_HNSW, 50, 0.5f, false, true); + TEST_ASSERT(hnsw_params != NULL); + + // Test IVF query parameters + ZVecIVFQueryParams *ivf_params = + zvec_query_params_ivf_create(ZVEC_INDEX_TYPE_IVF, 10, true, 1.5f); + TEST_ASSERT(ivf_params != NULL); + + // Test Flat query parameters + ZVecFlatQueryParams *flat_params = + zvec_query_params_flat_create(ZVEC_INDEX_TYPE_FLAT, false, 2.0f); + TEST_ASSERT(flat_params != NULL); + + // Test setting various parameters on base query params + ZVecErrorCode err; + + // Test index type setting + err = zvec_query_params_set_index_type(base_params, ZVEC_INDEX_TYPE_IVF); + TEST_ASSERT(err == ZVEC_OK); + + // Test radius setting + err = zvec_query_params_set_radius(base_params, 0.8f); + TEST_ASSERT(err == ZVEC_OK); + + // Test linear search setting + err = zvec_query_params_set_is_linear(base_params, false); + TEST_ASSERT(err == ZVEC_OK); + + // Test refiner setting + err = zvec_query_params_set_is_using_refiner(base_params, true); + TEST_ASSERT(err == ZVEC_OK); + + // Test HNSW-specific parameters + err = zvec_query_params_hnsw_set_ef(hnsw_params, 75); + TEST_ASSERT(err == ZVEC_OK); + + // Test IVF-specific parameters + err = zvec_query_params_ivf_set_nprobe(ivf_params, 15); + TEST_ASSERT(err == ZVEC_OK); + + // Test IVF scale factor setting + err = zvec_query_params_ivf_set_scale_factor(ivf_params, 2.5f); + TEST_ASSERT(err == ZVEC_OK); + + // Test destruction of valid parameters + zvec_query_params_destroy(base_params); + zvec_query_params_hnsw_destroy(hnsw_params); + zvec_query_params_ivf_destroy(ivf_params); + zvec_query_params_flat_destroy(flat_params); + zvec_query_params_union_destroy(union_params); + + + // Test boundary cases - null pointer handling + zvec_query_params_hnsw_destroy(NULL); + zvec_query_params_ivf_destroy(NULL); + zvec_query_params_flat_destroy(NULL); + zvec_query_params_union_destroy(NULL); + + + TEST_END(); +} + +void test_collection_stats_functions(void) { + TEST_START(); + + char temp_dir[] = "/tmp/zvec_test_collection_stats_functions"; + + ZVecCollectionSchema *schema = zvec_test_create_temp_schema(); + TEST_ASSERT(schema != NULL); + + if (schema) { + ZVecCollection *collection = NULL; + ZVecErrorCode err = + zvec_collection_create_and_open(temp_dir, schema, NULL, &collection); + TEST_ASSERT(err == ZVEC_OK); + + if (collection) { + ZVecCollectionStats *stats = NULL; + + // Test normal statistics retrieval + err = zvec_collection_get_stats(collection, &stats); + TEST_ASSERT(err == ZVEC_OK); + + if (stats) { + TEST_ASSERT(stats->doc_count == 0); + zvec_collection_stats_destroy(stats); + } + + // Test NULL parameters + err = zvec_collection_get_stats(NULL, &stats); + TEST_ASSERT(err != ZVEC_OK); + + err = zvec_collection_get_stats(collection, NULL); + TEST_ASSERT(err != ZVEC_OK); + + // Test statistics destruction boundary cases + zvec_collection_stats_destroy(NULL); + zvec_collection_destroy(collection); + } + + zvec_collection_schema_destroy(schema); + } + + // Clean up temporary directory + char cmd[256]; + snprintf(cmd, sizeof(cmd), "rm -rf %s", temp_dir); + system(cmd); + + TEST_END(); +} + +void test_collection_dml_functions(void) { + TEST_START(); + + char temp_dir[] = "/tmp/zvec_test_collection_dml"; + + ZVecCollectionSchema *schema = zvec_test_create_temp_schema(); + TEST_ASSERT(schema != NULL); + + if (schema) { + ZVecCollection *collection = NULL; + ZVecErrorCode err = + zvec_collection_create_and_open(temp_dir, schema, NULL, &collection); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(collection != NULL); + + if (collection) { + // Test insertion function boundary cases + size_t success_count, error_count; + + // Test NULL collection + err = zvec_collection_insert(NULL, NULL, 0, &success_count, &error_count); + TEST_ASSERT(err != ZVEC_OK); + + // Test NULL document array + err = zvec_collection_insert(collection, NULL, 0, &success_count, + &error_count); + TEST_ASSERT(err != ZVEC_OK); + + // Test zero document count + ZVecDoc *empty_docs[1]; + err = zvec_collection_insert(collection, (const ZVecDoc **)empty_docs, 0, + &success_count, &error_count); + TEST_ASSERT(err != ZVEC_OK); + + // Test NULL count pointer + err = zvec_collection_insert(collection, (const ZVecDoc **)empty_docs, 1, + NULL, &error_count); + TEST_ASSERT(err != ZVEC_OK); + + // Test update function boundary cases + err = zvec_collection_update(NULL, NULL, 0, &success_count, &error_count); + TEST_ASSERT(err != ZVEC_OK); + + err = zvec_collection_update(collection, NULL, 0, &success_count, + &error_count); + TEST_ASSERT(err != ZVEC_OK); + + err = zvec_collection_update(collection, (const ZVecDoc **)empty_docs, 0, + NULL, &error_count); + TEST_ASSERT(err != ZVEC_OK); + + // Test upsert function boundary cases + err = zvec_collection_upsert(NULL, NULL, 0, &success_count, &error_count); + TEST_ASSERT(err != ZVEC_OK); + + err = zvec_collection_upsert(collection, NULL, 0, &success_count, + &error_count); + TEST_ASSERT(err != ZVEC_OK); + + err = zvec_collection_upsert(collection, (const ZVecDoc **)empty_docs, 0, + NULL, &error_count); + TEST_ASSERT(err != ZVEC_OK); + + // Test deletion function boundary cases + const char *pks[1]; + err = zvec_collection_delete(NULL, NULL, 0, &success_count, &error_count); + TEST_ASSERT(err != ZVEC_OK); + + err = zvec_collection_delete(collection, NULL, 0, &success_count, + &error_count); + TEST_ASSERT(err != ZVEC_OK); + + err = zvec_collection_delete(collection, pks, 0, NULL, &error_count); + TEST_ASSERT(err != ZVEC_OK); + + // Test deletion by filter boundary cases + err = zvec_collection_delete_by_filter(NULL, NULL); + TEST_ASSERT(err != ZVEC_OK); + + err = zvec_collection_delete_by_filter(collection, NULL); + TEST_ASSERT(err != ZVEC_OK); + + zvec_collection_destroy(collection); + } + + zvec_collection_schema_destroy(schema); + } + + // Clean up temporary directory + char cmd[256]; + snprintf(cmd, sizeof(cmd), "rm -rf %s", temp_dir); + system(cmd); + + TEST_END(); +} + +// ============================================================================= +// Actual Query Execution Tests +// ============================================================================= + +void test_actual_vector_queries(void) { + TEST_START(); + + char temp_dir[] = "/tmp/zvec_test_actual_queries"; + + // Create schema with vector field + ZVecCollectionSchema *schema = zvec_collection_schema_create("query_test"); + TEST_ASSERT(schema != NULL); + + if (schema) { + // Add ID field + ZVecFieldSchema *id_field = + zvec_field_schema_create("id", ZVEC_DATA_TYPE_INT64, false, 0); + zvec_collection_schema_add_field(schema, id_field); + + // Add vector field with HNSW index + ZVecHnswIndexParams *hnsw_params = zvec_index_params_hnsw_create( + ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_UNDEFINED, 16, 100, 50); + ZVecFieldSchema *vec_field = zvec_field_schema_create( + "embedding", ZVEC_DATA_TYPE_VECTOR_FP32, false, 4); + zvec_field_schema_set_hnsw_index(vec_field, hnsw_params); + zvec_collection_schema_add_field(schema, vec_field); + + ZVecCollection *collection = NULL; + ZVecErrorCode err = + zvec_collection_create_and_open(temp_dir, schema, NULL, &collection); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(collection != NULL); + + if (collection) { + // Insert test documents + float vec1[] = {1.0f, 0.0f, 0.0f, 0.0f}; + float vec2[] = {0.0f, 1.0f, 0.0f, 0.0f}; + float vec3[] = {0.0f, 0.0f, 1.0f, 0.0f}; + float vec4[] = {0.7f, 0.7f, 0.0f, 0.0f}; // Similar to vec1 and vec2 + + ZVecDoc *docs[4]; + for (int i = 0; i < 4; i++) { + docs[i] = zvec_doc_create(); + zvec_doc_set_pk(docs[i], zvec_test_make_pk(i + 1)); + zvec_doc_add_field_by_value(docs[i], "id", ZVEC_DATA_TYPE_INT64, + &(int64_t){i + 1}, sizeof(int64_t)); + } + + zvec_doc_add_field_by_value( + docs[0], "embedding", ZVEC_DATA_TYPE_VECTOR_FP32, vec1, sizeof(vec1)); + zvec_doc_add_field_by_value( + docs[1], "embedding", ZVEC_DATA_TYPE_VECTOR_FP32, vec2, sizeof(vec2)); + zvec_doc_add_field_by_value( + docs[2], "embedding", ZVEC_DATA_TYPE_VECTOR_FP32, vec3, sizeof(vec3)); + zvec_doc_add_field_by_value( + docs[3], "embedding", ZVEC_DATA_TYPE_VECTOR_FP32, vec4, sizeof(vec4)); + + size_t success_count, error_count; + err = zvec_collection_insert(collection, (const ZVecDoc **)docs, 4, + &success_count, &error_count); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(success_count == 4); + TEST_ASSERT(error_count == 0); + + // Flush collection to build index + zvec_collection_flush(collection); + + // Test 1: Basic vector search + ZVecVectorQuery query1 = {0}; + query1.field_name = (ZVecString){.data = "embedding", .length = 9}; + query1.query_vector = + (ZVecByteArray){.data = (uint8_t *)vec1, .length = sizeof(vec1)}; + query1.topk = 3; + query1.include_vector = true; + query1.include_doc_id = true; + + ZVecDoc **results = NULL; + size_t result_count = 0; + err = zvec_collection_query(collection, &query1, &results, &result_count); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(result_count > 0); + TEST_ASSERT(results != NULL); + + // First result should be vec1 itself (distance ~0) + if (result_count > 0) { + float score = zvec_doc_get_score(results[0]); + TEST_ASSERT(score < 0.001f); // Very small distance + } + + zvec_docs_free(results, result_count); + + // Test 2: Search with filter + ZVecVectorQuery query2 = query1; + query2.filter = (ZVecString){.data = "id > 2", .length = 6}; + + err = zvec_collection_query(collection, &query2, &results, &result_count); + TEST_ASSERT(err == ZVEC_OK); + + // Should only return documents with id > 2 + for (size_t i = 0; i < result_count; i++) { + int64_t id; + zvec_doc_get_field_value_basic(results[i], "id", ZVEC_DATA_TYPE_INT64, + &id, sizeof(id)); + TEST_ASSERT(id > 2); + } + + zvec_docs_free(results, result_count); + + // Cleanup documents + for (int i = 0; i < 4; i++) { + zvec_doc_destroy(docs[i]); + } + + zvec_collection_destroy(collection); + } + + zvec_collection_schema_destroy(schema); + zvec_index_params_hnsw_destroy(hnsw_params); + } + + // Clean up + char cmd[256]; + snprintf(cmd, sizeof(cmd), "rm -rf %s", temp_dir); + system(cmd); + + TEST_END(); +} + +void test_index_creation_and_management(void) { + TEST_START(); + + char temp_dir[] = "/tmp/zvec_test_index_management"; + + ZVecCollectionSchema *schema = zvec_test_create_temp_schema(); + TEST_ASSERT(schema != NULL); + + if (schema) { + ZVecCollection *collection = NULL; + ZVecErrorCode err = + zvec_collection_create_and_open(temp_dir, schema, NULL, &collection); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(collection != NULL); + + if (collection) { + // Test 1: Create HNSW index + ZVecHnswIndexParams *hnsw_params = zvec_index_params_hnsw_create( + ZVEC_METRIC_TYPE_COSINE, ZVEC_QUANTIZE_TYPE_UNDEFINED, 16, 100, 50); + TEST_ASSERT(hnsw_params != NULL); + + err = zvec_collection_create_hnsw_index( + collection, &(ZVecString){.data = "dense", .length = 5}, hnsw_params); + TEST_ASSERT(err == ZVEC_OK); + + // Test 2: Create scalar index + ZVecInvertIndexParams *invert_params = + zvec_index_params_invert_create(true, false); + TEST_ASSERT(invert_params != NULL); + + err = zvec_collection_create_invert_index( + collection, &(ZVecString){.data = "name", .length = 4}, + invert_params); + TEST_ASSERT(err == ZVEC_OK); + + // Note: Index statistics and drop functionality not yet implemented in C + // API These would require zvec_collection_get_index_stats() and + // zvec_collection_drop_index() + + // Test 3: Optimize collection + err = zvec_collection_optimize(collection); + TEST_ASSERT(err == ZVEC_OK); + + zvec_collection_destroy(collection); + zvec_index_params_hnsw_destroy(hnsw_params); + zvec_index_params_invert_destroy(invert_params); + } + + zvec_collection_schema_destroy(schema); + } + + // Clean up + char cmd[256]; + snprintf(cmd, sizeof(cmd), "rm -rf %s", temp_dir); + system(cmd); + + TEST_END(); +} + +void test_collection_ddl_operations(void) { + TEST_START(); + + char temp_dir[] = "/tmp/zvec_test_collection_ddl"; + + ZVecCollectionSchema *schema = zvec_test_create_temp_schema(); + TEST_ASSERT(schema != NULL); + + if (schema) { + ZVecCollection *collection = NULL; + ZVecErrorCode err = + zvec_collection_create_and_open(temp_dir, schema, NULL, &collection); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(collection != NULL); + + if (collection) { + // Test 1: Add new field (using schema modification before opening) + ZVecFieldSchema *new_field = + zvec_field_schema_create("new_field", ZVEC_DATA_TYPE_STRING, true, 0); + TEST_ASSERT(new_field != NULL); + + // Note: Runtime field addition not yet implemented in C API + // This would require zvec_collection_add_field() which is not implemented + + // Test 2: Get collection schema + ZVecCollectionSchema *retrieved_schema = NULL; + err = zvec_collection_get_schema(collection, &retrieved_schema); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(retrieved_schema != NULL); + + size_t field_count = + zvec_collection_schema_get_field_count(retrieved_schema); + TEST_ASSERT(field_count > 0); + + zvec_collection_schema_destroy(retrieved_schema); + + zvec_collection_destroy(collection); + } + + zvec_collection_schema_destroy(schema); + } + + // Clean up + char cmd[256]; + snprintf(cmd, sizeof(cmd), "rm -rf %s", temp_dir); + system(cmd); + + TEST_END(); +} + +void test_field_ddl_operations(void) { + TEST_START(); + + // Test field schema creation with various configurations + ZVecFieldSchema *field1 = + zvec_field_schema_create("test_field1", ZVEC_DATA_TYPE_STRING, false, 0); + TEST_ASSERT(field1 != NULL); + TEST_ASSERT(strcmp(field1->name->data, "test_field1") == 0); + TEST_ASSERT(field1->data_type == ZVEC_DATA_TYPE_STRING); + TEST_ASSERT(field1->nullable == false); + TEST_ASSERT(field1->dimension == 0); + + ZVecFieldSchema *field2 = zvec_field_schema_create( + "test_field2", ZVEC_DATA_TYPE_VECTOR_FP32, true, 128); + TEST_ASSERT(field2 != NULL); + TEST_ASSERT(field2->data_type == ZVEC_DATA_TYPE_VECTOR_FP32); + TEST_ASSERT(field2->nullable == true); + TEST_ASSERT(field2->dimension == 128); + + // Test index parameter setting + ZVecHnswIndexParams *hnsw_params = zvec_index_params_hnsw_create( + ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_UNDEFINED, 16, 100, 50); + TEST_ASSERT(hnsw_params != NULL); + + ZVecErrorCode err = zvec_field_schema_set_index_params( + field2, (ZVecIndexParams *)hnsw_params); + TEST_ASSERT(err == ZVEC_OK); + + // Test field operations + // (Field validation function doesn't exist in current API) + + // Cleanup + zvec_field_schema_destroy(field1); + zvec_field_schema_destroy(field2); + zvec_index_params_hnsw_destroy(hnsw_params); + + TEST_END(); +} + +void test_performance_benchmarks(void) { + TEST_START(); + + char temp_dir[] = "/tmp/zvec_test_performance"; + + ZVecCollectionSchema *schema = zvec_collection_schema_create("perf_test"); + TEST_ASSERT(schema != NULL); + + if (schema) { + // Create simple schema for performance testing + ZVecFieldSchema *id_field = + zvec_field_schema_create("id", ZVEC_DATA_TYPE_INT64, false, 0); + zvec_collection_schema_add_field(schema, id_field); + + ZVecFieldSchema *vec_field = + zvec_field_schema_create("vec", ZVEC_DATA_TYPE_VECTOR_FP32, false, 128); + ZVecHnswIndexParams *hnsw_params = zvec_index_params_hnsw_create( + ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_UNDEFINED, 16, 100, 50); + zvec_field_schema_set_hnsw_index(vec_field, hnsw_params); + zvec_collection_schema_add_field(schema, vec_field); + + ZVecCollection *collection = NULL; + ZVecErrorCode err = + zvec_collection_create_and_open(temp_dir, schema, NULL, &collection); + TEST_ASSERT(err == ZVEC_OK); + + TEST_ASSERT(collection != NULL); + + if (collection) { + const size_t BATCH_SIZE = 1000; + const size_t TOTAL_DOCS = 10000; + + // Test bulk insertion performance +#ifdef _POSIX_C_SOURCE + struct timeval start_time, end_time; + gettimeofday(&start_time, NULL); +#else + clock_t start_clock = clock(); +#endif + + for (size_t batch_start = 0; batch_start < TOTAL_DOCS; + batch_start += BATCH_SIZE) { + ZVecDoc *batch_docs[BATCH_SIZE]; + size_t current_batch_size = (batch_start + BATCH_SIZE > TOTAL_DOCS) + ? TOTAL_DOCS - batch_start + : BATCH_SIZE; + + // Create batch of documents + for (size_t i = 0; i < current_batch_size; i++) { + batch_docs[i] = zvec_doc_create(); + zvec_doc_set_pk(batch_docs[i], zvec_test_make_pk(batch_start + i)); + + int64_t id = batch_start + i; + zvec_doc_add_field_by_value(batch_docs[i], "id", ZVEC_DATA_TYPE_INT64, + &id, sizeof(id)); + + // Create random vector + float vec[128]; + for (int j = 0; j < 128; j++) { + vec[j] = (float)rand() / RAND_MAX; + } + zvec_doc_add_field_by_value(batch_docs[i], "vec", + ZVEC_DATA_TYPE_VECTOR_FP32, vec, + sizeof(vec)); + } + + // Insert batch + size_t success_count, error_count; + err = zvec_collection_insert(collection, (const ZVecDoc **)batch_docs, + current_batch_size, &success_count, + &error_count); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(success_count == current_batch_size); + TEST_ASSERT(error_count == 0); + + // Cleanup batch documents + for (size_t i = 0; i < current_batch_size; i++) { + zvec_doc_destroy(batch_docs[i]); + } + } + +#ifdef _POSIX_C_SOURCE + gettimeofday(&end_time, NULL); + double insert_time = (end_time.tv_sec - start_time.tv_sec) + + (end_time.tv_usec - start_time.tv_usec) / 1000000.0; +#else + clock_t end_clock = clock(); + double insert_time = ((double)(end_clock - start_clock)) / CLOCKS_PER_SEC; +#endif + printf(" Inserted %zu documents in %.3f seconds (%.0f docs/sec)\n", + TOTAL_DOCS, insert_time, TOTAL_DOCS / insert_time); + + // Flush and optimize + zvec_collection_flush(collection); + zvec_collection_optimize(collection); + + // Test query performance + float query_vec[128]; + for (int i = 0; i < 128; i++) { + query_vec[i] = (float)rand() / RAND_MAX; + } + + ZVecVectorQuery query = {0}; + query.field_name = (ZVecString){.data = "vec", .length = 3}; + query.query_vector = (ZVecByteArray){.data = (uint8_t *)query_vec, + .length = sizeof(query_vec)}; + query.topk = 10; + query.include_vector = false; + query.include_doc_id = true; + + const int QUERY_COUNT = 100; +#ifdef _POSIX_C_SOURCE + struct timeval query_start_time, query_end_time; + gettimeofday(&query_start_time, NULL); +#else + clock_t query_start_clock = clock(); +#endif + + for (int q = 0; q < QUERY_COUNT; q++) { + ZVecDoc **results = NULL; + size_t result_count = 0; + + err = + zvec_collection_query(collection, &query, &results, &result_count); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(result_count <= 10); + + zvec_docs_free(results, result_count); + } + +#ifdef _POSIX_C_SOURCE + gettimeofday(&query_end_time, NULL); + double query_time = (query_end_time.tv_sec - query_start_time.tv_sec) + + (query_end_time.tv_usec - query_start_time.tv_usec) / 1000000.0; +#else + clock_t query_end_clock = clock(); + double query_time = ((double)(query_end_clock - query_start_clock)) / CLOCKS_PER_SEC; +#endif + double avg_query_time = + (query_time * 1000) / QUERY_COUNT; // ms per query + printf(" Average query time: %.2f ms\n", avg_query_time); + + zvec_collection_destroy(collection); + zvec_index_params_hnsw_destroy(hnsw_params); + } + + zvec_collection_schema_destroy(schema); + } + + // Clean up + char cmd[256]; + snprintf(cmd, sizeof(cmd), "rm -rf %s", temp_dir); + system(cmd); + + TEST_END(); +} + +// ============================================================================= +// Main function +// ============================================================================= + +int main(void) { + printf("Starting comprehensive C API tests...\n\n"); + + // Clean up previous test directories + printf("Cleaning up previous test directories...\n"); + system("rm -rf /tmp/zvec_test_*"); + printf("Cleanup completed.\n\n"); + + test_version_functions(); + test_error_handling_functions(); + test_zvec_config(); + test_zvec_initialize(); + test_zvec_string_functions(); + + // Schema-related tests + test_schema_basic_operations(); + test_schema_edge_cases(); + test_schema_field_operations(); + test_normal_schema_creation(); + test_schema_with_indexes(); + test_schema_max_doc_count(); + + // Field-related tests + test_field_schema_functions(); + test_field_helper_functions(); + test_field_ddl_operations(); + + // Collection-related tests + test_collection_basic_operations(); + test_collection_edge_cases(); + test_collection_delete_by_filter(); + test_collection_stats(); + test_collection_stats_functions(); + test_collection_dml_functions(); + test_collection_ddl_operations(); + + // Doc-related tests + test_doc_creation(); + test_doc_primary_key(); + test_doc_functions(); + + // Index tests + test_index_params(); + test_index_params_functions(); + test_index_creation_and_management(); + + // Query tests + test_query_params_functions(); + test_actual_vector_queries(); + + // Performance tests + // test_performance_benchmarks(); + + // Utility function tests + test_utility_functions(); + + // Memory management tests + test_memory_management_functions(); + + printf("\n=== Comprehensive Test Summary ===\n"); + printf("Total tests: %d\n", test_count); + printf("Passed: %d\n", passed_count); + printf("Failed: %d\n", test_count - passed_count); + + return test_count == passed_count ? 0 : 1; +} diff --git a/tests/c_api/utils.c b/tests/c_api/utils.c new file mode 100644 index 000000000..66c932a45 --- /dev/null +++ b/tests/c_api/utils.c @@ -0,0 +1,940 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "utils.h" +#include +#include +#include +#include + +// ============================================================================= +// Internal Helper Functions +// ============================================================================= + +static char *strdup_safe(const char *str) { + if (!str) return NULL; + size_t len = strlen(str) + 1; + char *copy = (char *)malloc(len); + if (copy) { + memcpy(copy, str, len); + } + return copy; +} + +// ============================================================================= +// Schema Creation Helper Functions Implementation +// ============================================================================= + +ZVecCollectionSchema *zvec_test_create_temp_schema(void) { + // Create collection schema using C API + ZVecCollectionSchema *schema = zvec_collection_schema_create("demo"); + schema->max_doc_count_per_segment = 1000; + + // Create index parameters using C API + ZVecInvertIndexParams *invert_params = + zvec_index_params_invert_create(true, true); + ZVecHnswIndexParams *dense_hnsw_params = zvec_index_params_hnsw_create( + ZVEC_METRIC_TYPE_L2, 16, 100, 50, ZVEC_QUANTIZE_TYPE_UNDEFINED); + ZVecHnswIndexParams *sparse_hnsw_params = zvec_index_params_hnsw_create( + ZVEC_METRIC_TYPE_IP, 16, 100, 50, ZVEC_QUANTIZE_TYPE_UNDEFINED); + + + // Create and add fields + ZVecFieldSchema *id_field = + zvec_field_schema_create("id", ZVEC_DATA_TYPE_INT64, false, 0); + zvec_field_schema_set_invert_index(id_field, invert_params); + zvec_collection_schema_add_field(schema, id_field); + + // Create name field (inverted index without optimization) + ZVecInvertIndexParams *name_invert_params = + zvec_index_params_invert_create(false, false); + ZVecFieldSchema *name_field = + zvec_field_schema_create("name", ZVEC_DATA_TYPE_STRING, false, 0); + zvec_field_schema_set_invert_index(name_field, name_invert_params); + zvec_collection_schema_add_field(schema, name_field); + + // Create weight field (no index) + ZVecFieldSchema *weight_field = + zvec_field_schema_create("weight", ZVEC_DATA_TYPE_FLOAT, true, 0); + zvec_collection_schema_add_field(schema, weight_field); + + // Create dense field (HNSW index) + ZVecFieldSchema *dense_field = + zvec_field_schema_create("dense", ZVEC_DATA_TYPE_VECTOR_FP32, false, 128); + zvec_field_schema_set_hnsw_index(dense_field, dense_hnsw_params); + zvec_collection_schema_add_field(schema, dense_field); + + // Create sparse field (HNSW index) + ZVecFieldSchema *sparse_field = zvec_field_schema_create( + "sparse", ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32, false, 0); + zvec_field_schema_set_hnsw_index(sparse_field, sparse_hnsw_params); + zvec_collection_schema_add_field(schema, sparse_field); + + return schema; +} + +ZVecCollectionSchema *zvec_test_create_scalar_schema(void) { + // Create collection schema using C API + ZVecCollectionSchema *schema = zvec_collection_schema_create("demo"); + + // Create fields + ZVecFieldSchema *int32_field = + zvec_field_schema_create("int32", ZVEC_DATA_TYPE_INT32, false, 0); + zvec_collection_schema_add_field(schema, int32_field); + + ZVecFieldSchema *string_field = + zvec_field_schema_create("string", ZVEC_DATA_TYPE_STRING, false, 0); + zvec_collection_schema_add_field(schema, string_field); + + return schema; +} + +ZVecCollectionSchema *zvec_test_create_normal_schema( + bool nullable, const char *name, + const ZVecInvertIndexParams *scalar_index_params, + const ZVecHnswIndexParams *vector_index_params, uint64_t max_doc_count) { + // Create collection schema using C API + ZVecCollectionSchema *schema = + zvec_collection_schema_create(name ? name : "demo"); + schema->max_doc_count_per_segment = max_doc_count; + + // Create scalar fields (8) + const char *scalar_names[] = {"int32", "string", "uint32", "bool", + "float", "double", "int64", "uint64"}; + ZVecDataType scalar_types[] = {ZVEC_DATA_TYPE_INT32, ZVEC_DATA_TYPE_STRING, + ZVEC_DATA_TYPE_UINT32, ZVEC_DATA_TYPE_BOOL, + ZVEC_DATA_TYPE_FLOAT, ZVEC_DATA_TYPE_DOUBLE, + ZVEC_DATA_TYPE_INT64, ZVEC_DATA_TYPE_UINT64}; + + for (int i = 0; i < 8; i++) { + ZVecFieldSchema *field = + zvec_field_schema_create(scalar_names[i], scalar_types[i], nullable, 0); + if (scalar_index_params) { + zvec_field_schema_set_invert_index( + field, (ZVecInvertIndexParams *)scalar_index_params); + } + zvec_collection_schema_add_field(schema, field); + } + + // Create array fields (8) + const char *array_names[] = {"array_int32", "array_string", "array_uint32", + "array_bool", "array_float", "array_double", + "array_int64", "array_uint64"}; + ZVecDataType array_types[] = { + ZVEC_DATA_TYPE_ARRAY_INT32, ZVEC_DATA_TYPE_ARRAY_STRING, + ZVEC_DATA_TYPE_ARRAY_UINT32, ZVEC_DATA_TYPE_ARRAY_BOOL, + ZVEC_DATA_TYPE_ARRAY_FLOAT, ZVEC_DATA_TYPE_ARRAY_DOUBLE, + ZVEC_DATA_TYPE_ARRAY_INT64, ZVEC_DATA_TYPE_ARRAY_UINT64}; + + for (int i = 0; i < 8; i++) { + ZVecFieldSchema *field = + zvec_field_schema_create(array_names[i], array_types[i], nullable, 0); + if (scalar_index_params) { + zvec_field_schema_set_invert_index( + field, (ZVecInvertIndexParams *)scalar_index_params); + } + zvec_collection_schema_add_field(schema, field); + } + + // Create vector fields (5) + // dense vectors + ZVecFieldSchema *dense_fp32 = zvec_field_schema_create( + "dense_fp32", ZVEC_DATA_TYPE_VECTOR_FP32, false, 128); + if (vector_index_params) { + zvec_field_schema_set_hnsw_index( + dense_fp32, (ZVecHnswIndexParams *)vector_index_params); + } + zvec_collection_schema_add_field(schema, dense_fp32); + + ZVecFieldSchema *dense_fp16 = zvec_field_schema_create( + "dense_fp16", ZVEC_DATA_TYPE_VECTOR_FP16, false, 128); + ZVecFlatIndexParams *flat_params1 = zvec_index_params_flat_create( + ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_UNDEFINED); + zvec_field_schema_set_flat_index(dense_fp16, flat_params1); + zvec_collection_schema_add_field(schema, dense_fp16); + + ZVecFieldSchema *dense_int8 = zvec_field_schema_create( + "dense_int8", ZVEC_DATA_TYPE_VECTOR_INT8, false, 128); + ZVecFlatIndexParams *flat_params2 = zvec_index_params_flat_create( + ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_UNDEFINED); + zvec_field_schema_set_flat_index(dense_int8, flat_params2); + zvec_collection_schema_add_field(schema, dense_int8); + + // sparse vectors + ZVecFieldSchema *sparse_fp32 = zvec_field_schema_create( + "sparse_fp32", ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32, false, 0); + if (vector_index_params) { + zvec_field_schema_set_hnsw_index( + sparse_fp32, (ZVecHnswIndexParams *)vector_index_params); + } + zvec_collection_schema_add_field(schema, sparse_fp32); + + ZVecFieldSchema *sparse_fp16 = zvec_field_schema_create( + "sparse_fp16", ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16, false, 0); + ZVecFlatIndexParams *flat_params3 = zvec_index_params_flat_create( + ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_UNDEFINED); + zvec_field_schema_set_flat_index(sparse_fp16, flat_params3); + zvec_collection_schema_add_field(schema, sparse_fp16); + + return schema; +} + +ZVecCollectionSchema *zvec_test_create_schema_with_scalar_index( + bool nullable, bool enable_optimize, const char *name) { + ZVecInvertIndexParams *invert_params = + zvec_test_create_default_invert_params(enable_optimize); + ZVecCollectionSchema *schema = + zvec_test_create_normal_schema(nullable, name, invert_params, NULL, 1000); + free(invert_params); + return schema; +} + +ZVecCollectionSchema *zvec_test_create_schema_with_vector_index( + bool nullable, const char *name, + const ZVecHnswIndexParams *vector_index_params) { + ZVecHnswIndexParams *default_params = NULL; + if (!vector_index_params) { + default_params = zvec_test_create_default_hnsw_params(); + } + + ZVecCollectionSchema *schema = zvec_test_create_normal_schema( + nullable, name, NULL, + vector_index_params ? vector_index_params : default_params, 1000); + + if (default_params) { + free(default_params); + } + + return schema; +} + +ZVecCollectionSchema *zvec_test_create_schema_with_max_doc_count( + uint64_t doc_count) { + return zvec_test_create_normal_schema(false, "demo", NULL, NULL, doc_count); +} + +// ============================================================================= +// Document Creation Helper Functions Implementation +// ============================================================================= + +char *zvec_test_make_pk(uint64_t doc_id) { + char *pk = (char *)malloc(32); // Sufficiently large buffer + if (pk) { + snprintf(pk, 32, "pk_%llu", (unsigned long long)doc_id); + } + return pk; +} + +uint64_t zvec_test_extract_doc_id(const char *pk) { + if (!pk || strlen(pk) < 4) return 0; + return strtoull(pk + 3, NULL, 10); +} + +ZVecDoc *zvec_test_create_doc(uint64_t doc_id, + const ZVecCollectionSchema *schema, + const char *pk) { + if (!schema) return NULL; + ZVecDoc *doc = zvec_doc_create(); + if (!doc) return NULL; + + // Set primary key + char *primary_key = pk ? strdup_safe(pk) : zvec_test_make_pk(doc_id); + if (primary_key) { + zvec_doc_set_pk(doc, primary_key); + free(primary_key); + } + + // Create test data for each field + for (size_t i = 0; i < schema->field_count; i++) { + // Fix type mismatch issue - remove address operator + const ZVecFieldSchema *field = schema->fields[i]; + // Remove unused variable + // ZVecErrorCode err = ZVEC_OK; + + switch (field->data_type) { + case ZVEC_DATA_TYPE_BINARY: { + char binary_str[32]; + snprintf(binary_str, sizeof(binary_str), "binary_%llu", + (unsigned long long)doc_id); + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + binary_str, strlen(binary_str)); + break; + } + case ZVEC_DATA_TYPE_BOOL: { + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + &(bool){doc_id % 10 == 0}, sizeof(bool)); + break; + } + case ZVEC_DATA_TYPE_INT32: { + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + &(int32_t){(int32_t)doc_id}, + sizeof(int32_t)); + break; + } + case ZVEC_DATA_TYPE_INT64: { + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + &(int64_t){(int64_t)doc_id}, + sizeof(int64_t)); + break; + } + case ZVEC_DATA_TYPE_UINT32: { + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + &(uint32_t){(uint32_t)doc_id}, + sizeof(uint32_t)); + break; + } + case ZVEC_DATA_TYPE_UINT64: { + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + &(uint64_t){(uint64_t)doc_id}, + sizeof(uint64_t)); + break; + } + case ZVEC_DATA_TYPE_FLOAT: { + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + &(float){(float)doc_id}, sizeof(float)); + break; + } + case ZVEC_DATA_TYPE_DOUBLE: { + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + &(double){(double)doc_id}, sizeof(double)); + break; + } + case ZVEC_DATA_TYPE_STRING: { + char string_val[64]; + snprintf(string_val, sizeof(string_val), "value_%llu", + (unsigned long long)doc_id); + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + string_val, strlen(string_val)); + break; + } + case ZVEC_DATA_TYPE_ARRAY_BOOL: { + bool bool_array[10]; + for (int j = 0; j < 10; j++) { + bool_array[j] = (doc_id + j) % 2 == 0; + } + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + bool_array, sizeof(bool_array)); + break; + } + case ZVEC_DATA_TYPE_ARRAY_INT32: { + int32_t int32_array[10]; + for (int j = 0; j < 10; j++) { + int32_array[j] = (int32_t)doc_id; + } + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + int32_array, sizeof(int32_array)); + break; + } + case ZVEC_DATA_TYPE_ARRAY_INT64: { + int64_t int64_array[10]; + for (int j = 0; j < 10; j++) { + int64_array[j] = (int64_t)doc_id; + } + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + int64_array, sizeof(int64_array)); + break; + } + case ZVEC_DATA_TYPE_ARRAY_UINT32: { + uint32_t uint32_array[10]; + for (int j = 0; j < 10; j++) { + uint32_array[j] = (uint32_t)doc_id; + } + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + uint32_array, sizeof(uint32_array)); + break; + } + case ZVEC_DATA_TYPE_ARRAY_UINT64: { + uint64_t uint64_array[10]; + for (int j = 0; j < 10; j++) { + uint64_array[j] = (uint64_t)doc_id; + } + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + uint64_array, sizeof(uint64_array)); + break; + } + case ZVEC_DATA_TYPE_ARRAY_FLOAT: { + float float_array[10]; + for (int j = 0; j < 10; j++) { + float_array[j] = (float)doc_id; + } + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + float_array, sizeof(float_array)); + break; + } + case ZVEC_DATA_TYPE_ARRAY_DOUBLE: { + double double_array[10]; + for (int j = 0; j < 10; j++) { + double_array[j] = (double)doc_id; + } + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + double_array, sizeof(double_array)); + break; + } + case ZVEC_DATA_TYPE_ARRAY_STRING: { + // String arrays need special handling + char string_data[256]; + size_t offset = 0; + for (int j = 0; j < 10; j++) { + char temp_str[32]; + snprintf(temp_str, sizeof(temp_str), "value_%llu_%d", + (unsigned long long)doc_id, j); + size_t len = strlen(temp_str); + if (offset + len + 1 < sizeof(string_data)) { + strcpy(string_data + offset, temp_str); + offset += len + 1; + } + } + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + string_data, offset); + break; + } + case ZVEC_DATA_TYPE_VECTOR_BINARY32: { + uint32_t *vector_data = + (uint32_t *)malloc(field->dimension * sizeof(uint32_t)); + if (vector_data) { + for (uint32_t j = 0; j < field->dimension; j++) { + vector_data[j] = (uint32_t)(doc_id + j); + } + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + vector_data, + field->dimension * sizeof(uint32_t)); + free(vector_data); + } + break; + } + case ZVEC_DATA_TYPE_VECTOR_BINARY64: { + uint64_t *vector_data = + (uint64_t *)malloc(field->dimension * sizeof(uint64_t)); + if (vector_data) { + for (uint32_t j = 0; j < field->dimension; j++) { + vector_data[j] = (uint64_t)(doc_id + j); + } + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + vector_data, + field->dimension * sizeof(uint64_t)); + free(vector_data); + } + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP32: { + float *vector_data = (float *)malloc(field->dimension * sizeof(float)); + if (vector_data) { + for (uint32_t j = 0; j < field->dimension; j++) { + vector_data[j] = (float)(doc_id + j * 0.1); + } + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + vector_data, + field->dimension * sizeof(float)); + free(vector_data); + } + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP64: { + double *vector_data = + (double *)malloc(field->dimension * sizeof(double)); + if (vector_data) { + for (uint32_t j = 0; j < field->dimension; j++) { + vector_data[j] = (double)(doc_id + j * 0.1); + } + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + vector_data, + field->dimension * sizeof(double)); + free(vector_data); + } + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP16: { + // FP16 needs special handling, simplified to FP32 here + float *vector_data = (float *)malloc(field->dimension * sizeof(float)); + if (vector_data) { + for (uint32_t j = 0; j < field->dimension; j++) { + vector_data[j] = (float)(doc_id + j * 0.1); + } + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + vector_data, + field->dimension * sizeof(float)); + free(vector_data); + } + break; + } + case ZVEC_DATA_TYPE_VECTOR_INT8: { + int8_t *vector_data = + (int8_t *)malloc(field->dimension * sizeof(int8_t)); + if (vector_data) { + for (uint32_t j = 0; j < field->dimension; j++) { + vector_data[j] = (int8_t)((doc_id + j) % 256); + } + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + vector_data, + field->dimension * sizeof(int8_t)); + free(vector_data); + } + break; + } + case ZVEC_DATA_TYPE_VECTOR_INT16: { + int16_t *vector_data = + (int16_t *)malloc(field->dimension * sizeof(int16_t)); + if (vector_data) { + for (uint32_t j = 0; j < field->dimension; j++) { + vector_data[j] = (int16_t)((doc_id + j) % 65536); + } + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + vector_data, + field->dimension * sizeof(int16_t)); + free(vector_data); + } + break; + } + case ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32: { + // Sparse vectors need special handling + uint32_t nnz = field->dimension > 0 + ? field->dimension / 10 + : 10; // Number of non-zero elements + size_t sparse_size = + sizeof(uint32_t) + nnz * (sizeof(uint32_t) + sizeof(float)); + void *sparse_data = malloc(sparse_size); + if (sparse_data) { + uint32_t *data_ptr = (uint32_t *)sparse_data; + *data_ptr = nnz; // Set number of non-zero elements + uint32_t *indices = data_ptr + 1; + float *values = (float *)(indices + nnz); + for (uint32_t j = 0; j < nnz; j++) { + indices[j] = j * 10; // Index + values[j] = (float)(doc_id + j * 0.1); // Value + } + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + sparse_data, sparse_size); + free(sparse_data); + } + break; + } + case ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16: { + // Sparse FP16 vectors, simplified handling + uint32_t nnz = field->dimension > 0 ? field->dimension / 10 : 10; + size_t sparse_size = + sizeof(uint32_t) + + nnz * (sizeof(uint32_t) + + sizeof(float)); // Still use float for storage + void *sparse_data = malloc(sparse_size); + if (sparse_data) { + uint32_t *data_ptr = (uint32_t *)sparse_data; + *data_ptr = nnz; + uint32_t *indices = data_ptr + 1; + float *values = (float *)(indices + nnz); + for (uint32_t j = 0; j < nnz; j++) { + indices[j] = j * 10; + values[j] = (float)(doc_id + j * 0.1); + } + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + sparse_data, sparse_size); + free(sparse_data); + } + break; + } + + default: + // Unsupported data type + break; + } + + // Remove reference to removed variable err + /* + if (err != ZVEC_OK) { + // Error handling: continue processing other fields + } + */ + } + + return doc; +} + +ZVecDoc *zvec_test_create_doc_null(uint64_t doc_id, + const ZVecCollectionSchema *schema, + const char *pk) { + // Reuse create_doc function, but only process vector fields + ZVecDoc *doc = zvec_doc_create(); + if (!doc) return NULL; + + // Set primary key + char *primary_key = pk ? strdup_safe(pk) : zvec_test_make_pk(doc_id); + if (primary_key) { + zvec_doc_set_pk(doc, primary_key); + free(primary_key); + } + + // Only create data for vector fields + for (size_t i = 0; i < schema->field_count; i++) { + const ZVecFieldSchema *field = schema->fields[i]; + + // Only process specific vector type fields + if (field->data_type != ZVEC_DATA_TYPE_VECTOR_FP32 && + field->data_type != ZVEC_DATA_TYPE_VECTOR_FP16 && + field->data_type != ZVEC_DATA_TYPE_VECTOR_INT8 && + field->data_type != ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32 && + field->data_type != ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16) { + continue; + } + + ZVecErrorCode err = ZVEC_OK; + + switch (field->data_type) { + case ZVEC_DATA_TYPE_VECTOR_FP32: { + float *vector_data = (float *)malloc(field->dimension * sizeof(float)); + if (vector_data) { + for (uint32_t j = 0; j < field->dimension; j++) { + vector_data[j] = (float)(doc_id + j * 0.1); + } + err = zvec_doc_add_field_by_value(doc, field->name->data, + field->data_type, vector_data, + field->dimension * sizeof(float)); + free(vector_data); + } + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP64: { + double *vector_data = + (double *)malloc(field->dimension * sizeof(double)); + if (vector_data) { + for (uint32_t j = 0; j < field->dimension; j++) { + vector_data[j] = (double)(doc_id + j * 0.1); + } + err = zvec_doc_add_field_by_value(doc, field->name->data, + field->data_type, vector_data, + field->dimension * sizeof(double)); + free(vector_data); + } + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP16: { + float *vector_data = (float *)malloc(field->dimension * sizeof(float)); + if (vector_data) { + for (uint32_t j = 0; j < field->dimension; j++) { + vector_data[j] = (float)(doc_id + j * 0.1); + } + err = zvec_doc_add_field_by_value(doc, field->name->data, + field->data_type, vector_data, + field->dimension * sizeof(float)); + free(vector_data); + } + break; + } + case ZVEC_DATA_TYPE_VECTOR_INT8: { + int8_t *vector_data = + (int8_t *)malloc(field->dimension * sizeof(int8_t)); + if (vector_data) { + for (uint32_t j = 0; j < field->dimension; j++) { + vector_data[j] = (int8_t)(doc_id % 128); + } + err = zvec_doc_add_field_by_value(doc, field->name->data, + field->data_type, vector_data, + field->dimension * sizeof(int8_t)); + free(vector_data); + } + break; + } + case ZVEC_DATA_TYPE_VECTOR_INT16: { + int16_t *vector_data = + (int16_t *)malloc(field->dimension * sizeof(int16_t)); + if (vector_data) { + for (uint32_t j = 0; j < field->dimension; j++) { + vector_data[j] = (int16_t)(doc_id % 32768); + } + err = zvec_doc_add_field_by_value(doc, field->name->data, + field->data_type, vector_data, + field->dimension * sizeof(int16_t)); + free(vector_data); + } + break; + } + case ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16: + case ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32: { + const size_t nnz = 100; + size_t sparse_size = + sizeof(size_t) + nnz * (sizeof(uint32_t) + sizeof(float)); + char *sparse_data = (char *)malloc(sparse_size); + if (sparse_data) { + char *ptr = sparse_data; + *((size_t *)ptr) = nnz; + ptr += sizeof(size_t); + + for (size_t j = 0; j < nnz; j++) { + *((uint32_t *)ptr) = (uint32_t)j; + ptr += sizeof(uint32_t); + *((float *)ptr) = (float)(doc_id + j * 0.1); + ptr += sizeof(float); + } + err = zvec_doc_add_field_by_value(doc, field->name->data, + field->data_type, sparse_data, + sparse_size); + free(sparse_data); + } + break; + } + default: + break; + } + + + if (err != ZVEC_OK) { + zvec_doc_destroy(doc); + return NULL; + } + } + + return doc; +} + +ZVecDoc *zvec_test_create_doc_with_fields(uint64_t doc_id, + const char **field_names, + const ZVecDataType *field_types, + size_t field_count, const char *pk) { + ZVecDoc *doc = zvec_doc_create(); + if (!doc) return NULL; + + // Set primary key + char *primary_key = pk ? strdup_safe(pk) : zvec_test_make_pk(doc_id); + if (primary_key) { + zvec_doc_set_pk(doc, primary_key); + free(primary_key); + } + + // Create data for specified fields + for (size_t i = 0; i < field_count; i++) { + ZVecErrorCode err = ZVEC_OK; + + switch (field_types[i]) { + case ZVEC_DATA_TYPE_INT32: + err = zvec_doc_add_field_by_value(doc, field_names[i], field_types[i], + &(int32_t){(int32_t)doc_id}, + sizeof(int32_t)); + break; + case ZVEC_DATA_TYPE_STRING: { + char string_val[64]; + snprintf(string_val, sizeof(string_val), "value_%llu", + (unsigned long long)doc_id); + err = zvec_doc_add_field_by_value(doc, field_names[i], field_types[i], + string_val, strlen(string_val)); + break; + } + case ZVEC_DATA_TYPE_FLOAT: + err = + zvec_doc_add_field_by_value(doc, field_names[i], field_types[i], + &(float){(float)doc_id}, sizeof(float)); + break; + case ZVEC_DATA_TYPE_VECTOR_FP32: { + float vector_data[128]; + for (int j = 0; j < 128; j++) { + vector_data[j] = (float)(doc_id + j * 0.1); + } + err = zvec_doc_add_field_by_value(doc, field_names[i], field_types[i], + vector_data, sizeof(vector_data)); + break; + } + default: + // Other types can be added here + break; + } + + if (err != ZVEC_OK) { + zvec_doc_destroy(doc); + return NULL; + } + } + + return doc; +} + +// ============================================================================= +// Index Parameter Creation Helper Functions Implementation +// ============================================================================= + +ZVecHnswIndexParams *zvec_test_create_default_hnsw_params(void) { + ZVecHnswIndexParams *params = + (ZVecHnswIndexParams *)malloc(sizeof(ZVecHnswIndexParams)); + if (!params) return NULL; + + params->base.base.index_type = ZVEC_INDEX_TYPE_HNSW; + params->base.metric_type = ZVEC_METRIC_TYPE_IP; + params->base.quantize_type = ZVEC_QUANTIZE_TYPE_UNDEFINED; + params->m = 16; + params->ef_construction = 100; + + return params; +} + +ZVecFlatIndexParams *zvec_test_create_default_flat_params(void) { + ZVecFlatIndexParams *params = + (ZVecFlatIndexParams *)malloc(sizeof(ZVecFlatIndexParams)); + if (!params) return NULL; + + params->base.base.index_type = ZVEC_INDEX_TYPE_FLAT; + params->base.metric_type = ZVEC_METRIC_TYPE_IP; + params->base.quantize_type = ZVEC_QUANTIZE_TYPE_UNDEFINED; + + return params; +} + +ZVecInvertIndexParams *zvec_test_create_default_invert_params( + bool enable_optimize) { + ZVecInvertIndexParams *params = + (ZVecInvertIndexParams *)malloc(sizeof(ZVecInvertIndexParams)); + if (!params) return NULL; + + params->base.index_type = ZVEC_INDEX_TYPE_INVERT; + params->enable_range_optimization = enable_optimize; + params->enable_extended_wildcard = enable_optimize; + + return params; +} + +// ============================================================================= +// Field Schema Creation Helper Functions Implementation +// ============================================================================= + +ZVecFieldSchema *zvec_test_create_scalar_field( + const char *name, ZVecDataType data_type, bool nullable, + const ZVecInvertIndexParams *invert_params) { + ZVecFieldSchema *field = (ZVecFieldSchema *)malloc(sizeof(ZVecFieldSchema)); + if (!field) return NULL; + + field->name = (ZVecString *)malloc(sizeof(ZVecString)); + if (!field->name) { + free(field); + return NULL; + } + // Fix const qualifier issue - create string copy + field->name->data = name ? strdup(name) : NULL; + field->name->length = name ? strlen(name) : 0; + field->name->capacity = name ? strlen(name) + 1 : 0; + field->data_type = data_type; + field->nullable = nullable; + field->dimension = 0; + field->index_params = invert_params ? (ZVecIndexParams *)invert_params : NULL; + + return field; +} + +ZVecFieldSchema *zvec_test_create_vector_field( + const char *name, ZVecDataType data_type, uint32_t dimension, bool nullable, + const ZVecHnswIndexParams *vector_index_params) { + ZVecFieldSchema *field = (ZVecFieldSchema *)malloc(sizeof(ZVecFieldSchema)); + if (!field) return NULL; + + field->name = (ZVecString *)malloc(sizeof(ZVecString)); + if (!field->name) { + free(field); + return NULL; + } + // Fix const qualifier issue - create string copy + field->name->data = name ? strdup(name) : NULL; + field->name->length = name ? strlen(name) : 0; + field->name->capacity = name ? strlen(name) + 1 : 0; + field->data_type = data_type; + field->nullable = nullable; + field->dimension = dimension; + field->index_params = + vector_index_params ? (ZVecIndexParams *)vector_index_params : NULL; + + return field; +} + +ZVecFieldSchema *zvec_test_create_sparse_vector_field( + const char *name, ZVecDataType data_type, bool nullable, + const ZVecHnswIndexParams *vector_index_params) { + ZVecFieldSchema *field = (ZVecFieldSchema *)malloc(sizeof(ZVecFieldSchema)); + if (!field) return NULL; + + field->name = (ZVecString *)malloc(sizeof(ZVecString)); + if (!field->name) { + free(field); + return NULL; + } + // Fix const qualifier issue - create string copy + field->name->data = name ? strdup(name) : NULL; + field->name->length = name ? strlen(name) : 0; + field->name->capacity = name ? strlen(name) + 1 : 0; + field->data_type = data_type; + field->nullable = nullable; + field->dimension = 0; // Sparse vectors don't need fixed dimension + field->index_params = + vector_index_params ? (ZVecIndexParams *)vector_index_params : NULL; + + return field; +} + +// ============================================================================= +// Memory Management Helper Functions Implementation +// ============================================================================= + +void zvec_test_free_field_schemas(ZVecFieldSchema *fields, size_t count) { + if (!fields) return; + + for (size_t i = 0; i < count; i++) { + if (fields[i].name) { + // Free string memory allocated by strdup + if (fields[i].name->data) { + free(fields[i].name->data); + } + free(fields[i].name); + } + // Free index parameter memory + if (fields[i].index_params) { + zvec_index_params_destroy(fields[i].index_params); + free(fields[i].index_params); + } + } + free(fields); +} + +void zvec_test_free_strings(char **strings, size_t count) { + if (!strings) return; + + for (size_t i = 0; i < count; i++) { + if (strings[i]) { + free(strings[i]); + } + } + + free(strings); +} + +// ============================================================================= +// File System Helper Functions Implementation +// ============================================================================= + +/** + * @brief Delete directory and all its contents (wrapper function) + * + * @param dir_path Directory path + * @return int 0 for success, -1 for failure + */ +int zvec_test_delete_dir(const char *dir_path) { + if (!dir_path) { + return -1; + } + +#ifdef _WIN32 + // Windows platform implementation + char cmd[1024]; + snprintf(cmd, sizeof(cmd), "rd /s /q \"%s\" >nul 2>&1", dir_path); + int result = system(cmd); + return (result == 0) ? 0 : -1; +#else + // Unix/Linux/macOS platform implementation + char cmd[1024]; + snprintf(cmd, sizeof(cmd), "rm -rf \"%s\" 2>/dev/null", dir_path); + int result = system(cmd); + return (result == 0) ? 0 : -1; +#endif +} diff --git a/tests/c_api/utils.h b/tests/c_api/utils.h new file mode 100644 index 000000000..63e5e3149 --- /dev/null +++ b/tests/c_api/utils.h @@ -0,0 +1,260 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ZVEC_TESTS_C_API_UTILS_H +#define ZVEC_TESTS_C_API_UTILS_H + +#include +#include +#include +#include "zvec/c_api.h" + +#ifdef __cplusplus +extern "C" { +#endif + +// ============================================================================= +// Schema Creation Helper Functions +// ============================================================================= + +/** + * @brief Create temporary test schema + * Contains basic scalar fields and vector fields + * + * @return ZVecCollectionSchema* Created schema pointer, needs to be released by + * calling zvec_collection_schema_cleanup + */ +ZVecCollectionSchema *zvec_test_create_temp_schema(void); + +/** + * @brief Create pure scalar schema + * Contains only scalar fields (int32, string) + * + * @return ZVecCollectionSchema* Created schema pointer + */ +ZVecCollectionSchema *zvec_test_create_scalar_schema(void); + +/** + * @brief Create full-featured schema + * Contains all supported data type fields + * + * @param nullable Whether to allow null values + * @param name Schema name + * @param scalar_index_params Scalar index parameters (can be NULL) + * @param vector_index_params Vector index parameters (can be NULL) + * @param max_doc_count Maximum documents per segment + * @return ZVecCollectionSchema* Created schema pointer + */ +ZVecCollectionSchema *zvec_test_create_normal_schema( + bool nullable, const char *name, + const ZVecInvertIndexParams *scalar_index_params, + const ZVecHnswIndexParams *vector_index_params, uint64_t max_doc_count); + +/** + * @brief Create schema with scalar index + * + * @param nullable Whether to allow null values + * @param enable_optimize Whether to enable optimization + * @param name Schema name + * @return ZVecCollectionSchema* Created schema pointer + */ +ZVecCollectionSchema *zvec_test_create_schema_with_scalar_index( + bool nullable, bool enable_optimize, const char *name); + +/** + * @brief Create schema with vector index + * + * @param nullable Whether to allow null values + * @param name Schema name + * @param vector_index_params Vector index parameters (can be NULL, uses default + * HNSW parameters) + * @return ZVecCollectionSchema* Created schema pointer + */ +ZVecCollectionSchema *zvec_test_create_schema_with_vector_index( + bool nullable, const char *name, + const ZVecHnswIndexParams *vector_index_params); + +/** + * @brief Create schema with specified maximum document count + * + * @param doc_count Maximum documents per segment + * @return ZVecCollectionSchema* Created schema pointer + */ +ZVecCollectionSchema *zvec_test_create_schema_with_max_doc_count( + uint64_t doc_count); + +// ============================================================================= +// Document Creation Helper Functions +// ============================================================================= + +/** + * @brief Generate primary key based on document ID + * + * @param doc_id Document ID + * @return char* Generated primary key string, needs to be released by calling + * free() + */ +char *zvec_test_make_pk(uint64_t doc_id); + +/** + * @brief Create complete document + * Create corresponding test data for each field according to schema + * + * @param doc_id Document ID + * @param schema Schema pointer + * @param pk Primary key (can be NULL, auto-generated) + * @return ZVecDoc* Created document pointer, needs to be released by calling + * zvec_doc_destroy + */ +ZVecDoc *zvec_test_create_doc(uint64_t doc_id, + const ZVecCollectionSchema *schema, + const char *pk); + +/** + * @brief Create partial null document + * Only set values for vector fields, keep scalar fields as null + * + * @param doc_id Document ID + * @param schema Schema pointer + * @param pk Primary key (can be NULL, auto-generated) + * @return ZVecDoc* Created document pointer + */ +ZVecDoc *zvec_test_create_doc_null(uint64_t doc_id, + const ZVecCollectionSchema *schema, + const char *pk); + +/** + * @brief Create document with specified fields + * Only create data for specified fields + * + * @param doc_id Document ID + * @param field_names Field name array + * @param field_types Field type array + * @param field_count Number of fields + * @param pk Primary key (can be NULL, auto-generated) + * @return ZVecDoc* Created document pointer + */ +ZVecDoc *zvec_test_create_doc_with_fields(uint64_t doc_id, + const char **field_names, + const ZVecDataType *field_types, + size_t field_count, const char *pk); + +// ============================================================================= +// Index Parameter Creation Helper Functions +// ============================================================================= + +/** + * @brief Create default HNSW index parameters + * + * @return ZVecHnswIndexParams* Created parameter pointer, needs to be released + * by calling free() + */ +ZVecHnswIndexParams *zvec_test_create_default_hnsw_params(void); + +/** + * @brief Create default Flat index parameters + * + * @return ZVecFlatIndexParams* Created parameter pointer, needs to be released + * by calling free() + */ +ZVecFlatIndexParams *zvec_test_create_default_flat_params(void); + +/** + * @brief Create default scalar index parameters + * + * @param enable_optimize Whether to enable optimization + * @return ZVecInvertIndexParams* Created parameter pointer, needs to be + * released by calling free() + */ +ZVecInvertIndexParams *zvec_test_create_default_invert_params( + bool enable_optimize); + +// ============================================================================= +// Field Schema Creation Helper Functions +// ============================================================================= + +/** + * @brief Create scalar field schema + * + * @param name Field name + * @param data_type Data type + * @param nullable Whether to allow null values + * @param invert_params Scalar index parameters (can be NULL) + * @return ZVecFieldSchema* Created field schema pointer, needs to be released + * by calling free() + */ +ZVecFieldSchema *zvec_test_create_scalar_field( + const char *name, ZVecDataType data_type, bool nullable, + const ZVecInvertIndexParams *invert_params); + +/** + * @brief Create vector field schema + * + * @param name Field name + * @param data_type Data type + * @param dimension Vector dimension + * @param nullable Whether to allow null values + * @param vector_index_params Vector index parameters (can be NULL) + * @return ZVecFieldSchema* Created field schema pointer + */ +ZVecFieldSchema *zvec_test_create_vector_field( + const char *name, ZVecDataType data_type, uint32_t dimension, bool nullable, + const ZVecHnswIndexParams *vector_index_params); + +/** + * @brief Create sparse vector field schema + * + * @param name Field name + * @param data_type Data type + * @param nullable Whether to allow null values + * @param vector_index_params Vector index parameters (can be NULL) + * @return ZVecFieldSchema* Created field schema pointer + */ +ZVecFieldSchema *zvec_test_create_sparse_vector_field( + const char *name, ZVecDataType data_type, bool nullable, + const ZVecHnswIndexParams *vector_index_params); + +// ============================================================================= +// Memory Management Helper Functions +// ============================================================================= + +/** + * @brief Free field schema array + * + * @param fields Field array pointer + * @param count Number of fields + */ +void zvec_test_free_field_schemas(ZVecFieldSchema *fields, size_t count); + +/** + * @brief Free string array + * + * @param strings String array pointer + * @param count Number of strings + */ +void zvec_test_free_strings(char **strings, size_t count); + +/** + * @brief Delete directory and all its contents + * + * @param dir_path Directory path + * @return int 0 for success, -1 for failure + */ +int zvec_test_delete_dir(const char *dir_path); + +#ifdef __cplusplus +} +#endif + +#endif // ZVEC_TESTS_C_API_UTILS_H \ No newline at end of file From b116d6c8653cfc107f8ed2c7d9332939e3a24346 Mon Sep 17 00:00:00 2001 From: lichen2015 Date: Wed, 25 Feb 2026 12:00:41 +0800 Subject: [PATCH 02/15] format c_api.h --- src/include/zvec/c_api.h | 70 ++++++++++++++++++++-------------------- 1 file changed, 35 insertions(+), 35 deletions(-) diff --git a/src/include/zvec/c_api.h b/src/include/zvec/c_api.h index 0ae01935a..5695711a2 100644 --- a/src/include/zvec/c_api.h +++ b/src/include/zvec/c_api.h @@ -146,7 +146,8 @@ typedef struct { * @param[out] error_details Pointer to error details structure * @return ZVecErrorCode Error code */ -ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_get_last_error_details(ZVecErrorDetails *error_details); +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_get_last_error_details(ZVecErrorDetails *error_details); /** * @brief Get last error message @@ -237,38 +238,40 @@ ZVEC_EXPORT ZVecString *ZVEC_CALL zvec_string_create(const char *str); /** * @brief Create string from string view - * + * * Creates a new ZVecString by copying data from a ZVecStringView. * The created string owns its memory and must be freed with zvec_free_string(). - * + * * @param view Pointer to source string view (must not be NULL) * @return ZVecString* New string instance on success, NULL on error * @note Caller is responsible for freeing the returned string */ -ZVEC_EXPORT ZVecString *ZVEC_CALL zvec_string_create_from_view(const ZVecStringView *view); +ZVEC_EXPORT ZVecString *ZVEC_CALL +zvec_string_create_from_view(const ZVecStringView *view); /** * @brief Create binary-safe string from raw data - * + * * Creates a new ZVecString from raw binary data that may contain null bytes. * Unlike zvec_string_create(), this function takes explicit length parameter * and doesn't rely on null-termination. * The created string owns its memory and must be freed with zvec_free_string(). - * + * * @param data Raw binary data pointer (must not be NULL) * @param length Length of data in bytes * @return ZVecString* New string instance on success, NULL on error * @note Caller is responsible for freeing the returned string * @note This function is suitable for binary data containing null bytes */ -ZVEC_EXPORT ZVecString *ZVEC_CALL zvec_bin_create(const uint8_t *data, size_t length); +ZVEC_EXPORT ZVecString *ZVEC_CALL zvec_bin_create(const uint8_t *data, + size_t length); /** * @brief Copy string - * + * * Creates a new ZVecString by copying an existing string. * The created string owns its memory and must be freed with zvec_free_string(). - * + * * @param str Pointer to source string (must not be NULL) * @return ZVecString* New string instance on success, NULL on error * @note Caller is responsible for freeing the returned string @@ -708,10 +711,9 @@ ZVEC_EXPORT void ZVEC_CALL zvec_index_params_vector_init( * @param ef_search Search exploration factor * @param quantize_type Quantization type */ -ZVEC_EXPORT void ZVEC_CALL zvec_index_params_hnsw_init(ZVecHnswIndexParams *params, - ZVecMetricType metric_type, int m, - int ef_construction, int ef_search, - ZVecQuantizeType quantize_type); +ZVEC_EXPORT void ZVEC_CALL zvec_index_params_hnsw_init( + ZVecHnswIndexParams *params, ZVecMetricType metric_type, int m, + int ef_construction, int ef_search, ZVecQuantizeType quantize_type); /** * @brief Initialize Flat index parameters @@ -719,9 +721,9 @@ ZVEC_EXPORT void ZVEC_CALL zvec_index_params_hnsw_init(ZVecHnswIndexParams *para * @param metric_type Metric type * @param quantize_type Quantization type */ -ZVEC_EXPORT void ZVEC_CALL zvec_index_params_flat_init(ZVecFlatIndexParams *params, - ZVecMetricType metric_type, - ZVecQuantizeType quantize_type); +ZVEC_EXPORT void ZVEC_CALL zvec_index_params_flat_init( + ZVecFlatIndexParams *params, ZVecMetricType metric_type, + ZVecQuantizeType quantize_type); /** * @brief Initialize IVF index parameters @@ -733,10 +735,9 @@ ZVEC_EXPORT void ZVEC_CALL zvec_index_params_flat_init(ZVecFlatIndexParams *para * @param n_probe Search probe count * @param quantize_type Quantization type */ -ZVEC_EXPORT void ZVEC_CALL zvec_index_params_ivf_init(ZVecIVFIndexParams *params, - ZVecMetricType metric_type, int n_list, - int n_iters, bool use_soar, int n_probe, - ZVecQuantizeType quantize_type); +ZVEC_EXPORT void ZVEC_CALL zvec_index_params_ivf_init( + ZVecIVFIndexParams *params, ZVecMetricType metric_type, int n_list, + int n_iters, bool use_soar, int n_probe, ZVecQuantizeType quantize_type); /** * @brief Initialize generic index parameters @@ -744,9 +745,9 @@ ZVEC_EXPORT void ZVEC_CALL zvec_index_params_ivf_init(ZVecIVFIndexParams *params * @param index_type Index type * @param metric_type Metric type (only valid for vector indexes) */ -ZVEC_EXPORT void ZVEC_CALL zvec_index_params_init_default(ZVecIndexParams *params, - ZVecIndexType index_type, - ZVecMetricType metric_type); +ZVEC_EXPORT void ZVEC_CALL zvec_index_params_init_default( + ZVecIndexParams *params, ZVecIndexType index_type, + ZVecMetricType metric_type); /** * @brief Destroy index parameters (free internal dynamically allocated memory) @@ -1010,8 +1011,7 @@ zvec_query_params_union_create(ZVecIndexType index_type); * @brief Destroy base query parameters * @param params HNSW query parameters pointer */ -ZVEC_EXPORT void ZVEC_CALL -zvec_query_params_destroy(ZVecQueryParams *params); +ZVEC_EXPORT void ZVEC_CALL zvec_query_params_destroy(ZVecQueryParams *params); /** * @brief Destroy HNSW query parameters @@ -1101,8 +1101,8 @@ zvec_query_params_ivf_set_nprobe(ZVecIVFQueryParams *params, int nprobe); * @param scale_factor Scale factor * @return ZVecErrorCode Error code */ -ZVEC_EXPORT ZVecErrorCode ZVEC_CALL -zvec_query_params_ivf_set_scale_factor(ZVecIVFQueryParams *params, float scale_factor); +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_query_params_ivf_set_scale_factor( + ZVecIVFQueryParams *params, float scale_factor); /** * @brief Collection options structure @@ -1971,9 +1971,9 @@ ZVEC_EXPORT const char *ZVEC_CALL zvec_doc_get_pk_copy(const ZVecDoc *doc); /** * @brief Get field value (basic type returned directly) * - * Supports basic numeric data types: BOOL, INT32, INT64, UINT32, UINT64, + * Supports basic numeric data types: BOOL, INT32, INT64, UINT32, UINT64, * FLOAT, DOUBLE. The value is copied directly into the provided buffer. - * For STRING, BINARY, and VECTOR types, use zvec_doc_get_field_value_copy + * For STRING, BINARY, and VECTOR types, use zvec_doc_get_field_value_copy * or zvec_doc_get_field_value_pointer instead. * * @param doc Document object pointer @@ -1993,14 +1993,14 @@ ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_doc_get_field_value_basic( * Supports all data types including: * - Basic types: BOOL, INT32, INT64, UINT32, UINT64, FLOAT, DOUBLE * - String types: STRING, BINARY - * - Vector types: VECTOR_FP32, VECTOR_FP64, VECTOR_FP16, VECTOR_INT4, + * - Vector types: VECTOR_FP32, VECTOR_FP64, VECTOR_FP16, VECTOR_INT4, * VECTOR_INT8, VECTOR_INT16, VECTOR_BINARY32, VECTOR_BINARY64 * - Sparse vector types: SPARSE_VECTOR_FP32, SPARSE_VECTOR_FP16 - * - Array types: ARRAY_STRING, ARRAY_BINARY, ARRAY_BOOL, ARRAY_INT32, + * - Array types: ARRAY_STRING, ARRAY_BINARY, ARRAY_BOOL, ARRAY_INT32, * ARRAY_INT64, ARRAY_UINT32, ARRAY_UINT64, ARRAY_FLOAT, ARRAY_DOUBLE * - * The returned value pointer must be manually freed using appropriate - * deallocation functions (free() for basic types and strings, + * The returned value pointer must be manually freed using appropriate + * deallocation functions (free() for basic types and strings, * zvec_free_uint8_array() for binary data). * * @param doc Document object pointer @@ -2025,8 +2025,8 @@ ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_doc_get_field_value_copy( * - Array types: ARRAY_INT32, ARRAY_INT64, ARRAY_UINT32, ARRAY_UINT64, * ARRAY_FLOAT, ARRAY_DOUBLE * - * The returned pointer points to data within the document object and - * does not require manual memory management. The pointer remains valid + * The returned pointer points to data within the document object and + * does not require manual memory management. The pointer remains valid * as long as the document exists. * * @param doc Document object pointer From 87aded5827f3d0bfec3c63587f68ab0d34014a20 Mon Sep 17 00:00:00 2001 From: lichen2015 Date: Sun, 8 Mar 2026 20:39:42 +0800 Subject: [PATCH 03/15] fix some code --- src/c_api/c_api.cc | 18 +++++------------- src/include/zvec/c_api.h | 23 +++++++++++------------ 2 files changed, 16 insertions(+), 25 deletions(-) diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 1c4d830d1..0f45bce89 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -142,30 +142,24 @@ ZVecString *zvec_string_create(const char *str) { __FUNCTION__); return nullptr; } - ZVecString *zstr = nullptr; char *data_buffer = nullptr; - try { size_t len = strlen(str); zstr = new ZVecString(); - data_buffer = new char[len + 1]; - strcpy(const_cast(data_buffer), str); - + data_buffer = static_cast(malloc(len + 1)); + strcpy(data_buffer, str); zstr->data = data_buffer; zstr->length = len; zstr->capacity = len + 1; - return zstr; - } catch (const std::exception &e) { if (data_buffer) { - delete[] data_buffer; + free(data_buffer); } if (zstr) { delete zstr; } - set_last_error_details(ZVEC_ERROR_INTERNAL_ERROR, std::string("String creation failed: ") + e.what(), __FILE__, __LINE__, __FUNCTION__); @@ -173,7 +167,6 @@ ZVecString *zvec_string_create(const char *str) { } } - ZVecString *zvec_string_create_from_view(const ZVecStringView *view) { if (!view || !view->data) { set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, @@ -651,8 +644,7 @@ static ZVecErrorCode handle_expected_result( // Helper function: copy strings static char *copy_string(const std::string &str) { if (str.empty()) return nullptr; - - char *copy = new char[str.length() + 1]; + char *copy = static_cast(malloc(str.length() + 1)); strcpy(copy, str.c_str()); return copy; } @@ -2903,7 +2895,7 @@ const char *zvec_doc_get_pk_copy(const ZVecDoc *doc) { const std::string &pk = (*doc_ptr)->pk_ref(); if (pk.empty()) return nullptr; - char *result = new char[pk.length() + 1]; + char *result = static_cast(malloc(pk.length() + 1)); strcpy(result, pk.c_str()); return result; } diff --git a/src/include/zvec/c_api.h b/src/include/zvec/c_api.h index 5695711a2..ad3cc5865 100644 --- a/src/include/zvec/c_api.h +++ b/src/include/zvec/c_api.h @@ -2469,11 +2469,11 @@ ZVEC_EXPORT void ZVEC_CALL zvec_free_uint8_array(uint8_t *array); * Usage example: * ZVecCollectionOptions opts = ZVEC_DEFAULT_OPTIONS(); */ -#define ZVEC_DEFAULT_OPTIONS() \ - (ZVecCollectionOptions){.enable_mmap = true, \ - .max_buffer_size = 1048576, \ - .read_only = false, \ - .max_doc_count_per_segment = 1000000} +#define ZVEC_DEFAULT_OPTIONS() \ + (ZVecCollectionOptions) { \ + .enable_mmap = true, .max_buffer_size = 1048576, .read_only = false, \ + .max_doc_count_per_segment = 1000000 \ + } /** * @brief Simplified vector query initialization macro @@ -2486,13 +2486,12 @@ ZVEC_EXPORT void ZVEC_CALL zvec_free_uint8_array(uint8_t *array); * ZVecVectorQuery query = ZVEC_VECTOR_QUERY("embedding", query_vectors, 10, * ""); */ -#define ZVEC_VECTOR_QUERY(field_name_str, query_vec, top_k, filter_str) \ - (ZVecVectorQuery){.field_name = ZVEC_STRING(field_name_str), \ - .query_vector = query_vec, \ - .topk = top_k, \ - .filter = ZVEC_STRING(filter_str), \ - .include_vector = 1, \ - .include_doc_id = 1} +#define ZVEC_VECTOR_QUERY(field_name_str, query_vec, top_k, filter_str) \ + (ZVecVectorQuery) { \ + .field_name = ZVEC_STRING(field_name_str), .query_vector = query_vec, \ + .topk = top_k, .filter = ZVEC_STRING(filter_str), .include_vector = 1, \ + .include_doc_id = 1 \ + } /** * @brief Simplified document field initialization macro From a979e33b27e655ba956ab875c339b3b75ab0c44a Mon Sep 17 00:00:00 2001 From: lichen2015 Date: Mon, 9 Mar 2026 10:06:00 +0800 Subject: [PATCH 04/15] format c_api code --- src/c_api/c_api.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 0f45bce89..77a6127ab 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -147,7 +147,7 @@ ZVecString *zvec_string_create(const char *str) { try { size_t len = strlen(str); zstr = new ZVecString(); - data_buffer = static_cast(malloc(len + 1)); + data_buffer = static_cast(malloc(len + 1)); strcpy(data_buffer, str); zstr->data = data_buffer; zstr->length = len; @@ -644,7 +644,7 @@ static ZVecErrorCode handle_expected_result( // Helper function: copy strings static char *copy_string(const std::string &str) { if (str.empty()) return nullptr; - char *copy = static_cast(malloc(str.length() + 1)); + char *copy = static_cast(malloc(str.length() + 1)); strcpy(copy, str.c_str()); return copy; } @@ -2895,7 +2895,7 @@ const char *zvec_doc_get_pk_copy(const ZVecDoc *doc) { const std::string &pk = (*doc_ptr)->pk_ref(); if (pk.empty()) return nullptr; - char *result = static_cast(malloc(pk.length() + 1)); + char *result = static_cast(malloc(pk.length() + 1)); strcpy(result, pk.c_str()); return result; } From 551c8c920987858592df207fb778dab179d91ce5 Mon Sep 17 00:00:00 2001 From: lc285652 Date: Tue, 10 Mar 2026 09:58:39 +0800 Subject: [PATCH 05/15] build rpm --- CMakeLists.txt | 129 ++++++++++++- cmake/rpm_install.cmake | 42 +++++ cmake/zvec.pc.in | 24 +++ cmake/zvec.spec.in | 62 ++++++ cmake/zvec_config.cmake.in | 33 ++++ src/ailego/CMakeLists.txt | 4 +- src/c_api/CMakeLists.txt | 178 +++++++++++++----- src/c_api/c_api.cc | 12 +- src/core/CMakeLists.txt | 4 +- src/db/CMakeLists.txt | 9 +- tests/c_api/c_api_test.c | 17 +- tests/core/algorithm/ivf/ivf_searcher_test.cc | 2 +- 12 files changed, 446 insertions(+), 70 deletions(-) create mode 100644 cmake/rpm_install.cmake create mode 100644 cmake/zvec.pc.in create mode 100644 cmake/zvec.spec.in create mode 100644 cmake/zvec_config.cmake.in diff --git a/CMakeLists.txt b/CMakeLists.txt index 2b7638164..742e2728a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,6 @@ cmake_minimum_required(VERSION 3.13) cmake_policy(SET CMP0077 NEW) -project(zvec) +project(zvec VERSION 0.3.0) set(CC_CXX_STANDARD 17) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Werror=return-type") @@ -34,6 +34,20 @@ message(STATUS "USE_OSS_MIRROR:${USE_OSS_MIRROR}") option(BUILD_EXAMPLES "Build examples" ON) message(STATUS "BUILD_EXAMPLES:${BUILD_EXAMPLES}") +# Option to build static libraries for RPM packaging +option(BUILD_STATIC_LIBS "Build static libraries" OFF) +message(STATUS "BUILD_STATIC_LIBS:${BUILD_STATIC_LIBS}") + +# Option to enable RPM packaging +option(ENABLE_RPM_PACKAGING "Enable RPM packaging support" OFF) +message(STATUS "ENABLE_RPM_PACKAGING:${ENABLE_RPM_PACKAGING}") + +# Option to build fat libraries (all-in-one with static-linked third-party deps) +# When ON: libzvec_c_api.so and libzvec_c_api_static.a include all third-party libs +# When OFF: Libraries depend on external .so files for third-party deps +option(BUILD_FAT_LIBS "Build fat libraries with all dependencies statically linked" ON) +message(STATUS "BUILD_FAT_LIBS:${BUILD_FAT_LIBS}") + cc_directory(thirdparty) cc_directories(src) cc_directories(tests) @@ -48,7 +62,118 @@ endif() git_version(GIT_SRCS_VER ${PROJECT_ROOT_DIR}) set(CPACK_PACKAGE_VERSION ${GIT_SRCS_VER}) set(CPACK_PACKAGE_NAME zvec) -include(CPack) + +# ============================================================================= +# RPM Packaging Configuration +# ============================================================================= +if(ENABLE_RPM_PACKAGING OR BUILD_STATIC_LIBS) + include(GNUInstallDirs) + include(CMakePackageConfigHelpers) + + # Generate version helpers + write_basic_package_version_file( + "${CMAKE_CURRENT_BINARY_DIR}/zvecConfigVersion.cmake" + VERSION ${PROJECT_VERSION} + COMPATIBILITY SameMajorVersion + ) + + # Configure CMake config file + configure_package_config_file( + "${PROJECT_ROOT_DIR}/cmake/zvec_config.cmake.in" + "${CMAKE_CURRENT_BINARY_DIR}/zvecConfig.cmake" + INSTALL_DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/zvec" + ) + + # Configure pkg-config file + configure_file( + "${PROJECT_ROOT_DIR}/cmake/zvec.pc.in" + "${CMAKE_CURRENT_BINARY_DIR}/zvec.pc" + @ONLY + ) + + # Install CMake configuration files + install(FILES + "${CMAKE_CURRENT_BINARY_DIR}/zvecConfig.cmake" + "${CMAKE_CURRENT_BINARY_DIR}/zvecConfigVersion.cmake" + DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/zvec" + ) + + # Install pkg-config file + install(FILES + "${CMAKE_CURRENT_BINARY_DIR}/zvec.pc" + DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig" + ) + + # Install documentation + install(FILES + "${PROJECT_ROOT_DIR}/README.md" + DESTINATION "${CMAKE_INSTALL_DATADIR}/doc/zvec" + ) + + # Create a simple CMake targets file + # Since we can't export complex target hierarchies with all dependencies, + # we create a simplified import file + file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/zvecTargets.cmake" +"# Zvec CMake Targets File + +# Import target: zvec::zvec_c_api +if(NOT TARGET zvec::zvec_c_api) + add_library(zvec::zvec_c_api SHARED IMPORTED) + set_target_properties(zvec::zvec_c_api PROPERTIES + IMPORTED_LOCATION \"\${CMAKE_CURRENT_LIST_DIR}/../../../${CMAKE_INSTALL_LIBDIR}/libzvec_c_api.so\" + INTERFACE_INCLUDE_DIRECTORIES \"\${CMAKE_CURRENT_LIST_DIR}/../../../${CMAKE_INSTALL_INCLUDEDIR}\" + INTERFACE_LINK_LIBRARIES \"Threads::Threads\" + ) +endif() + +# Import target: zvec::zvec_c_api_static (if available) +# This is a 'fat' static library that includes all zvec code (core, db, ailego) +# But you still need to link third-party dependencies separately +if(BUILD_STATIC_LIBS AND EXISTS \"\${CMAKE_CURRENT_LIST_DIR}/../../../${CMAKE_INSTALL_LIBDIR}/libzvec_c_api_static.a\") + if(NOT TARGET zvec::zvec_c_api_static) + add_library(zvec::zvec_c_api_static STATIC IMPORTED) + set_target_properties(zvec::zvec_c_api_static PROPERTIES + IMPORTED_LOCATION \"\${CMAKE_CURRENT_LIST_DIR}/../../../${CMAKE_INSTALL_LIBDIR}/libzvec_c_api_static.a\" + INTERFACE_INCLUDE_DIRECTORIES \"\${CMAKE_CURRENT_LIST_DIR}/../../../${CMAKE_INSTALL_INCLUDEDIR}\" + # Note: Third-party dependencies (Arrow, RocksDB, etc.) must be linked by the user + INTERFACE_LINK_LIBRARIES \"Threads::Threads\" + ) + endif() +endif() +") + + # Install the targets file + install(FILES + "${CMAKE_CURRENT_BINARY_DIR}/zvecTargets.cmake" + DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/zvec" + COMPONENT devel + ) + + # Include RPM-specific installation rules + include(${PROJECT_ROOT_DIR}/cmake/rpm_install.cmake) +endif() + +# Configure CPack for RPM generation +if(ENABLE_RPM_PACKAGING) + set(CPACK_GENERATOR "RPM" CACHE STRING "Generators to support") + set(CPACK_PACKAGE_VERSION ${GIT_SRCS_VER}) + set(CPACK_PACKAGE_NAME zvec) + set(CPACK_RPM_PACKAGE_LICENSE "Apache-2.0") + set(CPACK_RPM_PACKAGE_URL "https://github.com/alibaba/zvec") + set(CPACK_RPM_PACKAGE_DESCRIPTION "Zvec - High-performance in-process vector database library with self-contained shared library") + set(CPACK_RPM_PACKAGE_GROUP "Development/Libraries") + set(CPACK_RPM_PACKAGE_VENDOR "Alibaba") + set(CPACK_RPM_PACKAGE_SUMMARY "High-performance in-process vector database library") + + # RPM specific settings + set(CPACK_PACKAGE_FILE_NAME "zvec-${CPACK_PACKAGE_VERSION}-1") + + # Single RPM package - no components + # Only the self-contained shared library (fat lib) + set(CPACK_RPM_COMPONENT_INSTALL OFF) + + include(CPack) +endif() if(BUILD_PYTHON_BINDINGS) if(APPLE) diff --git a/cmake/rpm_install.cmake b/cmake/rpm_install.cmake new file mode 100644 index 000000000..18d273d8e --- /dev/null +++ b/cmake/rpm_install.cmake @@ -0,0 +1,42 @@ +# Copyright 2025-present the zvec project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Install rules for RPM packaging (single package, no components) + +include(GNUInstallDirs) + +# Install all public headers for zvec +# These are the headers that C API users will need +install(DIRECTORY ${PROJECT_SOURCE_DIR}/src/include/zvec/ + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/zvec + FILES_MATCHING PATTERN "*.h" PATTERN "*.hpp" +) + +# Install proto headers if they exist +if(EXISTS "${CMAKE_CURRENT_BINARY_DIR}/proto") + install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/proto/ + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/zvec/proto + FILES_MATCHING PATTERN "*.pb.h" + ) +endif() + +# Install static library only in NON-FAT mode +# In FAT mode, we only build the self-contained shared library +if(BUILD_STATIC_LIBS AND NOT BUILD_FAT_LIBS) + if(TARGET zvec_c_api_static) + install(TARGETS zvec_c_api_static + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + ) + endif() +endif() diff --git a/cmake/zvec.pc.in b/cmake/zvec.pc.in new file mode 100644 index 000000000..bd61ccc3d --- /dev/null +++ b/cmake/zvec.pc.in @@ -0,0 +1,24 @@ +# pkg-config file for zvec library + +prefix=@CMAKE_INSTALL_PREFIX@ +exec_prefix=${prefix} +libdir=${prefix}/@CMAKE_INSTALL_LIBDIR@ +includedir=${prefix}/@CMAKE_INSTALL_INCDIR@ + +Name: zvec +Description: High-performance in-process vector database library +Version: @PROJECT_VERSION@ +Requires: +Libs: -L${libdir} -lzvec_c_api +Libs.private: -lstdc++ -lpthread -lm ${CMAKE_DL_LIBS} +Cflags: -I${includedir} + +# BUILD_FAT_LIBS=ON (default): +# libzvec_c_api.so - self-contained, no external deps except system libs +# Users only need: -lzvec_c_api +# +# libzvec_c_api_static.a - NOT built in FAT mode (use shared library instead) +# +# BUILD_FAT_LIBS=OFF: +# libzvec_c_api.so - depends on external third-party .so files +# libzvec_c_api_static.a - only zvec code, user links third-party libs diff --git a/cmake/zvec.spec.in b/cmake/zvec.spec.in new file mode 100644 index 000000000..f9ff85593 --- /dev/null +++ b/cmake/zvec.spec.in @@ -0,0 +1,62 @@ +# RPM spec file for zvec +# Generated by CMake CPack + +Name: zvec +Version: @CPACK_PACKAGE_VERSION@ +Release: 1%{?dist} +Summary: Zvec - High-performance in-process vector database library + +License: Apache-2.0 +URL: https://github.com/alibaba/zvec +Vendor: Alibaba +BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root + +%description +Zvec is an open-source, in-process vector database - lightweight, lightning-fast, +and designed to embed directly into applications. Built on Proxima (Alibaba's +battle-tested vector search engine), it delivers production-grade, low-latency, +scalable similarity search with minimal setup. + +Features: +- Blazing Fast: Searches billions of vectors in milliseconds +- Simple, Just Works: No servers, no config, no fuss +- Dense + Sparse Vectors: Work with both dense and sparse embeddings +- Hybrid Search: Combine semantic similarity with structured filters +- Runs Anywhere: As an in-process library + +This package contains: +- libzvec_c_api.so (self-contained shared library) +- Header files +- CMake configuration files +- pkg-config files + +Note: libzvec_c_api.so includes all third-party dependencies statically linked. +Users only need -lzvec_c_api without installing Arrow, RocksDB, etc. + +%prep +%setup -q + +%build +%cmake_build \ + -DBUILD_PYTHON_BINDINGS=OFF \ + -DBUILD_TOOLS=OFF \ + -DBUILD_EXAMPLES=OFF \ + -DBUILD_FAT_LIBS=ON + +%install +%cmake_install + +%post -p /sbin/ldconfig + +%postun -p /sbin/ldconfig + +%files +%{_includedir}/zvec/ +%{_libdir}/libzvec_c_api.so* +%{_libdir}/cmake/zvec/ +%{_libdir}/pkgconfig/zvec.pc +%{_datadir}/doc/zvec/ + +%changelog +* %{date} %{name} - @CPACK_PACKAGE_VERSION@ +- Initial package diff --git a/cmake/zvec_config.cmake.in b/cmake/zvec_config.cmake.in new file mode 100644 index 000000000..874bf78e4 --- /dev/null +++ b/cmake/zvec_config.cmake.in @@ -0,0 +1,33 @@ +# Zvec CMake Configuration File +# +# This file is used by CMake's find_package() command to locate the zvec library. +# +# The following variables are set: +# zvec_FOUND - True if zvec is found +# zvec_VERSION - Version of zvec +# zvec_INCLUDE_DIRS - Include directories for zvec +# zvec_LIBRARIES - Libraries to link against +# zvec_LIBRARY_DIRS - Library directories +# +# The following imported targets are defined: +# zvec::zvec_c_api - Shared library target +# zvec::zvec_c_api_static - Static library target (if available) + +@PACKAGE_INIT@ + +include(CMakeFindDependencyMacro) + +# Find required dependencies +find_dependency(Threads REQUIRED) + +# Include the targets file +if(NOT TARGET zvec::zvec_c_api) + include("${CMAKE_CURRENT_LIST_DIR}/zvecTargets.cmake") +endif() + +# Set variables for compatibility +set(zvec_INCLUDE_DIRS "@PACKAGE_CMAKE_INSTALL_INCLUDEDIR@") +set(zvec_LIBRARY_DIRS "@PACKAGE_CMAKE_INSTALL_LIBDIR@") +set(zvec_LIBRARIES zvec::zvec_c_api) + +check_required_components(zvec) diff --git a/src/ailego/CMakeLists.txt b/src/ailego/CMakeLists.txt index 5fcaacac2..12e87861a 100644 --- a/src/ailego/CMakeLists.txt +++ b/src/ailego/CMakeLists.txt @@ -18,8 +18,10 @@ if(UNIX AND NOT APPLE) list(APPEND EXTRA_LIBS ${LIB_RT}) endif() +# Build both OBJECT and STATIC library +# OBJECT library allows other targets to extract object files cc_library( - NAME zvec_ailego STATIC STRICT PACKED + NAME zvec_ailego SHARED STATIC STRICT PACKED SRCS ${ALL_SRCS} LIBS ${EXTRA_LIBS} Arrow::arrow_static diff --git a/src/c_api/CMakeLists.txt b/src/c_api/CMakeLists.txt index c47fcaf31..9d3263190 100644 --- a/src/c_api/CMakeLists.txt +++ b/src/c_api/CMakeLists.txt @@ -14,6 +14,7 @@ include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake) include(${PROJECT_ROOT_DIR}/cmake/option.cmake) +include(GNUInstallDirs) set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) @@ -40,25 +41,102 @@ set_target_properties(zvec_c_api PROPERTIES VERSION ${PROJECT_VERSION} SOVERSION ${PROJECT_VERSION_MAJOR} OUTPUT_NAME "zvec_c_api" + POSITION_INDEPENDENT_CODE ON ) find_package(Threads REQUIRED) -# Link dependencies - Add force_load flag to ensure static initialization is executed -if(APPLE) - target_link_libraries(zvec_c_api - PRIVATE - "-Wl,-force_load" "$" - zvec_db - Threads::Threads - ) +# Link dependencies based on BUILD_FAT_LIBS option +# FAT mode: statically link all third-party libs (self-contained, no external deps) +# NON-FAT mode: dynamically link third-party libs (smaller, but requires external .so files) + +# Common: always statically link all zvec code into the shared library +set(ZVEC_C_API_COMMON_LINK_OPTS + "$" + "$" + "$" +) + +if(BUILD_FAT_LIBS) + # FAT mode: embed all third-party libraries + if(APPLE) + target_link_libraries(zvec_c_api + PRIVATE + # Force load all object files from zvec components + ${ZVEC_C_API_COMMON_LINK_OPTS} + # Statically link third-party libraries + roaring + Arrow::arrow_static + Arrow::parquet_static + Arrow::arrow_compute + Arrow::arrow_dataset + Arrow::arrow_acero + rocksdb + glog + libprotobuf + antlr4 + sparsehash + magic_enum + Threads::Threads + ) + target_link_options(zvec_c_api PRIVATE + LINKER:-force_load,$ + LINKER:-force_load,$ + LINKER:-force_load,$ + LINKER:-force_load,$ + LINKER:-force_load,$ + LINKER:-force_load,$ + LINKER:-force_load,$ + LINKER:-force_load,$ + LINKER:-force_load,$ + LINKER:-force_load,$ + ) + else() + target_link_libraries(zvec_c_api + PRIVATE + "-Wl,--whole-archive" ${ZVEC_C_API_COMMON_LINK_OPTS} + # Statically link third-party libraries + "-Wl,--whole-archive" + roaring + Arrow::arrow_static + Arrow::parquet_static + Arrow::arrow_compute + Arrow::arrow_dataset + Arrow::arrow_acero + rocksdb + glog + libprotobuf + antlr4 + sparsehash + magic_enum + "-Wl,--no-whole-archive" + Threads::Threads + ${CMAKE_DL_LIBS} + ) + endif() else() - target_link_libraries(zvec_c_api - PRIVATE - "-Wl,--whole-archive" zvec_core "-Wl,--no-whole-archive" - zvec_db - Threads::Threads - ) + # NON-FAT mode: link zvec_db and zvec_ailego to get third-party dependencies dynamically + if(APPLE) + set(FORCE_LOAD_FLAGS "-Wl,-force_load") + target_link_libraries(zvec_c_api + PRIVATE + ${FORCE_LOAD_FLAGS} ${ZVEC_C_API_COMMON_LINK_OPTS} + # Link targets to get third-party dependencies + ${FORCE_LOAD_FLAGS} zvec_db + ${FORCE_LOAD_FLAGS} zvec_ailego + Threads::Threads + ) + else() + target_link_libraries(zvec_c_api + PRIVATE + "-Wl,--whole-archive" ${ZVEC_C_API_COMMON_LINK_OPTS} + "-Wl,--no-whole-archive" + # Link targets to get third-party dependencies + zvec_db + zvec_ailego + Threads::Threads + ) + endif() endif() # Include directories @@ -76,58 +154,60 @@ target_compile_options(zvec_c_api PRIVATE $<$:-Wall -Wextra -Wpedantic> ) -# Installation rules +# Installation rules for shared library install(TARGETS zvec_c_api - EXPORT zvecTargets - LIBRARY DESTINATION lib - ARCHIVE DESTINATION lib - RUNTIME DESTINATION bin - INCLUDES DESTINATION include + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} ) +# Installation rules for headers install(FILES ${PROJECT_SOURCE_DIR}/src/include/zvec/c_api.h - DESTINATION include/zvec + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/zvec ) # Create static library version (optional) -if(BUILD_STATIC_LIBS) +# Note: In FAT mode (BUILD_FAT_LIBS=ON), we only build shared library because +# creating a self-contained fat static library is complex and error-prone. +# Users who need static linking should use the shared library or link third-party libs manually. +if(BUILD_STATIC_LIBS AND NOT BUILD_FAT_LIBS) + # NON-FAT mode only: Create static library without third-party dependencies + # User needs to link third-party libraries manually + + # Collect all object files from zvec dependencies + set(ZVEC_C_API_STATIC_DEPS + $ + $ + $ + ) + + # Create static library with zvec code only add_library(zvec_c_api_static STATIC ${ZVEC_C_API_SOURCES} ${ZVEC_C_API_HEADERS} + ${ZVEC_C_API_STATIC_DEPS} ) - + set_target_properties(zvec_c_api_static PROPERTIES - OUTPUT_NAME "zvec_c_api" + OUTPUT_NAME "zvec_c_api_static" + POSITION_INDEPENDENT_CODE ON ) - - # Static library also adds force_load flag - if(APPLE) - target_link_libraries(zvec_c_api_static - PRIVATE - "-Wl,-force_load" "$" - zvec_db - Threads::Threads - ) - else() - target_link_libraries(zvec_c_api_static - PRIVATE - "-Wl,--whole-archive" zvec_core "-Wl,--no-whole-archive" - zvec_db - Threads::Threads - ) - endif() - + + target_link_libraries(zvec_c_api_static + PUBLIC + Threads::Threads + ${CMAKE_DL_LIBS} + ) + target_include_directories(zvec_c_api_static PUBLIC $ $ - PRIVATE - ${PROJECT_SOURCE_DIR}/src ) - + + # Install the static library install(TARGETS zvec_c_api_static - EXPORT zvecTargets - ARCHIVE DESTINATION lib - INCLUDES DESTINATION include + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} ) -endif() \ No newline at end of file +endif() diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 77a6127ab..73260d3be 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -3551,19 +3551,19 @@ ZVecErrorCode zvec_doc_get_field_value_pointer(const ZVecDoc *doc, break; } case ZVEC_DATA_TYPE_BOOL: { - const bool val = (*doc_ptr)->get_ref(field_name); + const bool &val = (*doc_ptr)->get_ref(field_name); *value = &val; *value_size = sizeof(bool); break; } case ZVEC_DATA_TYPE_INT32: { - const int32_t val = (*doc_ptr)->get_ref(field_name); + const int32_t &val = (*doc_ptr)->get_ref(field_name); *value = &val; *value_size = sizeof(int32_t); break; } case ZVEC_DATA_TYPE_INT64: { - const int64_t val = (*doc_ptr)->get_ref(field_name); + const int64_t &val = (*doc_ptr)->get_ref(field_name); *value = &val; *value_size = sizeof(int64_t); break; @@ -3575,19 +3575,19 @@ ZVecErrorCode zvec_doc_get_field_value_pointer(const ZVecDoc *doc, break; } case ZVEC_DATA_TYPE_UINT64: { - const uint64_t val = (*doc_ptr)->get_ref(field_name); + const uint64_t &val = (*doc_ptr)->get_ref(field_name); *value = &val; *value_size = sizeof(uint64_t); break; } case ZVEC_DATA_TYPE_FLOAT: { - const float val = (*doc_ptr)->get_ref(field_name); + const float &val = (*doc_ptr)->get_ref(field_name); *value = &val; *value_size = sizeof(float); break; } case ZVEC_DATA_TYPE_DOUBLE: { - const double val = (*doc_ptr)->get_ref(field_name); + const double &val = (*doc_ptr)->get_ref(field_name); *value = &val; *value_size = sizeof(double); break; diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 7742db594..74b56d9ff 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -12,8 +12,10 @@ cc_directory(mixed_reducer) git_version(GIT_SRCS_VER ${CMAKE_CURRENT_SOURCE_DIR}) file(GLOB_RECURSE ALL_CORE_SRCS *.cc *.c *.h) +# Build both OBJECT and STATIC library +# OBJECT library allows other targets to extract object files cc_library( - NAME zvec_core STATIC STRICT PACKED + NAME zvec_core SHARED STATIC STRICT PACKED SRCS ${ALL_CORE_SRCS} LIBS zvec_ailego sparsehash magic_enum INCS . ${PROJECT_ROOT_DIR}/src/core diff --git a/src/db/CMakeLists.txt b/src/db/CMakeLists.txt index 765a1b4a6..016ff3db2 100644 --- a/src/db/CMakeLists.txt +++ b/src/db/CMakeLists.txt @@ -13,12 +13,15 @@ cc_directory(sqlengine) file(GLOB_RECURSE ALL_DB_SRCS *.cc *.c *.h) +# Build both OBJECT and STATIC library +# OBJECT library allows other targets to extract object files +# Note: No PACKED flag - this library is not installed separately, +# it's included in libzvec_c_api_static.a cc_library( - NAME zvec_db STATIC STRICT SRCS_NO_GLOB + NAME zvec_db SHARED STATIC STRICT SRCS_NO_GLOB SRCS ${ALL_DB_SRCS} ${CMAKE_CURRENT_BINARY_DIR}/proto/zvec.pb.cc INCS . ${CMAKE_CURRENT_BINARY_DIR} - PUBINCS ${PROJECT_ROOT_DIR}/src/include - LIBS + LIBS zvec_ailego zvec_core glog diff --git a/tests/c_api/c_api_test.c b/tests/c_api/c_api_test.c index 5abcb5332..d331f9d9c 100644 --- a/tests/c_api/c_api_test.c +++ b/tests/c_api/c_api_test.c @@ -19,6 +19,7 @@ #include #include #include +#include <_printf.h> #ifdef _POSIX_C_SOURCE #include #endif @@ -2146,7 +2147,7 @@ void test_performance_benchmarks(void) { ZVecErrorCode err = zvec_collection_create_and_open(temp_dir, schema, NULL, &collection); TEST_ASSERT(err == ZVEC_OK); - + TEST_ASSERT(collection != NULL); if (collection) { @@ -2204,8 +2205,8 @@ void test_performance_benchmarks(void) { #ifdef _POSIX_C_SOURCE gettimeofday(&end_time, NULL); - double insert_time = (end_time.tv_sec - start_time.tv_sec) + - (end_time.tv_usec - start_time.tv_usec) / 1000000.0; + double insert_time = (end_time.tv_sec - start_time.tv_sec) + + (end_time.tv_usec - start_time.tv_usec) / 1000000.0; #else clock_t end_clock = clock(); double insert_time = ((double)(end_clock - start_clock)) / CLOCKS_PER_SEC; @@ -2253,11 +2254,13 @@ void test_performance_benchmarks(void) { #ifdef _POSIX_C_SOURCE gettimeofday(&query_end_time, NULL); - double query_time = (query_end_time.tv_sec - query_start_time.tv_sec) + - (query_end_time.tv_usec - query_start_time.tv_usec) / 1000000.0; + double query_time = + (query_end_time.tv_sec - query_start_time.tv_sec) + + (query_end_time.tv_usec - query_start_time.tv_usec) / 1000000.0; #else clock_t query_end_clock = clock(); - double query_time = ((double)(query_end_clock - query_start_clock)) / CLOCKS_PER_SEC; + double query_time = + ((double)(query_end_clock - query_start_clock)) / CLOCKS_PER_SEC; #endif double avg_query_time = (query_time * 1000) / QUERY_COUNT; // ms per query @@ -2284,7 +2287,7 @@ void test_performance_benchmarks(void) { int main(void) { printf("Starting comprehensive C API tests...\n\n"); - + // Clean up previous test directories printf("Cleaning up previous test directories...\n"); system("rm -rf /tmp/zvec_test_*"); diff --git a/tests/core/algorithm/ivf/ivf_searcher_test.cc b/tests/core/algorithm/ivf/ivf_searcher_test.cc index 9911e0e2e..75d5df1ce 100644 --- a/tests/core/algorithm/ivf/ivf_searcher_test.cc +++ b/tests/core/algorithm/ivf/ivf_searcher_test.cc @@ -392,7 +392,7 @@ TEST_F(IVFSearcherTest, TestSimpleCosine) { { size_t topk = 33; context->set_topk(topk); - + std::string new_vec; IndexQueryMeta new_meta; ASSERT_EQ(0, reformer->convert(query.data(), qmeta, &new_vec, &new_meta)); From 50ed71763b27ec0cc713df8de511987310c8c472 Mon Sep 17 00:00:00 2001 From: lc285652 Date: Tue, 10 Mar 2026 19:06:26 +0800 Subject: [PATCH 06/15] fix some code --- CMakeLists.txt | 8 +--- cmake/rpm_install.cmake | 2 +- pyproject.toml | 8 ++++ src/binding/python/CMakeLists.txt | 71 +++++++++++++++++++------------ src/c_api/CMakeLists.txt | 2 +- src/db/CMakeLists.txt | 4 +- 6 files changed, 56 insertions(+), 39 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 742e2728a..621422d8c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -34,10 +34,6 @@ message(STATUS "USE_OSS_MIRROR:${USE_OSS_MIRROR}") option(BUILD_EXAMPLES "Build examples" ON) message(STATUS "BUILD_EXAMPLES:${BUILD_EXAMPLES}") -# Option to build static libraries for RPM packaging -option(BUILD_STATIC_LIBS "Build static libraries" OFF) -message(STATUS "BUILD_STATIC_LIBS:${BUILD_STATIC_LIBS}") - # Option to enable RPM packaging option(ENABLE_RPM_PACKAGING "Enable RPM packaging support" OFF) message(STATUS "ENABLE_RPM_PACKAGING:${ENABLE_RPM_PACKAGING}") @@ -66,7 +62,7 @@ set(CPACK_PACKAGE_NAME zvec) # ============================================================================= # RPM Packaging Configuration # ============================================================================= -if(ENABLE_RPM_PACKAGING OR BUILD_STATIC_LIBS) +if(ENABLE_RPM_PACKAGING) include(GNUInstallDirs) include(CMakePackageConfigHelpers) @@ -129,7 +125,7 @@ endif() # Import target: zvec::zvec_c_api_static (if available) # This is a 'fat' static library that includes all zvec code (core, db, ailego) # But you still need to link third-party dependencies separately -if(BUILD_STATIC_LIBS AND EXISTS \"\${CMAKE_CURRENT_LIST_DIR}/../../../${CMAKE_INSTALL_LIBDIR}/libzvec_c_api_static.a\") +if(ENABLE_RPM_PACKAGING AND EXISTS "${CMAKE_CURRENT_LIST_DIR}/../../../${CMAKE_INSTALL_LIBDIR}/libzvec_c_api_static.a") if(NOT TARGET zvec::zvec_c_api_static) add_library(zvec::zvec_c_api_static STATIC IMPORTED) set_target_properties(zvec::zvec_c_api_static PROPERTIES diff --git a/cmake/rpm_install.cmake b/cmake/rpm_install.cmake index 18d273d8e..22c0c07a7 100644 --- a/cmake/rpm_install.cmake +++ b/cmake/rpm_install.cmake @@ -33,7 +33,7 @@ endif() # Install static library only in NON-FAT mode # In FAT mode, we only build the self-contained shared library -if(BUILD_STATIC_LIBS AND NOT BUILD_FAT_LIBS) +if(ENABLE_RPM_PACKAGING AND NOT BUILD_FAT_LIBS) if(TARGET zvec_c_api_static) install(TARGETS zvec_c_api_static ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} diff --git a/pyproject.toml b/pyproject.toml index 5e99edfae..69e84dd57 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -109,6 +109,14 @@ build-dir = "build" wheel.expand-macos-universal-tags = true wheel.packages = ["python/zvec"] +# Exclude unnecessary files from wheel +wheel.exclude = [ + "**/*.dylib", + "**/*.a", + "lib/cmake/**", + "lib/pkgconfig/**", +] + # Source distribution sdist.include = [ "README.md", diff --git a/src/binding/python/CMakeLists.txt b/src/binding/python/CMakeLists.txt index 160b25ea8..9fd462b30 100644 --- a/src/binding/python/CMakeLists.txt +++ b/src/binding/python/CMakeLists.txt @@ -18,42 +18,57 @@ set(SRC_LISTS pybind11_add_module(_zvec ${SRC_LISTS}) + if (CMAKE_SYSTEM_NAME STREQUAL "Linux") target_link_libraries(_zvec PRIVATE - -Wl,--whole-archive - $ - $ - $ - $ - $ - $ - $ - $ - $ - $ - -Wl,--no-whole-archive - zvec_db + # Link all zvec internal objects (includes all algorithms) + "$" + "$" + "$" + # Link third-party static libraries + Arrow::arrow_static + Arrow::arrow_compute + Arrow::arrow_dataset + Arrow::arrow_acero + Arrow::parquet_static + rocksdb + glog + libprotobuf + antlr4 + roaring + sparsehash + magic_enum ) target_link_options(_zvec PRIVATE "LINKER:--version-script=${CMAKE_CURRENT_SOURCE_DIR}/exports.map" ) -elseif (APPLE) - target_link_libraries(_zvec PRIVATE - -Wl,-force_load,$ - -Wl,-force_load,$ - -Wl,-force_load,$ - -Wl,-force_load,$ - -Wl,-force_load,$ - -Wl,-force_load,$ - -Wl,-force_load,$ - -Wl,-force_load,$ - -Wl,-force_load,$ - -Wl,-force_load,$ - zvec_db +elseif(APPLE) + # Link all zvec internal objects (includes all algorithms) + set(ZVEC_PYTHON_STATIC_DEPS + "$" + "$" + "$" ) + target_link_libraries(_zvec PRIVATE - -Wl,-exported_symbols_list,${CMAKE_CURRENT_SOURCE_DIR}/exports.mac + ${ZVEC_PYTHON_STATIC_DEPS} + # Link third-party static libraries + Arrow::arrow_static + Arrow::arrow_compute + Arrow::arrow_dataset + Arrow::arrow_acero + Arrow::parquet_static + rocksdb + glog + libprotobuf + antlr4 + roaring + sparsehash + magic_enum + ) + target_link_options(_zvec PRIVATE + "-Wl,-exported_symbols_list,${CMAKE_CURRENT_SOURCE_DIR}/exports.mac" ) -endif () +endif() target_include_directories(_zvec PRIVATE ${PYBIND11_INCLUDE_DIR} ${PROJECT_ROOT_DIR}/src ${PROJECT_ROOT_DIR}/src/binding/python/include) diff --git a/src/c_api/CMakeLists.txt b/src/c_api/CMakeLists.txt index 9d3263190..57d9d94c6 100644 --- a/src/c_api/CMakeLists.txt +++ b/src/c_api/CMakeLists.txt @@ -171,7 +171,7 @@ install(FILES ${PROJECT_SOURCE_DIR}/src/include/zvec/c_api.h # Note: In FAT mode (BUILD_FAT_LIBS=ON), we only build shared library because # creating a self-contained fat static library is complex and error-prone. # Users who need static linking should use the shared library or link third-party libs manually. -if(BUILD_STATIC_LIBS AND NOT BUILD_FAT_LIBS) +if(ENABLE_RPM_PACKAGING AND NOT BUILD_FAT_LIBS) # NON-FAT mode only: Create static library without third-party dependencies # User needs to link third-party libraries manually diff --git a/src/db/CMakeLists.txt b/src/db/CMakeLists.txt index 016ff3db2..89305b328 100644 --- a/src/db/CMakeLists.txt +++ b/src/db/CMakeLists.txt @@ -15,10 +15,8 @@ file(GLOB_RECURSE ALL_DB_SRCS *.cc *.c *.h) # Build both OBJECT and STATIC library # OBJECT library allows other targets to extract object files -# Note: No PACKED flag - this library is not installed separately, -# it's included in libzvec_c_api_static.a cc_library( - NAME zvec_db SHARED STATIC STRICT SRCS_NO_GLOB + NAME zvec_db SHARED STATIC STRICT SRCS_NO_GLOB PACKED SRCS ${ALL_DB_SRCS} ${CMAKE_CURRENT_BINARY_DIR}/proto/zvec.pb.cc INCS . ${CMAKE_CURRENT_BINARY_DIR} LIBS From d0f828c7c6dd370c5b88203717afc12469a07173 Mon Sep 17 00:00:00 2001 From: lc285652 Date: Tue, 10 Mar 2026 19:46:17 +0800 Subject: [PATCH 07/15] fix some code --- tests/c_api/c_api_test.c | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/c_api/c_api_test.c b/tests/c_api/c_api_test.c index d331f9d9c..4a26ce90c 100644 --- a/tests/c_api/c_api_test.c +++ b/tests/c_api/c_api_test.c @@ -19,7 +19,6 @@ #include #include #include -#include <_printf.h> #ifdef _POSIX_C_SOURCE #include #endif From 7619eb7f7882bb505cdf61bb388f0ac2d478c4fa Mon Sep 17 00:00:00 2001 From: lc285652 Date: Thu, 12 Mar 2026 16:08:44 +0800 Subject: [PATCH 08/15] remove rpm build --- CMakeLists.txt | 122 ----------------- cmake/rpm_install.cmake | 42 ------ cmake/zvec.pc.in | 24 ---- cmake/zvec.spec.in | 62 --------- cmake/zvec_config.cmake.in | 33 ----- src/ailego/CMakeLists.txt | 4 +- src/binding/python/CMakeLists.txt | 73 +++++------ src/c_api/CMakeLists.txt | 209 ++++++++++++------------------ src/core/CMakeLists.txt | 4 +- src/db/CMakeLists.txt | 4 +- src/include/zvec/c_api.h | 14 +- 11 files changed, 122 insertions(+), 469 deletions(-) delete mode 100644 cmake/rpm_install.cmake delete mode 100644 cmake/zvec.pc.in delete mode 100644 cmake/zvec.spec.in delete mode 100644 cmake/zvec_config.cmake.in diff --git a/CMakeLists.txt b/CMakeLists.txt index 621422d8c..07f99a72a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -34,16 +34,6 @@ message(STATUS "USE_OSS_MIRROR:${USE_OSS_MIRROR}") option(BUILD_EXAMPLES "Build examples" ON) message(STATUS "BUILD_EXAMPLES:${BUILD_EXAMPLES}") -# Option to enable RPM packaging -option(ENABLE_RPM_PACKAGING "Enable RPM packaging support" OFF) -message(STATUS "ENABLE_RPM_PACKAGING:${ENABLE_RPM_PACKAGING}") - -# Option to build fat libraries (all-in-one with static-linked third-party deps) -# When ON: libzvec_c_api.so and libzvec_c_api_static.a include all third-party libs -# When OFF: Libraries depend on external .so files for third-party deps -option(BUILD_FAT_LIBS "Build fat libraries with all dependencies statically linked" ON) -message(STATUS "BUILD_FAT_LIBS:${BUILD_FAT_LIBS}") - cc_directory(thirdparty) cc_directories(src) cc_directories(tests) @@ -59,118 +49,6 @@ git_version(GIT_SRCS_VER ${PROJECT_ROOT_DIR}) set(CPACK_PACKAGE_VERSION ${GIT_SRCS_VER}) set(CPACK_PACKAGE_NAME zvec) -# ============================================================================= -# RPM Packaging Configuration -# ============================================================================= -if(ENABLE_RPM_PACKAGING) - include(GNUInstallDirs) - include(CMakePackageConfigHelpers) - - # Generate version helpers - write_basic_package_version_file( - "${CMAKE_CURRENT_BINARY_DIR}/zvecConfigVersion.cmake" - VERSION ${PROJECT_VERSION} - COMPATIBILITY SameMajorVersion - ) - - # Configure CMake config file - configure_package_config_file( - "${PROJECT_ROOT_DIR}/cmake/zvec_config.cmake.in" - "${CMAKE_CURRENT_BINARY_DIR}/zvecConfig.cmake" - INSTALL_DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/zvec" - ) - - # Configure pkg-config file - configure_file( - "${PROJECT_ROOT_DIR}/cmake/zvec.pc.in" - "${CMAKE_CURRENT_BINARY_DIR}/zvec.pc" - @ONLY - ) - - # Install CMake configuration files - install(FILES - "${CMAKE_CURRENT_BINARY_DIR}/zvecConfig.cmake" - "${CMAKE_CURRENT_BINARY_DIR}/zvecConfigVersion.cmake" - DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/zvec" - ) - - # Install pkg-config file - install(FILES - "${CMAKE_CURRENT_BINARY_DIR}/zvec.pc" - DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig" - ) - - # Install documentation - install(FILES - "${PROJECT_ROOT_DIR}/README.md" - DESTINATION "${CMAKE_INSTALL_DATADIR}/doc/zvec" - ) - - # Create a simple CMake targets file - # Since we can't export complex target hierarchies with all dependencies, - # we create a simplified import file - file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/zvecTargets.cmake" -"# Zvec CMake Targets File - -# Import target: zvec::zvec_c_api -if(NOT TARGET zvec::zvec_c_api) - add_library(zvec::zvec_c_api SHARED IMPORTED) - set_target_properties(zvec::zvec_c_api PROPERTIES - IMPORTED_LOCATION \"\${CMAKE_CURRENT_LIST_DIR}/../../../${CMAKE_INSTALL_LIBDIR}/libzvec_c_api.so\" - INTERFACE_INCLUDE_DIRECTORIES \"\${CMAKE_CURRENT_LIST_DIR}/../../../${CMAKE_INSTALL_INCLUDEDIR}\" - INTERFACE_LINK_LIBRARIES \"Threads::Threads\" - ) -endif() - -# Import target: zvec::zvec_c_api_static (if available) -# This is a 'fat' static library that includes all zvec code (core, db, ailego) -# But you still need to link third-party dependencies separately -if(ENABLE_RPM_PACKAGING AND EXISTS "${CMAKE_CURRENT_LIST_DIR}/../../../${CMAKE_INSTALL_LIBDIR}/libzvec_c_api_static.a") - if(NOT TARGET zvec::zvec_c_api_static) - add_library(zvec::zvec_c_api_static STATIC IMPORTED) - set_target_properties(zvec::zvec_c_api_static PROPERTIES - IMPORTED_LOCATION \"\${CMAKE_CURRENT_LIST_DIR}/../../../${CMAKE_INSTALL_LIBDIR}/libzvec_c_api_static.a\" - INTERFACE_INCLUDE_DIRECTORIES \"\${CMAKE_CURRENT_LIST_DIR}/../../../${CMAKE_INSTALL_INCLUDEDIR}\" - # Note: Third-party dependencies (Arrow, RocksDB, etc.) must be linked by the user - INTERFACE_LINK_LIBRARIES \"Threads::Threads\" - ) - endif() -endif() -") - - # Install the targets file - install(FILES - "${CMAKE_CURRENT_BINARY_DIR}/zvecTargets.cmake" - DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/zvec" - COMPONENT devel - ) - - # Include RPM-specific installation rules - include(${PROJECT_ROOT_DIR}/cmake/rpm_install.cmake) -endif() - -# Configure CPack for RPM generation -if(ENABLE_RPM_PACKAGING) - set(CPACK_GENERATOR "RPM" CACHE STRING "Generators to support") - set(CPACK_PACKAGE_VERSION ${GIT_SRCS_VER}) - set(CPACK_PACKAGE_NAME zvec) - set(CPACK_RPM_PACKAGE_LICENSE "Apache-2.0") - set(CPACK_RPM_PACKAGE_URL "https://github.com/alibaba/zvec") - set(CPACK_RPM_PACKAGE_DESCRIPTION "Zvec - High-performance in-process vector database library with self-contained shared library") - set(CPACK_RPM_PACKAGE_GROUP "Development/Libraries") - set(CPACK_RPM_PACKAGE_VENDOR "Alibaba") - set(CPACK_RPM_PACKAGE_SUMMARY "High-performance in-process vector database library") - - # RPM specific settings - set(CPACK_PACKAGE_FILE_NAME "zvec-${CPACK_PACKAGE_VERSION}-1") - - # Single RPM package - no components - # Only the self-contained shared library (fat lib) - set(CPACK_RPM_COMPONENT_INSTALL OFF) - - include(CPack) -endif() - if(BUILD_PYTHON_BINDINGS) if(APPLE) set(CMAKE_STRIP "") diff --git a/cmake/rpm_install.cmake b/cmake/rpm_install.cmake deleted file mode 100644 index 22c0c07a7..000000000 --- a/cmake/rpm_install.cmake +++ /dev/null @@ -1,42 +0,0 @@ -# Copyright 2025-present the zvec project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Install rules for RPM packaging (single package, no components) - -include(GNUInstallDirs) - -# Install all public headers for zvec -# These are the headers that C API users will need -install(DIRECTORY ${PROJECT_SOURCE_DIR}/src/include/zvec/ - DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/zvec - FILES_MATCHING PATTERN "*.h" PATTERN "*.hpp" -) - -# Install proto headers if they exist -if(EXISTS "${CMAKE_CURRENT_BINARY_DIR}/proto") - install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/proto/ - DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/zvec/proto - FILES_MATCHING PATTERN "*.pb.h" - ) -endif() - -# Install static library only in NON-FAT mode -# In FAT mode, we only build the self-contained shared library -if(ENABLE_RPM_PACKAGING AND NOT BUILD_FAT_LIBS) - if(TARGET zvec_c_api_static) - install(TARGETS zvec_c_api_static - ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} - ) - endif() -endif() diff --git a/cmake/zvec.pc.in b/cmake/zvec.pc.in deleted file mode 100644 index bd61ccc3d..000000000 --- a/cmake/zvec.pc.in +++ /dev/null @@ -1,24 +0,0 @@ -# pkg-config file for zvec library - -prefix=@CMAKE_INSTALL_PREFIX@ -exec_prefix=${prefix} -libdir=${prefix}/@CMAKE_INSTALL_LIBDIR@ -includedir=${prefix}/@CMAKE_INSTALL_INCDIR@ - -Name: zvec -Description: High-performance in-process vector database library -Version: @PROJECT_VERSION@ -Requires: -Libs: -L${libdir} -lzvec_c_api -Libs.private: -lstdc++ -lpthread -lm ${CMAKE_DL_LIBS} -Cflags: -I${includedir} - -# BUILD_FAT_LIBS=ON (default): -# libzvec_c_api.so - self-contained, no external deps except system libs -# Users only need: -lzvec_c_api -# -# libzvec_c_api_static.a - NOT built in FAT mode (use shared library instead) -# -# BUILD_FAT_LIBS=OFF: -# libzvec_c_api.so - depends on external third-party .so files -# libzvec_c_api_static.a - only zvec code, user links third-party libs diff --git a/cmake/zvec.spec.in b/cmake/zvec.spec.in deleted file mode 100644 index f9ff85593..000000000 --- a/cmake/zvec.spec.in +++ /dev/null @@ -1,62 +0,0 @@ -# RPM spec file for zvec -# Generated by CMake CPack - -Name: zvec -Version: @CPACK_PACKAGE_VERSION@ -Release: 1%{?dist} -Summary: Zvec - High-performance in-process vector database library - -License: Apache-2.0 -URL: https://github.com/alibaba/zvec -Vendor: Alibaba -BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root - -%description -Zvec is an open-source, in-process vector database - lightweight, lightning-fast, -and designed to embed directly into applications. Built on Proxima (Alibaba's -battle-tested vector search engine), it delivers production-grade, low-latency, -scalable similarity search with minimal setup. - -Features: -- Blazing Fast: Searches billions of vectors in milliseconds -- Simple, Just Works: No servers, no config, no fuss -- Dense + Sparse Vectors: Work with both dense and sparse embeddings -- Hybrid Search: Combine semantic similarity with structured filters -- Runs Anywhere: As an in-process library - -This package contains: -- libzvec_c_api.so (self-contained shared library) -- Header files -- CMake configuration files -- pkg-config files - -Note: libzvec_c_api.so includes all third-party dependencies statically linked. -Users only need -lzvec_c_api without installing Arrow, RocksDB, etc. - -%prep -%setup -q - -%build -%cmake_build \ - -DBUILD_PYTHON_BINDINGS=OFF \ - -DBUILD_TOOLS=OFF \ - -DBUILD_EXAMPLES=OFF \ - -DBUILD_FAT_LIBS=ON - -%install -%cmake_install - -%post -p /sbin/ldconfig - -%postun -p /sbin/ldconfig - -%files -%{_includedir}/zvec/ -%{_libdir}/libzvec_c_api.so* -%{_libdir}/cmake/zvec/ -%{_libdir}/pkgconfig/zvec.pc -%{_datadir}/doc/zvec/ - -%changelog -* %{date} %{name} - @CPACK_PACKAGE_VERSION@ -- Initial package diff --git a/cmake/zvec_config.cmake.in b/cmake/zvec_config.cmake.in deleted file mode 100644 index 874bf78e4..000000000 --- a/cmake/zvec_config.cmake.in +++ /dev/null @@ -1,33 +0,0 @@ -# Zvec CMake Configuration File -# -# This file is used by CMake's find_package() command to locate the zvec library. -# -# The following variables are set: -# zvec_FOUND - True if zvec is found -# zvec_VERSION - Version of zvec -# zvec_INCLUDE_DIRS - Include directories for zvec -# zvec_LIBRARIES - Libraries to link against -# zvec_LIBRARY_DIRS - Library directories -# -# The following imported targets are defined: -# zvec::zvec_c_api - Shared library target -# zvec::zvec_c_api_static - Static library target (if available) - -@PACKAGE_INIT@ - -include(CMakeFindDependencyMacro) - -# Find required dependencies -find_dependency(Threads REQUIRED) - -# Include the targets file -if(NOT TARGET zvec::zvec_c_api) - include("${CMAKE_CURRENT_LIST_DIR}/zvecTargets.cmake") -endif() - -# Set variables for compatibility -set(zvec_INCLUDE_DIRS "@PACKAGE_CMAKE_INSTALL_INCLUDEDIR@") -set(zvec_LIBRARY_DIRS "@PACKAGE_CMAKE_INSTALL_LIBDIR@") -set(zvec_LIBRARIES zvec::zvec_c_api) - -check_required_components(zvec) diff --git a/src/ailego/CMakeLists.txt b/src/ailego/CMakeLists.txt index 12e87861a..5fcaacac2 100644 --- a/src/ailego/CMakeLists.txt +++ b/src/ailego/CMakeLists.txt @@ -18,10 +18,8 @@ if(UNIX AND NOT APPLE) list(APPEND EXTRA_LIBS ${LIB_RT}) endif() -# Build both OBJECT and STATIC library -# OBJECT library allows other targets to extract object files cc_library( - NAME zvec_ailego SHARED STATIC STRICT PACKED + NAME zvec_ailego STATIC STRICT PACKED SRCS ${ALL_SRCS} LIBS ${EXTRA_LIBS} Arrow::arrow_static diff --git a/src/binding/python/CMakeLists.txt b/src/binding/python/CMakeLists.txt index 9fd462b30..c78aa0339 100644 --- a/src/binding/python/CMakeLists.txt +++ b/src/binding/python/CMakeLists.txt @@ -18,57 +18,42 @@ set(SRC_LISTS pybind11_add_module(_zvec ${SRC_LISTS}) - if (CMAKE_SYSTEM_NAME STREQUAL "Linux") target_link_libraries(_zvec PRIVATE - # Link all zvec internal objects (includes all algorithms) - "$" - "$" - "$" - # Link third-party static libraries - Arrow::arrow_static - Arrow::arrow_compute - Arrow::arrow_dataset - Arrow::arrow_acero - Arrow::parquet_static - rocksdb - glog - libprotobuf - antlr4 - roaring - sparsehash - magic_enum + -Wl,--whole-archive + $ + $ + $ + $ + $ + $ + $ + $ + $ + $ + -Wl,--no-whole-archive + zvec_db ) target_link_options(_zvec PRIVATE "LINKER:--version-script=${CMAKE_CURRENT_SOURCE_DIR}/exports.map" ) -elseif(APPLE) - # Link all zvec internal objects (includes all algorithms) - set(ZVEC_PYTHON_STATIC_DEPS - "$" - "$" - "$" - ) - +elseif (APPLE) target_link_libraries(_zvec PRIVATE - ${ZVEC_PYTHON_STATIC_DEPS} - # Link third-party static libraries - Arrow::arrow_static - Arrow::arrow_compute - Arrow::arrow_dataset - Arrow::arrow_acero - Arrow::parquet_static - rocksdb - glog - libprotobuf - antlr4 - roaring - sparsehash - magic_enum + -Wl,-force_load,$ + -Wl,-force_load,$ + -Wl,-force_load,$ + -Wl,-force_load,$ + -Wl,-force_load,$ + -Wl,-force_load,$ + -Wl,-force_load,$ + -Wl,-force_load,$ + -Wl,-force_load,$ + -Wl,-force_load,$ + zvec_db ) - target_link_options(_zvec PRIVATE - "-Wl,-exported_symbols_list,${CMAKE_CURRENT_SOURCE_DIR}/exports.mac" + target_link_libraries(_zvec PRIVATE + -Wl,-exported_symbols_list,${CMAKE_CURRENT_SOURCE_DIR}/exports.mac ) -endif() +endif () -target_include_directories(_zvec PRIVATE ${PYBIND11_INCLUDE_DIR} ${PROJECT_ROOT_DIR}/src ${PROJECT_ROOT_DIR}/src/binding/python/include) +target_include_directories(_zvec PRIVATE ${PYBIND11_INCLUDE_DIR} ${PROJECT_ROOT_DIR}/src ${PROJECT_ROOT_DIR}/src/binding/python/include) \ No newline at end of file diff --git a/src/c_api/CMakeLists.txt b/src/c_api/CMakeLists.txt index 57d9d94c6..0db99f29b 100644 --- a/src/c_api/CMakeLists.txt +++ b/src/c_api/CMakeLists.txt @@ -30,7 +30,19 @@ set(ZVEC_C_API_HEADERS ${PROJECT_SOURCE_DIR}/src/include/zvec/c_api.h ) -# Create shared library +# ============================================================================= +# Build FAT Shared Library (zvec_c_api.so) +# ============================================================================= +# BUILD_RELEASE_FAT_LIBS=ON: Fully self-contained, zero external dependencies +# Users only need -lzvec_c_api +# BUILD_RELEASE_FAT_LIBS=OFF: Development mode, third-party libs linked normally +# Allows parallel test execution without symbol conflicts +# +# Implementation: +# - Always embeds zvec_db, zvec_core, zvec_ailego via --whole-archive +# - For release: also embeds all third-party libs (rocksdb, glog, protobuf, etc.) +# - Uses --exclude-libs,ALL to hide third-party symbols from export +# ============================================================================= add_library(zvec_c_api SHARED ${ZVEC_C_API_SOURCES} ${ZVEC_C_API_HEADERS} @@ -42,29 +54,71 @@ set_target_properties(zvec_c_api PROPERTIES SOVERSION ${PROJECT_VERSION_MAJOR} OUTPUT_NAME "zvec_c_api" POSITION_INDEPENDENT_CODE ON + # Hide all symbols by default, only export C API + CXX_VISIBILITY_PRESET hidden + VISIBILITY_INLINES_HIDDEN ON ) find_package(Threads REQUIRED) -# Link dependencies based on BUILD_FAT_LIBS option -# FAT mode: statically link all third-party libs (self-contained, no external deps) -# NON-FAT mode: dynamically link third-party libs (smaller, but requires external .so files) - -# Common: always statically link all zvec code into the shared library -set(ZVEC_C_API_COMMON_LINK_OPTS - "$" - "$" - "$" -) +# FAT mode: embed ALL libraries (including third-party) statically +# This creates a truly self-contained library with zero external dependencies +# Users only need to link libzvec_c_api.so without installing any dependencies +if(APPLE) + # Combine all libraries in a single target_link_libraries call + target_link_libraries(zvec_c_api + PRIVATE + # zvec static libraries + zvec_db + zvec_core + zvec_ailego + # Third-party libraries + roaring + Arrow::arrow_static + Arrow::parquet_static + Arrow::arrow_compute + Arrow::arrow_dataset + Arrow::arrow_acero + rocksdb + glog + libprotobuf + antlr4 + sparsehash + magic_enum + Threads::Threads + ${CMAKE_DL_LIBS} + ) + + # Then use target_link_libraries with -force_load on macOS + # This ensures all symbols from static libraries are included + # Note: sparsehash and magic_enum are header-only, skip them + target_link_libraries(zvec_c_api PRIVATE + -Wl,-force_load,$ + -Wl,-force_load,$ + -Wl,-force_load,$ + -Wl,-force_load,$ + -Wl,-force_load,$ + -Wl,-force_load,$ + -Wl,-force_load,$ + -Wl,-force_load,$ + -Wl,-force_load,$ + -Wl,-force_load,$ + -Wl,-force_load,$ + -Wl,-force_load,$ + -Wl,-force_load,$ + ) -if(BUILD_FAT_LIBS) - # FAT mode: embed all third-party libraries - if(APPLE) - target_link_libraries(zvec_c_api - PRIVATE - # Force load all object files from zvec components - ${ZVEC_C_API_COMMON_LINK_OPTS} - # Statically link third-party libraries +else() + target_link_libraries(zvec_c_api + PRIVATE + # Force load all zvec static libraries (extract all objects) + "-Wl,--whole-archive" + zvec_db + zvec_core + zvec_ailego + "-Wl,--no-whole-archive" + # Force load ALL third-party libraries for zero-dependency deployment + "-Wl,--whole-archive" roaring Arrow::arrow_static Arrow::parquet_static @@ -77,66 +131,10 @@ if(BUILD_FAT_LIBS) antlr4 sparsehash magic_enum - Threads::Threads - ) - target_link_options(zvec_c_api PRIVATE - LINKER:-force_load,$ - LINKER:-force_load,$ - LINKER:-force_load,$ - LINKER:-force_load,$ - LINKER:-force_load,$ - LINKER:-force_load,$ - LINKER:-force_load,$ - LINKER:-force_load,$ - LINKER:-force_load,$ - LINKER:-force_load,$ - ) - else() - target_link_libraries(zvec_c_api - PRIVATE - "-Wl,--whole-archive" ${ZVEC_C_API_COMMON_LINK_OPTS} - # Statically link third-party libraries - "-Wl,--whole-archive" - roaring - Arrow::arrow_static - Arrow::parquet_static - Arrow::arrow_compute - Arrow::arrow_dataset - Arrow::arrow_acero - rocksdb - glog - libprotobuf - antlr4 - sparsehash - magic_enum - "-Wl,--no-whole-archive" - Threads::Threads - ${CMAKE_DL_LIBS} - ) - endif() -else() - # NON-FAT mode: link zvec_db and zvec_ailego to get third-party dependencies dynamically - if(APPLE) - set(FORCE_LOAD_FLAGS "-Wl,-force_load") - target_link_libraries(zvec_c_api - PRIVATE - ${FORCE_LOAD_FLAGS} ${ZVEC_C_API_COMMON_LINK_OPTS} - # Link targets to get third-party dependencies - ${FORCE_LOAD_FLAGS} zvec_db - ${FORCE_LOAD_FLAGS} zvec_ailego - Threads::Threads - ) - else() - target_link_libraries(zvec_c_api - PRIVATE - "-Wl,--whole-archive" ${ZVEC_C_API_COMMON_LINK_OPTS} - "-Wl,--no-whole-archive" - # Link targets to get third-party dependencies - zvec_db - zvec_ailego - Threads::Threads - ) - endif() + "-Wl,--no-whole-archive" + Threads::Threads + ${CMAKE_DL_LIBS} + ) endif() # Include directories @@ -154,7 +152,11 @@ target_compile_options(zvec_c_api PRIVATE $<$:-Wall -Wextra -Wpedantic> ) -# Installation rules for shared library +# ============================================================================= +# Installation Rules +# ============================================================================= + +# Install shared library install(TARGETS zvec_c_api LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} @@ -162,52 +164,7 @@ install(TARGETS zvec_c_api INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} ) -# Installation rules for headers +# Install headers install(FILES ${PROJECT_SOURCE_DIR}/src/include/zvec/c_api.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/zvec ) - -# Create static library version (optional) -# Note: In FAT mode (BUILD_FAT_LIBS=ON), we only build shared library because -# creating a self-contained fat static library is complex and error-prone. -# Users who need static linking should use the shared library or link third-party libs manually. -if(ENABLE_RPM_PACKAGING AND NOT BUILD_FAT_LIBS) - # NON-FAT mode only: Create static library without third-party dependencies - # User needs to link third-party libraries manually - - # Collect all object files from zvec dependencies - set(ZVEC_C_API_STATIC_DEPS - $ - $ - $ - ) - - # Create static library with zvec code only - add_library(zvec_c_api_static STATIC - ${ZVEC_C_API_SOURCES} - ${ZVEC_C_API_HEADERS} - ${ZVEC_C_API_STATIC_DEPS} - ) - - set_target_properties(zvec_c_api_static PROPERTIES - OUTPUT_NAME "zvec_c_api_static" - POSITION_INDEPENDENT_CODE ON - ) - - target_link_libraries(zvec_c_api_static - PUBLIC - Threads::Threads - ${CMAKE_DL_LIBS} - ) - - target_include_directories(zvec_c_api_static - PUBLIC - $ - $ - ) - - # Install the static library - install(TARGETS zvec_c_api_static - ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} - ) -endif() diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 74b56d9ff..7742db594 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -12,10 +12,8 @@ cc_directory(mixed_reducer) git_version(GIT_SRCS_VER ${CMAKE_CURRENT_SOURCE_DIR}) file(GLOB_RECURSE ALL_CORE_SRCS *.cc *.c *.h) -# Build both OBJECT and STATIC library -# OBJECT library allows other targets to extract object files cc_library( - NAME zvec_core SHARED STATIC STRICT PACKED + NAME zvec_core STATIC STRICT PACKED SRCS ${ALL_CORE_SRCS} LIBS zvec_ailego sparsehash magic_enum INCS . ${PROJECT_ROOT_DIR}/src/core diff --git a/src/db/CMakeLists.txt b/src/db/CMakeLists.txt index 89305b328..0384659b3 100644 --- a/src/db/CMakeLists.txt +++ b/src/db/CMakeLists.txt @@ -13,10 +13,8 @@ cc_directory(sqlengine) file(GLOB_RECURSE ALL_DB_SRCS *.cc *.c *.h) -# Build both OBJECT and STATIC library -# OBJECT library allows other targets to extract object files cc_library( - NAME zvec_db SHARED STATIC STRICT SRCS_NO_GLOB PACKED + NAME zvec_db STATIC STRICT SRCS_NO_GLOB PACKED SRCS ${ALL_DB_SRCS} ${CMAKE_CURRENT_BINARY_DIR}/proto/zvec.pb.cc INCS . ${CMAKE_CURRENT_BINARY_DIR} LIBS diff --git a/src/include/zvec/c_api.h b/src/include/zvec/c_api.h index ad3cc5865..c38d1bab5 100644 --- a/src/include/zvec/c_api.h +++ b/src/include/zvec/c_api.h @@ -1159,7 +1159,7 @@ ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_field_schema_set_index_params( * @param field_schema Field schema pointer * @param invert_params Inverted index parameters pointer */ -void zvec_field_schema_set_invert_index( +ZVEC_EXPORT void ZVEC_CALL zvec_field_schema_set_invert_index( ZVecFieldSchema *field_schema, const ZVecInvertIndexParams *invert_params); /** @@ -1167,24 +1167,24 @@ void zvec_field_schema_set_invert_index( * @param field_schema Field schema pointer * @param hnsw_params HNSW index parameters pointer */ -void zvec_field_schema_set_hnsw_index(ZVecFieldSchema *field_schema, - const ZVecHnswIndexParams *hnsw_params); +ZVEC_EXPORT void ZVEC_CALL zvec_field_schema_set_hnsw_index( + ZVecFieldSchema *field_schema, const ZVecHnswIndexParams *hnsw_params); /** * @brief Set Flat index parameters for field schema * @param field_schema Field schema pointer * @param flat_params Flat index parameters pointer */ -void zvec_field_schema_set_flat_index(ZVecFieldSchema *field_schema, - const ZVecFlatIndexParams *flat_params); +ZVEC_EXPORT void ZVEC_CALL zvec_field_schema_set_flat_index( + ZVecFieldSchema *field_schema, const ZVecFlatIndexParams *flat_params); /** * @brief Set IVF index parameters for field schema * @param field_schema Field schema pointer * @param ivf_params IVF index parameters pointer */ -void zvec_field_schema_set_ivf_index(ZVecFieldSchema *field_schema, - const ZVecIVFIndexParams *ivf_params); +ZVEC_EXPORT void ZVEC_CALL zvec_field_schema_set_ivf_index( + ZVecFieldSchema *field_schema, const ZVecIVFIndexParams *ivf_params); // ============================================================================= From 6f30a7c8bd5b043a3a00f008481aab9029d78a3c Mon Sep 17 00:00:00 2001 From: lc285652 Date: Thu, 12 Mar 2026 17:03:26 +0800 Subject: [PATCH 09/15] Add GitHub Actions release workflow for C API --- .github/workflows/release.yml | 138 ++++++++++++++++++++++++++++++++++ 1 file changed, 138 insertions(+) create mode 100644 .github/workflows/release.yml diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 000000000..51dbe3e09 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,138 @@ +name: Release + +on: + push: + tags: + - 'v*' # Match v0.3.0, v1.0.0, etc. + +jobs: + # ============================================================================ + # Linux x64 Build + # ============================================================================ + linux-x64: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + fetch-depth: 0 + + - name: Install dependencies + run: | + sudo apt-get update + sudo apt-get install -y cmake ninja-build build-essential + + - name: Build libzvec_c_api.so + run: | + cmake -S . -B build -G Ninja \ + -DCMAKE_BUILD_TYPE=Release \ + -DBUILD_PYTHON_BINDINGS=OFF \ + -DBUILD_TOOLS=OFF \ + -DBUILD_EXAMPLES=OFF + cmake --build build --parallel --target zvec_c_api + + - name: Verify library + run: | + echo "=== Library file ===" + ls -lh build/src/c_api/libzvec_c_api.so + echo "=== Check dependencies ===" + ldd build/src/c_api/libzvec_c_api.so || true + + - name: Create tarball + run: | + cp src/include/zvec/c_api.h . + cp build/src/c_api/libzvec_c_api.so . + tar -czvf libzvec-capi-linux-x64.tar.gz \ + c_api.h \ + libzvec_c_api.so + + - name: Upload artifact + uses: actions/upload-artifact@v4 + with: + name: zvec-capi-linux-x64 + path: libzvec-capi-linux-x64.tar.gz + + # ============================================================================ + # macOS Universal Build (arm64 + x86_64) + # ============================================================================ + macos-universal: + runs-on: macos-latest + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + fetch-depth: 0 + + - name: Install dependencies + run: | + brew install cmake ninja + + - name: Build libzvec_c_api.dylib (Universal Binary) + env: + CMAKE_OSX_ARCHITECTURES: "arm64;x86_64" + MACOSX_DEPLOYMENT_TARGET: "11.0" + run: | + cmake -S . -B build -G Ninja \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_OSX_ARCHITECTURES="arm64;x86_64" \ + -DCMAKE_OSX_DEPLOYMENT_TARGET="11.0" \ + -DBUILD_PYTHON_BINDINGS=OFF \ + -DBUILD_TOOLS=OFF \ + -DBUILD_EXAMPLES=OFF + cmake --build build --parallel --target zvec_c_api + + - name: Verify library + run: | + echo "=== Library file ===" + ls -lh build/src/c_api/libzvec_c_api.dylib + echo "=== Check architectures ===" + lipo -archs build/src/c_api/libzvec_c_api.dylib + + - name: Create tarball + run: | + cp src/include/zvec/c_api.h . + cp build/src/c_api/libzvec_c_api.dylib . + tar -czvf libzvec-capi-macos-universal.tar.gz \ + c_api.h \ + libzvec_c_api.dylib + + - name: Upload artifact + uses: actions/upload-artifact@v4 + with: + name: zvec-capi-macos-universal + path: libzvec-capi-macos-universal.tar.gz + + # ============================================================================ + # Upload to GitHub Releases + # ============================================================================ + upload-release: + needs: [linux-x64, macos-universal] + runs-on: ubuntu-latest + permissions: + contents: write + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + # Download build artifacts for each platform + - uses: actions/download-artifact@v4 + with: + name: zvec-capi-linux-x64 + path: dist/ + + - uses: actions/download-artifact@v4 + with: + name: zvec-capi-macos-universal + path: dist/ + + - name: List artifacts + run: ls -la dist/ + + # Upload to GitHub Releases + - uses: softprops/action-gh-release@v1 + with: + files: dist/*.tar.gz + generate_release_notes: true + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} From 7d0ff144867358bfd2f8310dccad26bf89f396c3 Mon Sep 17 00:00:00 2001 From: lc285652 Date: Thu, 12 Mar 2026 17:13:38 +0800 Subject: [PATCH 10/15] add release linux-arm64 --- .github/workflows/release.yml | 53 ++++++++++++++++++++++++++++++++++- 1 file changed, 52 insertions(+), 1 deletion(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 51dbe3e09..26f96ad71 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -52,6 +52,52 @@ jobs: name: zvec-capi-linux-x64 path: libzvec-capi-linux-x64.tar.gz + # ============================================================================ + # Linux ARM64 Build + # ============================================================================ + linux-arm64: + runs-on: ubuntu-24.04-arm + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + fetch-depth: 0 + + - name: Install dependencies + run: | + sudo apt-get update + sudo apt-get install -y cmake ninja-build build-essential + + - name: Build libzvec_c_api.so (ARM64) + run: | + cmake -S . -B build -G Ninja \ + -DCMAKE_BUILD_TYPE=Release \ + -DBUILD_PYTHON_BINDINGS=OFF \ + -DBUILD_TOOLS=OFF \ + -DBUILD_EXAMPLES=OFF + cmake --build build --parallel --target zvec_c_api + + - name: Verify library + run: | + echo "=== Library file ===" + ls -lh build/src/c_api/libzvec_c_api.so + echo "=== Check dependencies ===" + ldd build/src/c_api/libzvec_c_api.so || true + + - name: Create tarball + run: | + cp src/include/zvec/c_api.h . + cp build/src/c_api/libzvec_c_api.so . + tar -czvf libzvec-capi-linux-arm64.tar.gz \ + c_api.h \ + libzvec_c_api.so + + - name: Upload artifact + uses: actions/upload-artifact@v4 + with: + name: zvec-capi-linux-arm64 + path: libzvec-capi-linux-arm64.tar.gz + # ============================================================================ # macOS Universal Build (arm64 + x86_64) # ============================================================================ @@ -106,7 +152,7 @@ jobs: # Upload to GitHub Releases # ============================================================================ upload-release: - needs: [linux-x64, macos-universal] + needs: [linux-x64, linux-arm64, macos-universal] runs-on: ubuntu-latest permissions: contents: write @@ -121,6 +167,11 @@ jobs: name: zvec-capi-linux-x64 path: dist/ + - uses: actions/download-artifact@v4 + with: + name: zvec-capi-linux-arm64 + path: dist/ + - uses: actions/download-artifact@v4 with: name: zvec-capi-macos-universal From 0ed36edf53c442d1abced69358dac0b38f1be34d Mon Sep 17 00:00:00 2001 From: lc285652 Date: Thu, 12 Mar 2026 17:54:42 +0800 Subject: [PATCH 11/15] remove c api version --- .github/workflows/release.yml | 1 + CMakeLists.txt | 3 ++- src/c_api/CMakeLists.txt | 2 -- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 26f96ad71..33adadd10 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -4,6 +4,7 @@ on: push: tags: - 'v*' # Match v0.3.0, v1.0.0, etc. + workflow_dispatch: # Allow manual trigger jobs: # ============================================================================ diff --git a/CMakeLists.txt b/CMakeLists.txt index 07f99a72a..2b7638164 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,6 @@ cmake_minimum_required(VERSION 3.13) cmake_policy(SET CMP0077 NEW) -project(zvec VERSION 0.3.0) +project(zvec) set(CC_CXX_STANDARD 17) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Werror=return-type") @@ -48,6 +48,7 @@ endif() git_version(GIT_SRCS_VER ${PROJECT_ROOT_DIR}) set(CPACK_PACKAGE_VERSION ${GIT_SRCS_VER}) set(CPACK_PACKAGE_NAME zvec) +include(CPack) if(BUILD_PYTHON_BINDINGS) if(APPLE) diff --git a/src/c_api/CMakeLists.txt b/src/c_api/CMakeLists.txt index 0db99f29b..565479ab0 100644 --- a/src/c_api/CMakeLists.txt +++ b/src/c_api/CMakeLists.txt @@ -50,8 +50,6 @@ add_library(zvec_c_api SHARED # Set library properties set_target_properties(zvec_c_api PROPERTIES - VERSION ${PROJECT_VERSION} - SOVERSION ${PROJECT_VERSION_MAJOR} OUTPUT_NAME "zvec_c_api" POSITION_INDEPENDENT_CODE ON # Hide all symbols by default, only export C API From d6c4c8dea7e30636fc9b531d4a637116592fe6c4 Mon Sep 17 00:00:00 2001 From: lc285652 Date: Thu, 12 Mar 2026 21:38:06 +0800 Subject: [PATCH 12/15] fix some code --- .github/workflows/release.yml | 3 + src/c_api/c_api.cc | 170 ++--- src/include/zvec/c_api.h | 44 +- tests/c_api/c_api_test.c | 1112 +++++++++++++++++++++++++++++++-- 4 files changed, 1182 insertions(+), 147 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 33adadd10..20a57f88d 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -1,5 +1,8 @@ name: Release +permissions: + contents: read + on: push: tags: diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 73260d3be..9ede0d99c 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -2150,7 +2150,7 @@ std::pair, std::vector> extract_sparse_vector( return std::make_pair(std::move(index_vec), std::move(value_vec)); } -// Helper function to extract string array from raw data +// Helper function to extract string array from raw data (C-string array) std::vector extract_string_array(const void *value, size_t value_size) { std::vector string_array; @@ -2168,6 +2168,23 @@ std::vector extract_string_array(const void *value, return string_array; } +// Helper function to extract string array from ZVecString** array +std::vector extract_string_array_from_zvec( + ZVecString **zvec_strings, size_t count) { + std::vector string_array; + string_array.reserve(count); + + for (size_t i = 0; i < count; ++i) { + if (zvec_strings[i] && zvec_strings[i]->data) { + string_array.emplace_back(zvec_strings[i]->data, zvec_strings[i]->length); + } else { + string_array.emplace_back("", 0); + } + } + + return string_array; +} + // Helper function to extract binary array from raw data std::vector extract_binary_array(const void *value, size_t value_size) { @@ -2336,6 +2353,12 @@ ZVecErrorCode zvec_doc_add_field_by_value(ZVecDoc *doc, const char *field_name, switch (data_type) { // Scalar types + case ZVEC_DATA_TYPE_BINARY: + case ZVEC_DATA_TYPE_STRING: { + std::string val(static_cast(value), value_size); + (*doc_ptr)->set(name, val); + break; + } case ZVEC_DATA_TYPE_BOOL: { bool val = extract_scalar_value(value, value_size, &error_code); if (error_code != ZVEC_OK) { @@ -2405,15 +2428,27 @@ ZVecErrorCode zvec_doc_add_field_by_value(ZVecDoc *doc, const char *field_name, break; } - // String and binary types - case ZVEC_DATA_TYPE_STRING: - case ZVEC_DATA_TYPE_BINARY: { - std::string val(static_cast(value), value_size); - (*doc_ptr)->set(name, val); + // Vector types + case ZVEC_DATA_TYPE_VECTOR_BINARY32: { + auto vec = + extract_vector_values(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for vector_binary32 type"); + return error_code; + } + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_VECTOR_BINARY64: { + auto vec = + extract_vector_values(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for vector_binary64 type"); + return error_code; + } + (*doc_ptr)->set(name, vec); break; } - - // Vector types case ZVEC_DATA_TYPE_VECTOR_FP32: { auto vec = extract_vector_values(value, value_size, &error_code); if (error_code != ZVEC_OK) { @@ -2481,31 +2516,11 @@ ZVecErrorCode zvec_doc_add_field_by_value(ZVecDoc *doc, const char *field_name, (*doc_ptr)->set(name, vec); break; } - case ZVEC_DATA_TYPE_VECTOR_BINARY32: { - auto vec = - extract_vector_values(value, value_size, &error_code); - if (error_code != ZVEC_OK) { - set_last_error("Invalid value size for vector_binary32 type"); - return error_code; - } - (*doc_ptr)->set(name, vec); - break; - } - case ZVEC_DATA_TYPE_VECTOR_BINARY64: { - auto vec = - extract_vector_values(value, value_size, &error_code); - if (error_code != ZVEC_OK) { - set_last_error("Invalid value size for vector_binary64 type"); - return error_code; - } - (*doc_ptr)->set(name, vec); - break; - } // Sparse vector types - case ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32: { - auto sparse_vec = - extract_sparse_vector(value, value_size, &error_code); + case ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16: { + auto sparse_vec = extract_sparse_vector( + value, value_size, &error_code); if (error_code != ZVEC_OK) { set_last_error("Invalid sparse vector data size"); return error_code; @@ -2513,9 +2528,9 @@ ZVecErrorCode zvec_doc_add_field_by_value(ZVecDoc *doc, const char *field_name, (*doc_ptr)->set(name, sparse_vec); break; } - case ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16: { - auto sparse_vec = extract_sparse_vector( - value, value_size, &error_code); + case ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32: { + auto sparse_vec = + extract_sparse_vector(value, value_size, &error_code); if (error_code != ZVEC_OK) { set_last_error("Invalid sparse vector data size"); return error_code; @@ -2525,6 +2540,30 @@ ZVecErrorCode zvec_doc_add_field_by_value(ZVecDoc *doc, const char *field_name, } // Array types + case ZVEC_DATA_TYPE_ARRAY_BINARY: { + auto binary_array = extract_binary_array(value, value_size); + (*doc_ptr)->set(name, binary_array); + break; + } + case ZVEC_DATA_TYPE_ARRAY_STRING: { + // Check if this is a ZVecString** array or a C-string array + // ZVecString** array has pointer-sized elements + constexpr size_t ptr_size = sizeof(void *); + if (value_size % ptr_size == 0) { + // Likely a ZVecString** array + size_t count = value_size / ptr_size; + ZVecString **zvec_str_array = + reinterpret_cast(const_cast(value)); + auto string_array = + extract_string_array_from_zvec(zvec_str_array, count); + (*doc_ptr)->set(name, string_array); + } else { + // C-string array (null-terminated strings) + auto string_array = extract_string_array(value, value_size); + (*doc_ptr)->set(name, string_array); + } + break; + } case ZVEC_DATA_TYPE_ARRAY_BOOL: { auto vec = extract_array_values(value, value_size, &error_code); if (error_code != ZVEC_OK) { @@ -2592,16 +2631,6 @@ ZVecErrorCode zvec_doc_add_field_by_value(ZVecDoc *doc, const char *field_name, (*doc_ptr)->set(name, vec); break; } - case ZVEC_DATA_TYPE_ARRAY_STRING: { - auto string_array = extract_string_array(value, value_size); - (*doc_ptr)->set(name, string_array); - break; - } - case ZVEC_DATA_TYPE_ARRAY_BINARY: { - auto binary_array = extract_binary_array(value, value_size); - (*doc_ptr)->set(name, binary_array); - break; - } default: set_last_error("Unsupported data type: " + std::to_string(data_type)); @@ -2628,7 +2657,21 @@ ZVecErrorCode zvec_doc_add_field_by_struct(ZVecDoc *doc, std::string name(field->name.data, field->name.length); switch (field->data_type) { - // Scalar basic types + // Scalar types (in ZVecDataType order: BINARY, STRING, BOOL, INT32, + // INT64, UINT32, UINT64, FLOAT, DOUBLE) + case ZVEC_DATA_TYPE_BINARY: { + std::string val( + reinterpret_cast(field->value.binary_value.data), + field->value.binary_value.length); + (*doc_ptr)->set(name, val); + break; + } + case ZVEC_DATA_TYPE_STRING: { + std::string val(field->value.string_value.data, + field->value.string_value.length); + (*doc_ptr)->set(name, val); + break; + } case ZVEC_DATA_TYPE_BOOL: { (*doc_ptr)->set(name, field->value.bool_value); break; @@ -2658,22 +2701,8 @@ ZVecErrorCode zvec_doc_add_field_by_struct(ZVecDoc *doc, break; } - // String and binary types - case ZVEC_DATA_TYPE_STRING: { - std::string val(field->value.string_value.data, - field->value.string_value.length); - (*doc_ptr)->set(name, val); - break; - } - case ZVEC_DATA_TYPE_BINARY: { - std::string val( - reinterpret_cast(field->value.binary_value.data), - field->value.binary_value.length); - (*doc_ptr)->set(name, val); - break; - } - - // Vector types + // Vector types (in ZVecDataType order: BINARY32, BINARY64, FP16, FP32, + // FP64, INT4, INT8, INT16) case ZVEC_DATA_TYPE_VECTOR_BINARY32: { std::vector vec( reinterpret_cast(field->value.vector_value.data), @@ -2753,7 +2782,7 @@ ZVecErrorCode zvec_doc_add_field_by_struct(ZVecDoc *doc, break; } - // Sparse vector types + // Sparse vector types (in ZVecDataType order: FP16, FP32) case ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16: { std::vector vec( reinterpret_cast( @@ -2772,7 +2801,8 @@ ZVecErrorCode zvec_doc_add_field_by_struct(ZVecDoc *doc, break; } - // Array types + // Array types (in ZVecDataType order: BINARY, STRING, BOOL, INT32, INT64, + // UINT32, UINT64, FLOAT, DOUBLE) case ZVEC_DATA_TYPE_ARRAY_BINARY: { std::vector array_values; const uint8_t *data_ptr = field->value.binary_value.data; @@ -3569,7 +3599,7 @@ ZVecErrorCode zvec_doc_get_field_value_pointer(const ZVecDoc *doc, break; } case ZVEC_DATA_TYPE_UINT32: { - const uint32_t val = (*doc_ptr)->get_ref(field_name); + const uint32_t &val = (*doc_ptr)->get_ref(field_name); *value = &val; *value_size = sizeof(uint32_t); break; @@ -4797,7 +4827,7 @@ ZVecErrorCode zvec_collection_create_index( } ZVecErrorCode zvec_collection_create_index_with_params( - ZVecCollection *collection, const ZVecString *field_name, + ZVecCollection *collection, const char *field_name, const void *index_params) { if (!collection || !field_name || !index_params) { set_last_error("Invalid arguments"); @@ -4806,7 +4836,7 @@ ZVecErrorCode zvec_collection_create_index_with_params( auto coll_ptr = reinterpret_cast *>(collection); - std::string field_name_str(field_name->data, field_name->length); + std::string field_name_str(field_name); const ZVecBaseIndexParams *base_params = static_cast(index_params); @@ -4869,7 +4899,7 @@ ZVecErrorCode zvec_collection_create_index_with_params( } ZVecErrorCode zvec_collection_create_hnsw_index( - ZVecCollection *collection, const ZVecString *field_name, + ZVecCollection *collection, const char *field_name, const ZVecHnswIndexParams *hnsw_params) { if (!hnsw_params) { set_last_error("Invalid HNSW parameters"); @@ -4881,7 +4911,7 @@ ZVecErrorCode zvec_collection_create_hnsw_index( } ZVecErrorCode zvec_collection_create_flat_index( - ZVecCollection *collection, const ZVecString *field_name, + ZVecCollection *collection, const char *field_name, const ZVecFlatIndexParams *flat_params) { if (!flat_params) { set_last_error("Invalid Flat parameters"); @@ -4893,7 +4923,7 @@ ZVecErrorCode zvec_collection_create_flat_index( } ZVecErrorCode zvec_collection_create_ivf_index( - ZVecCollection *collection, const ZVecString *field_name, + ZVecCollection *collection, const char *field_name, const ZVecIVFIndexParams *ivf_params) { if (!ivf_params) { set_last_error("Invalid IVF parameters"); @@ -4905,7 +4935,7 @@ ZVecErrorCode zvec_collection_create_ivf_index( } ZVecErrorCode zvec_collection_create_invert_index( - ZVecCollection *collection, const ZVecString *field_name, + ZVecCollection *collection, const char *field_name, const ZVecInvertIndexParams *invert_params) { if (!invert_params) { set_last_error("Invalid Invert parameters"); diff --git a/src/include/zvec/c_api.h b/src/include/zvec/c_api.h index c38d1bab5..1c9d54835 100644 --- a/src/include/zvec/c_api.h +++ b/src/include/zvec/c_api.h @@ -1492,13 +1492,13 @@ zvec_free_field_schema(ZVecFieldSchema *field_schema); * @brief Create index * * @param collection Collection handle - * @param column_name Column name + * @param field_name Field name * @param index_params Index parameters * @return ZVecErrorCode Error code */ -ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_create_index( - ZVecCollection *collection, const char *column_name, - const ZVecIndexParams *index_params); +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_collection_create_index(ZVecCollection *collection, const char *field_name, + const ZVecIndexParams *index_params); /** * @brief Create index for collection field (using specific type parameters) @@ -1509,7 +1509,7 @@ ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_create_index( * @return Error code */ ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_create_index_with_params( - ZVecCollection *collection, const ZVecString *field_name, + ZVecCollection *collection, const char *field_name, const void *index_params); // Determine specific type based on index_type field @@ -1521,7 +1521,7 @@ ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_create_index_with_params( * @return Error code */ ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_create_hnsw_index( - ZVecCollection *collection, const ZVecString *field_name, + ZVecCollection *collection, const char *field_name, const ZVecHnswIndexParams *hnsw_params); /** @@ -1532,7 +1532,7 @@ ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_create_hnsw_index( * @return Error code */ ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_create_flat_index( - ZVecCollection *collection, const ZVecString *field_name, + ZVecCollection *collection, const char *field_name, const ZVecFlatIndexParams *flat_params); /** @@ -1543,7 +1543,7 @@ ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_create_flat_index( * @return Error code */ ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_create_ivf_index( - ZVecCollection *collection, const ZVecString *field_name, + ZVecCollection *collection, const char *field_name, const ZVecIVFIndexParams *ivf_params); /** @@ -1554,7 +1554,7 @@ ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_create_ivf_index( * @return Error code */ ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_create_invert_index( - ZVecCollection *collection, const ZVecString *field_name, + ZVecCollection *collection, const char *field_name, const ZVecInvertIndexParams *invert_params); /** @@ -1574,18 +1574,6 @@ zvec_collection_drop_index(ZVecCollection *collection, const char *field_name); ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_optimize(ZVecCollection *collection); -/** - * @brief Get index statistics - * @param collection Collection handle - * @param field_name Field name - * @param[out] completeness Index completeness (0.0-1.0) - * @return ZVecErrorCode Error code - */ -ZVEC_EXPORT ZVecErrorCode ZVEC_CALL -zvec_collection_get_index_stats(const ZVecCollection *collection, - const char *field_name, float *completeness); - - /** * @brief Compact collection (reclaim space) * @param collection Collection handle @@ -1606,31 +1594,31 @@ ZVEC_EXPORT void ZVEC_CALL zvec_clear_error(void); // ============================================================================= -// Field Management Interface (DDL) +// Column Management Interface (DDL) // ============================================================================= /** - * @brief Add field + * @brief Add column * @param collection Collection handle * @param field_schema Field schema pointer * @param default_expression Default value expression (can be NULL) * @return ZVecErrorCode Error code */ -ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_add_field( +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_add_column( ZVecCollection *collection, const ZVecFieldSchema *field_schema, const char *default_expression); /** - * @brief Drop field + * @brief Drop column * @param collection Collection handle * @param field_name Field name * @return ZVecErrorCode Error code */ ZVEC_EXPORT ZVecErrorCode ZVEC_CALL -zvec_collection_drop_field(ZVecCollection *collection, const char *field_name); +zvec_collection_drop_column(ZVecCollection *collection, const char *field_name); /** - * @brief Alter field + * @brief Alter column * @param collection Collection handle * @param old_name Original field name * @param new_name New field name (can be NULL to indicate no renaming) @@ -1638,7 +1626,7 @@ zvec_collection_drop_field(ZVecCollection *collection, const char *field_name); * modification) * @return ZVecErrorCode Error code */ -ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_alter_field( +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_alter_column( ZVecCollection *collection, const char *old_name, const char *new_name, const ZVecFieldSchema *new_schema); diff --git a/tests/c_api/c_api_test.c b/tests/c_api/c_api_test.c index 4a26ce90c..15bb92a20 100644 --- a/tests/c_api/c_api_test.c +++ b/tests/c_api/c_api_test.c @@ -1048,13 +1048,28 @@ void test_doc_primary_key(void) { TEST_END(); } +void test_doc_basic_operations(void); +void test_doc_get_field_value_basic(void); +void test_doc_get_field_value_copy(void); +void test_doc_get_field_value_pointer(void); +void test_doc_field_operations(void); +void test_doc_error_conditions(void); +void test_doc_serialization(void); + void test_doc_functions(void) { - TEST_START(); + test_doc_basic_operations(); + test_doc_get_field_value_basic(); + test_doc_get_field_value_copy(); + test_doc_get_field_value_pointer(); + test_doc_field_operations(); + test_doc_error_conditions(); + test_doc_serialization(); +} - // Create test document using utility function - ZVecCollectionSchema *schema = zvec_test_create_temp_schema(); - TEST_ASSERT(schema != NULL); +void test_doc_basic_operations(void) { + TEST_START(); + // Create test document ZVecDoc *doc = zvec_doc_create(); TEST_ASSERT(doc != NULL); @@ -1078,15 +1093,23 @@ void test_doc_functions(void) { ZVecDocOperator op = zvec_doc_get_operator(doc); TEST_ASSERT(op == ZVEC_DOC_OP_INSERT); - ZVecErrorCode err; + zvec_doc_destroy(doc); - // ==================== COMPREHENSIVE DATA TYPE TESTING ==================== + TEST_END(); +} + +void test_doc_get_field_value_basic(void) { + TEST_START(); + + ZVecDoc *doc = zvec_doc_create(); + TEST_ASSERT(doc != NULL); + + ZVecErrorCode err; printf( "=== Testing zvec_doc_get_field_value_basic with all supported types " "===\n"); - // Test all basic numeric types that zvec_doc_get_field_value_basic supports // BOOL type ZVecDocField bool_field; bool_field.name.data = "bool_field"; @@ -1195,11 +1218,133 @@ void test_doc_functions(void) { TEST_ASSERT(err == ZVEC_OK); TEST_ASSERT(fabs(double_result - 2.71828182845904523536) < 1e-15); + zvec_doc_destroy(doc); + + TEST_END(); +} + +void test_doc_get_field_value_copy(void) { + TEST_START(); + + ZVecDoc *doc = zvec_doc_create(); + TEST_ASSERT(doc != NULL); + + ZVecErrorCode err; + printf( "=== Testing zvec_doc_get_field_value_copy with all supported types " "===\n"); - // Test STRING type with zvec_doc_get_field_value_copy + // Basic scalar types first + bool bool_val = true; + err = zvec_doc_add_field_by_value(doc, "bool_field2", ZVEC_DATA_TYPE_BOOL, + &bool_val, sizeof(bool_val)); + TEST_ASSERT(err == ZVEC_OK); + + void *bool_copy_result; + size_t bool_copy_size; + err = zvec_doc_get_field_value_copy(doc, "bool_field2", ZVEC_DATA_TYPE_BOOL, + &bool_copy_result, &bool_copy_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(bool_copy_result != NULL); + TEST_ASSERT(bool_copy_size == sizeof(bool)); + TEST_ASSERT(*(bool *)bool_copy_result == true); + free(bool_copy_result); + + int32_t int32_val = -12345; + err = zvec_doc_add_field_by_value(doc, "int32_field2", ZVEC_DATA_TYPE_INT32, + &int32_val, sizeof(int32_val)); + TEST_ASSERT(err == ZVEC_OK); + + void *int32_copy_result; + size_t int32_copy_size; + err = zvec_doc_get_field_value_copy(doc, "int32_field2", ZVEC_DATA_TYPE_INT32, + &int32_copy_result, &int32_copy_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(int32_copy_result != NULL); + TEST_ASSERT(int32_copy_size == sizeof(int32_t)); + TEST_ASSERT(*(int32_t *)int32_copy_result == -12345); + free(int32_copy_result); + + int64_t int64_val = -9223372036854775807LL; + err = zvec_doc_add_field_by_value(doc, "int64_field2", ZVEC_DATA_TYPE_INT64, + &int64_val, sizeof(int64_val)); + TEST_ASSERT(err == ZVEC_OK); + + void *int64_copy_result; + size_t int64_copy_size; + err = zvec_doc_get_field_value_copy(doc, "int64_field2", ZVEC_DATA_TYPE_INT64, + &int64_copy_result, &int64_copy_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(int64_copy_result != NULL); + TEST_ASSERT(int64_copy_size == sizeof(int64_t)); + TEST_ASSERT(*(int64_t *)int64_copy_result == -9223372036854775807LL); + free(int64_copy_result); + + uint32_t uint32_val = 4000000000U; + err = zvec_doc_add_field_by_value(doc, "uint32_field2", ZVEC_DATA_TYPE_UINT32, + &uint32_val, sizeof(uint32_val)); + TEST_ASSERT(err == ZVEC_OK); + + void *uint32_copy_result; + size_t uint32_copy_size; + err = + zvec_doc_get_field_value_copy(doc, "uint32_field2", ZVEC_DATA_TYPE_UINT32, + &uint32_copy_result, &uint32_copy_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(uint32_copy_result != NULL); + TEST_ASSERT(uint32_copy_size == sizeof(uint32_t)); + TEST_ASSERT(*(uint32_t *)uint32_copy_result == 4000000000U); + free(uint32_copy_result); + + uint64_t uint64_val = 18000000000000000000ULL; + err = zvec_doc_add_field_by_value(doc, "uint64_field2", ZVEC_DATA_TYPE_UINT64, + &uint64_val, sizeof(uint64_val)); + TEST_ASSERT(err == ZVEC_OK); + + void *uint64_copy_result; + size_t uint64_copy_size; + err = + zvec_doc_get_field_value_copy(doc, "uint64_field2", ZVEC_DATA_TYPE_UINT64, + &uint64_copy_result, &uint64_copy_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(uint64_copy_result != NULL); + TEST_ASSERT(uint64_copy_size == sizeof(uint64_t)); + TEST_ASSERT(*(uint64_t *)uint64_copy_result == 18000000000000000000ULL); + free(uint64_copy_result); + + float float_val = 3.14159265f; + err = zvec_doc_add_field_by_value(doc, "float_field2", ZVEC_DATA_TYPE_FLOAT, + &float_val, sizeof(float_val)); + TEST_ASSERT(err == ZVEC_OK); + + void *float_copy_result; + size_t float_copy_size; + err = zvec_doc_get_field_value_copy(doc, "float_field2", ZVEC_DATA_TYPE_FLOAT, + &float_copy_result, &float_copy_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(float_copy_result != NULL); + TEST_ASSERT(float_copy_size == sizeof(float)); + TEST_ASSERT(fabs(*(float *)float_copy_result - 3.14159265f) < 1e-6f); + free(float_copy_result); + + double double_val = 2.718281828459045; + err = zvec_doc_add_field_by_value(doc, "double_field2", ZVEC_DATA_TYPE_DOUBLE, + &double_val, sizeof(double_val)); + TEST_ASSERT(err == ZVEC_OK); + + void *double_copy_result; + size_t double_copy_size; + err = + zvec_doc_get_field_value_copy(doc, "double_field2", ZVEC_DATA_TYPE_DOUBLE, + &double_copy_result, &double_copy_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(double_copy_result != NULL); + TEST_ASSERT(double_copy_size == sizeof(double)); + TEST_ASSERT(fabs(*(double *)double_copy_result - 2.718281828459045) < 1e-15); + free(double_copy_result); + + // String and binary types ZVecDocField string_field; string_field.name.data = "string_field"; string_field.name.length = strlen("string_field"); @@ -1218,7 +1363,6 @@ void test_doc_functions(void) { TEST_ASSERT(memcmp(string_result, "Hello, 世界!", string_size) == 0); free(string_result); - // Test BINARY type with zvec_doc_get_field_value_copy ZVecDocField binary_field; binary_field.name.data = "binary_field"; binary_field.name.length = strlen("binary_field"); @@ -1240,7 +1384,7 @@ void test_doc_functions(void) { 0); free(binary_result); - // Test VECTOR_FP32 type with zvec_doc_get_field_value_copy + // VECTOR_FP32 type float test_vector[] = {1.1f, 2.2f, 3.3f, 4.4f, 5.5f}; ZVecDocField fp32_vec_field; fp32_vec_field.name.data = "fp32_vec_field"; @@ -1262,12 +1406,586 @@ void test_doc_functions(void) { TEST_ASSERT(memcmp(fp32_vec_result, test_vector, fp32_vec_size) == 0); free(fp32_vec_result); + // VECTOR_FP16 type (16-bit float vector) + uint16_t fp16_data[] = {0x3C00, 0x4000, 0x4200, + 0x4400}; // FP16: 1.0, 2.0, 3.0, 4.0 + err = zvec_doc_add_field_by_value(doc, "fp16_vec_field", + ZVEC_DATA_TYPE_VECTOR_FP16, fp16_data, + sizeof(fp16_data)); + TEST_ASSERT(err == ZVEC_OK); + + void *fp16_result; + size_t fp16_size; + err = zvec_doc_get_field_value_copy(doc, "fp16_vec_field", + ZVEC_DATA_TYPE_VECTOR_FP16, &fp16_result, + &fp16_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(fp16_result != NULL); + TEST_ASSERT(fp16_size == sizeof(fp16_data)); + TEST_ASSERT(memcmp(fp16_result, fp16_data, fp16_size) == 0); + free(fp16_result); + + // VECTOR_INT8 type + int8_t int8_data[] = {-128, -1, 0, 1, 127}; + err = zvec_doc_add_field_by_value(doc, "int8_vec_field", + ZVEC_DATA_TYPE_VECTOR_INT8, int8_data, + sizeof(int8_data)); + TEST_ASSERT(err == ZVEC_OK); + + void *int8_result; + size_t int8_size; + err = zvec_doc_get_field_value_copy(doc, "int8_vec_field", + ZVEC_DATA_TYPE_VECTOR_INT8, &int8_result, + &int8_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(int8_result != NULL); + TEST_ASSERT(int8_size == sizeof(int8_data)); + TEST_ASSERT(memcmp(int8_result, int8_data, int8_size) == 0); + free(int8_result); + + // VECTOR_BINARY32 type (32-bit aligned binary vector) + uint8_t bin32_data[] = {0xAA, 0x55, 0xAA, 0x55}; + err = zvec_doc_add_field_by_value(doc, "bin32_vec_field", + ZVEC_DATA_TYPE_VECTOR_BINARY32, bin32_data, + sizeof(bin32_data)); + TEST_ASSERT(err == ZVEC_OK); + + void *bin32_result; + size_t bin32_size; + err = zvec_doc_get_field_value_copy(doc, "bin32_vec_field", + ZVEC_DATA_TYPE_VECTOR_BINARY32, + &bin32_result, &bin32_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(bin32_result != NULL); + TEST_ASSERT(bin32_size == sizeof(bin32_data)); + TEST_ASSERT(memcmp(bin32_result, bin32_data, bin32_size) == 0); + free(bin32_result); + + // VECTOR_BINARY64 type (64-bit aligned binary vector) + uint64_t bin64_data[] = {0xAA55AA55AA55AA55ULL, 0x55AA55AA55AA55AAULL}; + err = zvec_doc_add_field_by_value(doc, "bin64_vec_field", + ZVEC_DATA_TYPE_VECTOR_BINARY64, bin64_data, + sizeof(bin64_data)); + TEST_ASSERT(err == ZVEC_OK); + + void *bin64_result; + size_t bin64_size; + err = zvec_doc_get_field_value_copy(doc, "bin64_vec_field", + ZVEC_DATA_TYPE_VECTOR_BINARY64, + &bin64_result, &bin64_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(bin64_result != NULL); + TEST_ASSERT(bin64_size == sizeof(bin64_data)); + TEST_ASSERT(memcmp(bin64_result, bin64_data, bin64_size) == 0); + free(bin64_result); + + // VECTOR_FP64 type (double precision vector) + double fp64_data[] = {1.1, 2.2, 3.3, 4.4}; + err = zvec_doc_add_field_by_value(doc, "fp64_vec_field", + ZVEC_DATA_TYPE_VECTOR_FP64, fp64_data, + sizeof(fp64_data)); + TEST_ASSERT(err == ZVEC_OK); + + void *fp64_result; + size_t fp64_size; + err = zvec_doc_get_field_value_copy(doc, "fp64_vec_field", + ZVEC_DATA_TYPE_VECTOR_FP64, &fp64_result, + &fp64_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(fp64_result != NULL); + TEST_ASSERT(fp64_size == sizeof(fp64_data)); + TEST_ASSERT(memcmp(fp64_result, fp64_data, fp64_size) == 0); + free(fp64_result); + + // VECTOR_INT16 type + int16_t int16_data[] = {-32768, -1, 0, 1, 32767}; + err = zvec_doc_add_field_by_value(doc, "int16_vec_field", + ZVEC_DATA_TYPE_VECTOR_INT16, int16_data, + sizeof(int16_data)); + TEST_ASSERT(err == ZVEC_OK); + + void *int16_result; + size_t int16_size; + err = zvec_doc_get_field_value_copy(doc, "int16_vec_field", + ZVEC_DATA_TYPE_VECTOR_INT16, + &int16_result, &int16_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(int16_result != NULL); + TEST_ASSERT(int16_size == sizeof(int16_data)); + TEST_ASSERT(memcmp(int16_result, int16_data, int16_size) == 0); + free(int16_result); + + // SPARSE_VECTOR_FP16 type - format: [nnz(uint32_t)][indices...][values...] + uint32_t sparse_fp16_nnz = 3; + size_t sparse_fp16_size_input = + sizeof(uint32_t) + + sparse_fp16_nnz * (sizeof(uint32_t) + sizeof(uint16_t)); + void *sparse_fp16_input = malloc(sparse_fp16_size_input); + uint32_t *fp16_nnz_ptr = (uint32_t *)sparse_fp16_input; + *fp16_nnz_ptr = sparse_fp16_nnz; + uint32_t *fp16_indices = + (uint32_t *)((char *)sparse_fp16_input + sizeof(uint32_t)); + uint16_t *fp16_values = + (uint16_t *)((char *)sparse_fp16_input + sizeof(uint32_t) + + sparse_fp16_nnz * sizeof(uint32_t)); + fp16_indices[0] = 0; + fp16_indices[1] = 5; + fp16_indices[2] = 10; + fp16_values[0] = 0x3C00; + fp16_values[1] = 0x4000; + fp16_values[2] = 0x4200; // FP16: 1.0, 2.0, 3.0 + err = zvec_doc_add_field_by_value(doc, "sparse_fp16_field", + ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16, + sparse_fp16_input, sparse_fp16_size_input); + TEST_ASSERT(err == ZVEC_OK); + free(sparse_fp16_input); + + void *sparse_fp16_result; + size_t sparse_fp16_result_size; + err = zvec_doc_get_field_value_copy( + doc, "sparse_fp16_field", ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16, + &sparse_fp16_result, &sparse_fp16_result_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(sparse_fp16_result != NULL); + // Sparse vector format: [nnz(size_t)][indices...][values...] + size_t retrieved_nnz = *(size_t *)sparse_fp16_result; + TEST_ASSERT(retrieved_nnz == 3); + uint32_t *retrieved_fp16_indices = + (uint32_t *)((char *)sparse_fp16_result + sizeof(size_t)); + uint16_t *retrieved_fp16_vals = + (uint16_t *)((char *)sparse_fp16_result + sizeof(size_t) + + retrieved_nnz * sizeof(uint32_t)); + TEST_ASSERT(retrieved_fp16_indices[0] == 0); + TEST_ASSERT(retrieved_fp16_indices[1] == 5); + TEST_ASSERT(retrieved_fp16_indices[2] == 10); + TEST_ASSERT(retrieved_fp16_vals[0] == 0x3C00); + TEST_ASSERT(retrieved_fp16_vals[1] == 0x4000); + TEST_ASSERT(retrieved_fp16_vals[2] == 0x4200); + free(sparse_fp16_result); + + // SPARSE_VECTOR_FP32 type - format: [nnz(uint32_t)][indices...][values...] + uint32_t sparse_fp32_nnz = 4; + size_t sparse_fp32_size_input = + sizeof(uint32_t) + sparse_fp32_nnz * (sizeof(uint32_t) + sizeof(float)); + void *sparse_fp32_input = malloc(sparse_fp32_size_input); + uint32_t *fp32_nnz_ptr = (uint32_t *)sparse_fp32_input; + *fp32_nnz_ptr = sparse_fp32_nnz; + uint32_t *fp32_indices = + (uint32_t *)((char *)sparse_fp32_input + sizeof(uint32_t)); + float *fp32_values = (float *)((char *)sparse_fp32_input + sizeof(uint32_t) + + sparse_fp32_nnz * sizeof(uint32_t)); + fp32_indices[0] = 2; + fp32_indices[1] = 7; + fp32_indices[2] = 15; + fp32_indices[3] = 20; + fp32_values[0] = 1.5f; + fp32_values[1] = 2.5f; + fp32_values[2] = 3.5f; + fp32_values[3] = 4.5f; + err = zvec_doc_add_field_by_value(doc, "sparse_fp32_field", + ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32, + sparse_fp32_input, sparse_fp32_size_input); + TEST_ASSERT(err == ZVEC_OK); + free(sparse_fp32_input); + + void *sparse_fp32_result; + size_t sparse_fp32_result_size; + err = zvec_doc_get_field_value_copy( + doc, "sparse_fp32_field", ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32, + &sparse_fp32_result, &sparse_fp32_result_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(sparse_fp32_result != NULL); + retrieved_nnz = *(size_t *)sparse_fp32_result; + TEST_ASSERT(retrieved_nnz == 4); + uint32_t *retrieved_fp32_indices = + (uint32_t *)((char *)sparse_fp32_result + sizeof(size_t)); + float *retrieved_fp32_vals = + (float *)((char *)sparse_fp32_result + sizeof(size_t) + + retrieved_nnz * sizeof(uint32_t)); + TEST_ASSERT(retrieved_fp32_indices[0] == 2); + TEST_ASSERT(retrieved_fp32_indices[1] == 7); + TEST_ASSERT(retrieved_fp32_indices[2] == 15); + TEST_ASSERT(retrieved_fp32_indices[3] == 20); + TEST_ASSERT(fabs(retrieved_fp32_vals[0] - 1.5f) < 1e-5f); + TEST_ASSERT(fabs(retrieved_fp32_vals[1] - 2.5f) < 1e-5f); + TEST_ASSERT(fabs(retrieved_fp32_vals[2] - 3.5f) < 1e-5f); + TEST_ASSERT(fabs(retrieved_fp32_vals[3] - 4.5f) < 1e-5f); + free(sparse_fp32_result); + + // ARRAY_BINARY type + // Format: [length(uint32_t)][data][length][data]... + uint8_t array_bin_data[] = { + 1, 0, 0, 0, 0x01, // length=1, data=0x01 + 2, 0, 0, 0, 0x02, 0x03, // length=2, data=0x02,0x03 + 2, 0, 0, 0, 0x04, 0x05 // length=2, data=0x04,0x05 + }; + err = zvec_doc_add_field_by_value(doc, "array_binary_field", + ZVEC_DATA_TYPE_ARRAY_BINARY, array_bin_data, + sizeof(array_bin_data)); + TEST_ASSERT(err == ZVEC_OK); + void *array_binary_result; + size_t array_binary_size; + err = zvec_doc_get_field_value_copy(doc, "array_binary_field", + ZVEC_DATA_TYPE_ARRAY_BINARY, + &array_binary_result, &array_binary_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(array_binary_result != NULL); + // The result is a contiguous buffer of binary data without length prefixes + TEST_ASSERT(array_binary_size == 5); // 1 + 2 + 2 bytes + const uint8_t *result_bytes = (const uint8_t *)array_binary_result; + TEST_ASSERT(result_bytes[0] == 0x01); + TEST_ASSERT(result_bytes[1] == 0x02); + TEST_ASSERT(result_bytes[2] == 0x03); + TEST_ASSERT(result_bytes[3] == 0x04); + TEST_ASSERT(result_bytes[4] == 0x05); + free(array_binary_result); + + + // ARRAY_STRING type + const char *array_str_data[] = {"str1", "str2", "str3"}; + ZVecString *array_zvec_str[3]; + for (int i = 0; i < 3; i++) { + array_zvec_str[i] = zvec_string_create(array_str_data[i]); + } + err = zvec_doc_add_field_by_value(doc, "array_string_field", + ZVEC_DATA_TYPE_ARRAY_STRING, array_zvec_str, + sizeof(array_zvec_str)); + TEST_ASSERT(err == ZVEC_OK); + + void *array_string_result; + size_t array_string_size; + err = zvec_doc_get_field_value_copy(doc, "array_string_field", + ZVEC_DATA_TYPE_ARRAY_STRING, + &array_string_result, &array_string_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(array_string_result != NULL); + free(array_string_result); + for (int i = 0; i < 3; i++) { + zvec_free_string(array_zvec_str[i]); + } + + zvec_free_str(string_field.value.string_value.data); + + // ARRAY_BOOL type + bool array_bool_data[] = {true, false, true, false, true}; + err = zvec_doc_add_field_by_value(doc, "array_bool_field", + ZVEC_DATA_TYPE_ARRAY_BOOL, array_bool_data, + sizeof(array_bool_data)); + TEST_ASSERT(err == ZVEC_OK); + + void *array_bool_result; + size_t array_bool_size; + err = zvec_doc_get_field_value_copy(doc, "array_bool_field", + ZVEC_DATA_TYPE_ARRAY_BOOL, + &array_bool_result, &array_bool_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(array_bool_result != NULL); + // Verify the bit-packed bool array + uint8_t *bool_bytes = (uint8_t *)array_bool_result; + TEST_ASSERT((bool_bytes[0] & 0x01) != 0); // index 0: true + TEST_ASSERT((bool_bytes[0] & 0x02) == 0); // index 1: false + TEST_ASSERT((bool_bytes[0] & 0x04) != 0); // index 2: true + TEST_ASSERT((bool_bytes[0] & 0x08) == 0); // index 3: false + TEST_ASSERT((bool_bytes[0] & 0x10) != 0); // index 4: true + free(array_bool_result); + + // ARRAY_INT32 type + int32_t array_int32_data[] = {100, 200, 300}; + err = zvec_doc_add_field_by_value(doc, "array_int32_field", + ZVEC_DATA_TYPE_ARRAY_INT32, + array_int32_data, sizeof(array_int32_data)); + TEST_ASSERT(err == ZVEC_OK); + + void *array_int32_result; + size_t array_int32_size; + err = zvec_doc_get_field_value_copy(doc, "array_int32_field", + ZVEC_DATA_TYPE_ARRAY_INT32, + &array_int32_result, &array_int32_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(array_int32_result != NULL); + TEST_ASSERT(array_int32_size == sizeof(array_int32_data)); + TEST_ASSERT(((int32_t *)array_int32_result)[0] == 100); + TEST_ASSERT(((int32_t *)array_int32_result)[1] == 200); + TEST_ASSERT(((int32_t *)array_int32_result)[2] == 300); + free(array_int32_result); + + // ARRAY_INT64 type + int64_t array_int64_data[] = {-9223372036854775807LL, 0, + 9223372036854775807LL}; + err = zvec_doc_add_field_by_value(doc, "array_int64_field", + ZVEC_DATA_TYPE_ARRAY_INT64, + array_int64_data, sizeof(array_int64_data)); + TEST_ASSERT(err == ZVEC_OK); + + void *array_int64_result; + size_t array_int64_size; + err = zvec_doc_get_field_value_copy(doc, "array_int64_field", + ZVEC_DATA_TYPE_ARRAY_INT64, + &array_int64_result, &array_int64_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(array_int64_result != NULL); + TEST_ASSERT(array_int64_size == sizeof(array_int64_data)); + TEST_ASSERT(((int64_t *)array_int64_result)[0] == -9223372036854775807LL); + TEST_ASSERT(((int64_t *)array_int64_result)[1] == 0); + TEST_ASSERT(((int64_t *)array_int64_result)[2] == 9223372036854775807LL); + free(array_int64_result); + + // ARRAY_UINT32 type + uint32_t array_uint32_data[] = {0U, 1000000U, 4000000000U}; + err = zvec_doc_add_field_by_value( + doc, "array_uint32_field", ZVEC_DATA_TYPE_ARRAY_UINT32, array_uint32_data, + sizeof(array_uint32_data)); + TEST_ASSERT(err == ZVEC_OK); + + void *array_uint32_result; + size_t array_uint32_size; + err = zvec_doc_get_field_value_copy(doc, "array_uint32_field", + ZVEC_DATA_TYPE_ARRAY_UINT32, + &array_uint32_result, &array_uint32_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(array_uint32_result != NULL); + TEST_ASSERT(array_uint32_size == sizeof(array_uint32_data)); + TEST_ASSERT(((uint32_t *)array_uint32_result)[0] == 0U); + TEST_ASSERT(((uint32_t *)array_uint32_result)[1] == 1000000U); + TEST_ASSERT(((uint32_t *)array_uint32_result)[2] == 4000000000U); + free(array_uint32_result); + + // ARRAY_UINT64 type + uint64_t array_uint64_data[] = {0ULL, 1000000000000ULL, + 18000000000000000000ULL}; + err = zvec_doc_add_field_by_value( + doc, "array_uint64_field", ZVEC_DATA_TYPE_ARRAY_UINT64, array_uint64_data, + sizeof(array_uint64_data)); + TEST_ASSERT(err == ZVEC_OK); + + void *array_uint64_result; + size_t array_uint64_size; + err = zvec_doc_get_field_value_copy(doc, "array_uint64_field", + ZVEC_DATA_TYPE_ARRAY_UINT64, + &array_uint64_result, &array_uint64_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(array_uint64_result != NULL); + TEST_ASSERT(array_uint64_size == sizeof(array_uint64_data)); + TEST_ASSERT(((uint64_t *)array_uint64_result)[0] == 0ULL); + TEST_ASSERT(((uint64_t *)array_uint64_result)[1] == 1000000000000ULL); + TEST_ASSERT(((uint64_t *)array_uint64_result)[2] == 18000000000000000000ULL); + free(array_uint64_result); + + // ARRAY_FLOAT type + float array_float_data[] = {1.5f, 2.5f, 3.5f}; + err = zvec_doc_add_field_by_value(doc, "array_float_field", + ZVEC_DATA_TYPE_ARRAY_FLOAT, + array_float_data, sizeof(array_float_data)); + TEST_ASSERT(err == ZVEC_OK); + + void *array_float_result; + size_t array_float_size; + err = zvec_doc_get_field_value_copy(doc, "array_float_field", + ZVEC_DATA_TYPE_ARRAY_FLOAT, + &array_float_result, &array_float_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(array_float_result != NULL); + TEST_ASSERT(array_float_size == sizeof(array_float_data)); + TEST_ASSERT(((float *)array_float_result)[0] == 1.5f); + TEST_ASSERT(((float *)array_float_result)[1] == 2.5f); + TEST_ASSERT(((float *)array_float_result)[2] == 3.5f); + free(array_float_result); + + // ARRAY_DOUBLE type + double array_double_data[] = {1.111111, 2.222222, 3.333333}; + err = zvec_doc_add_field_by_value( + doc, "array_double_field", ZVEC_DATA_TYPE_ARRAY_DOUBLE, array_double_data, + sizeof(array_double_data)); + TEST_ASSERT(err == ZVEC_OK); + + void *array_double_result; + size_t array_double_size; + err = zvec_doc_get_field_value_copy(doc, "array_double_field", + ZVEC_DATA_TYPE_ARRAY_DOUBLE, + &array_double_result, &array_double_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(array_double_result != NULL); + TEST_ASSERT(array_double_size == sizeof(array_double_data)); + TEST_ASSERT(fabs(((double *)array_double_result)[0] - 1.111111) < 1e-10); + TEST_ASSERT(fabs(((double *)array_double_result)[1] - 2.222222) < 1e-10); + TEST_ASSERT(fabs(((double *)array_double_result)[2] - 3.333333) < 1e-10); + free(array_double_result); + + + zvec_free_str(binary_field.value.string_value.data); + zvec_doc_destroy(doc); + + TEST_END(); +} + +void test_doc_get_field_value_pointer(void) { + TEST_START(); + + ZVecDoc *doc = zvec_doc_create(); + TEST_ASSERT(doc != NULL); + + ZVecErrorCode err; + + // Add fields for pointer testing + ZVecDocField bool_field; + bool_field.name.data = "bool_field"; + bool_field.name.length = strlen("bool_field"); + bool_field.data_type = ZVEC_DATA_TYPE_BOOL; + bool_field.value.bool_value = true; + err = zvec_doc_add_field_by_struct(doc, &bool_field); + TEST_ASSERT(err == ZVEC_OK); + + ZVecDocField int32_field; + int32_field.name.data = "int32_field"; + int32_field.name.length = strlen("int32_field"); + int32_field.data_type = ZVEC_DATA_TYPE_INT32; + int32_field.value.int32_value = -2147483648; + err = zvec_doc_add_field_by_struct(doc, &int32_field); + TEST_ASSERT(err == ZVEC_OK); + + ZVecDocField string_field; + string_field.name.data = "string_field"; + string_field.name.length = strlen("string_field"); + string_field.data_type = ZVEC_DATA_TYPE_STRING; + string_field.value.string_value = *zvec_string_create("Hello, 世界!"); + err = zvec_doc_add_field_by_struct(doc, &string_field); + TEST_ASSERT(err == ZVEC_OK); + + ZVecDocField binary_field; + binary_field.name.data = "binary_field"; + binary_field.name.length = strlen("binary_field"); + binary_field.data_type = ZVEC_DATA_TYPE_BINARY; + uint8_t binary_data[] = {0x00, 0x01, 0x02, 0xFF, 0xFE, 0xFD}; + binary_field.value.string_value = + *zvec_bin_create(binary_data, sizeof(binary_data)); + err = zvec_doc_add_field_by_struct(doc, &binary_field); + TEST_ASSERT(err == ZVEC_OK); + + float test_vector[] = {1.1f, 2.2f, 3.3f, 4.4f, 5.5f}; + ZVecDocField fp32_vec_field; + fp32_vec_field.name.data = "fp32_vec_field"; + fp32_vec_field.name.length = strlen("fp32_vec_field"); + fp32_vec_field.data_type = ZVEC_DATA_TYPE_VECTOR_FP32; + fp32_vec_field.value.vector_value.data = test_vector; + fp32_vec_field.value.vector_value.length = 5; + err = zvec_doc_add_field_by_struct(doc, &fp32_vec_field); + TEST_ASSERT(err == ZVEC_OK); + + // Add more fields for comprehensive pointer testing + int64_t int64_val = -9223372036854775807LL; + err = + zvec_doc_add_field_by_value(doc, "int64_field_ptr", ZVEC_DATA_TYPE_INT64, + &int64_val, sizeof(int64_val)); + TEST_ASSERT(err == ZVEC_OK); + + uint32_t uint32_val = 4000000000U; + err = zvec_doc_add_field_by_value(doc, "uint32_field_ptr", + ZVEC_DATA_TYPE_UINT32, &uint32_val, + sizeof(uint32_val)); + TEST_ASSERT(err == ZVEC_OK); + + uint64_t uint64_val = 18000000000000000000ULL; + err = zvec_doc_add_field_by_value(doc, "uint64_field_ptr", + ZVEC_DATA_TYPE_UINT64, &uint64_val, + sizeof(uint64_val)); + TEST_ASSERT(err == ZVEC_OK); + + float float_val = 3.14159265f; + err = + zvec_doc_add_field_by_value(doc, "float_field_ptr", ZVEC_DATA_TYPE_FLOAT, + &float_val, sizeof(float_val)); + TEST_ASSERT(err == ZVEC_OK); + + double double_val = 2.718281828459045; + err = zvec_doc_add_field_by_value(doc, "double_field_ptr", + ZVEC_DATA_TYPE_DOUBLE, &double_val, + sizeof(double_val)); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_BINARY64 + uint64_t bin64_vec_data[] = {0xAA55AA55AA55AA55ULL, 0x55AA55AA55AA55AAULL}; + err = zvec_doc_add_field_by_value(doc, "bin64_vec_field_ptr", + ZVEC_DATA_TYPE_VECTOR_BINARY64, + bin64_vec_data, sizeof(bin64_vec_data)); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_FP16 + uint16_t fp16_vec_data[] = {0x3C00, 0x4000, 0x4200, 0x4400}; + err = zvec_doc_add_field_by_value(doc, "fp16_vec_field_ptr", + ZVEC_DATA_TYPE_VECTOR_FP16, fp16_vec_data, + sizeof(fp16_vec_data)); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_FP64 + double fp64_vec_data[] = {1.1, 2.2, 3.3, 4.4}; + err = zvec_doc_add_field_by_value(doc, "fp64_vec_field_ptr", + ZVEC_DATA_TYPE_VECTOR_FP64, fp64_vec_data, + sizeof(fp64_vec_data)); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_INT8 + int8_t int8_vec_data[] = {-128, -1, 0, 1, 127}; + err = zvec_doc_add_field_by_value(doc, "int8_vec_field_ptr", + ZVEC_DATA_TYPE_VECTOR_INT8, int8_vec_data, + sizeof(int8_vec_data)); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_INT16 + int16_t int16_vec_data[] = {-32768, -1, 0, 1, 32767}; + err = zvec_doc_add_field_by_value(doc, "int16_vec_field_ptr", + ZVEC_DATA_TYPE_VECTOR_INT16, int16_vec_data, + sizeof(int16_vec_data)); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_INT32 + int32_t array_int32_data[] = {100, 200, 300}; + err = zvec_doc_add_field_by_value(doc, "array_int32_field_ptr", + ZVEC_DATA_TYPE_ARRAY_INT32, + array_int32_data, sizeof(array_int32_data)); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_INT64 + int64_t array_int64_data[] = {-9223372036854775807LL, 0, + 9223372036854775807LL}; + err = zvec_doc_add_field_by_value(doc, "array_int64_field_ptr", + ZVEC_DATA_TYPE_ARRAY_INT64, + array_int64_data, sizeof(array_int64_data)); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_UINT32 + uint32_t array_uint32_data[] = {0U, 1000000U, 4000000000U}; + err = zvec_doc_add_field_by_value( + doc, "array_uint32_field_ptr", ZVEC_DATA_TYPE_ARRAY_UINT32, + array_uint32_data, sizeof(array_uint32_data)); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_UINT64 + uint64_t array_uint64_data[] = {0ULL, 1000000000000ULL, + 18000000000000000000ULL}; + err = zvec_doc_add_field_by_value( + doc, "array_uint64_field_ptr", ZVEC_DATA_TYPE_ARRAY_UINT64, + array_uint64_data, sizeof(array_uint64_data)); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_FLOAT + float array_float_data[] = {1.5f, 2.5f, 3.5f}; + err = zvec_doc_add_field_by_value(doc, "array_float_field_ptr", + ZVEC_DATA_TYPE_ARRAY_FLOAT, + array_float_data, sizeof(array_float_data)); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_DOUBLE + double array_double_data[] = {1.111111, 2.222222, 3.333333}; + err = zvec_doc_add_field_by_value( + doc, "array_double_field_ptr", ZVEC_DATA_TYPE_ARRAY_DOUBLE, + array_double_data, sizeof(array_double_data)); + TEST_ASSERT(err == ZVEC_OK); printf( "=== Testing zvec_doc_get_field_value_pointer with all supported types " "===\n"); - // Test pointer access to basic types + // Test pointer access to BOOL const void *bool_ptr; size_t bool_ptr_size; err = zvec_doc_get_field_value_pointer(doc, "bool_field", ZVEC_DATA_TYPE_BOOL, @@ -1277,6 +1995,7 @@ void test_doc_functions(void) { TEST_ASSERT(bool_ptr_size == sizeof(bool)); TEST_ASSERT(*(const bool *)bool_ptr == true); + // Test pointer access to INT32 const void *int32_ptr; size_t int32_ptr_size; err = zvec_doc_get_field_value_pointer( @@ -1286,7 +2005,7 @@ void test_doc_functions(void) { TEST_ASSERT(int32_ptr_size == sizeof(int32_t)); TEST_ASSERT(*(const int32_t *)int32_ptr == -2147483648); - // Test pointer access to STRING (should return null-terminated C string) + // Test pointer access to STRING const void *string_ptr; size_t string_ptr_size; err = zvec_doc_get_field_value_pointer(doc, "string_field", @@ -1320,15 +2039,238 @@ void test_doc_functions(void) { TEST_ASSERT(fp32_vec_ptr_size == 5 * sizeof(float)); TEST_ASSERT(memcmp(fp32_vec_ptr, test_vector, fp32_vec_ptr_size) == 0); - // Declare dummy variables for error testing - const void *dummy_ptr; - size_t dummy_ptr_size; + // Test pointer access to INT64 + const void *int64_ptr; + size_t int64_ptr_size; + err = zvec_doc_get_field_value_pointer(doc, "int64_field_ptr", + ZVEC_DATA_TYPE_INT64, &int64_ptr, + &int64_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(int64_ptr != NULL); + TEST_ASSERT(int64_ptr_size == sizeof(int64_t)); + TEST_ASSERT(*(const int64_t *)int64_ptr == -9223372036854775807LL); + + // Test pointer access to UINT32 + const void *uint32_ptr; + size_t uint32_ptr_size; + err = zvec_doc_get_field_value_pointer(doc, "uint32_field_ptr", + ZVEC_DATA_TYPE_UINT32, &uint32_ptr, + &uint32_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(uint32_ptr != NULL); + TEST_ASSERT(uint32_ptr_size == sizeof(uint32_t)); + TEST_ASSERT(*(const uint32_t *)uint32_ptr == 4000000000U); + + // Test pointer access to UINT64 + const void *uint64_ptr; + size_t uint64_ptr_size; + err = zvec_doc_get_field_value_pointer(doc, "uint64_field_ptr", + ZVEC_DATA_TYPE_UINT64, &uint64_ptr, + &uint64_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(uint64_ptr != NULL); + TEST_ASSERT(uint64_ptr_size == sizeof(uint64_t)); + TEST_ASSERT(*(const uint64_t *)uint64_ptr == 18000000000000000000ULL); + + // Test pointer access to FLOAT + const void *float_ptr; + size_t float_ptr_size; + err = zvec_doc_get_field_value_pointer(doc, "float_field_ptr", + ZVEC_DATA_TYPE_FLOAT, &float_ptr, + &float_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(float_ptr != NULL); + TEST_ASSERT(float_ptr_size == sizeof(float)); + TEST_ASSERT(fabs(*(const float *)float_ptr - 3.14159265f) < 1e-6f); + + // Test pointer access to DOUBLE + const void *double_ptr; + size_t double_ptr_size; + err = zvec_doc_get_field_value_pointer(doc, "double_field_ptr", + ZVEC_DATA_TYPE_DOUBLE, &double_ptr, + &double_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(double_ptr != NULL); + TEST_ASSERT(double_ptr_size == sizeof(double)); + TEST_ASSERT(fabs(*(const double *)double_ptr - 2.718281828459045) < 1e-15); + + // Test pointer access to VECTOR_BINARY64 + const void *bin64_vec_ptr; + size_t bin64_vec_ptr_size; + err = zvec_doc_get_field_value_pointer(doc, "bin64_vec_field_ptr", + ZVEC_DATA_TYPE_VECTOR_BINARY64, + &bin64_vec_ptr, &bin64_vec_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(bin64_vec_ptr != NULL); + TEST_ASSERT(bin64_vec_ptr_size == sizeof(bin64_vec_data)); + TEST_ASSERT(memcmp(bin64_vec_ptr, bin64_vec_data, bin64_vec_ptr_size) == 0); + + // Test pointer access to VECTOR_FP16 + const void *fp16_vec_ptr; + size_t fp16_vec_ptr_size; + err = zvec_doc_get_field_value_pointer(doc, "fp16_vec_field_ptr", + ZVEC_DATA_TYPE_VECTOR_FP16, + &fp16_vec_ptr, &fp16_vec_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(fp16_vec_ptr != NULL); + TEST_ASSERT(fp16_vec_ptr_size == sizeof(fp16_vec_data)); + TEST_ASSERT(memcmp(fp16_vec_ptr, fp16_vec_data, fp16_vec_ptr_size) == 0); + + // Test pointer access to VECTOR_FP64 + const void *fp64_vec_ptr; + size_t fp64_vec_ptr_size; + err = zvec_doc_get_field_value_pointer(doc, "fp64_vec_field_ptr", + ZVEC_DATA_TYPE_VECTOR_FP64, + &fp64_vec_ptr, &fp64_vec_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(fp64_vec_ptr != NULL); + TEST_ASSERT(fp64_vec_ptr_size == sizeof(fp64_vec_data)); + TEST_ASSERT(memcmp(fp64_vec_ptr, fp64_vec_data, fp64_vec_ptr_size) == 0); + + // Test pointer access to VECTOR_INT8 + const void *int8_vec_ptr; + size_t int8_vec_ptr_size; + err = zvec_doc_get_field_value_pointer(doc, "int8_vec_field_ptr", + ZVEC_DATA_TYPE_VECTOR_INT8, + &int8_vec_ptr, &int8_vec_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(int8_vec_ptr != NULL); + TEST_ASSERT(int8_vec_ptr_size == sizeof(int8_vec_data)); + TEST_ASSERT(memcmp(int8_vec_ptr, int8_vec_data, int8_vec_ptr_size) == 0); + + // Test pointer access to VECTOR_INT16 + const void *int16_vec_ptr; + size_t int16_vec_ptr_size; + err = zvec_doc_get_field_value_pointer(doc, "int16_vec_field_ptr", + ZVEC_DATA_TYPE_VECTOR_INT16, + &int16_vec_ptr, &int16_vec_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(int16_vec_ptr != NULL); + TEST_ASSERT(int16_vec_ptr_size == sizeof(int16_vec_data)); + TEST_ASSERT(memcmp(int16_vec_ptr, int16_vec_data, int16_vec_ptr_size) == 0); + + // Test pointer access to ARRAY_INT32 + const void *array_int32_ptr; + size_t array_int32_ptr_size; + err = zvec_doc_get_field_value_pointer( + doc, "array_int32_field_ptr", ZVEC_DATA_TYPE_ARRAY_INT32, + &array_int32_ptr, &array_int32_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(array_int32_ptr != NULL); + TEST_ASSERT(array_int32_ptr_size == sizeof(array_int32_data)); + TEST_ASSERT(((const int32_t *)array_int32_ptr)[0] == 100); + TEST_ASSERT(((const int32_t *)array_int32_ptr)[1] == 200); + TEST_ASSERT(((const int32_t *)array_int32_ptr)[2] == 300); + + // Test pointer access to ARRAY_INT64 + const void *array_int64_ptr; + size_t array_int64_ptr_size; + err = zvec_doc_get_field_value_pointer( + doc, "array_int64_field_ptr", ZVEC_DATA_TYPE_ARRAY_INT64, + &array_int64_ptr, &array_int64_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(array_int64_ptr != NULL); + TEST_ASSERT(array_int64_ptr_size == sizeof(array_int64_data)); + TEST_ASSERT(((const int64_t *)array_int64_ptr)[0] == -9223372036854775807LL); + TEST_ASSERT(((const int64_t *)array_int64_ptr)[1] == 0); + TEST_ASSERT(((const int64_t *)array_int64_ptr)[2] == 9223372036854775807LL); + + // Test pointer access to ARRAY_UINT32 + const void *array_uint32_ptr; + size_t array_uint32_ptr_size; + err = zvec_doc_get_field_value_pointer( + doc, "array_uint32_field_ptr", ZVEC_DATA_TYPE_ARRAY_UINT32, + &array_uint32_ptr, &array_uint32_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(array_uint32_ptr != NULL); + TEST_ASSERT(array_uint32_ptr_size == sizeof(array_uint32_data)); + TEST_ASSERT(((const uint32_t *)array_uint32_ptr)[0] == 0U); + TEST_ASSERT(((const uint32_t *)array_uint32_ptr)[1] == 1000000U); + TEST_ASSERT(((const uint32_t *)array_uint32_ptr)[2] == 4000000000U); + + // Test pointer access to ARRAY_UINT64 + const void *array_uint64_ptr; + size_t array_uint64_ptr_size; + err = zvec_doc_get_field_value_pointer( + doc, "array_uint64_field_ptr", ZVEC_DATA_TYPE_ARRAY_UINT64, + &array_uint64_ptr, &array_uint64_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(array_uint64_ptr != NULL); + TEST_ASSERT(array_uint64_ptr_size == sizeof(array_uint64_data)); + TEST_ASSERT(((const uint64_t *)array_uint64_ptr)[0] == 0ULL); + TEST_ASSERT(((const uint64_t *)array_uint64_ptr)[1] == 1000000000000ULL); + TEST_ASSERT(((const uint64_t *)array_uint64_ptr)[2] == + 18000000000000000000ULL); + + // Test pointer access to ARRAY_FLOAT + const void *array_float_ptr; + size_t array_float_ptr_size; + err = zvec_doc_get_field_value_pointer( + doc, "array_float_field_ptr", ZVEC_DATA_TYPE_ARRAY_FLOAT, + &array_float_ptr, &array_float_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(array_float_ptr != NULL); + TEST_ASSERT(array_float_ptr_size == sizeof(array_float_data)); + TEST_ASSERT(((const float *)array_float_ptr)[0] == 1.5f); + TEST_ASSERT(((const float *)array_float_ptr)[1] == 2.5f); + TEST_ASSERT(((const float *)array_float_ptr)[2] == 3.5f); + + // Test pointer access to ARRAY_DOUBLE + const void *array_double_ptr; + size_t array_double_ptr_size; + err = zvec_doc_get_field_value_pointer( + doc, "array_double_field_ptr", ZVEC_DATA_TYPE_ARRAY_DOUBLE, + &array_double_ptr, &array_double_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(array_double_ptr != NULL); + TEST_ASSERT(array_double_ptr_size == sizeof(array_double_data)); + TEST_ASSERT(fabs(((const double *)array_double_ptr)[0] - 1.111111) < 1e-10); + TEST_ASSERT(fabs(((const double *)array_double_ptr)[1] - 2.222222) < 1e-10); + TEST_ASSERT(fabs(((const double *)array_double_ptr)[2] - 3.333333) < 1e-10); + + zvec_free_str(string_field.value.string_value.data); + zvec_free_str(binary_field.value.string_value.data); + zvec_doc_destroy(doc); + + TEST_END(); +} + +void test_doc_field_operations(void) { + TEST_START(); + + ZVecDoc *doc = zvec_doc_create(); + TEST_ASSERT(doc != NULL); + + ZVecErrorCode err; + + // Add some fields + ZVecDocField bool_field; + bool_field.name.data = "bool_field"; + bool_field.name.length = strlen("bool_field"); + bool_field.data_type = ZVEC_DATA_TYPE_BOOL; + bool_field.value.bool_value = true; + err = zvec_doc_add_field_by_struct(doc, &bool_field); + TEST_ASSERT(err == ZVEC_OK); - // ==================== FIELD OPERATIONS TESTING ==================== + ZVecDocField int32_field; + int32_field.name.data = "int32_field"; + int32_field.name.length = strlen("int32_field"); + int32_field.data_type = ZVEC_DATA_TYPE_INT32; + int32_field.value.int32_value = -2147483648; + err = zvec_doc_add_field_by_struct(doc, &int32_field); + TEST_ASSERT(err == ZVEC_OK); - // Test field operations + ZVecDocField string_field; + string_field.name.data = "string_field"; + string_field.name.length = strlen("string_field"); + string_field.data_type = ZVEC_DATA_TYPE_STRING; + string_field.value.string_value = *zvec_string_create("Hello"); + err = zvec_doc_add_field_by_struct(doc, &string_field); + TEST_ASSERT(err == ZVEC_OK); + + // Test field count size_t field_count = zvec_doc_get_field_count(doc); - TEST_ASSERT(field_count >= 10); // All the fields we've added + TEST_ASSERT(field_count >= 3); // Test field existence checks TEST_ASSERT(zvec_doc_has_field(doc, "bool_field") == true); @@ -1345,7 +2287,7 @@ void test_doc_functions(void) { size_t name_count; err = zvec_doc_get_field_names(doc, &field_names, &name_count); TEST_ASSERT(err == ZVEC_OK); - TEST_ASSERT(name_count >= 10); + TEST_ASSERT(name_count >= 3); TEST_ASSERT(field_names != NULL); // Verify some expected fields are present @@ -1361,8 +2303,32 @@ void test_doc_functions(void) { TEST_ASSERT(found_key_fields == true); zvec_free_str_array(field_names, name_count); + zvec_free_str(string_field.value.string_value.data); + zvec_doc_destroy(doc); + + TEST_END(); +} + +void test_doc_error_conditions(void) { + TEST_START(); + + ZVecDoc *doc = zvec_doc_create(); + TEST_ASSERT(doc != NULL); - // ==================== ERROR CONDITION TESTING ==================== + // Add a field for error testing + ZVecDocField bool_field; + bool_field.name.data = "bool_field"; + bool_field.name.length = strlen("bool_field"); + bool_field.data_type = ZVEC_DATA_TYPE_BOOL; + bool_field.value.bool_value = true; + zvec_doc_add_field_by_struct(doc, &bool_field); + + ZVecErrorCode err; + const void *dummy_ptr; + size_t dummy_ptr_size; + int32_t int32_result; + void *string_result; + size_t string_size; printf("=== Testing error conditions ===\n"); @@ -1382,12 +2348,11 @@ void test_doc_functions(void) { TEST_ASSERT(err != ZVEC_OK); // Test wrong data type access - err = - zvec_doc_get_field_value_basic(doc, "string_field", ZVEC_DATA_TYPE_INT32, - &int32_result, sizeof(int32_result)); + err = zvec_doc_get_field_value_basic(doc, "bool_field", ZVEC_DATA_TYPE_INT32, + &int32_result, sizeof(int32_result)); TEST_ASSERT(err != ZVEC_OK); - err = zvec_doc_get_field_value_copy(doc, "int32_field", ZVEC_DATA_TYPE_STRING, + err = zvec_doc_get_field_value_copy(doc, "bool_field", ZVEC_DATA_TYPE_STRING, &string_result, &string_size); TEST_ASSERT(err != ZVEC_OK); @@ -1395,7 +2360,35 @@ void test_doc_functions(void) { doc, "bool_field", ZVEC_DATA_TYPE_FLOAT, &dummy_ptr, &dummy_ptr_size); TEST_ASSERT(err != ZVEC_OK); - // ==================== DOCUMENT SERIALIZATION TESTING ==================== + zvec_doc_destroy(doc); + + TEST_END(); +} + +void test_doc_serialization(void) { + TEST_START(); + + ZVecDoc *doc = zvec_doc_create(); + TEST_ASSERT(doc != NULL); + + ZVecErrorCode err; + + // Add fields for serialization testing + ZVecDocField int32_field; + int32_field.name.data = "int32_field"; + int32_field.name.length = strlen("int32_field"); + int32_field.data_type = ZVEC_DATA_TYPE_INT32; + int32_field.value.int32_value = -2147483648; + err = zvec_doc_add_field_by_struct(doc, &int32_field); + TEST_ASSERT(err == ZVEC_OK); + + ZVecDocField string_field; + string_field.name.data = "string_field"; + string_field.name.length = strlen("string_field"); + string_field.data_type = ZVEC_DATA_TYPE_STRING; + string_field.value.string_value = *zvec_string_create("Serialization Test"); + err = zvec_doc_add_field_by_struct(doc, &string_field); + TEST_ASSERT(err == ZVEC_OK); printf("=== Testing document serialization ===\n"); @@ -1412,6 +2405,7 @@ void test_doc_functions(void) { TEST_ASSERT(deserialized_doc != NULL); // Verify deserialized document has same field count + size_t field_count = zvec_doc_get_field_count(doc); size_t deserialized_field_count = zvec_doc_get_field_count(deserialized_doc); TEST_ASSERT(deserialized_field_count == field_count); @@ -1423,14 +2417,10 @@ void test_doc_functions(void) { TEST_ASSERT(err == ZVEC_OK); TEST_ASSERT(deserialized_int32 == -2147483648); - // ==================== CLEANUP ==================== - - zvec_doc_destroy(deserialized_doc); zvec_free_uint8_array(serialized_data); zvec_free_str(string_field.value.string_value.data); - zvec_free_str(binary_field.value.string_value.data); + zvec_doc_destroy(deserialized_doc); zvec_doc_destroy(doc); - zvec_collection_schema_destroy(schema); TEST_END(); } @@ -1994,8 +2984,7 @@ void test_index_creation_and_management(void) { ZVEC_METRIC_TYPE_COSINE, ZVEC_QUANTIZE_TYPE_UNDEFINED, 16, 100, 50); TEST_ASSERT(hnsw_params != NULL); - err = zvec_collection_create_hnsw_index( - collection, &(ZVecString){.data = "dense", .length = 5}, hnsw_params); + err = zvec_collection_create_hnsw_index(collection, "dense", hnsw_params); TEST_ASSERT(err == ZVEC_OK); // Test 2: Create scalar index @@ -2003,14 +2992,12 @@ void test_index_creation_and_management(void) { zvec_index_params_invert_create(true, false); TEST_ASSERT(invert_params != NULL); - err = zvec_collection_create_invert_index( - collection, &(ZVecString){.data = "name", .length = 4}, - invert_params); + err = zvec_collection_create_invert_index(collection, "name", + invert_params); TEST_ASSERT(err == ZVEC_OK); - // Note: Index statistics and drop functionality not yet implemented in C - // API These would require zvec_collection_get_index_stats() and - // zvec_collection_drop_index() + err = zvec_collection_drop_index(collection, "name"); + TEST_ASSERT(err == ZVEC_OK); // Test 3: Optimize collection err = zvec_collection_optimize(collection); @@ -2040,6 +3027,8 @@ void test_collection_ddl_operations(void) { ZVecCollectionSchema *schema = zvec_test_create_temp_schema(); TEST_ASSERT(schema != NULL); + size_t field_count = zvec_collection_schema_get_field_count(schema); + if (schema) { ZVecCollection *collection = NULL; ZVecErrorCode err = @@ -2048,25 +3037,47 @@ void test_collection_ddl_operations(void) { TEST_ASSERT(collection != NULL); if (collection) { - // Test 1: Add new field (using schema modification before opening) + // Test 1: Add new column ZVecFieldSchema *new_field = - zvec_field_schema_create("new_field", ZVEC_DATA_TYPE_STRING, true, 0); + zvec_field_schema_create("new_int32", ZVEC_DATA_TYPE_INT32, true, 0); TEST_ASSERT(new_field != NULL); - // Note: Runtime field addition not yet implemented in C API - // This would require zvec_collection_add_field() which is not implemented + err = zvec_collection_add_column(collection, new_field, NULL); + TEST_ASSERT(err == ZVEC_OK); - // Test 2: Get collection schema + // Test 2: Get collection schema and verify field count ZVecCollectionSchema *retrieved_schema = NULL; err = zvec_collection_get_schema(collection, &retrieved_schema); TEST_ASSERT(err == ZVEC_OK); TEST_ASSERT(retrieved_schema != NULL); - size_t field_count = + size_t new_field_count = zvec_collection_schema_get_field_count(retrieved_schema); - TEST_ASSERT(field_count > 0); + TEST_ASSERT((field_count + 1) == new_field_count); + + // Test 3: Alter column + ZVecFieldSchema *alter_field = + zvec_field_schema_create("new_float", ZVEC_DATA_TYPE_FLOAT, true, 0); + TEST_ASSERT(alter_field != NULL); + + err = zvec_collection_alter_column(collection, "new_int32", "", + alter_field); + TEST_ASSERT(err == ZVEC_OK); + + // Test 4: Drop column + err = zvec_collection_drop_column(collection, "new_float"); + TEST_ASSERT(err == ZVEC_OK); + + // Test 5: Verify field count after drop + err = zvec_collection_get_schema(collection, &retrieved_schema); + TEST_ASSERT(err == ZVEC_OK); + new_field_count = + zvec_collection_schema_get_field_count(retrieved_schema); + TEST_ASSERT(new_field_count == field_count); zvec_collection_schema_destroy(retrieved_schema); + zvec_field_schema_destroy(new_field); + zvec_field_schema_destroy(alter_field); zvec_collection_destroy(collection); } @@ -2110,9 +3121,6 @@ void test_field_ddl_operations(void) { field2, (ZVecIndexParams *)hnsw_params); TEST_ASSERT(err == ZVEC_OK); - // Test field operations - // (Field validation function doesn't exist in current API) - // Cleanup zvec_field_schema_destroy(field1); zvec_field_schema_destroy(field2); @@ -2323,7 +3331,13 @@ int main(void) { // Doc-related tests test_doc_creation(); test_doc_primary_key(); - test_doc_functions(); + test_doc_basic_operations(); + test_doc_get_field_value_basic(); + test_doc_get_field_value_copy(); + test_doc_get_field_value_pointer(); + test_doc_field_operations(); + test_doc_error_conditions(); + test_doc_serialization(); // Index tests test_index_params(); From 686e32fddcc3eaae15a875798d894504ecb8e041 Mon Sep 17 00:00:00 2001 From: lc285652 Date: Fri, 13 Mar 2026 13:26:18 +0800 Subject: [PATCH 13/15] fix some code --- examples/c_api/basic_example.c | 3 +- examples/c_api/collection_schema_example.c | 3 +- examples/c_api/doc_example.c | 2 +- examples/c_api/field_schema_example.c | 3 +- examples/c_api/index_example.c | 6 +- examples/c_api/optimized_example.c | 3 +- src/c_api/c_api.cc | 282 ++--- src/include/zvec/c_api.h | 312 ++---- tests/c_api/c_api_test.c | 1084 +++++++++++++++++++- 9 files changed, 1289 insertions(+), 409 deletions(-) diff --git a/examples/c_api/basic_example.c b/examples/c_api/basic_example.c index 081e631a2..245e929ea 100644 --- a/examples/c_api/basic_example.c +++ b/examples/c_api/basic_example.c @@ -197,7 +197,8 @@ int main() { query.filter = (ZVecString){.data = "", .length = 0}; query.include_vector = true; query.include_doc_id = true; - query.output_fields = NULL; + query.output_fields.strings = NULL; + query.output_fields.count = 0; ZVecDoc **results = NULL; size_t result_count = 0; diff --git a/examples/c_api/collection_schema_example.c b/examples/c_api/collection_schema_example.c index af66daa04..40263526c 100644 --- a/examples/c_api/collection_schema_example.c +++ b/examples/c_api/collection_schema_example.c @@ -231,7 +231,8 @@ int main() { query.filter = (ZVecString){.data = "", .length = 0}; query.include_vector = true; query.include_doc_id = true; - query.output_fields = NULL; + query.output_fields.strings = NULL; + query.output_fields.count = 0; ZVecDoc **results = NULL; size_t result_count = 0; diff --git a/examples/c_api/doc_example.c b/examples/c_api/doc_example.c index 81a74506f..155c7a25b 100644 --- a/examples/c_api/doc_example.c +++ b/examples/c_api/doc_example.c @@ -411,7 +411,7 @@ int main() { .filter = (ZVecString){.data = "", .length = 0}, .include_vector = true, .include_doc_id = true, - .output_fields = NULL}; + .output_fields = {.strings = NULL, .count = 0}}; ZVecDoc **query_results = NULL; size_t result_count = 0; diff --git a/examples/c_api/field_schema_example.c b/examples/c_api/field_schema_example.c index 2c1bd9d79..41bdd1a9e 100644 --- a/examples/c_api/field_schema_example.c +++ b/examples/c_api/field_schema_example.c @@ -256,7 +256,8 @@ int main() { query.filter = (ZVecString){.data = "", .length = 0}; query.include_vector = false; query.include_doc_id = true; - query.output_fields = NULL; + query.output_fields.strings = NULL; + query.output_fields.count = 0; ZVecDoc **results = NULL; size_t result_count = 0; diff --git a/examples/c_api/index_example.c b/examples/c_api/index_example.c index 9e1639308..14795ca5b 100644 --- a/examples/c_api/index_example.c +++ b/examples/c_api/index_example.c @@ -266,7 +266,8 @@ int main() { hnsw_query.filter = (ZVecString){.data = "", .length = 0}; hnsw_query.include_vector = false; hnsw_query.include_doc_id = true; - hnsw_query.output_fields = NULL; + hnsw_query.output_fields.strings = NULL; + hnsw_query.output_fields.count = 0; ZVecDoc **hnsw_results = NULL; size_t hnsw_result_count = 0; @@ -287,7 +288,8 @@ int main() { flat_query.filter = (ZVecString){.data = "", .length = 0}; flat_query.include_vector = false; flat_query.include_doc_id = true; - flat_query.output_fields = NULL; + flat_query.output_fields.strings = NULL; + flat_query.output_fields.count = 0; ZVecDoc **flat_results = NULL; size_t flat_result_count = 0; diff --git a/examples/c_api/optimized_example.c b/examples/c_api/optimized_example.c index dca683050..33ed24d61 100644 --- a/examples/c_api/optimized_example.c +++ b/examples/c_api/optimized_example.c @@ -240,7 +240,8 @@ int main() { query.filter = (ZVecString){.data = "", .length = 0}; query.include_vector = false; query.include_doc_id = true; - query.output_fields = NULL; + query.output_fields.strings = NULL; + query.output_fields.count = 0; const int QUERY_COUNT = 100; start_time = clock(); diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 9ede0d99c..1d44094f2 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -744,83 +744,121 @@ static zvec::Status set_field_index_params(zvec::FieldSchema::Ptr &field_schema, // Memory Management interface implementation // ============================================================================= -void *zvec_malloc(size_t size) { - if (size == 0) { - set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, - "Cannot allocate zero bytes", __FILE__, __LINE__, - __FUNCTION__); - return nullptr; +void zvec_free_string(ZVecString *str) { + if (str) { + if (str->data) { + delete[] str->data; + } + delete str; } +} - try { - return malloc(size); - } catch (const std::bad_alloc &e) { - set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, - std::string("Memory allocation failed: ") + e.what(), - __FILE__, __LINE__, __FUNCTION__); - return nullptr; +ZVecStringArray *zvec_string_array_create(size_t count) { + ZVecStringArray *array = (ZVecStringArray *)malloc(sizeof(ZVecStringArray)); + array->count = count; + array->strings = (ZVecString *)malloc(sizeof(ZVecString) * count); + memset(array->strings, 0, sizeof(ZVecString) * count); + return array; +} + +void zvec_string_array_add(ZVecStringArray *array, size_t idx, + const char *str) { + if (idx >= array->count) return; + size_t len = strlen(str); + array->strings[idx].data = (char *)malloc(len + 1); + memcpy(array->strings[idx].data, str, len + 1); + array->strings[idx].length = len; + array->strings[idx].capacity = len + 1; +} + +void zvec_string_array_destroy(ZVecStringArray *array) { + if (!array) return; + for (size_t i = 0; i < array->count; i++) { + free((void *)array->strings[i].data); } + free(array->strings); + free(array); } -void *zvec_realloc(void *ptr, size_t size) { - if (size == 0 && ptr == nullptr) { - set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, - "Cannot reallocate null pointer to zero size", - __FILE__, __LINE__, __FUNCTION__); - return nullptr; + +void zvec_free_str(char *str) { + if (str) { + free(str); } +} - try { - return realloc(ptr, size); - } catch (const std::bad_alloc &e) { - set_last_error_details( - ZVEC_ERROR_RESOURCE_EXHAUSTED, - std::string("Memory reallocation failed: ") + e.what(), __FILE__, - __LINE__, __FUNCTION__); +// Byte array helper functions +ZVecMutableByteArray *zvec_byte_array_create(size_t capacity) { + ZVecMutableByteArray *array = + (ZVecMutableByteArray *)malloc(sizeof(ZVecMutableByteArray)); + if (!array) return nullptr; + + array->data = (uint8_t *)malloc(capacity); + if (!array->data) { + free(array); return nullptr; } + + array->length = 0; + array->capacity = capacity; + memset(array->data, 0, capacity); + return array; } -void zvec_free(void *ptr) { - if (ptr) { - free(ptr); +void zvec_byte_array_destroy(ZVecMutableByteArray *array) { + if (!array) return; + if (array->data) { + free(array->data); } + free(array); } -void zvec_free_string(ZVecString *str) { - if (str) { - if (str->data) { - delete[] str->data; - } - delete str; +// Float array helper functions +ZVecFloatArray *zvec_float_array_create(size_t count) { + ZVecFloatArray *array = (ZVecFloatArray *)malloc(sizeof(ZVecFloatArray)); + if (!array) return nullptr; + + array->data = (const float *)malloc(sizeof(float) * count); + if (!array->data) { + free(array); + return nullptr; } + + array->length = count; + memset((void *)array->data, 0, sizeof(float) * count); + return array; } -void zvec_free_string_array(ZVecStringArray *array) { - if (array) { - if (array->strings) { - for (size_t i = 0; i < array->count; ++i) { - zvec_free_string(&array->strings[i]); - } - delete[] array->strings; - } - delete array; +void zvec_float_array_destroy(ZVecFloatArray *array) { + if (!array) return; + if (array->data) { + free((void *)array->data); } + free(array); } -void zvec_free_byte_array(ZVecMutableByteArray *array) { - if (array) { - if (array->data) { - delete[] array->data; - } - delete array; +// Int64 array helper functions +ZVecInt64Array *zvec_int64_array_create(size_t count) { + ZVecInt64Array *array = (ZVecInt64Array *)malloc(sizeof(ZVecInt64Array)); + if (!array) return nullptr; + + array->data = (const int64_t *)malloc(sizeof(int64_t) * count); + if (!array->data) { + free(array); + return nullptr; } + + array->length = count; + memset((void *)array->data, 0, sizeof(int64_t) * count); + return array; } -void zvec_free_str(char *str) { - if (str) { - free(str); +void zvec_int64_array_destroy(ZVecInt64Array *array) { + if (!array) return; + if (array->data) { + free((void *)array->data); } + free(array); } void zvec_free_float_array(float *array) { @@ -863,15 +901,6 @@ void zvec_free_uint8_array(uint8_t *array) { } } -void zvec_free_field_schema_array(ZVecFieldSchema **array, size_t count) { - if (!array) return; - - for (size_t i = 0; i < count; ++i) { - zvec_free_field_schema(array[i]); - } - free(array); -} - void zvec_free_field_schema(ZVecFieldSchema *field_schema) { if (field_schema) { if (field_schema->index_params) { @@ -1903,71 +1932,6 @@ const char *zvec_metric_type_to_string(ZVecMetricType metric_type) { } } -ZVecErrorCode zvec_get_system_info(ZVecString **info_json) { - if (!info_json) { - set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, - "Info JSON pointer cannot be null", __FILE__, - __LINE__, __FUNCTION__); - return ZVEC_ERROR_INVALID_ARGUMENT; - } - - try { - std::ostringstream oss; - oss << "{"; - oss << "\"version\":\"" << ZVEC_VERSION_STRING << "\","; - oss << "\"platform\":\"" - << -#ifdef _WIN32 - "Windows" -#elif __APPLE__ - "macOS" -#elif __linux__ - "Linux" -#else - "Unknown" -#endif - << "\","; - oss << "\"architecture\":\"" - << -#ifdef __x86_64__ - "x86_64" -#elif __aarch64__ - "ARM64" -#elif __arm__ - "ARM" -#else - "Unknown" -#endif - << "\","; - oss << "\"compiler\":\"" - << -#ifdef __GNUC__ - "GCC " << __GNUC__ << "." << __GNUC_MINOR__ -#elif _MSC_VER - "MSVC " << _MSC_VER -#elif __clang__ - "Clang " << __clang_major__ << "." << __clang_minor__ -#else - "Unknown" -#endif - << "\""; - oss << "}"; - - *info_json = zvec_string_create(oss.str().c_str()); - if (!*info_json) { - return ZVEC_ERROR_RESOURCE_EXHAUSTED; - } - - return ZVEC_OK; - } catch (const std::exception &e) { - set_last_error_details( - ZVEC_ERROR_INTERNAL_ERROR, - std::string("Failed to get system info: ") + e.what(), __FILE__, - __LINE__, __FUNCTION__); - return ZVEC_ERROR_INTERNAL_ERROR; - } -} - bool check_is_vector_field(const ZVecFieldSchema &zvec_field) { bool is_vector_field = (zvec_field.data_type == ZVEC_DATA_TYPE_VECTOR_FP32 || @@ -4140,54 +4104,6 @@ ZVecErrorCode zvec_collection_flush(ZVecCollection *collection) { } } -ZVecErrorCode zvec_collection_get_path(const ZVecCollection *collection, - char **path) { - if (!collection || !path) { - set_last_error("Invalid arguments: collection and path cannot be null"); - return ZVEC_ERROR_INVALID_ARGUMENT; - } - - try { - auto &coll = *reinterpret_cast *>( - collection); - auto result = coll->Path(); - - ZVecErrorCode error_code = handle_expected_result(result); - if (error_code == ZVEC_OK) { - *path = copy_string(result.value()); - } - - return error_code; - } catch (const std::exception &e) { - set_last_error(std::string("Exception occurred: ") + e.what()); - return ZVEC_ERROR_INTERNAL_ERROR; - } -} - -ZVecErrorCode zvec_collection_get_name(const ZVecCollection *collection, - char **name) { - if (!collection || !name) { - set_last_error("Invalid arguments: collection and name cannot be null"); - return ZVEC_ERROR_INVALID_ARGUMENT; - } - - try { - auto &coll = *reinterpret_cast *>( - collection); - auto result = coll->Schema(); - - ZVecErrorCode error_code = handle_expected_result(result); - if (error_code == ZVEC_OK) { - *name = copy_string(result.value().name()); - } - - return error_code; - } catch (const std::exception &e) { - set_last_error(std::string("Exception occurred: ") + e.what()); - return ZVEC_ERROR_INTERNAL_ERROR; - } -} - ZVecErrorCode zvec_collection_get_schema(const ZVecCollection *collection, ZVecCollectionSchema **schema) { if (!collection || !schema) { @@ -5343,12 +5259,12 @@ void convert_common_query_params(zvec::VectorQuery &internal_query, } // Output fields conversion - if (query->output_fields && query->output_fields->count > 0) { + if (query->output_fields.count > 0) { internal_query.output_fields_ = std::vector(); - for (size_t i = 0; i < query->output_fields->count; ++i) { + for (size_t i = 0; i < query->output_fields.count; ++i) { internal_query.output_fields_->emplace_back( - query->output_fields->strings[i].data, - query->output_fields->strings[i].length); + query->output_fields.strings[i].data, + query->output_fields.strings[i].length); } } } @@ -5435,14 +5351,14 @@ void convert_groupby_query_params(zvec::GroupByVectorQuery &internal_query, query->query_sparse_values.length); } - if (query->output_fields && query->output_fields->count > 0) { + if (query->output_fields.count > 0) { if (!internal_query.output_fields_.has_value()) { internal_query.output_fields_ = std::vector(); } - for (size_t i = 0; i < query->output_fields->count; ++i) { + for (size_t i = 0; i < query->output_fields.count; ++i) { internal_query.output_fields_->push_back( - std::string(query->output_fields->strings[i].data, - query->output_fields->strings[i].length)); + std::string(query->output_fields.strings[i].data, + query->output_fields.strings[i].length)); } } diff --git a/src/include/zvec/c_api.h b/src/include/zvec/c_api.h index 1c9d54835..5d2e61769 100644 --- a/src/include/zvec/c_api.h +++ b/src/include/zvec/c_api.h @@ -301,6 +301,93 @@ ZVEC_EXPORT size_t ZVEC_CALL zvec_string_length(const ZVecString *str); ZVEC_EXPORT int ZVEC_CALL zvec_string_compare(const ZVecString *str1, const ZVecString *str2); +/** + * @brief Free string memory + * @param str String pointer to free + */ +ZVEC_EXPORT void ZVEC_CALL zvec_free_string(ZVecString *str); + +/** + * @brief Free string memory + * @param str String pointer to free + */ +ZVEC_EXPORT void ZVEC_CALL zvec_free_str(char *str); + + +// ============================================================================= +// Array Memory management functions +// ============================================================================= + +/** + * @brief Create a new string array + * @param count Initial number of strings to allocate space for + * @return Pointer to the newly created string array, or NULL on failure + */ +ZVEC_EXPORT ZVecStringArray *ZVEC_CALL zvec_string_array_create(size_t count); + +/** + * @brief Add a string to the string array at specified index + * @param array String array pointer + * @param idx Index position where the string should be added + * @param str Null-terminated C string to add + */ +ZVEC_EXPORT void ZVEC_CALL zvec_string_array_add(ZVecStringArray *array, + size_t idx, const char *str); + +/** + * @brief Destroy string array and free all associated memory + * @param array String array pointer to destroy + */ +ZVEC_EXPORT void ZVEC_CALL zvec_string_array_destroy(ZVecStringArray *array); + +/** + * @brief Create a new mutable byte array + * @param capacity Initial capacity in bytes + * @return Pointer to the newly created byte array, or NULL on failure + */ +ZVEC_EXPORT ZVecMutableByteArray *ZVEC_CALL +zvec_byte_array_create(size_t capacity); + + +/** + * @brief Destroy byte array and free all associated memory + * @param array Byte array pointer to destroy + */ +ZVEC_EXPORT void ZVEC_CALL zvec_byte_array_destroy(ZVecMutableByteArray *array); + +/** + * @brief Create a new float array + * @param count Number of floats to allocate space for + * @return Pointer to the newly created float array, or NULL on failure + */ +ZVEC_EXPORT ZVecFloatArray *ZVEC_CALL zvec_float_array_create(size_t count); + +/** + * @brief Destroy float array and free all associated memory + * @param array Float array pointer to destroy + */ +ZVEC_EXPORT void ZVEC_CALL zvec_float_array_destroy(ZVecFloatArray *array); + +/** + * @brief Create a new int64 array + * @param count Number of int64 values to allocate space for + * @return Pointer to the newly created int64 array, or NULL on failure + */ +ZVEC_EXPORT ZVecInt64Array *ZVEC_CALL zvec_int64_array_create(size_t count); + +/** + * @brief Destroy int64 array and free all associated memory + * @param array Int64 array pointer to destroy + */ +ZVEC_EXPORT void ZVEC_CALL zvec_int64_array_destroy(ZVecInt64Array *array); + +/** + * @brief Release uint8_t array memory + * + * @param array uint8_t array pointer + */ +ZVEC_EXPORT void ZVEC_CALL zvec_free_uint8_array(uint8_t *array); + // ============================================================================= // Configuration and Options Structures @@ -922,7 +1009,7 @@ typedef struct { ZVecString filter; /**< Filter expression */ bool include_vector; /**< Whether to include vector data */ bool include_doc_id; /**< Whether to include document ID */ - ZVecStringArray *output_fields; /**< Output field list (NULL means all) */ + ZVecStringArray output_fields; /**< Output field list (NULL means all) */ ZVecQueryParamsUnion *query_params; /**< Query parameters (optional, NULL means using default parameters) */ } ZVecVectorQuery; @@ -939,7 +1026,7 @@ typedef struct { ZVecByteArray query_sparse_values; /**< Sparse vector values (binary data) */ ZVecString filter; /**< Filter expression */ bool include_vector; /**< Whether to include vector data */ - ZVecStringArray *output_fields; /**< Output field list */ + ZVecStringArray output_fields; /**< Output field list */ ZVecString group_by_field_name; /**< Group by field name */ uint32_t group_count; /**< Number of groups */ uint32_t group_topk; /**< Number of results to return per group */ @@ -1373,27 +1460,6 @@ zvec_collection_destroy(ZVecCollection *collection); ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_flush(ZVecCollection *collection); -/** - * @brief Get collection path - * @param collection Collection handle - * @param[out] path Returned path string (needs to be freed by calling - * zvec_free_string) - * @return ZVecErrorCode Error code - */ -ZVEC_EXPORT ZVecErrorCode ZVEC_CALL -zvec_collection_get_path(const ZVecCollection *collection, ZVecString **path); - - -/** - * @brief Get collection name - * @param collection Collection handle - * @param[out] name Returned collection name (needs to be freed by calling - * zvec_free_string) - * @return ZVecErrorCode Error code - */ -ZVEC_EXPORT ZVecErrorCode ZVEC_CALL -zvec_collection_get_name(const ZVecCollection *collection, ZVecString **name); - /** * @brief Get collection schema * @param collection Collection handle @@ -1442,39 +1508,6 @@ ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_get_stats( ZVEC_EXPORT void ZVEC_CALL zvec_collection_stats_destroy(ZVecCollectionStats *stats); - -/** - * @brief Free field schema array memory - * - * @param array Field schema array pointer - * @param count Array element count - */ -ZVEC_EXPORT void ZVEC_CALL zvec_free_field_schema_array(ZVecFieldSchema **array, - size_t count); - -/** - * @brief Check if collection has specified field - * @param collection Collection handle - * @param field_name Field name - * @param[out] exists Whether exists - * @return ZVecErrorCode Error code - */ -ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_has_field( - const ZVecCollection *collection, const char *field_name, bool *exists); - -/** - * @brief Get field information - * @param collection Collection handle - * @param field_name Field name - * @param[out] field_schema - * Returned field schema pointer (needs to be freed by calling - * zvec_field_schema_destroy) - * @return ZVecErrorCode Error code - */ -ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_get_field_info( - const ZVecCollection *collection, const char *field_name, - ZVecFieldSchema **field_schema); - /** * @brief Free field schema memory * @@ -1728,7 +1761,7 @@ ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_query( * @param[out] results Returned document array (needs to be freed by calling * zvec_docs_free) * @param[out] group_by_values Returned group by field values array (needs to be - * freed by calling zvec_free_string_array) + * freed by calling zvec_string_array_destroy) * @param[out] result_count Number of returned results * @return ZVecErrorCode Error code */ @@ -1737,7 +1770,7 @@ ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_query_by_group( ZVecDoc ***results, ZVecString ***group_by_values, size_t *result_count); /** - * @brief Get documents by primary keys + * @brief Fetch documents by primary keys * @param collection Collection handle * @param primary_keys Primary key array * @param count Number of primary keys @@ -1746,24 +1779,10 @@ ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_query_by_group( * @param[out] found_count Number of found documents * @return ZVecErrorCode Error code */ -ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_get_by_primary_keys( +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_fetch( ZVecCollection *collection, const char *const *primary_keys, size_t count, ZVecDoc ***documents, size_t *found_count); -/** - * @brief Query documents by filter condition - * @param collection Collection handle - * @param filter_expression Filter expression - * @param limit Result limit - * @param offset Offset - * @param[out] documents Returned document array - * @param[out] result_count Number of returned results - * @return ZVecErrorCode Error code - */ -ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_query_by_filter( - const ZVecCollection *collection, const char *filter_expression, - size_t limit, size_t offset, ZVecDoc ***documents, size_t *result_count); - // ============================================================================= // Document Related Structures // ============================================================================= @@ -2159,77 +2178,6 @@ zvec_doc_to_detail_string(const ZVecDoc *doc, char **detail_str); ZVEC_EXPORT void ZVEC_CALL zvec_docs_free(ZVecDoc **docs, size_t count); -// ============================================================================= -// Query Parameter Constructor Functions -// ============================================================================= - -/** - * @brief Create vector query parameters - * @param field_name Query field name - * @param query_data Query vector data - * @param query_length Query vector length - * @param top_k Number of results to return - * @return ZVecVectorQuery* Pointer to the newly created query parameters - */ -ZVEC_EXPORT ZVecVectorQuery *ZVEC_CALL -zvec_vector_query_create(const char *field_name, const float *query_data, - size_t query_length, int top_k); - -/** - * @brief Destroy vector query parameters - * @param query Query parameters pointer - */ -ZVEC_EXPORT void ZVEC_CALL zvec_vector_query_destroy(ZVecVectorQuery *query); - -/** - * @brief Set query filter condition - * @param query Query parameters pointer - * @param filter_expression Filter expression - * @return ZVecErrorCode Error code - */ -ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_vector_query_set_filter( - ZVecVectorQuery *query, const char *filter_expression); - -/** - * @brief Set output fields - * @param query Query parameters pointer - * @param field_names Field name array - * @param count Field count - * @return ZVecErrorCode Error code - */ -ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_vector_query_set_output_fields( - ZVecVectorQuery *query, const char *const *field_names, size_t count); - -/** - * @brief Set timeout - * @param query Query parameters pointer - * @param timeout_ms Timeout in milliseconds - */ -ZVEC_EXPORT void ZVEC_CALL zvec_vector_query_set_timeout(ZVecVectorQuery *query, - int timeout_ms); - -/** - * @brief Create grouped vector query parameters - * @param field_name Query field name - * @param query_data Query vector data - * @param query_length Query vector length - * @param group_by_field Group by field name - * @param group_count Number of groups - * @param group_top_k Number of results to return per group - * @return ZVecGroupByVectorQuery* Pointer to the newly created query parameters - */ -ZVEC_EXPORT ZVecGroupByVectorQuery *ZVEC_CALL zvec_grouped_vector_query_create( - const char *field_name, const float *query_data, size_t query_length, - const char *group_by_field, uint32_t group_count, uint32_t group_top_k); - -/** - * @brief Destroy grouped vector query parameters - * @param query Query parameters pointer - */ -ZVEC_EXPORT void ZVEC_CALL -zvec_grouped_vector_query_destroy(ZVecGroupByVectorQuery *query); - - // ============================================================================= // Utility Functions // ============================================================================= @@ -2265,70 +2213,6 @@ zvec_index_type_to_string(ZVecIndexType index_type); */ const char *zvec_metric_type_to_string(ZVecMetricType metric_type); -/** - * @brief Get system information - * @param[out] info_json System information JSON string (needs to be freed by - * calling zvec_free_string) - * @return ZVecErrorCode Error code - */ -ZVecErrorCode zvec_get_system_info(ZVecString **info_json); - -// ============================================================================= -// Memory Management Interface -// ============================================================================= - -/** - * @brief Allocate memory - * @param size Number of bytes to allocate - * @return void* Allocated memory pointer, returns NULL on failure - */ -ZVEC_EXPORT void *ZVEC_CALL zvec_malloc(size_t size); - -/** - * @brief Reallocate memory - * @param ptr Original memory pointer - * @param size New number of bytes - * @return void* Reallocation memory pointer, returns NULL on failure - */ -ZVEC_EXPORT void *ZVEC_CALL zvec_realloc(void *ptr, size_t size); - -/** - * @brief Free memory - * @param ptr Memory pointer to free - */ -ZVEC_EXPORT void ZVEC_CALL zvec_free(void *ptr); - -/** - * @brief Free string memory - * @param str String pointer to free - */ -ZVEC_EXPORT void ZVEC_CALL zvec_free_string(ZVecString *str); - -/** - * @brief Free string array memory - * @param array String array pointer to free - */ -ZVEC_EXPORT void ZVEC_CALL zvec_free_string_array(ZVecStringArray *array); - -/** - * @brief Free byte array memory - * @param array Byte array pointer to free - */ -ZVEC_EXPORT void ZVEC_CALL zvec_free_byte_array(ZVecMutableByteArray *array); - -/** - * @brief Free string memory - * @param str String pointer to free - */ -ZVEC_EXPORT void ZVEC_CALL zvec_free_str(char *str); - -/** - * @brief Release uint8_t array memory - * - * @param array uint8_t array pointer - */ -ZVEC_EXPORT void ZVEC_CALL zvec_free_uint8_array(uint8_t *array); - // ============================================================================= // Helper Functions @@ -2395,6 +2279,18 @@ ZVEC_EXPORT void ZVEC_CALL zvec_free_uint8_array(uint8_t *array); .use_soar = soar, .n_probe = nprobe \ } +/** + * @brief Simplified string initialization macro + * @param str String content + * + * Usage example: + * ZVecString name = ZVEC_STRING_LITERAL("my_collection"); + */ +#define ZVEC_STRING_LITERAL(str) \ + (ZVecString) { \ + .data = str, .length = strlen(str) \ + } + /** * @brief Simplified string view initialization macro * @param str String content diff --git a/tests/c_api/c_api_test.c b/tests/c_api/c_api_test.c index 15bb92a20..8e97baa6f 100644 --- a/tests/c_api/c_api_test.c +++ b/tests/c_api/c_api_test.c @@ -1048,6 +1048,611 @@ void test_doc_primary_key(void) { TEST_END(); } +// Test for zvec_doc_add_field_by_value - covers all data types +void test_doc_add_field_by_value(void) { + TEST_START(); + + ZVecDoc *doc = zvec_doc_create(); + TEST_ASSERT(doc != NULL); + + if (!doc) { + TEST_END(); + return; + } + + // Scalar types + // BINARY + const char *binary_data = "binary"; + ZVecErrorCode err = + zvec_doc_add_field_by_value(doc, "binary_field", ZVEC_DATA_TYPE_BINARY, + binary_data, strlen(binary_data)); + TEST_ASSERT(err == ZVEC_OK); + + // STRING + const char *string_data = "hello"; + err = zvec_doc_add_field_by_value(doc, "string_field", ZVEC_DATA_TYPE_STRING, + string_data, strlen(string_data)); + TEST_ASSERT(err == ZVEC_OK); + + // BOOL + bool bool_val = true; + err = zvec_doc_add_field_by_value(doc, "bool_field", ZVEC_DATA_TYPE_BOOL, + &bool_val, sizeof(bool_val)); + TEST_ASSERT(err == ZVEC_OK); + + // INT32 + int32_t int32_val = -12345; + err = zvec_doc_add_field_by_value(doc, "int32_field", ZVEC_DATA_TYPE_INT32, + &int32_val, sizeof(int32_val)); + TEST_ASSERT(err == ZVEC_OK); + + // INT64 + int64_t int64_val = -9876543210LL; + err = zvec_doc_add_field_by_value(doc, "int64_field", ZVEC_DATA_TYPE_INT64, + &int64_val, sizeof(int64_val)); + TEST_ASSERT(err == ZVEC_OK); + + // UINT32 + uint32_t uint32_val = 4294967295U; + err = zvec_doc_add_field_by_value(doc, "uint32_field", ZVEC_DATA_TYPE_UINT32, + &uint32_val, sizeof(uint32_val)); + TEST_ASSERT(err == ZVEC_OK); + + // UINT64 + uint64_t uint64_val = 18446744073709551615ULL; + err = zvec_doc_add_field_by_value(doc, "uint64_field", ZVEC_DATA_TYPE_UINT64, + &uint64_val, sizeof(uint64_val)); + TEST_ASSERT(err == ZVEC_OK); + + // FLOAT + float float_val = 3.14159f; + err = zvec_doc_add_field_by_value(doc, "float_field", ZVEC_DATA_TYPE_FLOAT, + &float_val, sizeof(float_val)); + TEST_ASSERT(err == ZVEC_OK); + + // DOUBLE + double double_val = 3.14159265358979; + err = zvec_doc_add_field_by_value(doc, "double_field", ZVEC_DATA_TYPE_DOUBLE, + &double_val, sizeof(double_val)); + TEST_ASSERT(err == ZVEC_OK); + + // Vector types + // VECTOR_BINARY32 + uint32_t binary32_vec[] = {0xFFFFFFFF, 0x00000000, 0xAAAAAAAA, 0x55555555}; + err = zvec_doc_add_field_by_value(doc, "binary32_vec_field", + ZVEC_DATA_TYPE_VECTOR_BINARY32, + binary32_vec, sizeof(binary32_vec)); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_BINARY64 + uint64_t binary64_vec[] = {0xFFFFFFFFFFFFFFFFULL, 0x0000000000000000ULL}; + err = zvec_doc_add_field_by_value(doc, "binary64_vec_field", + ZVEC_DATA_TYPE_VECTOR_BINARY64, + binary64_vec, sizeof(binary64_vec)); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_FP16 + uint16_t fp16_vec[] = {0x3C00, 0x4000, 0xC000, 0x8000}; + err = zvec_doc_add_field_by_value(doc, "fp16_vec_field", + ZVEC_DATA_TYPE_VECTOR_FP16, fp16_vec, + sizeof(fp16_vec)); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_FP32 + float fp32_vec[] = {1.0f, -2.0f, 3.5f, -4.5f}; + err = zvec_doc_add_field_by_value(doc, "fp32_vec_field", + ZVEC_DATA_TYPE_VECTOR_FP32, fp32_vec, + sizeof(fp32_vec)); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_FP64 + double fp64_vec[] = {1.1, -2.2, 3.3, -4.4}; + err = zvec_doc_add_field_by_value(doc, "fp64_vec_field", + ZVEC_DATA_TYPE_VECTOR_FP64, fp64_vec, + sizeof(fp64_vec)); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_INT4 (packed - each byte contains 2 values) + int8_t int4_vec[] = {0x12, 0x34, 0x56, 0x78, 0x9A, 0xBC, 0xDE, 0xF0}; + err = zvec_doc_add_field_by_value(doc, "int4_vec_field", + ZVEC_DATA_TYPE_VECTOR_INT4, int4_vec, + sizeof(int4_vec)); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_INT8 + int8_t int8_vec[] = {-128, -1, 0, 1, 127}; + err = zvec_doc_add_field_by_value(doc, "int8_vec_field", + ZVEC_DATA_TYPE_VECTOR_INT8, int8_vec, + sizeof(int8_vec)); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_INT16 + int16_t int16_vec[] = {-32768, -1, 0, 1, 32767}; + err = zvec_doc_add_field_by_value(doc, "int16_vec_field", + ZVEC_DATA_TYPE_VECTOR_INT16, int16_vec, + sizeof(int16_vec)); + TEST_ASSERT(err == ZVEC_OK); + + // Sparse vector types + // SPARSE_VECTOR_FP16 - format: [nnz(size_t)][indices...][values...] + size_t sparse_fp16_nnz = 3; + uint32_t sparse_fp16_indices[] = {0, 5, 10}; + uint16_t sparse_fp16_values[] = {0x3C00, 0x4000, 0xC000}; + size_t sparse_fp16_size = sizeof(sparse_fp16_nnz) + + sizeof(sparse_fp16_indices) + + sizeof(sparse_fp16_values); + char *sparse_fp16_buffer = (char *)malloc(sparse_fp16_size); + memcpy(sparse_fp16_buffer, &sparse_fp16_nnz, sizeof(sparse_fp16_nnz)); + memcpy(sparse_fp16_buffer + sizeof(sparse_fp16_nnz), sparse_fp16_indices, + sizeof(sparse_fp16_indices)); + memcpy(sparse_fp16_buffer + sizeof(sparse_fp16_nnz) + + sizeof(sparse_fp16_indices), + sparse_fp16_values, sizeof(sparse_fp16_values)); + err = zvec_doc_add_field_by_value(doc, "sparse_fp16_field", + ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16, + sparse_fp16_buffer, sparse_fp16_size); + TEST_ASSERT(err == ZVEC_OK); + free(sparse_fp16_buffer); + + // SPARSE_VECTOR_FP32 + size_t sparse_fp32_nnz = 3; + uint32_t sparse_fp32_indices[] = {2, 7, 15}; + float sparse_fp32_values[] = {1.5f, -2.5f, 3.5f}; + size_t sparse_fp32_size = sizeof(sparse_fp32_nnz) + + sizeof(sparse_fp32_indices) + + sizeof(sparse_fp32_values); + char *sparse_fp32_buffer = (char *)malloc(sparse_fp32_size); + memcpy(sparse_fp32_buffer, &sparse_fp32_nnz, sizeof(sparse_fp32_nnz)); + memcpy(sparse_fp32_buffer + sizeof(sparse_fp32_nnz), sparse_fp32_indices, + sizeof(sparse_fp32_indices)); + memcpy(sparse_fp32_buffer + sizeof(sparse_fp32_nnz) + + sizeof(sparse_fp32_indices), + sparse_fp32_values, sizeof(sparse_fp32_values)); + err = zvec_doc_add_field_by_value(doc, "sparse_fp32_field", + ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32, + sparse_fp32_buffer, sparse_fp32_size); + TEST_ASSERT(err == ZVEC_OK); + free(sparse_fp32_buffer); + + // Array types + // ARRAY_BINARY - format: [length(uint32_t)][data][length][data]... + uint8_t array_bin_data[] = { + 1, 0, 0, 0, 0x01, // length=1, data=0x01 + 2, 0, 0, 0, 0x02, 0x03, // length=2, data=0x02,0x03 + 2, 0, 0, 0, 0x04, 0x05 // length=2, data=0x04,0x05 + }; + err = zvec_doc_add_field_by_value(doc, "array_binary_field", + ZVEC_DATA_TYPE_ARRAY_BINARY, array_bin_data, + sizeof(array_bin_data)); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_STRING - null-terminated strings + const char *array_str_data[] = {"str1", "str2", "str3"}; + ZVecString *array_zvec_str[3]; + for (int i = 0; i < 3; i++) { + array_zvec_str[i] = zvec_string_create(array_str_data[i]); + } + err = zvec_doc_add_field_by_value(doc, "array_string_field", + ZVEC_DATA_TYPE_ARRAY_STRING, array_zvec_str, + sizeof(array_zvec_str)); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_BOOL + bool array_bool_data[] = {true, false, true, false}; + err = zvec_doc_add_field_by_value(doc, "array_bool_field", + ZVEC_DATA_TYPE_ARRAY_BOOL, array_bool_data, + sizeof(array_bool_data)); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_INT32 + int32_t array_int32_data[] = {-100, -50, 0, 50, 100}; + err = zvec_doc_add_field_by_value(doc, "array_int32_field", + ZVEC_DATA_TYPE_ARRAY_INT32, + array_int32_data, sizeof(array_int32_data)); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_INT64 + int64_t array_int64_data[] = {-1000000, -500000, 0, 500000, 1000000}; + err = zvec_doc_add_field_by_value(doc, "array_int64_field", + ZVEC_DATA_TYPE_ARRAY_INT64, + array_int64_data, sizeof(array_int64_data)); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_UINT32 + uint32_t array_uint32_data[] = {0, 100, 1000, 10000, 4294967295U}; + err = zvec_doc_add_field_by_value( + doc, "array_uint32_field", ZVEC_DATA_TYPE_ARRAY_UINT32, array_uint32_data, + sizeof(array_uint32_data)); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_UINT64 + uint64_t array_uint64_data[] = {0, 100, 1000, 10000, 18446744073709551615ULL}; + err = zvec_doc_add_field_by_value( + doc, "array_uint64_field", ZVEC_DATA_TYPE_ARRAY_UINT64, array_uint64_data, + sizeof(array_uint64_data)); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_FLOAT + float array_float_data[] = {-1.5f, -0.5f, 0.0f, 0.5f, 1.5f}; + err = zvec_doc_add_field_by_value(doc, "array_float_field", + ZVEC_DATA_TYPE_ARRAY_FLOAT, + array_float_data, sizeof(array_float_data)); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_DOUBLE + double array_double_data[] = {-1.1, -0.1, 0.0, 0.1, 1.1}; + err = zvec_doc_add_field_by_value( + doc, "array_double_field", ZVEC_DATA_TYPE_ARRAY_DOUBLE, array_double_data, + sizeof(array_double_data)); + TEST_ASSERT(err == ZVEC_OK); + + // Verify we can retrieve some of the values + void *result = NULL; + size_t result_size = 0; + err = zvec_doc_get_field_value_copy(doc, "int32_field", ZVEC_DATA_TYPE_INT32, + &result, &result_size); + TEST_ASSERT(err == ZVEC_OK && result_size == sizeof(int32_t)); + if (result) { + TEST_ASSERT(*(int32_t *)result == -12345); + free(result); + } + + err = zvec_doc_get_field_value_copy(doc, "float_field", ZVEC_DATA_TYPE_FLOAT, + &result, &result_size); + TEST_ASSERT(err == ZVEC_OK && result_size == sizeof(float)); + if (result) { + TEST_ASSERT(fabs(*(float *)result - 3.14159f) < 0.0001f); + free(result); + } + + zvec_doc_destroy(doc); + TEST_END(); +} + +// Test for zvec_doc_add_field_by_struct - covers all data types +void test_doc_add_field_by_struct(void) { + TEST_START(); + + ZVecDoc *doc = zvec_doc_create(); + TEST_ASSERT(doc != NULL); + + if (!doc) { + TEST_END(); + return; + } + + ZVecErrorCode err; + ZVecDocField field; + + // Scalar types + // BINARY + memset(&field, 0, sizeof(field)); + field.name.data = "binary_field"; + field.name.length = strlen("binary_field"); + field.data_type = ZVEC_DATA_TYPE_BINARY; + uint8_t binary_data[] = {0x01, 0x02, 0x03, 0x04}; + field.value.binary_value.data = binary_data; + field.value.binary_value.length = sizeof(binary_data); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // STRING + memset(&field, 0, sizeof(field)); + field.name.data = "string_field"; + field.name.length = strlen("string_field"); + field.data_type = ZVEC_DATA_TYPE_STRING; + const char *string_data = "hello world"; + field.value.string_value.data = (char *)string_data; + field.value.string_value.length = strlen(string_data); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // BOOL + memset(&field, 0, sizeof(field)); + field.name.data = "bool_field"; + field.name.length = strlen("bool_field"); + field.data_type = ZVEC_DATA_TYPE_BOOL; + field.value.bool_value = true; + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // INT32 + memset(&field, 0, sizeof(field)); + field.name.data = "int32_field"; + field.name.length = strlen("int32_field"); + field.data_type = ZVEC_DATA_TYPE_INT32; + field.value.int32_value = -12345; + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // INT64 + memset(&field, 0, sizeof(field)); + field.name.data = "int64_field"; + field.name.length = strlen("int64_field"); + field.data_type = ZVEC_DATA_TYPE_INT64; + field.value.int64_value = -9876543210LL; + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // UINT32 + memset(&field, 0, sizeof(field)); + field.name.data = "uint32_field"; + field.name.length = strlen("uint32_field"); + field.data_type = ZVEC_DATA_TYPE_UINT32; + field.value.uint32_value = 4294967295U; + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // UINT64 + memset(&field, 0, sizeof(field)); + field.name.data = "uint64_field"; + field.name.length = strlen("uint64_field"); + field.data_type = ZVEC_DATA_TYPE_UINT64; + field.value.uint64_value = 18446744073709551615ULL; + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // FLOAT + memset(&field, 0, sizeof(field)); + field.name.data = "float_field"; + field.name.length = strlen("float_field"); + field.data_type = ZVEC_DATA_TYPE_FLOAT; + field.value.float_value = 3.14159f; + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // DOUBLE + memset(&field, 0, sizeof(field)); + field.name.data = "double_field"; + field.name.length = strlen("double_field"); + field.data_type = ZVEC_DATA_TYPE_DOUBLE; + field.value.double_value = 3.14159265358979; + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_BINARY32 + memset(&field, 0, sizeof(field)); + field.name.data = "binary32_vec_field"; + field.name.length = strlen("binary32_vec_field"); + field.data_type = ZVEC_DATA_TYPE_VECTOR_BINARY32; + uint32_t binary32_vec[] = {0xFFFFFFFF, 0x00000000, 0xAAAAAAAA, 0x55555555}; + field.value.vector_value.data = (const float *)binary32_vec; + field.value.vector_value.length = sizeof(binary32_vec) / sizeof(uint32_t); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_BINARY64 + memset(&field, 0, sizeof(field)); + field.name.data = "binary64_vec_field"; + field.name.length = strlen("binary64_vec_field"); + field.data_type = ZVEC_DATA_TYPE_VECTOR_BINARY64; + uint64_t binary64_vec[] = {0xFFFFFFFFFFFFFFFFULL, 0x0000000000000000ULL}; + field.value.vector_value.data = (const float *)binary64_vec; + field.value.vector_value.length = sizeof(binary64_vec) / sizeof(uint64_t); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_FP16 + memset(&field, 0, sizeof(field)); + field.name.data = "fp16_vec_field"; + field.name.length = strlen("fp16_vec_field"); + field.data_type = ZVEC_DATA_TYPE_VECTOR_FP16; + uint16_t fp16_vec[] = {0x3C00, 0x4000, 0xC000, 0x8000}; + field.value.vector_value.data = (const float *)fp16_vec; + field.value.vector_value.length = sizeof(fp16_vec) / sizeof(uint16_t); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_FP32 + memset(&field, 0, sizeof(field)); + field.name.data = "fp32_vec_field"; + field.name.length = strlen("fp32_vec_field"); + field.data_type = ZVEC_DATA_TYPE_VECTOR_FP32; + float fp32_vec[] = {1.0f, -2.0f, 3.5f, -4.5f}; + field.value.vector_value.data = fp32_vec; + field.value.vector_value.length = sizeof(fp32_vec) / sizeof(float); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_FP64 + memset(&field, 0, sizeof(field)); + field.name.data = "fp64_vec_field"; + field.name.length = strlen("fp64_vec_field"); + field.data_type = ZVEC_DATA_TYPE_VECTOR_FP64; + double fp64_vec[] = {1.1, -2.2, 3.3, -4.4}; + field.value.vector_value.data = (const float *)fp64_vec; + field.value.vector_value.length = sizeof(fp64_vec) / sizeof(double); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_INT4 + memset(&field, 0, sizeof(field)); + field.name.data = "int4_vec_field"; + field.name.length = strlen("int4_vec_field"); + field.data_type = ZVEC_DATA_TYPE_VECTOR_INT4; + int8_t int4_vec[] = {0x12, 0x34, 0x56, 0x78}; + field.value.vector_value.data = (const float *)int4_vec; + field.value.vector_value.length = + sizeof(int4_vec) * 2; // Each byte contains 2 values + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_INT8 + memset(&field, 0, sizeof(field)); + field.name.data = "int8_vec_field"; + field.name.length = strlen("int8_vec_field"); + field.data_type = ZVEC_DATA_TYPE_VECTOR_INT8; + int8_t int8_vec[] = {-128, -1, 0, 1, 127}; + field.value.vector_value.data = (const float *)int8_vec; + field.value.vector_value.length = sizeof(int8_vec) / sizeof(int8_t); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_INT16 + memset(&field, 0, sizeof(field)); + field.name.data = "int16_vec_field"; + field.name.length = strlen("int16_vec_field"); + field.data_type = ZVEC_DATA_TYPE_VECTOR_INT16; + int16_t int16_vec[] = {-32768, -1, 0, 1, 32767}; + field.value.vector_value.data = (const float *)int16_vec; + field.value.vector_value.length = sizeof(int16_vec) / sizeof(int16_t); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // Sparse vector types + // SPARSE_VECTOR_FP16 + memset(&field, 0, sizeof(field)); + field.name.data = "sparse_fp16_field"; + field.name.length = strlen("sparse_fp16_field"); + field.data_type = ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16; + uint16_t sparse_fp16_values[] = {0x3C00, 0x4000, 0xC000}; + field.value.vector_value.data = (const float *)sparse_fp16_values; + field.value.vector_value.length = + sizeof(sparse_fp16_values) / sizeof(uint16_t); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // SPARSE_VECTOR_FP32 + memset(&field, 0, sizeof(field)); + field.name.data = "sparse_fp32_field"; + field.name.length = strlen("sparse_fp32_field"); + field.data_type = ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32; + float sparse_fp32_values[] = {1.5f, -2.5f, 3.5f}; + field.value.vector_value.data = sparse_fp32_values; + field.value.vector_value.length = sizeof(sparse_fp32_values) / sizeof(float); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // Array types + // ARRAY_BINARY + memset(&field, 0, sizeof(field)); + field.name.data = "array_binary_field"; + field.name.length = strlen("array_binary_field"); + field.data_type = ZVEC_DATA_TYPE_ARRAY_BINARY; + uint8_t array_bin_data[] = { + 1, 0, 0, 0, 0x01, // length=1, data=0x01 + 2, 0, 0, 0, 0x02, 0x03, // length=2, data=0x02,0x03 + 2, 0, 0, 0, 0x04, 0x05 // length=2, data=0x04,0x05 + }; + field.value.binary_value.data = array_bin_data; + field.value.binary_value.length = sizeof(array_bin_data); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_STRING + memset(&field, 0, sizeof(field)); + field.name.data = "array_string_field"; + field.name.length = strlen("array_string_field"); + field.data_type = ZVEC_DATA_TYPE_ARRAY_STRING; + const char array_string_data[] = "str1\0str2\0str3\0"; + field.value.string_value.data = (char *)array_string_data; + field.value.string_value.length = sizeof(array_string_data); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_BOOL + memset(&field, 0, sizeof(field)); + field.name.data = "array_bool_field"; + field.name.length = strlen("array_bool_field"); + field.data_type = ZVEC_DATA_TYPE_ARRAY_BOOL; + bool array_bool_data[] = {true, false, true, false}; + field.value.binary_value.data = (const uint8_t *)array_bool_data; + field.value.binary_value.length = sizeof(array_bool_data); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_INT32 + memset(&field, 0, sizeof(field)); + field.name.data = "array_int32_field"; + field.name.length = strlen("array_int32_field"); + field.data_type = ZVEC_DATA_TYPE_ARRAY_INT32; + int32_t array_int32_data[] = {-100, -50, 0, 50, 100}; + field.value.vector_value.data = (const float *)array_int32_data; + field.value.vector_value.length = sizeof(array_int32_data) / sizeof(int32_t); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_INT64 + memset(&field, 0, sizeof(field)); + field.name.data = "array_int64_field"; + field.name.length = strlen("array_int64_field"); + field.data_type = ZVEC_DATA_TYPE_ARRAY_INT64; + int64_t array_int64_data[] = {-1000000, -500000, 0, 500000, 1000000}; + field.value.vector_value.data = (const float *)array_int64_data; + field.value.vector_value.length = sizeof(array_int64_data) / sizeof(int64_t); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_UINT32 + memset(&field, 0, sizeof(field)); + field.name.data = "array_uint32_field"; + field.name.length = strlen("array_uint32_field"); + field.data_type = ZVEC_DATA_TYPE_ARRAY_UINT32; + uint32_t array_uint32_data[] = {0, 100, 1000, 10000, 4294967295U}; + field.value.vector_value.data = (const float *)array_uint32_data; + field.value.vector_value.length = + sizeof(array_uint32_data) / sizeof(uint32_t); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_UINT64 + memset(&field, 0, sizeof(field)); + field.name.data = "array_uint64_field"; + field.name.length = strlen("array_uint64_field"); + field.data_type = ZVEC_DATA_TYPE_ARRAY_UINT64; + uint64_t array_uint64_data[] = {0, 100, 1000, 10000, 18446744073709551615ULL}; + field.value.vector_value.data = (const float *)array_uint64_data; + field.value.vector_value.length = + sizeof(array_uint64_data) / sizeof(uint64_t); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_FLOAT + memset(&field, 0, sizeof(field)); + field.name.data = "array_float_field"; + field.name.length = strlen("array_float_field"); + field.data_type = ZVEC_DATA_TYPE_ARRAY_FLOAT; + float array_float_data[] = {-1.5f, -0.5f, 0.0f, 0.5f, 1.5f}; + field.value.vector_value.data = array_float_data; + field.value.vector_value.length = sizeof(array_float_data) / sizeof(float); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_DOUBLE + memset(&field, 0, sizeof(field)); + field.name.data = "array_double_field"; + field.name.length = strlen("array_double_field"); + field.data_type = ZVEC_DATA_TYPE_ARRAY_DOUBLE; + double array_double_data[] = {-1.1, -0.1, 0.0, 0.1, 1.1}; + field.value.vector_value.data = (const float *)array_double_data; + field.value.vector_value.length = sizeof(array_double_data) / sizeof(double); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // Verify we can retrieve some of the values + void *result = NULL; + size_t result_size = 0; + + err = zvec_doc_get_field_value_copy(doc, "int32_field", ZVEC_DATA_TYPE_INT32, + &result, &result_size); + TEST_ASSERT(err == ZVEC_OK && result_size == sizeof(int32_t)); + if (result) { + TEST_ASSERT(*(int32_t *)result == -12345); + free(result); + } + + err = zvec_doc_get_field_value_copy(doc, "float_field", ZVEC_DATA_TYPE_FLOAT, + &result, &result_size); + TEST_ASSERT(err == ZVEC_OK && result_size == sizeof(float)); + if (result) { + TEST_ASSERT(fabs(*(float *)result - 3.14159f) < 0.0001f); + free(result); + } + + zvec_doc_destroy(doc); + TEST_END(); +} + void test_doc_basic_operations(void); void test_doc_get_field_value_basic(void); void test_doc_get_field_value_copy(void); @@ -1055,6 +1660,8 @@ void test_doc_get_field_value_pointer(void); void test_doc_field_operations(void); void test_doc_error_conditions(void); void test_doc_serialization(void); +void test_doc_add_field_by_value(void); +void test_doc_add_field_by_struct(void); void test_doc_functions(void) { test_doc_basic_operations(); @@ -2595,17 +3202,6 @@ void test_utility_functions(void) { void test_memory_management_functions(void) { TEST_START(); - // Test basic memory allocation - void *ptr = zvec_malloc(1024); - TEST_ASSERT(ptr != NULL); - - // Test memory reallocation - void *new_ptr = zvec_realloc(ptr, 2048); - TEST_ASSERT(new_ptr != NULL); - - // Test memory deallocation - zvec_free(new_ptr); - // Test string allocation and deallocation ZVecString *str = zvec_string_create("Test String"); TEST_ASSERT(str != NULL); @@ -3288,6 +3884,461 @@ void test_performance_benchmarks(void) { TEST_END(); } +// ============================================================================= +// Additional tests for uncovered API functions +// ============================================================================= + +void test_zvec_shutdown(void) { + TEST_START(); + + // Test shutdown + ZVecErrorCode err = zvec_shutdown(); + TEST_ASSERT(err == ZVEC_OK); + + // Re-initialize for other tests + ZVecConfigData *config = zvec_config_data_create(); + TEST_ASSERT(config != NULL); + err = zvec_initialize(config); + TEST_ASSERT(err == ZVEC_OK); + zvec_config_data_destroy(config); + + TEST_END(); +} + +void test_index_params_creation_functions(void) { + TEST_START(); + + // Test zvec_index_params_init_default + ZVecIndexParams params; + zvec_index_params_init_default(¶ms, ZVEC_INDEX_TYPE_HNSW, + ZVEC_METRIC_TYPE_COSINE); + TEST_ASSERT(params.index_type == ZVEC_INDEX_TYPE_HNSW); + + // Test zvec_index_params_vector_create + ZVecVectorIndexParams *vector_params = zvec_index_params_vector_create( + ZVEC_INDEX_TYPE_HNSW, ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_FP16); + TEST_ASSERT(vector_params != NULL); + TEST_ASSERT(vector_params->base.index_type == ZVEC_INDEX_TYPE_HNSW); + TEST_ASSERT(vector_params->metric_type == ZVEC_METRIC_TYPE_L2); + TEST_ASSERT(vector_params->quantize_type == ZVEC_QUANTIZE_TYPE_FP16); + if (vector_params) { + zvec_index_params_vector_destroy(vector_params); + } + + // Test zvec_index_params_ivf_create + ZVecIVFIndexParams *ivf_params = zvec_index_params_ivf_create( + ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_INT8, 100, 10, true, 5); + TEST_ASSERT(ivf_params != NULL); + TEST_ASSERT(ivf_params->base.base.index_type == ZVEC_INDEX_TYPE_IVF); + TEST_ASSERT(ivf_params->base.metric_type == ZVEC_METRIC_TYPE_L2); + TEST_ASSERT(ivf_params->n_list == 100); + TEST_ASSERT(ivf_params->n_iters == 10); + TEST_ASSERT(ivf_params->use_soar == true); + TEST_ASSERT(ivf_params->n_probe == 5); + if (ivf_params) { + zvec_index_params_ivf_destroy(ivf_params); + } + + // Test zvec_index_params_vector_destroy + ZVecVectorIndexParams *vector_params2 = zvec_index_params_vector_create( + ZVEC_INDEX_TYPE_FLAT, ZVEC_METRIC_TYPE_IP, ZVEC_QUANTIZE_TYPE_UNDEFINED); + TEST_ASSERT(vector_params2 != NULL); + zvec_index_params_vector_destroy(vector_params2); + + TEST_END(); +} + +void test_collection_advanced_index_functions(void) { + TEST_START(); + + const char *temp_dir = "/tmp/zvec_test_advanced_index"; + zvec_test_delete_dir(temp_dir); + + // Create schema + ZVecCollectionSchema *schema = + zvec_collection_schema_create("test_collection"); + TEST_ASSERT(schema != NULL); + + if (schema) { + // Add fields + ZVecFieldSchema *id_field = + zvec_field_schema_create("id", ZVEC_DATA_TYPE_INT64, false, 0); + ZVecFieldSchema *vec_field = + zvec_field_schema_create("vec", ZVEC_DATA_TYPE_VECTOR_FP32, false, 128); + zvec_collection_schema_add_field(schema, id_field); + zvec_collection_schema_add_field(schema, vec_field); + + ZVecCollectionOptions options = ZVEC_DEFAULT_OPTIONS(); + options.max_doc_count_per_segment = 1000; + ZVecCollection *collection = NULL; + + ZVecErrorCode err = zvec_collection_create_and_open(temp_dir, schema, + &options, &collection); + TEST_ASSERT(err == ZVEC_OK); + + if (collection) { + // Test zvec_collection_create_flat_index + ZVecFlatIndexParams *flat_params = zvec_index_params_flat_create( + ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_UNDEFINED); + TEST_ASSERT(flat_params != NULL); + err = zvec_collection_create_flat_index(collection, "vec", flat_params); + TEST_ASSERT(err == ZVEC_OK); + zvec_index_params_flat_destroy(flat_params); + + // Test zvec_collection_create_ivf_index + ZVecIVFIndexParams *ivf_params = zvec_index_params_ivf_create( + ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_INT8, 100, 10, true, 5); + TEST_ASSERT(ivf_params != NULL); + err = zvec_collection_drop_index(collection, + "vec"); // Drop previous index first + TEST_ASSERT(err == ZVEC_OK); + err = zvec_collection_create_ivf_index(collection, "vec", ivf_params); + TEST_ASSERT(err == ZVEC_OK); + zvec_index_params_ivf_destroy(ivf_params); + + // Test zvec_collection_create_index_with_params + ZVecHnswIndexParams *hnsw_params = zvec_index_params_hnsw_create( + ZVEC_METRIC_TYPE_COSINE, ZVEC_QUANTIZE_TYPE_FP16, 16, 100, 50); + TEST_ASSERT(hnsw_params != NULL); + err = zvec_collection_drop_index(collection, "vec"); + TEST_ASSERT(err == ZVEC_OK); + err = zvec_collection_create_index_with_params(collection, "vec", + hnsw_params); + TEST_ASSERT(err == ZVEC_OK); + zvec_index_params_hnsw_destroy(hnsw_params); + + // Test zvec_field_schema_set_ivf_index + ZVecFieldSchema *new_vec_field = zvec_field_schema_create( + "vec2", ZVEC_DATA_TYPE_VECTOR_FP32, false, 128); + TEST_ASSERT(new_vec_field != NULL); + ZVecIVFIndexParams *ivf_params2 = zvec_index_params_ivf_create( + ZVEC_METRIC_TYPE_IP, ZVEC_QUANTIZE_TYPE_UNDEFINED, 50, 5, false, 3); + TEST_ASSERT(ivf_params2 != NULL); + zvec_field_schema_set_ivf_index(new_vec_field, ivf_params2); + TEST_ASSERT(new_vec_field->index_params != NULL); + zvec_index_params_ivf_destroy(ivf_params2); + zvec_field_schema_destroy(new_vec_field); + + zvec_collection_destroy(collection); + } + zvec_collection_schema_destroy(schema); + } + + zvec_test_delete_dir(temp_dir); + TEST_END(); +} + +void test_collection_query_functions(void) { + TEST_START(); + + const char *temp_dir = "/tmp/zvec_test_query_funcs"; + zvec_test_delete_dir(temp_dir); + + // Create schema and collection + ZVecCollectionSchema *schema = zvec_collection_schema_create("query_test"); + ZVecHnswIndexParams *hnsw_params = zvec_index_params_hnsw_create( + ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_UNDEFINED, 16, 100, 50); + + ZVecFieldSchema *name_field = + zvec_field_schema_create("name", ZVEC_DATA_TYPE_STRING, false, 0); + ZVecFieldSchema *vec_field = + zvec_field_schema_create("vec", ZVEC_DATA_TYPE_VECTOR_FP32, false, 4); + zvec_field_schema_set_hnsw_index(vec_field, hnsw_params); + + zvec_collection_schema_add_field(schema, name_field); + zvec_collection_schema_add_field(schema, vec_field); + + ZVecCollection *collection = NULL; + ZVecErrorCode err = + zvec_collection_create_and_open(temp_dir, schema, NULL, &collection); + TEST_ASSERT(err == ZVEC_OK); + + if (collection) { + // Insert test documents + ZVecDoc *doc1 = zvec_doc_create(); + zvec_doc_set_pk(doc1, "doc1"); + float vec1[4] = {1.0f, 0.0f, 0.0f, 0.0f}; + zvec_doc_add_field_by_value(doc1, "vec", ZVEC_DATA_TYPE_VECTOR_FP32, vec1, + sizeof(vec1)); + zvec_doc_add_field_by_value(doc1, "name", ZVEC_DATA_TYPE_STRING, + "document1", 9); + + ZVecDoc *doc2 = zvec_doc_create(); + zvec_doc_set_pk(doc2, "doc2"); + float vec2[4] = {0.0f, 1.0f, 0.0f, 0.0f}; + zvec_doc_add_field_by_value(doc2, "vec", ZVEC_DATA_TYPE_VECTOR_FP32, vec2, + sizeof(vec2)); + zvec_doc_add_field_by_value(doc2, "name", ZVEC_DATA_TYPE_STRING, + "document2", 9); + + ZVecDoc *docs[] = {doc1, doc2}; + size_t success_count, error_count; + err = zvec_collection_insert(collection, (const ZVecDoc **)docs, 2, + &success_count, &error_count); + TEST_ASSERT(err == ZVEC_OK); + + zvec_collection_flush(collection); + zvec_collection_optimize(collection); + + // Test zvec_collection_fetch + const char *pks[] = {"doc1", "doc2"}; + ZVecDoc **results = NULL; + size_t found_count = 0; + err = zvec_collection_fetch(collection, pks, 2, &results, &found_count); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(found_count == 2); + zvec_docs_free(results, found_count); + + // Test zvec_collection_query_by_group + ZVecGroupByVectorQuery group_query = {0}; + group_query.field_name = ZVEC_STRING_LITERAL("vec"); + float query_vec[4] = {0.5f, 0.5f, 0.0f, 0.0f}; + group_query.query_vector.data = (uint8_t *)query_vec; + group_query.query_vector.length = sizeof(query_vec); + group_query.group_by_field_name = ZVEC_STRING_LITERAL("name"); + group_query.group_count = 2; + group_query.group_topk = 1; + group_query.include_vector = false; + + ZVecStringArray output_fields = {0}; + output_fields.count = 1; + output_fields.strings = + (ZVecString *)malloc(sizeof(ZVecString) * output_fields.count); + output_fields.strings[0] = ZVEC_STRING_LITERAL("name"); + group_query.output_fields = output_fields; + + ZVecDoc **group_results = NULL; + ZVecString **group_values = NULL; + size_t group_result_count = 0; + err = + zvec_collection_query_by_group(collection, &group_query, &group_results, + &group_values, &group_result_count); + TEST_ASSERT(err == ZVEC_OK); + if (group_results) { + zvec_docs_free(group_results, group_result_count); + } + if (group_values) { + for (size_t i = 0; i < group_result_count; i++) { + zvec_free_string(group_values[i]); + } + free(group_values); + } + + free(output_fields.strings); + + // Test zvec_collection_get_options + ZVecCollectionOptions *options = NULL; + err = zvec_collection_get_options(collection, &options); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(options != NULL); + free(options); + + zvec_collection_destroy(collection); + zvec_doc_destroy(doc1); + zvec_doc_destroy(doc2); + } + + zvec_index_params_hnsw_destroy(hnsw_params); + zvec_collection_schema_destroy(schema); + zvec_test_delete_dir(temp_dir); + + TEST_END(); +} + +void test_doc_advanced_functions(void) { + TEST_START(); + + // Test zvec_doc_clear + ZVecDoc *doc = zvec_doc_create(); + zvec_doc_set_pk(doc, "test_pk"); + zvec_doc_add_field_by_value(doc, "field1", ZVEC_DATA_TYPE_INT32, + &(int32_t){100}, sizeof(int32_t)); + TEST_ASSERT(zvec_doc_get_field_count(doc) > 0); + zvec_doc_clear(doc); + TEST_ASSERT(zvec_doc_get_field_count(doc) == 0); + + // Test zvec_doc_get_pk_copy + zvec_doc_set_pk(doc, "test_pk_copy"); + const char *pk_copy = zvec_doc_get_pk_copy(doc); + TEST_ASSERT(pk_copy != NULL); + TEST_ASSERT(strcmp(pk_copy, "test_pk_copy") == 0); + free((void *)pk_copy); + + // Test zvec_doc_is_empty + ZVecDoc *empty_doc = zvec_doc_create(); + TEST_ASSERT(zvec_doc_is_empty(empty_doc) == true); + zvec_doc_add_field_by_value(empty_doc, "test", ZVEC_DATA_TYPE_INT32, + &(int32_t){1}, sizeof(int32_t)); + TEST_ASSERT(zvec_doc_is_empty(empty_doc) == false); + zvec_doc_destroy(empty_doc); + + // Test zvec_doc_memory_usage + ZVecDoc *mem_doc = zvec_doc_create(); + zvec_doc_set_pk(mem_doc, "memory_test"); + char large_data[1024]; + memset(large_data, 'A', sizeof(large_data)); + zvec_doc_add_field_by_value(mem_doc, "large_field", ZVEC_DATA_TYPE_STRING, + large_data, sizeof(large_data)); + size_t mem_usage = zvec_doc_memory_usage(mem_doc); + TEST_ASSERT(mem_usage > 0); + zvec_doc_destroy(mem_doc); + + // Test zvec_doc_merge + ZVecDoc *doc1 = zvec_doc_create(); + zvec_doc_set_pk(doc1, "merge_test"); + zvec_doc_add_field_by_value(doc1, "field1", ZVEC_DATA_TYPE_INT32, + &(int32_t){100}, sizeof(int32_t)); + + ZVecDoc *doc2 = zvec_doc_create(); + zvec_doc_add_field_by_value(doc2, "field2", ZVEC_DATA_TYPE_STRING, "merged", + 6); + + zvec_doc_merge(doc1, doc2); + TEST_ASSERT(zvec_doc_has_field(doc1, "field1") == true); + TEST_ASSERT(zvec_doc_has_field(doc1, "field2") == true); + + zvec_doc_destroy(doc1); + zvec_doc_destroy(doc2); + + // Test zvec_doc_validate + ZVecCollectionSchema *schema = zvec_collection_schema_create("validate_test"); + ZVecFieldSchema *val_field = + zvec_field_schema_create("test_field", ZVEC_DATA_TYPE_INT32, false, 0); + zvec_collection_schema_add_field(schema, val_field); + + ZVecDoc *val_doc = zvec_doc_create(); + zvec_doc_set_pk(val_doc, "test_pk"); + zvec_doc_add_field_by_value(val_doc, "test_field", ZVEC_DATA_TYPE_INT32, + &(int32_t){42}, sizeof(int32_t)); + + char *error_msg = NULL; + ZVecErrorCode err = zvec_doc_validate(val_doc, schema, false, &error_msg); + TEST_ASSERT(err == ZVEC_OK); + if (error_msg) { + zvec_free_str(error_msg); + } + + zvec_doc_destroy(val_doc); + zvec_collection_schema_destroy(schema); + zvec_doc_destroy(doc); + + // Test zvec_doc_to_detail_string + ZVecDoc *detail_doc = zvec_doc_create(); + zvec_doc_set_pk(detail_doc, "detail_test"); + zvec_doc_add_field_by_value(detail_doc, "int_field", ZVEC_DATA_TYPE_INT32, + &(int32_t){12345}, sizeof(int32_t)); + zvec_doc_add_field_by_value(detail_doc, "str_field", ZVEC_DATA_TYPE_STRING, + "hello", 5); + + char *detail_str = NULL; + err = zvec_doc_to_detail_string(detail_doc, &detail_str); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(detail_str != NULL); + printf(" Document detail: %s\n", detail_str); + zvec_free_str(detail_str); + + zvec_doc_destroy(detail_doc); + + TEST_END(); +} + +void test_array_memory_functions(void) { + TEST_START(); + + // Test ZVecStringArray + ZVecStringArray *str_array = zvec_string_array_create(3); + TEST_ASSERT(str_array != NULL); + if (str_array) { + TEST_ASSERT(str_array->count == 3); + TEST_ASSERT(str_array->strings != NULL); + + // Add strings at specific indices + zvec_string_array_add(str_array, 0, "string1"); + zvec_string_array_add(str_array, 1, "string2"); + zvec_string_array_add(str_array, 2, "string3"); + + // Verify strings were added + TEST_ASSERT(strcmp(str_array->strings[0].data, "string1") == 0); + TEST_ASSERT(strcmp(str_array->strings[1].data, "string2") == 0); + TEST_ASSERT(strcmp(str_array->strings[2].data, "string3") == 0); + zvec_string_array_destroy(str_array); + } + + // Test ZVecMutableByteArray + ZVecMutableByteArray *byte_array = zvec_byte_array_create(1024); + TEST_ASSERT(byte_array != NULL); + if (byte_array) { + TEST_ASSERT(byte_array->capacity == 1024); + TEST_ASSERT(byte_array->length == 0); + TEST_ASSERT(byte_array->data != NULL); + + // Write some data + byte_array->data[0] = 0x01; + byte_array->data[1] = 0x02; + byte_array->data[2] = 0x03; + byte_array->length = 3; + + TEST_ASSERT(byte_array->length == 3); + TEST_ASSERT(byte_array->data[0] == 0x01); + TEST_ASSERT(byte_array->data[1] == 0x02); + TEST_ASSERT(byte_array->data[2] == 0x03); + + zvec_byte_array_destroy(byte_array); + } + + // Test ZVecFloatArray + ZVecFloatArray *float_array = zvec_float_array_create(10); + TEST_ASSERT(float_array != NULL); + if (float_array) { + TEST_ASSERT(float_array->length == 10); + TEST_ASSERT(float_array->data != NULL); + + // Note: Data is initialized to 0 by zvec_float_array_create + // The const qualifier indicates this is typically used for read-only access + // For testing, we verify the allocation succeeded and length is correct + TEST_ASSERT(float_array->data[0] == 0.0f); + TEST_ASSERT(float_array->data[9] == 0.0f); + + zvec_float_array_destroy(float_array); + } + + // Test ZVecInt64Array + ZVecInt64Array *int64_array = zvec_int64_array_create(5); + TEST_ASSERT(int64_array != NULL); + if (int64_array) { + TEST_ASSERT(int64_array->length == 5); + TEST_ASSERT(int64_array->data != NULL); + + // Note: Data is initialized to 0 by zvec_int64_array_create + // The const qualifier indicates this is typically used for read-only access + TEST_ASSERT(int64_array->data[0] == 0); + TEST_ASSERT(int64_array->data[4] == 0); + + zvec_int64_array_destroy(int64_array); + } + + // Test edge case: create with zero size + ZVecMutableByteArray *zero_array = zvec_byte_array_create(0); + TEST_ASSERT(zero_array != NULL); + if (zero_array) { + zvec_byte_array_destroy(zero_array); + } + + TEST_END(); +} + +void test_index_params_destruction(void) { + TEST_START(); + + // Test zvec_index_params_invert_destroy + ZVecInvertIndexParams *invert_params = + zvec_index_params_invert_create(true, false); + TEST_ASSERT(invert_params != NULL); + zvec_index_params_invert_destroy(invert_params); + + TEST_END(); +} + // ============================================================================= // Main function // ============================================================================= @@ -3338,6 +4389,8 @@ int main(void) { test_doc_field_operations(); test_doc_error_conditions(); test_doc_serialization(); + test_doc_add_field_by_value(); + test_doc_add_field_by_struct(); // Index tests test_index_params(); @@ -3357,6 +4410,15 @@ int main(void) { // Memory management tests test_memory_management_functions(); + // Additional API coverage tests + test_zvec_shutdown(); + test_index_params_creation_functions(); + test_collection_advanced_index_functions(); + test_collection_query_functions(); + test_doc_advanced_functions(); + test_array_memory_functions(); + test_index_params_destruction(); + printf("\n=== Comprehensive Test Summary ===\n"); printf("Total tests: %d\n", test_count); printf("Passed: %d\n", passed_count); From c29924f1449af7a5e77ecb6b3688bfe4baa551db Mon Sep 17 00:00:00 2001 From: lc285652 Date: Fri, 13 Mar 2026 14:30:51 +0800 Subject: [PATCH 14/15] fix some code --- examples/c_api/basic_example.c | 4 +- examples/c_api/collection_schema_example.c | 2 +- examples/c_api/doc_example.c | 2 +- examples/c_api/field_schema_example.c | 2 +- examples/c_api/index_example.c | 2 +- examples/c_api/optimized_example.c | 4 +- src/c_api/c_api.cc | 18 +------ src/include/zvec/c_api.h | 57 ++-------------------- tests/c_api/c_api_test.c | 24 ++++----- 9 files changed, 27 insertions(+), 88 deletions(-) diff --git a/examples/c_api/basic_example.c b/examples/c_api/basic_example.c index 245e929ea..e4efbdfd6 100644 --- a/examples/c_api/basic_example.c +++ b/examples/c_api/basic_example.c @@ -26,7 +26,7 @@ static ZVecErrorCode handle_error(ZVecErrorCode error, const char *context) { zvec_get_last_error(&error_msg); fprintf(stderr, "Error in %s: %d - %s\n", context, error, error_msg ? error_msg : "Unknown error"); - zvec_free_str(error_msg); + free(error_msg); } return error; } @@ -209,7 +209,7 @@ int main() { zvec_get_last_error(&error_msg); printf("[ERROR] Query failed: %s\n", error_msg ? error_msg : "Unknown error"); - zvec_free_str(error_msg); + free(error_msg); goto cleanup; } diff --git a/examples/c_api/collection_schema_example.c b/examples/c_api/collection_schema_example.c index 40263526c..d69ca9898 100644 --- a/examples/c_api/collection_schema_example.c +++ b/examples/c_api/collection_schema_example.c @@ -26,7 +26,7 @@ static ZVecErrorCode handle_error(ZVecErrorCode error, const char *context) { zvec_get_last_error(&error_msg); fprintf(stderr, "Error in %s: %d - %s\n", context, error, error_msg ? error_msg : "Unknown error"); - zvec_free_str(error_msg); + free(error_msg); } return error; } diff --git a/examples/c_api/doc_example.c b/examples/c_api/doc_example.c index 155c7a25b..b0e06624e 100644 --- a/examples/c_api/doc_example.c +++ b/examples/c_api/doc_example.c @@ -28,7 +28,7 @@ static ZVecErrorCode handle_error(ZVecErrorCode error, const char *context) { zvec_get_last_error(&error_msg); fprintf(stderr, "Error in %s: %d - %s\n", context, error, error_msg ? error_msg : "Unknown error"); - zvec_free_str(error_msg); + free(error_msg); } return error; } diff --git a/examples/c_api/field_schema_example.c b/examples/c_api/field_schema_example.c index 41bdd1a9e..8db81d8d6 100644 --- a/examples/c_api/field_schema_example.c +++ b/examples/c_api/field_schema_example.c @@ -26,7 +26,7 @@ static ZVecErrorCode handle_error(ZVecErrorCode error, const char *context) { zvec_get_last_error(&error_msg); fprintf(stderr, "Error in %s: %d - %s\n", context, error, error_msg ? error_msg : "Unknown error"); - zvec_free_str(error_msg); + free(error_msg); } return error; } diff --git a/examples/c_api/index_example.c b/examples/c_api/index_example.c index 14795ca5b..f4362ac0c 100644 --- a/examples/c_api/index_example.c +++ b/examples/c_api/index_example.c @@ -26,7 +26,7 @@ static ZVecErrorCode handle_error(ZVecErrorCode error, const char *context) { zvec_get_last_error(&error_msg); fprintf(stderr, "Error in %s: %d - %s\n", context, error, error_msg ? error_msg : "Unknown error"); - zvec_free_str(error_msg); + free(error_msg); } return error; } diff --git a/examples/c_api/optimized_example.c b/examples/c_api/optimized_example.c index 33ed24d61..3441af6e3 100644 --- a/examples/c_api/optimized_example.c +++ b/examples/c_api/optimized_example.c @@ -27,7 +27,7 @@ static ZVecErrorCode handle_error(ZVecErrorCode error, const char *context) { zvec_get_last_error(&error_msg); fprintf(stderr, "Error in %s: %d - %s\n", context, error, error_msg ? error_msg : "Unknown error"); - zvec_free_str(error_msg); + free(error_msg); } return error; } @@ -256,7 +256,7 @@ int main() { zvec_get_last_error(&error_msg); printf("Query %d failed: %s\n", q, error_msg ? error_msg : "Unknown error"); - zvec_free_str(error_msg); + free(error_msg); continue; } diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 1d44094f2..480e7d412 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -420,8 +420,8 @@ void zvec_config_console_log_destroy(ZVecConsoleLogConfig *config) { void zvec_config_file_log_destroy(ZVecFileLogConfig *config) { if (config) { - if (config->dir.data) zvec_free_str(config->dir.data); - if (config->basename.data) zvec_free_str(config->basename.data); + if (config->dir.data) free(config->dir.data); + if (config->basename.data) free(config->basename.data); delete config; } } @@ -781,12 +781,6 @@ void zvec_string_array_destroy(ZVecStringArray *array) { } -void zvec_free_str(char *str) { - if (str) { - free(str); - } -} - // Byte array helper functions ZVecMutableByteArray *zvec_byte_array_create(size_t capacity) { ZVecMutableByteArray *array = @@ -910,7 +904,6 @@ void zvec_free_field_schema(ZVecFieldSchema *field_schema) { } } - // ============================================================================= // Index parameters management interface implementation // ============================================================================= @@ -1295,7 +1288,6 @@ static void zvec_field_schema_cleanup(ZVecFieldSchema *field_schema) { field_schema->name = nullptr; } - // ============================================================================= // CollectionOptions management interface implementation // ============================================================================= @@ -1715,7 +1707,6 @@ uint64_t zvec_collection_schema_get_max_doc_count_per_segment( return schema->max_doc_count_per_segment; } - ZVecErrorCode zvec_collection_schema_validate( const ZVecCollectionSchema *schema, ZVecString **error_msg) { if (!schema) { @@ -1799,7 +1790,6 @@ void zvec_collection_schema_cleanup(ZVecCollectionSchema *schema) { } } - // ============================================================================= // Helper functions // ============================================================================= @@ -2012,7 +2002,6 @@ void zvec_doc_set_doc_id(ZVecDoc *doc, uint64_t doc_id) { } } - void zvec_doc_set_score(ZVecDoc *doc, float score) { if (!doc) return; @@ -5109,7 +5098,6 @@ ZVecErrorCode zvec_collection_update(ZVecCollection *collection, } } - ZVecErrorCode zvec_collection_upsert(ZVecCollection *collection, const ZVecDoc **docs, size_t doc_count, size_t *success_count, @@ -5219,12 +5207,10 @@ ZVecErrorCode zvec_collection_delete_by_filter(ZVecCollection *collection, } } - // ============================================================================= // Data query interface implementation // ============================================================================= - // Helper function to convert common query parameters void convert_common_query_params(zvec::VectorQuery &internal_query, const ZVecVectorQuery *query) { diff --git a/src/include/zvec/c_api.h b/src/include/zvec/c_api.h index 5d2e61769..c5ffbb2bc 100644 --- a/src/include/zvec/c_api.h +++ b/src/include/zvec/c_api.h @@ -18,6 +18,7 @@ #include #include #include +#include // ============================================================================= // API Export Control @@ -152,7 +153,7 @@ zvec_get_last_error_details(ZVecErrorDetails *error_details); /** * @brief Get last error message * @param[out] error_msg Returned error message string (needs to be freed by - * calling zvec_free) + * calling free) * @return ZVecErrorCode Error code */ ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_get_last_error(char **error_msg); @@ -307,12 +308,6 @@ ZVEC_EXPORT int ZVEC_CALL zvec_string_compare(const ZVecString *str1, */ ZVEC_EXPORT void ZVEC_CALL zvec_free_string(ZVecString *str); -/** - * @brief Free string memory - * @param str String pointer to free - */ -ZVEC_EXPORT void ZVEC_CALL zvec_free_str(char *str); - // ============================================================================= // Array Memory management functions @@ -557,12 +552,6 @@ ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_config_data_set_query_thread_count( ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_config_data_set_optimize_thread_count( ZVecConfigData *config, uint32_t thread_count); -/** - * @brief Destroy log configuration - * @param config Log configuration structure pointer - */ -void zvec_config_log_destroy(ZVecLogConfig *config); - // ============================================================================= // Initialization and Cleanup Interface // ============================================================================= @@ -1096,7 +1085,7 @@ zvec_query_params_union_create(ZVecIndexType index_type); /** * @brief Destroy base query parameters - * @param params HNSW query parameters pointer + * @param params query parameters pointer */ ZVEC_EXPORT void ZVEC_CALL zvec_query_params_destroy(ZVecQueryParams *params); @@ -1607,25 +1596,6 @@ zvec_collection_drop_index(ZVecCollection *collection, const char *field_name); ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_optimize(ZVecCollection *collection); -/** - * @brief Compact collection (reclaim space) - * @param collection Collection handle - * @return ZVecErrorCode Error code */ - -/** - * @brief Get detailed information of the last error - * @param[out] error_details Pointer to error details structure - * @return ZVecErrorCode Error code - */ -ZVEC_EXPORT ZVecErrorCode ZVEC_CALL -zvec_get_last_error_details(ZVecErrorDetails *error_details); - -/** - * @brief Clear error status - */ -ZVEC_EXPORT void ZVEC_CALL zvec_clear_error(void); - - // ============================================================================= // Column Management Interface (DDL) // ============================================================================= @@ -2284,9 +2254,9 @@ const char *zvec_metric_type_to_string(ZVecMetricType metric_type); * @param str String content * * Usage example: - * ZVecString name = ZVEC_STRING_LITERAL("my_collection"); + * ZVecString name = ZVEC_STRING("my_collection"); */ -#define ZVEC_STRING_LITERAL(str) \ +#define ZVEC_STRING(str) \ (ZVecString) { \ .data = str, .length = strlen(str) \ } @@ -2329,23 +2299,6 @@ const char *zvec_metric_type_to_string(ZVecMetricType metric_type); .data = data_ptr, .length = len \ } - -/** - * @brief Simplified inverted index parameters initialization macro - * @param range_opt Whether to enable range optimization - * @param wildcard Whether to enable wildcard expansion - * - * Usage example: - * ZVecInvertIndexParams params = ZVEC_INVERT_PARAMS(true, false); - */ -#define ZVEC_INVERT_PARAMS(range_opt, wildcard) \ - (ZVecInvertIndexParams) { \ - .base.index_type = ZVEC_INDEX_TYPE_INVERT, \ - .enable_range_optimization = range_opt, \ - .enable_extended_wildcard = wildcard \ - } - - /** * @brief Simplified collection options initialization macro (using default * values) diff --git a/tests/c_api/c_api_test.c b/tests/c_api/c_api_test.c index 8e97baa6f..e8256d802 100644 --- a/tests/c_api/c_api_test.c +++ b/tests/c_api/c_api_test.c @@ -97,7 +97,7 @@ void test_error_handling_functions(void) { TEST_ASSERT(err == ZVEC_OK); if (error_msg) { - zvec_free_str(error_msg); + free(error_msg); } // Test error clearing @@ -2271,7 +2271,7 @@ void test_doc_get_field_value_copy(void) { zvec_free_string(array_zvec_str[i]); } - zvec_free_str(string_field.value.string_value.data); + free(string_field.value.string_value.data); // ARRAY_BOOL type bool array_bool_data[] = {true, false, true, false, true}; @@ -2419,7 +2419,7 @@ void test_doc_get_field_value_copy(void) { free(array_double_result); - zvec_free_str(binary_field.value.string_value.data); + free(binary_field.value.string_value.data); zvec_doc_destroy(doc); TEST_END(); @@ -2835,8 +2835,8 @@ void test_doc_get_field_value_pointer(void) { TEST_ASSERT(fabs(((const double *)array_double_ptr)[1] - 2.222222) < 1e-10); TEST_ASSERT(fabs(((const double *)array_double_ptr)[2] - 3.333333) < 1e-10); - zvec_free_str(string_field.value.string_value.data); - zvec_free_str(binary_field.value.string_value.data); + free(string_field.value.string_value.data); + free(binary_field.value.string_value.data); zvec_doc_destroy(doc); TEST_END(); @@ -2910,7 +2910,7 @@ void test_doc_field_operations(void) { TEST_ASSERT(found_key_fields == true); zvec_free_str_array(field_names, name_count); - zvec_free_str(string_field.value.string_value.data); + free(string_field.value.string_value.data); zvec_doc_destroy(doc); TEST_END(); @@ -3025,7 +3025,7 @@ void test_doc_serialization(void) { TEST_ASSERT(deserialized_int32 == -2147483648); zvec_free_uint8_array(serialized_data); - zvec_free_str(string_field.value.string_value.data); + free(string_field.value.string_value.data); zvec_doc_destroy(deserialized_doc); zvec_doc_destroy(doc); @@ -4091,11 +4091,11 @@ void test_collection_query_functions(void) { // Test zvec_collection_query_by_group ZVecGroupByVectorQuery group_query = {0}; - group_query.field_name = ZVEC_STRING_LITERAL("vec"); + group_query.field_name = ZVEC_STRING("vec"); float query_vec[4] = {0.5f, 0.5f, 0.0f, 0.0f}; group_query.query_vector.data = (uint8_t *)query_vec; group_query.query_vector.length = sizeof(query_vec); - group_query.group_by_field_name = ZVEC_STRING_LITERAL("name"); + group_query.group_by_field_name = ZVEC_STRING("name"); group_query.group_count = 2; group_query.group_topk = 1; group_query.include_vector = false; @@ -4104,7 +4104,7 @@ void test_collection_query_functions(void) { output_fields.count = 1; output_fields.strings = (ZVecString *)malloc(sizeof(ZVecString) * output_fields.count); - output_fields.strings[0] = ZVEC_STRING_LITERAL("name"); + output_fields.strings[0] = ZVEC_STRING("name"); group_query.output_fields = output_fields; ZVecDoc **group_results = NULL; @@ -4215,7 +4215,7 @@ void test_doc_advanced_functions(void) { ZVecErrorCode err = zvec_doc_validate(val_doc, schema, false, &error_msg); TEST_ASSERT(err == ZVEC_OK); if (error_msg) { - zvec_free_str(error_msg); + free(error_msg); } zvec_doc_destroy(val_doc); @@ -4235,7 +4235,7 @@ void test_doc_advanced_functions(void) { TEST_ASSERT(err == ZVEC_OK); TEST_ASSERT(detail_str != NULL); printf(" Document detail: %s\n", detail_str); - zvec_free_str(detail_str); + free(detail_str); zvec_doc_destroy(detail_doc); From b2857eeaf330a5ad5dbe52a2fb1ea3717b7240c3 Mon Sep 17 00:00:00 2001 From: lc285652 Date: Fri, 13 Mar 2026 18:57:03 +0800 Subject: [PATCH 15/15] refact some code --- src/c_api/c_api.cc | 1639 +++++++++++++++++++------------------- src/include/zvec/c_api.h | 33 +- tests/c_api/c_api_test.c | 79 +- 3 files changed, 849 insertions(+), 902 deletions(-) diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 480e7d412..fb8b6c495 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -16,7 +16,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -31,6 +33,150 @@ #include #include +// ============================================================================= +// RAII Helpers and Error Handling Macros +// ============================================================================= + +namespace { + +// RAII guard for malloc-allocated memory +template +struct MallocGuard { + T* ptr; + explicit MallocGuard(T* p = nullptr) : ptr(p) {} + ~MallocGuard() { if (ptr) std::free(ptr); } + MallocGuard(const MallocGuard&) = delete; + MallocGuard& operator=(const MallocGuard&) = delete; + MallocGuard(MallocGuard&& other) noexcept : ptr(other.ptr) { other.ptr = nullptr; } + MallocGuard& operator=(MallocGuard&& other) noexcept { + if (this != &other) { + if (ptr) std::free(ptr); + ptr = other.ptr; + other.ptr = nullptr; + } + return *this; + } + T* get() const { return ptr; } + T* release() { T* p = ptr; ptr = nullptr; return p; } + T** ptr_ptr() { return &ptr; } +}; + +// RAII guard for C++ objects allocated with new +template +struct DeleteGuard { + T* ptr; + explicit DeleteGuard(T* p = nullptr) : ptr(p) {} + ~DeleteGuard() { delete ptr; } + DeleteGuard(const DeleteGuard&) = delete; + DeleteGuard& operator=(const DeleteGuard&) = delete; + DeleteGuard(DeleteGuard&& other) noexcept : ptr(other.ptr) { other.ptr = nullptr; } + T* get() const { return ptr; } + T* release() { T* p = ptr; ptr = nullptr; return p; } +}; + +// RAII guard for array allocated with new[] +template +struct DeleteArrayGuard { + T* ptr; + explicit DeleteArrayGuard(T* p = nullptr) : ptr(p) {} + ~DeleteArrayGuard() { delete[] ptr; } + DeleteArrayGuard(const DeleteArrayGuard&) = delete; + DeleteArrayGuard& operator=(const DeleteArrayGuard&) = delete; + DeleteArrayGuard(DeleteArrayGuard&& other) noexcept : ptr(other.ptr) { other.ptr = nullptr; } + T* get() const { return ptr; } + T* release() { T* p = ptr; ptr = nullptr; return p; } +}; + +} // namespace + +// Error checking macros - these preserve __LINE__ accuracy +#define ZVEC_CHECK_NOTNULL(ptr, error_code, msg) \ + if (!(ptr)) { \ + set_last_error_details(error_code, msg, __FILE__, __LINE__, __FUNCTION__); \ + return nullptr; \ + } + +#define ZVEC_CHECK_NOTNULL_ERRCODE(ptr, error_code, msg) \ + if (!(ptr)) { \ + set_last_error_details(error_code, msg, __FILE__, __LINE__, __FUNCTION__); \ + return (error_code); \ + } + +#define ZVEC_CHECK_COND(cond, error_code, msg) \ + if (cond) { \ + set_last_error_details(error_code, msg, __FILE__, __LINE__, __FUNCTION__); \ + return nullptr; \ + } + +#define ZVEC_CHECK_COND_ERRCODE(cond, error_code, msg) \ + if (cond) { \ + set_last_error_details(error_code, msg, __FILE__, __LINE__, __FUNCTION__); \ + return (error_code); \ + } + +// For void functions (no return value): +#define ZVEC_TRY_BEGIN_VOID \ + try { +#define ZVEC_CATCH_END_VOID \ + } \ + catch (const std::exception &e) { \ + set_last_error(std::string("Exception: ") + e.what()); \ + } + +// For functions returning pointer - complete try-catch wrapper +// Usage: ZVEC_TRY_RETURN_NULL("error msg", code...) +// Note: Use variadic macro to handle commas in template arguments +#define ZVEC_TRY_RETURN_NULL(msg, ...) \ + try { { __VA_ARGS__ } } \ + catch (const std::bad_alloc &e) { \ + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, \ + std::string(msg) + ": " + e.what(), \ + __FILE__, __LINE__, __FUNCTION__); \ + return nullptr; \ + } \ + catch (const std::exception &e) { \ + set_last_error_details(ZVEC_ERROR_INTERNAL_ERROR, \ + std::string(msg) + ": " + e.what(), \ + __FILE__, __LINE__, __FUNCTION__); \ + return nullptr; \ + } + +// For functions returning ErrorCode +// Usage: ZVEC_TRY_RETURN_ERROR("error msg", code...) +// Note: Use variadic macro to handle commas in template arguments +#define ZVEC_TRY_RETURN_ERROR(msg, ...) \ + try { { __VA_ARGS__ } } \ + catch (const std::bad_alloc &e) { \ + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, \ + std::string(msg) + ": " + e.what(), \ + __FILE__, __LINE__, __FUNCTION__); \ + return ZVEC_ERROR_RESOURCE_EXHAUSTED; \ + } \ + catch (const std::exception &e) { \ + set_last_error_details(ZVEC_ERROR_INTERNAL_ERROR, \ + std::string(msg) + ": " + e.what(), \ + __FILE__, __LINE__, __FUNCTION__); \ + return ZVEC_ERROR_INTERNAL_ERROR; \ + } + +// For functions returning scalar values (int, float, size_t, etc.) +// Usage: ZVEC_TRY_RETURN_SCALAR("error msg", error_value, code...) +// Note: Use variadic macro to handle commas in template arguments +#define ZVEC_TRY_RETURN_SCALAR(msg, error_val, ...) \ + try { { __VA_ARGS__ } } \ + catch (const std::bad_alloc &e) { \ + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, \ + std::string(msg) + ": " + e.what(), \ + __FILE__, __LINE__, __FUNCTION__); \ + return (error_val); \ + } \ + catch (const std::exception &e) { \ + set_last_error_details(ZVEC_ERROR_INTERNAL_ERROR, \ + std::string(msg) + ": " + e.what(), \ + __FILE__, __LINE__, __FUNCTION__); \ + return (error_val); \ + } + // Global status flags static std::atomic g_initialized{false}; static std::mutex g_init_mutex; @@ -74,7 +220,7 @@ const char *zvec_get_version(void) { std::lock_guard lock(g_version_mutex); if (g_version_info.empty()) { - try { + ZVEC_TRY_BEGIN_VOID std::string version = ZVEC_VERSION_STRING; // Try to get Git information @@ -93,10 +239,7 @@ const char *zvec_get_version(void) { std::string(__TIME__) + ")"; g_version_info = version; - } catch (const std::exception &e) { - // If getting version information fails, fall back to basic version - g_version_info = ZVEC_VERSION_STRING; - } + ZVEC_CATCH_END_VOID } return g_version_info.c_str(); @@ -142,29 +285,30 @@ ZVecString *zvec_string_create(const char *str) { __FUNCTION__); return nullptr; } - ZVecString *zstr = nullptr; - char *data_buffer = nullptr; - try { - size_t len = strlen(str); - zstr = new ZVecString(); - data_buffer = static_cast(malloc(len + 1)); - strcpy(data_buffer, str); - zstr->data = data_buffer; - zstr->length = len; - zstr->capacity = len + 1; - return zstr; - } catch (const std::exception &e) { - if (data_buffer) { - free(data_buffer); - } - if (zstr) { - delete zstr; - } - set_last_error_details(ZVEC_ERROR_INTERNAL_ERROR, - std::string("String creation failed: ") + e.what(), + + size_t len = strlen(str); + ZVecString *zstr = static_cast(malloc(sizeof(ZVecString))); + if (!zstr) { + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecString", + __FILE__, __LINE__, __FUNCTION__); + return nullptr; + } + + char *data_buffer = static_cast(malloc(len + 1)); + if (!data_buffer) { + free(zstr); + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for string data", __FILE__, __LINE__, __FUNCTION__); return nullptr; } + + memcpy(data_buffer, str, len + 1); + zstr->data = data_buffer; + zstr->length = len; + zstr->capacity = len + 1; + return zstr; } ZVecString *zvec_string_create_from_view(const ZVecStringView *view) { @@ -175,29 +319,30 @@ ZVecString *zvec_string_create_from_view(const ZVecStringView *view) { return nullptr; } - try { - auto zstr = new ZVecString(); - - zstr->data = new char[view->length + 1]; - memcpy(const_cast(zstr->data), view->data, view->length); - const_cast(zstr->data)[view->length] = '\0'; - zstr->length = view->length; - zstr->capacity = view->length + 1; - - return zstr; - } catch (const std::bad_alloc &e) { - set_last_error_details( - ZVEC_ERROR_RESOURCE_EXHAUSTED, - std::string("String creation from view failed: ") + e.what(), __FILE__, - __LINE__, __FUNCTION__); + ZVecString *zstr = static_cast(malloc(sizeof(ZVecString))); + if (!zstr) { + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecString", + __FILE__, __LINE__, __FUNCTION__); return nullptr; - } catch (const std::exception &e) { - set_last_error_details( - ZVEC_ERROR_INTERNAL_ERROR, - std::string("String creation from view failed: ") + e.what(), __FILE__, - __LINE__, __FUNCTION__); + } + + char *data_buffer = static_cast(malloc(view->length + 1)); + if (!data_buffer) { + free(zstr); + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for string data", + __FILE__, __LINE__, __FUNCTION__); return nullptr; } + + memcpy(data_buffer, view->data, view->length); + data_buffer[view->length] = '\0'; + zstr->data = data_buffer; + zstr->length = view->length; + zstr->capacity = view->length + 1; + + return zstr; } ZVecString *zvec_bin_create(const uint8_t *data, size_t length) { @@ -208,29 +353,30 @@ ZVecString *zvec_bin_create(const uint8_t *data, size_t length) { return nullptr; } - try { - auto zstr = new ZVecString(); - - zstr->data = new char[length + 1]; - memcpy(const_cast(zstr->data), data, length); - const_cast(zstr->data)[length] = '\0'; // Null terminate for safety - zstr->length = length; - zstr->capacity = length + 1; - - return zstr; - } catch (const std::bad_alloc &e) { - set_last_error_details( - ZVEC_ERROR_RESOURCE_EXHAUSTED, - std::string("Binary string creation failed: ") + e.what(), __FILE__, - __LINE__, __FUNCTION__); + ZVecString *zstr = static_cast(malloc(sizeof(ZVecString))); + if (!zstr) { + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecString", + __FILE__, __LINE__, __FUNCTION__); return nullptr; - } catch (const std::exception &e) { - set_last_error_details( - ZVEC_ERROR_INTERNAL_ERROR, - std::string("Binary string creation failed: ") + e.what(), __FILE__, - __LINE__, __FUNCTION__); + } + + char *data_buffer = static_cast(malloc(length + 1)); + if (!data_buffer) { + free(zstr); + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for binary data", + __FILE__, __LINE__, __FUNCTION__); return nullptr; } + + memcpy(data_buffer, data, length); + data_buffer[length] = '\0'; + zstr->data = data_buffer; + zstr->length = length; + zstr->capacity = length + 1; + + return zstr; } ZVecString *zvec_string_copy(const ZVecString *str) { @@ -284,21 +430,21 @@ int zvec_string_compare(const ZVecString *str1, const ZVecString *str2) { return strcmp(str1->data, str2->data); } - // ============================================================================= // Configuration-related functions implementation // ============================================================================= ZVecConsoleLogConfig *zvec_config_console_log_create(ZVecLogLevel level) { - try { - auto config = new ZVecConsoleLogConfig(); - config->level = level; - return config; - } catch (const std::exception &e) { - set_last_error(std::string("Failed to create console log config: ") + - e.what()); + ZVecConsoleLogConfig *config = static_cast( + malloc(sizeof(ZVecConsoleLogConfig))); + if (!config) { + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecConsoleLogConfig", + __FILE__, __LINE__, __FUNCTION__); return nullptr; } + config->level = level; + return config; } ZVecFileLogConfig *zvec_config_file_log_create(ZVecLogLevel level, @@ -306,136 +452,104 @@ ZVecFileLogConfig *zvec_config_file_log_create(ZVecLogLevel level, const char *basename, uint32_t file_size, uint32_t overdue_days) { - try { - auto config = new ZVecFileLogConfig(); - config->level = level; - config->dir = *(zvec_string_create(dir)); - config->basename = *(zvec_string_create(basename)); - config->file_size = file_size; - config->overdue_days = overdue_days; - return config; - } catch (const std::exception &e) { - set_last_error(std::string("Failed to create file log config: ") + - e.what()); + if (!dir || !basename) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Directory or basename cannot be null", __FILE__, + __LINE__, __FUNCTION__); return nullptr; } -} - -ZVecLogConfig *zvec_config_log_create(ZVecLogType type, void *config_data) { - try { - auto log_config = new ZVecLogConfig(); - log_config->type = type; - - switch (type) { - case ZVEC_LOG_TYPE_CONSOLE: { - if (config_data) { - auto console_config = - reinterpret_cast(config_data); - log_config->config.console_config = *console_config; - } else { - log_config->config.console_config.level = ZVEC_LOG_LEVEL_WARN; - } - break; - } - case ZVEC_LOG_TYPE_FILE: { - if (config_data) { - auto file_config = reinterpret_cast(config_data); - log_config->config.file_config = *file_config; - } else { - log_config->config.file_config.level = ZVEC_LOG_LEVEL_WARN; - log_config->config.file_config.dir = *zvec_string_create("./log"); - log_config->config.file_config.basename = *zvec_string_create("zvec"); - log_config->config.file_config.file_size = 100; - log_config->config.file_config.overdue_days = 7; - } - break; - } - default: - set_last_error("Invalid log type"); - delete log_config; - return nullptr; - } - return log_config; - } catch (const std::exception &e) { - set_last_error(std::string("Failed to create log config: ") + e.what()); + ZVecFileLogConfig *config = static_cast( + malloc(sizeof(ZVecFileLogConfig))); + if (!config) { + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecFileLogConfig", + __FILE__, __LINE__, __FUNCTION__); return nullptr; } -} -ZVecConfigData *zvec_config_data_create(void) { - ZVecConfigData *config = nullptr; - ZVecConsoleLogConfig *log_config = nullptr; - ZVecLogConfig *final_log_config = nullptr; - - try { - config = new ZVecConfigData(); + config->level = level; + ZVecString *dir_str = zvec_string_create(dir); + ZVecString *basename_str = zvec_string_create(basename); - log_config = zvec_config_console_log_create(ZVEC_LOG_LEVEL_WARN); - if (!log_config) { - throw std::runtime_error("Failed to create console log config"); - } - - final_log_config = - zvec_config_log_create(ZVEC_LOG_TYPE_CONSOLE, log_config); - if (!final_log_config) { - throw std::runtime_error("Failed to create log config"); - } + if (!dir_str || !basename_str) { + if (dir_str) zvec_free_string(dir_str); + if (basename_str) zvec_free_string(basename_str); + free(config); + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to create strings for file log config", + __FILE__, __LINE__, __FUNCTION__); + return nullptr; + } - config->log_config = final_log_config; + config->dir = *dir_str; + config->basename = *basename_str; + config->file_size = file_size; + config->overdue_days = overdue_days; - // Set default values from C++ ConfigData - zvec::GlobalConfig::ConfigData config_data; - config->memory_limit_bytes = config_data.memory_limit_bytes; - config->query_thread_count = config_data.query_thread_count; - config->invert_to_forward_scan_ratio = - config_data.invert_to_forward_scan_ratio; - config->brute_force_by_keys_ratio = config_data.brute_force_by_keys_ratio; - config->optimize_thread_count = config_data.optimize_thread_count; + // Free the temporary string wrappers (data is copied by value) + free(dir_str); + free(basename_str); - zvec_config_console_log_destroy(log_config); - return config; + return config; +} - } catch (const std::exception &e) { - if (final_log_config) { - zvec_config_log_destroy(final_log_config); - } - if (log_config) { - zvec_config_console_log_destroy(log_config); - } - if (config) { - delete config; - } +ZVecConfigData *zvec_config_data_create(void) { + ZVecConfigData *config = static_cast( + malloc(sizeof(ZVecConfigData))); + if (!config) { + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecConfigData", + __FILE__, __LINE__, __FUNCTION__); + return nullptr; + } - set_last_error(std::string("Failed to create config data: ") + e.what()); + ZVecConsoleLogConfig *log_config = zvec_config_console_log_create(ZVEC_LOG_LEVEL_WARN); + if (!log_config) { + free(config); + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to create console log config", + __FILE__, __LINE__, __FUNCTION__); return nullptr; } + config->log_config = log_config; + config->log_type = ZVEC_LOG_TYPE_CONSOLE; + + // Set default values from C++ ConfigData + zvec::GlobalConfig::ConfigData config_data; + config->memory_limit_bytes = config_data.memory_limit_bytes; + config->query_thread_count = config_data.query_thread_count; + config->invert_to_forward_scan_ratio = + config_data.invert_to_forward_scan_ratio; + config->brute_force_by_keys_ratio = config_data.brute_force_by_keys_ratio; + config->optimize_thread_count = config_data.optimize_thread_count; + + return config; } void zvec_config_console_log_destroy(ZVecConsoleLogConfig *config) { if (config) { - delete config; + free(config); } } void zvec_config_file_log_destroy(ZVecFileLogConfig *config) { if (config) { - if (config->dir.data) free(config->dir.data); - if (config->basename.data) free(config->basename.data); - delete config; - } -} - -void zvec_config_log_destroy(ZVecLogConfig *config) { - if (config) { - delete config; + if (config->dir.data) free((void *)config->dir.data); + if (config->basename.data) free((void *)config->basename.data); + free(config); } } void zvec_config_data_destroy(ZVecConfigData *config) { - if (config) { - delete config; + if (config->log_config) { + if (config->log_type == ZVEC_LOG_TYPE_CONSOLE) { + zvec_config_console_log_destroy((ZVecConsoleLogConfig*)config->log_config); + } else { + zvec_config_file_log_destroy((ZVecFileLogConfig*)config->log_config); + } } + free(config); } ZVecErrorCode zvec_config_data_set_memory_limit(ZVecConfigData *config, @@ -450,12 +564,22 @@ ZVecErrorCode zvec_config_data_set_memory_limit(ZVecConfigData *config, } ZVecErrorCode zvec_config_data_set_log_config(ZVecConfigData *config, - ZVecLogConfig *log_config) { - if (!config) { + ZVecLogType log_type, + void *log_config) { + if (!config || !log_config) { set_last_error("Config data pointer is null"); return ZVEC_ERROR_INVALID_ARGUMENT; } + if (config->log_config) { + if (config->log_type == ZVEC_LOG_TYPE_CONSOLE) { + zvec_config_console_log_destroy((ZVecConsoleLogConfig*)config->log_config); + } else { + zvec_config_file_log_destroy((ZVecFileLogConfig*)config->log_config); + } + } + + config->log_type = log_type; config->log_config = log_config; return ZVEC_OK; } @@ -496,7 +620,7 @@ ZVecErrorCode zvec_initialize(const ZVecConfigData *config) { return ZVEC_ERROR_ALREADY_EXISTS; } - try { + ZVEC_TRY_RETURN_ERROR("Initialization failed", // Convert to C++ configuration object if (config) { zvec::GlobalConfig::ConfigData cpp_config{}; @@ -511,22 +635,24 @@ ZVecErrorCode zvec_initialize(const ZVecConfigData *config) { if (config->log_config) { std::shared_ptr log_config; - switch (config->log_config->type) { + switch (config->log_type) { case ZVEC_LOG_TYPE_CONSOLE: { + ZVecConsoleLogConfig *console_config = (ZVecConsoleLogConfig*)config->log_config; auto console_level = static_cast( - config->log_config->config.console_config.level); + console_config->level); log_config = std::make_shared( console_level); break; } case ZVEC_LOG_TYPE_FILE: { + ZVecFileLogConfig *file_config = (ZVecFileLogConfig*)config->log_config; auto file_level = static_cast( - config->log_config->config.file_config.level); - std::string dir(config->log_config->config.file_config.dir.data, - config->log_config->config.file_config.dir.length); + file_config->level); + std::string dir(file_config->dir.data, + file_config->dir.length); std::string basename( - config->log_config->config.file_config.basename.data, - config->log_config->config.file_config.basename.length); + file_config->basename.data, + file_config->basename.length); log_config = std::make_shared( file_level, dir, basename); break; @@ -553,12 +679,7 @@ ZVecErrorCode zvec_initialize(const ZVecConfigData *config) { } g_initialized.store(true); return ZVEC_OK; - } catch (const std::exception &e) { - set_last_error_details(ZVEC_ERROR_INTERNAL_ERROR, - std::string("Initialization failed: ") + e.what(), - __FILE__, __LINE__, __FUNCTION__); - return ZVEC_ERROR_INTERNAL_ERROR; - } + ) } ZVecErrorCode zvec_shutdown(void) { @@ -570,15 +691,10 @@ ZVecErrorCode zvec_shutdown(void) { return ZVEC_ERROR_FAILED_PRECONDITION; } - try { + ZVEC_TRY_RETURN_ERROR("Shutdown failed", g_initialized.store(false); return ZVEC_OK; - } catch (const std::exception &e) { - set_last_error_details(ZVEC_ERROR_INTERNAL_ERROR, - std::string("Shutdown failed: ") + e.what(), - __FILE__, __LINE__, __FUNCTION__); - return ZVEC_ERROR_INTERNAL_ERROR; - } + ) } ZVecErrorCode zvec_is_initialized(bool *initialized) { @@ -747,9 +863,9 @@ static zvec::Status set_field_index_params(zvec::FieldSchema::Ptr &field_schema, void zvec_free_string(ZVecString *str) { if (str) { if (str->data) { - delete[] str->data; + free((void *)str->data); } - delete str; + free(str); } } @@ -900,7 +1016,7 @@ void zvec_free_field_schema(ZVecFieldSchema *field_schema) { if (field_schema->index_params) { zvec_index_params_destroy(field_schema->index_params); } - delete field_schema; + free(field_schema); } } @@ -1014,120 +1130,125 @@ void zvec_index_params_init_default(ZVecIndexParams *params, void zvec_index_params_destroy(ZVecIndexParams *params) { if (params) { - delete params; + free(params); } } ZVecInvertIndexParams *zvec_index_params_invert_create(bool enable_range_opt, bool enable_wildcard) { - try { - auto params = new ZVecInvertIndexParams(); - zvec_index_params_base_init(¶ms->base, ZVEC_INDEX_TYPE_INVERT); - params->enable_range_optimization = enable_range_opt; - params->enable_extended_wildcard = enable_wildcard; - return params; - } catch (const std::exception &e) { - set_last_error(std::string("Failed to create invert index params: ") + - e.what()); + ZVecInvertIndexParams *params = static_cast( + malloc(sizeof(ZVecInvertIndexParams))); + if (!params) { + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecInvertIndexParams", + __FILE__, __LINE__, __FUNCTION__); return nullptr; } + zvec_index_params_base_init(¶ms->base, ZVEC_INDEX_TYPE_INVERT); + params->enable_range_optimization = enable_range_opt; + params->enable_extended_wildcard = enable_wildcard; + return params; } ZVecVectorIndexParams *zvec_index_params_vector_create( ZVecIndexType index_type, ZVecMetricType metric_type, ZVecQuantizeType quantize_type) { - try { - auto params = new ZVecVectorIndexParams(); - zvec_index_params_base_init(¶ms->base, index_type); - params->metric_type = metric_type; - params->quantize_type = quantize_type; - return params; - } catch (const std::exception &e) { - set_last_error(std::string("Failed to create vector index params: ") + - e.what()); + ZVecVectorIndexParams *params = static_cast( + malloc(sizeof(ZVecVectorIndexParams))); + if (!params) { + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecVectorIndexParams", + __FILE__, __LINE__, __FUNCTION__); return nullptr; } + zvec_index_params_base_init(¶ms->base, index_type); + params->metric_type = metric_type; + params->quantize_type = quantize_type; + return params; } ZVecHnswIndexParams *zvec_index_params_hnsw_create( ZVecMetricType metric_type, ZVecQuantizeType quantize_type, int m, int ef_construction, int ef_search) { - try { - auto params = new ZVecHnswIndexParams(); - zvec_index_params_vector_init(¶ms->base, ZVEC_INDEX_TYPE_HNSW, - metric_type, quantize_type); - params->m = m; - params->ef_construction = ef_construction; - params->ef_search = ef_search; - return params; - } catch (const std::exception &e) { - set_last_error(std::string("Failed to create HNSW index params: ") + - e.what()); + ZVecHnswIndexParams *params = static_cast( + malloc(sizeof(ZVecHnswIndexParams))); + if (!params) { + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecHnswIndexParams", + __FILE__, __LINE__, __FUNCTION__); return nullptr; } + zvec_index_params_vector_init(¶ms->base, ZVEC_INDEX_TYPE_HNSW, + metric_type, quantize_type); + params->m = m; + params->ef_construction = ef_construction; + params->ef_search = ef_search; + return params; } ZVecFlatIndexParams *zvec_index_params_flat_create( ZVecMetricType metric_type, ZVecQuantizeType quantize_type) { - try { - auto params = new ZVecFlatIndexParams(); - zvec_index_params_vector_init(¶ms->base, ZVEC_INDEX_TYPE_FLAT, - metric_type, quantize_type); - return params; - } catch (const std::exception &e) { - set_last_error(std::string("Failed to create Flat index params: ") + - e.what()); + ZVecFlatIndexParams *params = static_cast( + malloc(sizeof(ZVecFlatIndexParams))); + if (!params) { + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecFlatIndexParams", + __FILE__, __LINE__, __FUNCTION__); return nullptr; } + zvec_index_params_vector_init(¶ms->base, ZVEC_INDEX_TYPE_FLAT, + metric_type, quantize_type); + return params; } ZVecIVFIndexParams *zvec_index_params_ivf_create(ZVecMetricType metric_type, ZVecQuantizeType quantize_type, int n_list, int n_iters, bool use_soar, int n_probe) { - try { - auto params = new ZVecIVFIndexParams(); - zvec_index_params_vector_init(¶ms->base, ZVEC_INDEX_TYPE_IVF, - metric_type, quantize_type); - params->n_list = n_list; - params->n_iters = n_iters; - params->use_soar = use_soar; - params->n_probe = n_probe; - return params; - } catch (const std::exception &e) { - set_last_error(std::string("Failed to create IVF index params: ") + - e.what()); + ZVecIVFIndexParams *params = static_cast( + malloc(sizeof(ZVecIVFIndexParams))); + if (!params) { + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecIVFIndexParams", + __FILE__, __LINE__, __FUNCTION__); return nullptr; } + zvec_index_params_vector_init(¶ms->base, ZVEC_INDEX_TYPE_IVF, + metric_type, quantize_type); + params->n_list = n_list; + params->n_iters = n_iters; + params->use_soar = use_soar; + params->n_probe = n_probe; + return params; } void zvec_index_params_invert_destroy(ZVecInvertIndexParams *params) { if (params) { - delete params; + free(params); } } void zvec_index_params_vector_destroy(ZVecVectorIndexParams *params) { if (params) { - delete params; + free(params); } } void zvec_index_params_hnsw_destroy(ZVecHnswIndexParams *params) { if (params) { - delete params; + free(params); } } void zvec_index_params_flat_destroy(ZVecFlatIndexParams *params) { if (params) { - delete params; + free(params); } } void zvec_index_params_ivf_destroy(ZVecIVFIndexParams *params) { if (params) { - delete params; + free(params); } } @@ -1145,34 +1266,30 @@ ZVecFieldSchema *zvec_field_schema_create(const char *name, return nullptr; } - try { - auto schema = new ZVecFieldSchema(); - - schema->name = zvec_string_create(name); - if (!schema->name) { - delete schema; - return nullptr; - } - - schema->data_type = data_type; - schema->nullable = nullable; - schema->dimension = dimension; - schema->index_params = nullptr; - - return schema; - } catch (const std::bad_alloc &e) { - set_last_error_details( - ZVEC_ERROR_RESOURCE_EXHAUSTED, - std::string("Field schema creation failed: ") + e.what(), __FILE__, - __LINE__, __FUNCTION__); + ZVecFieldSchema *schema = static_cast( + malloc(sizeof(ZVecFieldSchema))); + if (!schema) { + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecFieldSchema", + __FILE__, __LINE__, __FUNCTION__); return nullptr; - } catch (const std::exception &e) { - set_last_error_details( - ZVEC_ERROR_INTERNAL_ERROR, - std::string("Field schema creation failed: ") + e.what(), __FILE__, - __LINE__, __FUNCTION__); + } + + schema->name = zvec_string_create(name); + if (!schema->name) { + free(schema); + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to create string for field name", + __FILE__, __LINE__, __FUNCTION__); return nullptr; } + + schema->data_type = data_type; + schema->nullable = nullable; + schema->dimension = dimension; + schema->index_params = nullptr; + + return schema; } void zvec_field_schema_destroy(ZVecFieldSchema *schema) { @@ -1182,7 +1299,7 @@ void zvec_field_schema_destroy(ZVecFieldSchema *schema) { zvec_index_params_destroy(schema->index_params); schema->index_params = nullptr; } - delete schema; + free(schema); } } @@ -1198,40 +1315,40 @@ ZVecErrorCode zvec_field_schema_set_index_params( if (!index_params) { if (schema->index_params) { zvec_index_params_destroy(schema->index_params); - delete schema->index_params; + free(schema->index_params); schema->index_params = nullptr; } return ZVEC_OK; } - try { + if (!schema->index_params) { + schema->index_params = static_cast( + malloc(sizeof(ZVecIndexParams))); if (!schema->index_params) { - schema->index_params = new ZVecIndexParams(); + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecIndexParams", + __FILE__, __LINE__, __FUNCTION__); + return ZVEC_ERROR_RESOURCE_EXHAUSTED; } + } - *schema->index_params = *index_params; + *schema->index_params = *index_params; - return ZVEC_OK; - } catch (const std::bad_alloc &e) { - set_last_error_details( - ZVEC_ERROR_RESOURCE_EXHAUSTED, - std::string("Failed to set index params: ") + e.what(), __FILE__, - __LINE__, __FUNCTION__); - return ZVEC_ERROR_RESOURCE_EXHAUSTED; - } catch (const std::exception &e) { - set_last_error_details( - ZVEC_ERROR_INTERNAL_ERROR, - std::string("Failed to set index params: ") + e.what(), __FILE__, - __LINE__, __FUNCTION__); - return ZVEC_ERROR_INTERNAL_ERROR; - } + return ZVEC_OK; } void zvec_field_schema_set_invert_index( ZVecFieldSchema *field_schema, const ZVecInvertIndexParams *invert_params) { if (field_schema && invert_params) { if (!field_schema->index_params) { - field_schema->index_params = new ZVecIndexParams(); + field_schema->index_params = static_cast( + malloc(sizeof(ZVecIndexParams))); + if (!field_schema->index_params) { + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecIndexParams", + __FILE__, __LINE__, __FUNCTION__); + return; + } } field_schema->index_params->index_type = ZVEC_INDEX_TYPE_INVERT; @@ -1243,7 +1360,14 @@ void zvec_field_schema_set_hnsw_index(ZVecFieldSchema *field_schema, const ZVecHnswIndexParams *hnsw_params) { if (field_schema && hnsw_params) { if (!field_schema->index_params) { - field_schema->index_params = new ZVecIndexParams(); + field_schema->index_params = static_cast( + malloc(sizeof(ZVecIndexParams))); + if (!field_schema->index_params) { + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecIndexParams", + __FILE__, __LINE__, __FUNCTION__); + return; + } } field_schema->index_params->index_type = ZVEC_INDEX_TYPE_HNSW; @@ -1255,7 +1379,14 @@ void zvec_field_schema_set_flat_index(ZVecFieldSchema *field_schema, const ZVecFlatIndexParams *flat_params) { if (field_schema && flat_params) { if (!field_schema->index_params) { - field_schema->index_params = new ZVecIndexParams(); + field_schema->index_params = static_cast( + malloc(sizeof(ZVecIndexParams))); + if (!field_schema->index_params) { + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecIndexParams", + __FILE__, __LINE__, __FUNCTION__); + return; + } } field_schema->index_params->index_type = ZVEC_INDEX_TYPE_FLAT; @@ -1267,7 +1398,14 @@ void zvec_field_schema_set_ivf_index(ZVecFieldSchema *field_schema, const ZVecIVFIndexParams *ivf_params) { if (field_schema && ivf_params) { if (!field_schema->index_params) { - field_schema->index_params = new ZVecIndexParams(); + field_schema->index_params = static_cast( + malloc(sizeof(ZVecIndexParams))); + if (!field_schema->index_params) { + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecIndexParams", + __FILE__, __LINE__, __FUNCTION__); + return; + } } field_schema->index_params->index_type = ZVEC_INDEX_TYPE_IVF; @@ -1280,7 +1418,7 @@ static void zvec_field_schema_cleanup(ZVecFieldSchema *field_schema) { if (field_schema->index_params) { zvec_index_params_destroy(field_schema->index_params); - delete field_schema->index_params; + free(field_schema->index_params); field_schema->index_params = nullptr; } @@ -1318,34 +1456,30 @@ ZVecCollectionSchema *zvec_collection_schema_create(const char *name) { return nullptr; } - try { - auto schema = new ZVecCollectionSchema(); - - schema->name = zvec_string_create(name); - if (!schema->name) { - delete schema; - return nullptr; - } - - schema->fields = nullptr; - schema->field_count = 0; - schema->field_capacity = 0; - schema->max_doc_count_per_segment = zvec::MAX_DOC_COUNT_PER_SEGMENT; - - return schema; - } catch (const std::bad_alloc &e) { - set_last_error_details( - ZVEC_ERROR_RESOURCE_EXHAUSTED, - std::string("Collection schema creation failed: ") + e.what(), __FILE__, - __LINE__, __FUNCTION__); + ZVecCollectionSchema *schema = static_cast( + malloc(sizeof(ZVecCollectionSchema))); + if (!schema) { + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecCollectionSchema", + __FILE__, __LINE__, __FUNCTION__); return nullptr; - } catch (const std::exception &e) { - set_last_error_details( - ZVEC_ERROR_INTERNAL_ERROR, - std::string("Collection schema creation failed: ") + e.what(), __FILE__, - __LINE__, __FUNCTION__); + } + + schema->name = zvec_string_create(name); + if (!schema->name) { + free(schema); + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to create string for collection name", + __FILE__, __LINE__, __FUNCTION__); return nullptr; } + + schema->fields = nullptr; + schema->field_count = 0; + schema->field_capacity = 0; + schema->max_doc_count_per_segment = zvec::MAX_DOC_COUNT_PER_SEGMENT; + + return schema; } void zvec_collection_schema_destroy(ZVecCollectionSchema *schema) { @@ -1356,10 +1490,10 @@ void zvec_collection_schema_destroy(ZVecCollectionSchema *schema) { for (size_t i = 0; i < schema->field_count; ++i) { zvec_field_schema_destroy(schema->fields[i]); } - delete[] schema->fields; + free(schema->fields); } - delete schema; + free(schema); } } @@ -1379,47 +1513,42 @@ ZVecErrorCode zvec_collection_schema_add_field(ZVecCollectionSchema *schema, return ZVEC_ERROR_INVALID_ARGUMENT; } - try { - for (size_t i = 0; i < schema->field_count; ++i) { - if (schema->fields[i]->name && field->name && - zvec_string_compare(schema->fields[i]->name, field->name) == 0) { - set_last_error_details( - ZVEC_ERROR_ALREADY_EXISTS, - std::string("Field '") + field->name->data + "' already exists", - __FILE__, __LINE__, __FUNCTION__); - return ZVEC_ERROR_ALREADY_EXISTS; - } + for (size_t i = 0; i < schema->field_count; ++i) { + if (schema->fields[i]->name && field->name && + zvec_string_compare(schema->fields[i]->name, field->name) == 0) { + set_last_error_details( + ZVEC_ERROR_ALREADY_EXISTS, + std::string("Field '") + field->name->data + "' already exists", + __FILE__, __LINE__, __FUNCTION__); + return ZVEC_ERROR_ALREADY_EXISTS; } + } - if (schema->field_count >= schema->field_capacity) { - size_t new_capacity = - schema->field_capacity == 0 ? 8 : schema->field_capacity * 2; - auto new_fields = new ZVecFieldSchema *[new_capacity]; - - for (size_t i = 0; i < schema->field_count; ++i) { - new_fields[i] = schema->fields[i]; - } - - delete[] schema->fields; - schema->fields = new_fields; - schema->field_capacity = new_capacity; + if (schema->field_count >= schema->field_capacity) { + size_t new_capacity = + schema->field_capacity == 0 ? 8 : schema->field_capacity * 2; + ZVecFieldSchema **new_fields = static_cast( + malloc(new_capacity * sizeof(ZVecFieldSchema *))); + if (!new_fields) { + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for fields", + __FILE__, __LINE__, __FUNCTION__); + return ZVEC_ERROR_RESOURCE_EXHAUSTED; } - schema->fields[schema->field_count] = field; - schema->field_count++; + for (size_t i = 0; i < schema->field_count; ++i) { + new_fields[i] = schema->fields[i]; + } - return ZVEC_OK; - } catch (const std::bad_alloc &e) { - set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, - std::string("Failed to add field: ") + e.what(), - __FILE__, __LINE__, __FUNCTION__); - return ZVEC_ERROR_RESOURCE_EXHAUSTED; - } catch (const std::exception &e) { - set_last_error_details(ZVEC_ERROR_INTERNAL_ERROR, - std::string("Failed to add field: ") + e.what(), - __FILE__, __LINE__, __FUNCTION__); - return ZVEC_ERROR_INTERNAL_ERROR; + free(schema->fields); + schema->fields = new_fields; + schema->field_capacity = new_capacity; } + + schema->fields[schema->field_count] = field; + schema->field_count++; + + return ZVEC_OK; } ZVecErrorCode zvec_collection_schema_add_fields(ZVecCollectionSchema *schema, @@ -1443,70 +1572,88 @@ ZVecErrorCode zvec_collection_schema_add_fields(ZVecCollectionSchema *schema, return ZVEC_OK; } - try { - for (size_t i = 0; i < field_count; ++i) { - const ZVecFieldSchema &field = fields[i]; - if (!field.name || !field.name->data || field.name->length == 0) { - set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, - std::string("Field at index ") + - std::to_string(i) + " has invalid name", - __FILE__, __LINE__, __FUNCTION__); - return ZVEC_ERROR_INVALID_ARGUMENT; - } + for (size_t i = 0; i < field_count; ++i) { + const ZVecFieldSchema &field = fields[i]; + if (!field.name || !field.name->data || field.name->length == 0) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + std::string("Field at index ") + + std::to_string(i) + " has invalid name", + __FILE__, __LINE__, __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; } + } - size_t total_needed = schema->field_count + field_count; - if (total_needed > schema->field_capacity) { - size_t new_capacity = schema->field_capacity; - while (new_capacity < total_needed) { - new_capacity = new_capacity == 0 ? 8 : new_capacity * 2; - } - - auto new_fields = new ZVecFieldSchema *[new_capacity]; + size_t total_needed = schema->field_count + field_count; + if (total_needed > schema->field_capacity) { + size_t new_capacity = schema->field_capacity; + while (new_capacity < total_needed) { + new_capacity = new_capacity == 0 ? 8 : new_capacity * 2; + } - for (size_t i = 0; i < schema->field_count; ++i) { - new_fields[i] = schema->fields[i]; - } + ZVecFieldSchema **new_fields = static_cast( + malloc(new_capacity * sizeof(ZVecFieldSchema *))); + if (!new_fields) { + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for fields", + __FILE__, __LINE__, __FUNCTION__); + return ZVEC_ERROR_RESOURCE_EXHAUSTED; + } - delete[] schema->fields; - schema->fields = new_fields; - schema->field_capacity = new_capacity; + for (size_t i = 0; i < schema->field_count; ++i) { + new_fields[i] = schema->fields[i]; } - for (size_t i = 0; i < field_count; ++i) { - const ZVecFieldSchema &src_field = fields[i]; + free(schema->fields); + schema->fields = new_fields; + schema->field_capacity = new_capacity; + } - ZVecFieldSchema *new_field = new ZVecFieldSchema(); + for (size_t i = 0; i < field_count; ++i) { + const ZVecFieldSchema &src_field = fields[i]; - new_field->name = zvec_string_copy(src_field.name); + ZVecFieldSchema *new_field = static_cast( + malloc(sizeof(ZVecFieldSchema))); + if (!new_field) { + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for new field", + __FILE__, __LINE__, __FUNCTION__); + return ZVEC_ERROR_RESOURCE_EXHAUSTED; + } - new_field->data_type = src_field.data_type; - new_field->nullable = src_field.nullable; - new_field->dimension = src_field.dimension; + new_field->name = zvec_string_copy(src_field.name); + if (!new_field->name) { + free(new_field); + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to copy field name", + __FILE__, __LINE__, __FUNCTION__); + return ZVEC_ERROR_RESOURCE_EXHAUSTED; + } - if (src_field.index_params) { - new_field->index_params = new ZVecIndexParams(); - *(new_field->index_params) = *(src_field.index_params); - } else { - new_field->index_params = nullptr; + new_field->data_type = src_field.data_type; + new_field->nullable = src_field.nullable; + new_field->dimension = src_field.dimension; + + if (src_field.index_params) { + new_field->index_params = static_cast( + malloc(sizeof(ZVecIndexParams))); + if (!new_field->index_params) { + zvec_free_string(new_field->name); + free(new_field); + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for index params", + __FILE__, __LINE__, __FUNCTION__); + return ZVEC_ERROR_RESOURCE_EXHAUSTED; } - - schema->fields[schema->field_count] = new_field; - schema->field_count++; + *(new_field->index_params) = *(src_field.index_params); + } else { + new_field->index_params = nullptr; } - return ZVEC_OK; - } catch (const std::bad_alloc &e) { - set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, - std::string("Failed to add fields: ") + e.what(), - __FILE__, __LINE__, __FUNCTION__); - return ZVEC_ERROR_RESOURCE_EXHAUSTED; - } catch (const std::exception &e) { - set_last_error_details(ZVEC_ERROR_INTERNAL_ERROR, - std::string("Failed to add fields: ") + e.what(), - __FILE__, __LINE__, __FUNCTION__); - return ZVEC_ERROR_INTERNAL_ERROR; + schema->fields[schema->field_count] = new_field; + schema->field_count++; } + + return ZVEC_OK; } ZVecErrorCode zvec_collection_schema_remove_field(ZVecCollectionSchema *schema, @@ -1525,31 +1672,24 @@ ZVecErrorCode zvec_collection_schema_remove_field(ZVecCollectionSchema *schema, return ZVEC_ERROR_INVALID_ARGUMENT; } - try { - for (size_t i = 0; i < schema->field_count; ++i) { - if (schema->fields[i]->name && - strcmp(schema->fields[i]->name->data, field_name) == 0) { - zvec_field_schema_destroy(schema->fields[i]); - - for (size_t j = i; j < schema->field_count - 1; ++j) { - schema->fields[j] = schema->fields[j + 1]; - } + for (size_t i = 0; i < schema->field_count; ++i) { + if (schema->fields[i]->name && + strcmp(schema->fields[i]->name->data, field_name) == 0) { + zvec_field_schema_destroy(schema->fields[i]); - schema->field_count--; - return ZVEC_OK; + for (size_t j = i; j < schema->field_count - 1; ++j) { + schema->fields[j] = schema->fields[j + 1]; } - } - set_last_error_details(ZVEC_ERROR_NOT_FOUND, - std::string("Field '") + field_name + "' not found", - __FILE__, __LINE__, __FUNCTION__); - return ZVEC_ERROR_NOT_FOUND; - } catch (const std::exception &e) { - set_last_error_details(ZVEC_ERROR_INTERNAL_ERROR, - std::string("Failed to remove field: ") + e.what(), - __FILE__, __LINE__, __FUNCTION__); - return ZVEC_ERROR_INTERNAL_ERROR; + schema->field_count--; + return ZVEC_OK; + } } + + set_last_error_details(ZVEC_ERROR_NOT_FOUND, + std::string("Field '") + field_name + "' not found", + __FILE__, __LINE__, __FUNCTION__); + return ZVEC_ERROR_NOT_FOUND; } ZVecErrorCode zvec_collection_schema_remove_fields( @@ -1574,16 +1714,15 @@ ZVecErrorCode zvec_collection_schema_remove_fields( return ZVEC_OK; } - try { - for (size_t i = 0; i < field_count; ++i) { - if (!field_names[i]) { - set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, - std::string("Field name at index ") + - std::to_string(i) + " is null", - __FILE__, __LINE__, __FUNCTION__); - return ZVEC_ERROR_INVALID_ARGUMENT; - } + for (size_t i = 0; i < field_count; ++i) { + if (!field_names[i]) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + std::string("Field name at index ") + + std::to_string(i) + " is null", + __FILE__, __LINE__, __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; } + } std::vector remove_indices; std::vector not_found_fields; @@ -1606,6 +1745,7 @@ ZVecErrorCode zvec_collection_schema_remove_fields( } } + if (!not_found_fields.empty()) { std::string error_msg = "Fields not found: "; for (size_t i = 0; i < not_found_fields.size(); ++i) { @@ -1633,12 +1773,6 @@ ZVecErrorCode zvec_collection_schema_remove_fields( } return ZVEC_OK; - } catch (const std::exception &e) { - set_last_error_details(ZVEC_ERROR_INTERNAL_ERROR, - std::string("Failed to remove fields: ") + e.what(), - __FILE__, __LINE__, __FUNCTION__); - return ZVEC_ERROR_INTERNAL_ERROR; - } } ZVecFieldSchema *zvec_collection_schema_find_field( @@ -1768,7 +1902,7 @@ ZVecErrorCode zvec_collection_schema_validate( void zvec_collection_schema_cleanup(ZVecCollectionSchema *schema) { if (!schema) return; - try { + ZVEC_TRY_BEGIN_VOID if (schema->name) { zvec_free_string(schema->name); } @@ -1783,11 +1917,7 @@ void zvec_collection_schema_cleanup(ZVecCollectionSchema *schema) { } schema->max_doc_count_per_segment = 0; - } catch (const std::exception &e) { - fprintf(stderr, - "Warning: Exception in zvec_collection_schema_cleanup: %s\n", - e.what()); - } + ZVEC_CATCH_END_VOID } // ============================================================================= @@ -1942,15 +2072,10 @@ bool check_is_vector_field(const ZVecFieldSchema &zvec_field) { // ============================================================================= ZVecDoc *zvec_doc_create(void) { - try { - auto doc_ptr = - new std::shared_ptr(std::make_shared()); + ZVEC_TRY_RETURN_NULL("Failed to create document", { + auto doc_ptr = new std::shared_ptr(std::make_shared()); return reinterpret_cast(doc_ptr); - - } catch (const std::exception &e) { - set_last_error(std::string("Failed to create document: ") + e.what()); - return nullptr; - } + }) } void zvec_doc_destroy(ZVecDoc *doc) { @@ -1960,14 +2085,12 @@ void zvec_doc_destroy(ZVecDoc *doc) { } void zvec_doc_clear(ZVecDoc *doc) { - if (doc) { - try { - auto doc_ptr = reinterpret_cast *>(doc); - (*doc_ptr)->clear(); - } catch (const std::exception &e) { - set_last_error(std::string("Failed to cleanup document: ") + e.what()); - } - } + if (!doc) return; + + ZVEC_TRY_BEGIN_VOID + auto doc_ptr = reinterpret_cast *>(doc); + (*doc_ptr)->clear(); + ZVEC_CATCH_END_VOID } void zvec_docs_free(ZVecDoc **docs, size_t count) { @@ -1983,45 +2106,37 @@ void zvec_docs_free(ZVecDoc **docs, size_t count) { void zvec_doc_set_pk(ZVecDoc *doc, const char *pk) { if (!doc || !pk) return; - try { + ZVEC_TRY_BEGIN_VOID auto doc_ptr = reinterpret_cast *>(doc); (*doc_ptr)->set_pk(std::string(pk)); - } catch (const std::exception &e) { - set_last_error(std::string("Failed to set document PK: ") + e.what()); - } + ZVEC_CATCH_END_VOID } void zvec_doc_set_doc_id(ZVecDoc *doc, uint64_t doc_id) { if (!doc) return; - try { + ZVEC_TRY_BEGIN_VOID auto doc_ptr = reinterpret_cast *>(doc); (*doc_ptr)->set_doc_id(doc_id); - } catch (const std::exception &e) { - set_last_error(std::string("Failed to set document id: ") + e.what()); - } + ZVEC_CATCH_END_VOID } void zvec_doc_set_score(ZVecDoc *doc, float score) { if (!doc) return; - try { + ZVEC_TRY_BEGIN_VOID auto doc_ptr = reinterpret_cast *>(doc); (*doc_ptr)->set_score(score); - } catch (const std::exception &e) { - set_last_error(std::string("Failed to set document score: ") + e.what()); - } + ZVEC_CATCH_END_VOID } void zvec_doc_set_operator(ZVecDoc *doc, ZVecDocOperator op) { if (!doc) return; - try { + ZVEC_TRY_BEGIN_VOID auto doc_ptr = reinterpret_cast *>(doc); (*doc_ptr)->set_operator(static_cast(op)); - } catch (const std::exception &e) { - set_last_error(std::string("Failed to set document operator: ") + e.what()); - } + ZVEC_CATCH_END_VOID } // ============================================================================= @@ -2299,7 +2414,7 @@ ZVecErrorCode zvec_doc_add_field_by_value(ZVecDoc *doc, const char *field_name, return ZVEC_ERROR_INVALID_ARGUMENT; } - try { + ZVEC_TRY_RETURN_ERROR("Failed to add field", auto doc_ptr = reinterpret_cast *>(doc); std::string name(field_name); ZVecErrorCode error_code = ZVEC_OK; @@ -2591,10 +2706,7 @@ ZVecErrorCode zvec_doc_add_field_by_value(ZVecDoc *doc, const char *field_name, } return ZVEC_OK; - } catch (const std::exception &e) { - set_last_error(std::string("Failed to add field: ") + e.what()); - return ZVEC_ERROR_INTERNAL_ERROR; - } + ) } ZVecErrorCode zvec_doc_add_field_by_struct(ZVecDoc *doc, @@ -2604,7 +2716,7 @@ ZVecErrorCode zvec_doc_add_field_by_struct(ZVecDoc *doc, return ZVEC_ERROR_INVALID_ARGUMENT; } - try { + ZVEC_TRY_RETURN_ERROR("Failed to add field", auto doc_ptr = reinterpret_cast *>(doc); std::string name(field->name.data, field->name.length); @@ -2860,10 +2972,7 @@ ZVecErrorCode zvec_doc_add_field_by_struct(ZVecDoc *doc, } return ZVEC_OK; - } catch (const std::exception &e) { - set_last_error(std::string("Failed to add field: ") + e.what()); - return ZVEC_ERROR_INTERNAL_ERROR; - } + ) } const char *zvec_doc_get_pk_pointer(const ZVecDoc *doc) { @@ -2886,49 +2995,37 @@ const char *zvec_doc_get_pk_copy(const ZVecDoc *doc) { uint64_t zvec_doc_get_doc_id(const ZVecDoc *doc) { if (!doc) return 0; - try { + ZVEC_TRY_RETURN_SCALAR("Failed to get document ID", 0, auto doc_ptr = reinterpret_cast *>(doc); return (*doc_ptr)->doc_id(); - } catch (const std::exception &e) { - set_last_error(std::string("Failed to get document ID: ") + e.what()); - return 0; - } + ) } float zvec_doc_get_score(const ZVecDoc *doc) { if (!doc) return 0.0f; - try { + ZVEC_TRY_RETURN_SCALAR("Failed to get document score", 0.0f, auto doc_ptr = reinterpret_cast *>(doc); return (*doc_ptr)->score(); - } catch (const std::exception &e) { - set_last_error(std::string("Failed to get document score: ") + e.what()); - return 0.0f; - } + ) } ZVecDocOperator zvec_doc_get_operator(const ZVecDoc *doc) { if (!doc) return ZVEC_DOC_OP_INSERT; // default - try { + ZVEC_TRY_RETURN_SCALAR("Failed to get document operator", ZVEC_DOC_OP_INSERT, auto doc_ptr = reinterpret_cast *>(doc); zvec::Operator op = (*doc_ptr)->get_operator(); return static_cast(op); - } catch (const std::exception &e) { - set_last_error(std::string("Failed to get document operator: ") + e.what()); - return ZVEC_DOC_OP_INSERT; - } + ) } size_t zvec_doc_get_field_count(const ZVecDoc *doc) { if (!doc) return 0; - try { + ZVEC_TRY_RETURN_SCALAR("Failed to get field count", 0, auto doc_ptr = reinterpret_cast *>(doc); return (*doc_ptr)->field_names().size(); - } catch (const std::exception &e) { - set_last_error(std::string("Failed to get field count: ") + e.what()); - return 0; - } + ) } ZVecErrorCode zvec_doc_get_field_value_basic(const ZVecDoc *doc, @@ -2941,7 +3038,7 @@ ZVecErrorCode zvec_doc_get_field_value_basic(const ZVecDoc *doc, return ZVEC_ERROR_INVALID_ARGUMENT; } - try { + ZVEC_TRY_RETURN_ERROR("Failed to get field value", auto doc_ptr = reinterpret_cast *>(doc); // Check if field exists @@ -3022,10 +3119,7 @@ ZVecErrorCode zvec_doc_get_field_value_basic(const ZVecDoc *doc, } return ZVEC_OK; - } catch (const std::exception &e) { - set_last_error(std::string("Exception occurred: ") + e.what()); - return ZVEC_ERROR_INTERNAL_ERROR; - } + ) } ZVecErrorCode zvec_doc_get_field_value_copy(const ZVecDoc *doc, @@ -3037,7 +3131,7 @@ ZVecErrorCode zvec_doc_get_field_value_copy(const ZVecDoc *doc, return ZVEC_ERROR_INVALID_ARGUMENT; } - try { + ZVEC_TRY_RETURN_ERROR("Failed to get field value copy", auto doc_ptr = reinterpret_cast *>(doc); // Check if field exists @@ -3494,10 +3588,7 @@ ZVecErrorCode zvec_doc_get_field_value_copy(const ZVecDoc *doc, } return ZVEC_OK; - } catch (const std::exception &e) { - set_last_error(std::string("Exception occurred: ") + e.what()); - return ZVEC_ERROR_INTERNAL_ERROR; - } + ) } ZVecErrorCode zvec_doc_get_field_value_pointer(const ZVecDoc *doc, @@ -3510,7 +3601,7 @@ ZVecErrorCode zvec_doc_get_field_value_pointer(const ZVecDoc *doc, return ZVEC_ERROR_INVALID_ARGUMENT; } - try { + ZVEC_TRY_RETURN_ERROR("Failed to get field value pointer", auto doc_ptr = reinterpret_cast *>(doc); // Check if field exists @@ -3680,10 +3771,7 @@ ZVecErrorCode zvec_doc_get_field_value_pointer(const ZVecDoc *doc, } return ZVEC_OK; - } catch (const std::exception &e) { - set_last_error(std::string("Exception occurred: ") + e.what()); - return ZVEC_ERROR_INTERNAL_ERROR; - } + ) } bool zvec_doc_is_empty(const ZVecDoc *doc) { @@ -3692,14 +3780,10 @@ bool zvec_doc_is_empty(const ZVecDoc *doc) { return true; } - try { + ZVEC_TRY_RETURN_SCALAR("Failed to check if document is empty", true, auto doc_ptr = reinterpret_cast *>(doc); return (*doc_ptr)->is_empty(); - } catch (const std::exception &e) { - set_last_error(std::string("Failed to check if document is empty: ") + - e.what()); - return true; - } + ) } ZVecErrorCode zvec_doc_remove_field(ZVecDoc *doc, const char *field_name) { @@ -3708,14 +3792,11 @@ ZVecErrorCode zvec_doc_remove_field(ZVecDoc *doc, const char *field_name) { return ZVEC_ERROR_INVALID_ARGUMENT; } - try { + ZVEC_TRY_RETURN_ERROR("Failed to remove field", auto doc_ptr = reinterpret_cast *>(doc); (*doc_ptr)->remove(std::string(field_name)); return ZVEC_OK; - } catch (const std::exception &e) { - set_last_error(std::string("Failed to remove field: ") + e.what()); - return ZVEC_ERROR_INTERNAL_ERROR; - } + ) } @@ -3725,13 +3806,10 @@ bool zvec_doc_has_field(const ZVecDoc *doc, const char *field_name) { return false; } - try { + ZVEC_TRY_RETURN_SCALAR("Failed to check field existence", false, auto doc_ptr = reinterpret_cast *>(doc); return (*doc_ptr)->has(std::string(field_name)); - } catch (const std::exception &e) { - set_last_error(std::string("Failed to check field existence: ") + e.what()); - return false; - } + ) } bool zvec_doc_has_field_value(const ZVecDoc *doc, const char *field_name) { @@ -3740,14 +3818,10 @@ bool zvec_doc_has_field_value(const ZVecDoc *doc, const char *field_name) { return false; } - try { + ZVEC_TRY_RETURN_SCALAR("Failed to check field value existence", false, auto doc_ptr = reinterpret_cast *>(doc); return (*doc_ptr)->has_value(std::string(field_name)); - } catch (const std::exception &e) { - set_last_error(std::string("Failed to check field value existence: ") + - e.what()); - return false; - } + ) } bool zvec_doc_is_field_null(const ZVecDoc *doc, const char *field_name) { @@ -3756,14 +3830,10 @@ bool zvec_doc_is_field_null(const ZVecDoc *doc, const char *field_name) { return false; } - try { + ZVEC_TRY_RETURN_SCALAR("Failed to check if field is null", false, auto doc_ptr = reinterpret_cast *>(doc); return (*doc_ptr)->is_null(std::string(field_name)); - } catch (const std::exception &e) { - set_last_error(std::string("Failed to check if field is null: ") + - e.what()); - return false; - } + ) } ZVecErrorCode zvec_doc_get_field_names(const ZVecDoc *doc, char ***field_names, @@ -3773,7 +3843,7 @@ ZVecErrorCode zvec_doc_get_field_names(const ZVecDoc *doc, char ***field_names, return ZVEC_ERROR_INVALID_ARGUMENT; } - try { + ZVEC_TRY_RETURN_ERROR("Failed to get field names", auto doc_ptr = reinterpret_cast *>(doc); std::vector names = (*doc_ptr)->field_names(); @@ -3803,10 +3873,7 @@ ZVecErrorCode zvec_doc_get_field_names(const ZVecDoc *doc, char ***field_names, } return ZVEC_OK; - } catch (const std::exception &e) { - set_last_error(std::string("Failed to get field names: ") + e.what()); - return ZVEC_ERROR_INTERNAL_ERROR; - } + ) } ZVecErrorCode zvec_doc_serialize(const ZVecDoc *doc, uint8_t **data, @@ -3816,7 +3883,7 @@ ZVecErrorCode zvec_doc_serialize(const ZVecDoc *doc, uint8_t **data, return ZVEC_ERROR_INVALID_ARGUMENT; } - try { + ZVEC_TRY_RETURN_ERROR("Failed to serialize document", auto doc_ptr = reinterpret_cast *>(doc); std::vector serialized_data = (*doc_ptr)->serialize(); @@ -3834,10 +3901,7 @@ ZVecErrorCode zvec_doc_serialize(const ZVecDoc *doc, uint8_t **data, memcpy(*data, serialized_data.data(), *size); return ZVEC_OK; - } catch (const std::exception &e) { - set_last_error(std::string("Failed to serialize document: ") + e.what()); - return ZVEC_ERROR_INTERNAL_ERROR; - } + ) } ZVecErrorCode zvec_doc_deserialize(const uint8_t *data, size_t size, @@ -3847,7 +3911,7 @@ ZVecErrorCode zvec_doc_deserialize(const uint8_t *data, size_t size, return ZVEC_ERROR_INVALID_ARGUMENT; } - try { + ZVEC_TRY_RETURN_ERROR("Failed to deserialize document", auto deserialized_doc = zvec::Doc::deserialize(data, size); if (!deserialized_doc) { set_last_error("Failed to deserialize document"); @@ -3857,10 +3921,7 @@ ZVecErrorCode zvec_doc_deserialize(const uint8_t *data, size_t size, auto doc_ptr = new std::shared_ptr(deserialized_doc); *doc = reinterpret_cast(doc_ptr); return ZVEC_OK; - } catch (const std::exception &e) { - set_last_error(std::string("Failed to deserialize document: ") + e.what()); - return ZVEC_ERROR_INTERNAL_ERROR; - } + ) } void zvec_doc_merge(ZVecDoc *doc, const ZVecDoc *other) { @@ -3869,14 +3930,12 @@ void zvec_doc_merge(ZVecDoc *doc, const ZVecDoc *other) { return; } - try { + ZVEC_TRY_BEGIN_VOID auto doc_ptr = reinterpret_cast *>(doc); auto other_ptr = reinterpret_cast *>(other); (*doc_ptr)->merge(**other_ptr); - } catch (const std::exception &e) { - set_last_error(std::string("Failed to merge documents: ") + e.what()); - } + ZVEC_CATCH_END_VOID } size_t zvec_doc_memory_usage(const ZVecDoc *doc) { @@ -3885,14 +3944,10 @@ size_t zvec_doc_memory_usage(const ZVecDoc *doc) { return 0; } - try { + ZVEC_TRY_RETURN_SCALAR("Failed to get document memory usage", 0, auto doc_ptr = reinterpret_cast *>(doc); return (*doc_ptr)->memory_usage(); - } catch (const std::exception &e) { - set_last_error(std::string("Failed to get document memory usage: ") + - e.what()); - return 0; - } + ) } ZVecErrorCode zvec_doc_validate(const ZVecDoc *doc, @@ -3903,7 +3958,7 @@ ZVecErrorCode zvec_doc_validate(const ZVecDoc *doc, return ZVEC_ERROR_INVALID_ARGUMENT; } - try { + ZVEC_TRY_RETURN_ERROR("Failed to validate document", std::shared_ptr schema_ptr = nullptr; auto status = convert_zvec_collection_schema_to_internal(schema, schema_ptr); @@ -3927,10 +3982,7 @@ ZVecErrorCode zvec_doc_validate(const ZVecDoc *doc, *error_msg = nullptr; } return ZVEC_OK; - } catch (const std::exception &e) { - set_last_error(std::string("Failed to validate document: ") + e.what()); - return ZVEC_ERROR_INTERNAL_ERROR; - } + ) } ZVecErrorCode zvec_doc_to_detail_string(const ZVecDoc *doc, char **detail_str) { @@ -3939,7 +3991,7 @@ ZVecErrorCode zvec_doc_to_detail_string(const ZVecDoc *doc, char **detail_str) { return ZVEC_ERROR_INVALID_ARGUMENT; } - try { + ZVEC_TRY_RETURN_ERROR("Failed to get document detail string", auto doc_ptr = reinterpret_cast *>(doc); std::string detail = (*doc_ptr)->to_detail_string(); *detail_str = copy_string(detail); @@ -3950,11 +4002,7 @@ ZVecErrorCode zvec_doc_to_detail_string(const ZVecDoc *doc, char **detail_str) { } return ZVEC_OK; - } catch (const std::exception &e) { - set_last_error(std::string("Failed to get document detail string: ") + - e.what()); - return ZVEC_ERROR_INTERNAL_ERROR; - } + ) } // ============================================================================= @@ -3964,7 +4012,7 @@ ZVecErrorCode zvec_doc_to_detail_string(const ZVecDoc *doc, char **detail_str) { ZVecErrorCode zvec_collection_create_and_open( const char *path, const ZVecCollectionSchema *schema, const ZVecCollectionOptions *options, ZVecCollection **collection) { - try { + ZVEC_TRY_RETURN_ERROR("Exception in zvec_collection_create_and_open_with_schema", if (!path || !schema || !collection) { set_last_error("Path, schema, or collection cannot be null"); return ZVEC_ERROR_INVALID_ARGUMENT; @@ -3995,13 +4043,7 @@ ZVecErrorCode zvec_collection_create_and_open( } return error_code; - } catch (const std::exception &e) { - set_last_error( - std::string( - "Exception in zvec_collection_create_and_open_with_schema: ") + - e.what()); - return ZVEC_ERROR_INTERNAL_ERROR; - } + ) } ZVecErrorCode zvec_collection_open(const char *path, @@ -4012,7 +4054,7 @@ ZVecErrorCode zvec_collection_open(const char *path, return ZVEC_ERROR_INVALID_ARGUMENT; } - try { + ZVEC_TRY_RETURN_ERROR("Exception occurred", zvec::CollectionOptions collection_options; if (options) { collection_options.enable_mmap_ = options->enable_mmap; @@ -4029,10 +4071,7 @@ ZVecErrorCode zvec_collection_open(const char *path, } return error_code; - } catch (const std::exception &e) { - set_last_error(std::string("Exception occurred: ") + e.what()); - return ZVEC_ERROR_INTERNAL_ERROR; - } + ) } ZVecErrorCode zvec_collection_close(ZVecCollection *collection) { @@ -4041,13 +4080,10 @@ ZVecErrorCode zvec_collection_close(ZVecCollection *collection) { return ZVEC_ERROR_INVALID_ARGUMENT; } - try { + ZVEC_TRY_RETURN_ERROR("Exception occurred", delete reinterpret_cast *>(collection); return ZVEC_OK; - } catch (const std::exception &e) { - set_last_error(std::string("Exception occurred: ") + e.what()); - return ZVEC_ERROR_INTERNAL_ERROR; - } + ) } ZVecErrorCode zvec_collection_destroy(ZVecCollection *collection) { @@ -4056,7 +4092,7 @@ ZVecErrorCode zvec_collection_destroy(ZVecCollection *collection) { return ZVEC_ERROR_INVALID_ARGUMENT; } - try { + ZVEC_TRY_RETURN_ERROR("Exception occurred", auto &coll = *reinterpret_cast *>(collection); zvec::Status status = coll->Destroy(); @@ -4065,10 +4101,7 @@ ZVecErrorCode zvec_collection_destroy(ZVecCollection *collection) { } return status_to_error_code(status); - } catch (const std::exception &e) { - set_last_error(std::string("Exception occurred: ") + e.what()); - return ZVEC_ERROR_INTERNAL_ERROR; - } + ) } ZVecErrorCode zvec_collection_flush(ZVecCollection *collection) { @@ -4077,7 +4110,7 @@ ZVecErrorCode zvec_collection_flush(ZVecCollection *collection) { return ZVEC_ERROR_INVALID_ARGUMENT; } - try { + ZVEC_TRY_RETURN_ERROR("Exception occurred", auto &coll = *reinterpret_cast *>(collection); zvec::Status status = coll->Flush(); @@ -4087,10 +4120,7 @@ ZVecErrorCode zvec_collection_flush(ZVecCollection *collection) { } return status_to_error_code(status); - } catch (const std::exception &e) { - set_last_error(std::string("Exception occurred: ") + e.what()); - return ZVEC_ERROR_INTERNAL_ERROR; - } + ) } ZVecErrorCode zvec_collection_get_schema(const ZVecCollection *collection, @@ -4100,7 +4130,7 @@ ZVecErrorCode zvec_collection_get_schema(const ZVecCollection *collection, return ZVEC_ERROR_INVALID_ARGUMENT; } - try { + ZVEC_TRY_RETURN_ERROR("Exception occurred", auto &coll = *reinterpret_cast *>( collection); auto result = coll->Schema(); @@ -4110,7 +4140,8 @@ ZVecErrorCode zvec_collection_get_schema(const ZVecCollection *collection, const auto &cpp_schema = result.value(); // Create new schema structure - ZVecCollectionSchema *c_schema = new ZVecCollectionSchema(); + ZVecCollectionSchema *c_schema = static_cast( + malloc(sizeof(ZVecCollectionSchema))); if (!c_schema) { set_last_error("Failed to allocate memory for schema"); return ZVEC_ERROR_RESOURCE_EXHAUSTED; @@ -4127,7 +4158,7 @@ ZVecErrorCode zvec_collection_get_schema(const ZVecCollection *collection, // Set collection name c_schema->name = zvec_string_create(cpp_schema.name().c_str()); if (!c_schema->name) { - delete c_schema; + free(c_schema); set_last_error("Failed to allocate memory for collection name"); return ZVEC_ERROR_RESOURCE_EXHAUSTED; } @@ -4139,7 +4170,8 @@ ZVecErrorCode zvec_collection_get_schema(const ZVecCollection *collection, if (c_schema->field_count > 0) { // Allocate array of field pointers - c_schema->fields = new ZVecFieldSchema *[c_schema->field_count]; + c_schema->fields = static_cast( + malloc(c_schema->field_count * sizeof(ZVecFieldSchema *))); if (!c_schema->fields) { zvec_collection_schema_destroy(c_schema); set_last_error("Failed to allocate memory for fields"); @@ -4155,7 +4187,11 @@ ZVecErrorCode zvec_collection_get_schema(const ZVecCollection *collection, for (const auto &cpp_field : cpp_fields) { try { // Create new field schema - c_schema->fields[i] = new ZVecFieldSchema(); + c_schema->fields[i] = static_cast( + malloc(sizeof(ZVecFieldSchema))); + if (!c_schema->fields[i]) { + throw std::bad_alloc(); + } // Copy field name using zvec_string_create c_schema->fields[i]->name = @@ -4187,7 +4223,8 @@ ZVecErrorCode zvec_collection_get_schema(const ZVecCollection *collection, std::dynamic_pointer_cast( index_params); if (hnsw_params) { - auto c_hnsw_params = new ZVecHnswIndexParams(); + auto c_hnsw_params = static_cast( + malloc(sizeof(ZVecHnswIndexParams))); if (!c_hnsw_params) { throw std::bad_alloc(); } @@ -4220,7 +4257,8 @@ ZVecErrorCode zvec_collection_get_schema(const ZVecCollection *collection, std::dynamic_pointer_cast( index_params); if (ivf_params) { - auto c_ivf_params = new ZVecIVFIndexParams(); + auto c_ivf_params = static_cast( + malloc(sizeof(ZVecIVFIndexParams))); if (!c_ivf_params) { throw std::bad_alloc(); } @@ -4253,7 +4291,8 @@ ZVecErrorCode zvec_collection_get_schema(const ZVecCollection *collection, std::dynamic_pointer_cast( index_params); if (flat_params) { - auto c_flat_params = new ZVecFlatIndexParams(); + auto c_flat_params = static_cast( + malloc(sizeof(ZVecFlatIndexParams))); if (!c_flat_params) { throw std::bad_alloc(); } @@ -4283,7 +4322,8 @@ ZVecErrorCode zvec_collection_get_schema(const ZVecCollection *collection, std::dynamic_pointer_cast( index_params); if (invert_params) { - auto c_invert_params = new ZVecInvertIndexParams(); + auto c_invert_params = static_cast( + malloc(sizeof(ZVecInvertIndexParams))); if (!c_invert_params) { throw std::bad_alloc(); } @@ -4324,9 +4364,9 @@ ZVecErrorCode zvec_collection_get_schema(const ZVecCollection *collection, zvec_field_schema_destroy(c_schema->fields[j]); } } - delete[] c_schema->fields; + free(c_schema->fields); zvec_free_string(c_schema->name); - delete c_schema; + free(c_schema); set_last_error("Failed to allocate memory for field"); return ZVEC_ERROR_RESOURCE_EXHAUSTED; } @@ -4339,10 +4379,7 @@ ZVecErrorCode zvec_collection_get_schema(const ZVecCollection *collection, } return error_code; - } catch (const std::exception &e) { - set_last_error(std::string("Exception occurred: ") + e.what()); - return ZVEC_ERROR_INTERNAL_ERROR; - } + ) } ZVecErrorCode zvec_collection_get_options(const ZVecCollection *collection, @@ -4352,7 +4389,7 @@ ZVecErrorCode zvec_collection_get_options(const ZVecCollection *collection, return ZVEC_ERROR_INVALID_ARGUMENT; } - try { + ZVEC_TRY_RETURN_ERROR("Failed to get collection options", auto collection_ptr = reinterpret_cast *>(collection); auto result = (*collection_ptr)->Options(); @@ -4363,8 +4400,13 @@ ZVecErrorCode zvec_collection_get_options(const ZVecCollection *collection, return ZVEC_ERROR_INTERNAL_ERROR; } - // 创建并初始化选项结构体 - *options = new ZVecCollectionOptions(); + // Create and initialize options structure + *options = static_cast( + malloc(sizeof(ZVecCollectionOptions))); + if (!*options) { + set_last_error("Failed to allocate memory for options"); + return ZVEC_ERROR_RESOURCE_EXHAUSTED; + } (*options)->enable_mmap = result.value().enable_mmap_; (*options)->max_buffer_size = result.value().max_buffer_size_; @@ -4372,11 +4414,7 @@ ZVecErrorCode zvec_collection_get_options(const ZVecCollection *collection, (*options)->max_doc_count_per_segment = zvec::MAX_DOC_COUNT_PER_SEGMENT; return ZVEC_OK; - } catch (const std::exception &e) { - set_last_error(std::string("Failed to get collection options: ") + - e.what()); - return ZVEC_ERROR_INTERNAL_ERROR; - } + ) } ZVecErrorCode zvec_collection_get_stats(const ZVecCollection *collection, @@ -4386,7 +4424,7 @@ ZVecErrorCode zvec_collection_get_stats(const ZVecCollection *collection, return ZVEC_ERROR_INVALID_ARGUMENT; } - try { + ZVEC_TRY_RETURN_ERROR("Failed to get detailed collection stats", auto collection_ptr = reinterpret_cast *>(collection); auto result = (*collection_ptr)->Stats(); @@ -4397,7 +4435,13 @@ ZVecErrorCode zvec_collection_get_stats(const ZVecCollection *collection, return ZVEC_ERROR_INTERNAL_ERROR; } - *stats = new ZVecCollectionStats(); + *stats = static_cast( + malloc(sizeof(ZVecCollectionStats))); + if (!*stats) { + set_last_error("Failed to allocate memory for stats"); + return ZVEC_ERROR_RESOURCE_EXHAUSTED; + } + ZVecErrorCode error_code = handle_expected_result(result); if (error_code == ZVEC_OK) { (*stats)->doc_count = result.value().doc_count; @@ -4416,15 +4460,24 @@ ZVecErrorCode zvec_collection_get_stats(const ZVecCollection *collection, } } else { (*stats)->index_completeness = nullptr; - *(*stats)->index_names = nullptr; + (*stats)->index_names = nullptr; } return error_code; - } catch (const std::exception &e) { - set_last_error(std::string("Failed to get detailed collection stats: ") + - e.what()); - return ZVEC_ERROR_INTERNAL_ERROR; + ) +} + +ZVecCollectionStats *zvec_collection_stats_create(void) { + ZVecCollectionStats *stats = static_cast( + malloc(sizeof(ZVecCollectionStats))); + if (!stats) { + return nullptr; } + stats->doc_count = 0; + stats->index_count = 0; + stats->index_completeness = nullptr; + stats->index_names = nullptr; + return stats; } void zvec_collection_stats_destroy(ZVecCollectionStats *stats) { @@ -4440,7 +4493,7 @@ void zvec_collection_stats_destroy(ZVecCollectionStats *stats) { free(stats->index_completeness); } - delete stats; + free(stats); } } @@ -4449,132 +4502,138 @@ void zvec_collection_stats_destroy(ZVecCollectionStats *stats) { // ============================================================================= ZVecQueryParams *zvec_query_params_create(ZVecIndexType index_type) { - try { - auto params = new ZVecQueryParams(); - params->index_type = index_type; - params->radius = 0.0f; - params->is_linear = false; - params->is_using_refiner = false; - return params; - } catch (const std::exception &e) { - set_last_error(std::string("Failed to create query params: ") + e.what()); + ZVecQueryParams *params = static_cast( + malloc(sizeof(ZVecQueryParams))); + if (!params) { + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecQueryParams", + __FILE__, __LINE__, __FUNCTION__); return nullptr; } + params->index_type = index_type; + params->radius = 0.0f; + params->is_linear = false; + params->is_using_refiner = false; + return params; } ZVecHnswQueryParams *zvec_query_params_hnsw_create(ZVecIndexType index_type, int ef, float radius, bool is_linear, bool is_using_refiner) { - try { - auto params = new ZVecHnswQueryParams(); - params->base.index_type = index_type; - params->base.radius = radius; - params->base.is_linear = is_linear; - params->base.is_using_refiner = is_using_refiner; - params->ef = ef; - return params; - } catch (const std::exception &e) { - set_last_error(std::string("Failed to create HNSW query params: ") + - e.what()); + ZVecHnswQueryParams *params = static_cast( + malloc(sizeof(ZVecHnswQueryParams))); + if (!params) { + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecHnswQueryParams", + __FILE__, __LINE__, __FUNCTION__); return nullptr; } + params->base.index_type = index_type; + params->base.radius = radius; + params->base.is_linear = is_linear; + params->base.is_using_refiner = is_using_refiner; + params->ef = ef; + return params; } ZVecIVFQueryParams *zvec_query_params_ivf_create(ZVecIndexType index_type, int nprobe, bool is_using_refiner, float scale_factor) { - try { - auto params = new ZVecIVFQueryParams(); - params->base.index_type = index_type; - params->base.is_using_refiner = is_using_refiner; - params->nprobe = nprobe; - params->scale_factor = scale_factor; - return params; - } catch (const std::exception &e) { - set_last_error(std::string("Failed to create IVF query params: ") + - e.what()); + ZVecIVFQueryParams *params = static_cast( + malloc(sizeof(ZVecIVFQueryParams))); + if (!params) { + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecIVFQueryParams", + __FILE__, __LINE__, __FUNCTION__); return nullptr; } + params->base.index_type = index_type; + params->base.is_using_refiner = is_using_refiner; + params->nprobe = nprobe; + params->scale_factor = scale_factor; + return params; } ZVecFlatQueryParams *zvec_query_params_flat_create(ZVecIndexType index_type, bool is_using_refiner, float scale_factor) { - try { - auto params = new ZVecFlatQueryParams(); - params->base.index_type = index_type; - params->base.is_using_refiner = is_using_refiner; - params->scale_factor = scale_factor; - return params; - } catch (const std::exception &e) { - set_last_error(std::string("Failed to create Flat query params: ") + - e.what()); + ZVecFlatQueryParams *params = static_cast( + malloc(sizeof(ZVecFlatQueryParams))); + if (!params) { + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecFlatQueryParams", + __FILE__, __LINE__, __FUNCTION__); return nullptr; } + params->base.index_type = index_type; + params->base.is_using_refiner = is_using_refiner; + params->scale_factor = scale_factor; + return params; } ZVecQueryParamsUnion *zvec_query_params_union_create(ZVecIndexType index_type) { - try { - auto params = new ZVecQueryParamsUnion(); - params->index_type = index_type; - - switch (index_type) { - case ZVEC_INDEX_TYPE_HNSW: - params->params.hnsw_params.base.index_type = index_type; - params->params.hnsw_params.ef = - zvec::core_interface::kDefaultHnswEfSearch; - break; - case ZVEC_INDEX_TYPE_IVF: - params->params.ivf_params.base.index_type = index_type; - params->params.ivf_params.nprobe = 10; - params->params.ivf_params.scale_factor = 10.0f; - break; - case ZVEC_INDEX_TYPE_FLAT: - params->params.flat_params.base.index_type = index_type; - params->params.flat_params.scale_factor = 10.0f; - break; - default: - params->params.base_params.index_type = index_type; - break; - } - - return params; - } catch (const std::exception &e) { - set_last_error(std::string("Failed to create query params union: ") + - e.what()); + ZVecQueryParamsUnion *params = static_cast( + malloc(sizeof(ZVecQueryParamsUnion))); + if (!params) { + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecQueryParamsUnion", + __FILE__, __LINE__, __FUNCTION__); return nullptr; } + params->index_type = index_type; + + switch (index_type) { + case ZVEC_INDEX_TYPE_HNSW: + params->params.hnsw_params.base.index_type = index_type; + params->params.hnsw_params.ef = + zvec::core_interface::kDefaultHnswEfSearch; + break; + case ZVEC_INDEX_TYPE_IVF: + params->params.ivf_params.base.index_type = index_type; + params->params.ivf_params.nprobe = 10; + params->params.ivf_params.scale_factor = 10.0f; + break; + case ZVEC_INDEX_TYPE_FLAT: + params->params.flat_params.base.index_type = index_type; + params->params.flat_params.scale_factor = 10.0f; + break; + default: + params->params.base_params.index_type = index_type; + break; + } + + return params; } void zvec_query_params_destroy(ZVecQueryParams *params) { if (params) { - delete params; + free(params); } } void zvec_query_params_hnsw_destroy(ZVecHnswQueryParams *params) { if (params) { - delete params; + free(params); } } void zvec_query_params_ivf_destroy(ZVecIVFQueryParams *params) { if (params) { - delete params; + free(params); } } void zvec_query_params_flat_destroy(ZVecFlatQueryParams *params) { if (params) { - delete params; + free(params); } } void zvec_query_params_union_destroy(ZVecQueryParamsUnion *params) { if (params) { - delete params; + free(params); } } @@ -4670,7 +4729,7 @@ ZVecErrorCode zvec_collection_create_index( return ZVEC_ERROR_INVALID_ARGUMENT; } - try { + ZVEC_TRY_RETURN_ERROR("Exception in zvec_collection_create_index", auto coll_ptr = reinterpret_cast *>(collection); std::string field_name_str(column_name); @@ -4724,11 +4783,7 @@ ZVecErrorCode zvec_collection_create_index( return ZVEC_ERROR_INVALID_ARGUMENT; } } - } catch (const std::exception &e) { - set_last_error(std::string("Exception in zvec_collection_create_index: ") + - e.what()); - return ZVEC_ERROR_INTERNAL_ERROR; - } + ) } ZVecErrorCode zvec_collection_create_index_with_params( @@ -4746,7 +4801,7 @@ ZVecErrorCode zvec_collection_create_index_with_params( const ZVecBaseIndexParams *base_params = static_cast(index_params); - try { + ZVEC_TRY_RETURN_ERROR("Exception occurred", switch (base_params->index_type) { case ZVEC_INDEX_TYPE_INVERT: { const ZVecInvertIndexParams *invert_params = @@ -4797,10 +4852,7 @@ ZVecErrorCode zvec_collection_create_index_with_params( return ZVEC_ERROR_INVALID_ARGUMENT; } } - } catch (const std::exception &e) { - set_last_error(e.what()); - return ZVEC_ERROR_INTERNAL_ERROR; - } + ) } ZVecErrorCode zvec_collection_create_hnsw_index( @@ -4859,7 +4911,7 @@ ZVecErrorCode zvec_collection_drop_index(ZVecCollection *collection, return ZVEC_ERROR_INVALID_ARGUMENT; } - try { + ZVEC_TRY_RETURN_ERROR("Exception occurred", auto coll_ptr = reinterpret_cast *>(collection); zvec::Status status = (*coll_ptr)->DropIndex(column_name); @@ -4868,10 +4920,7 @@ ZVecErrorCode zvec_collection_drop_index(ZVecCollection *collection, } return status_to_error_code(status); - } catch (const std::exception &e) { - set_last_error(std::string("Exception occurred: ") + e.what()); - return ZVEC_ERROR_INTERNAL_ERROR; - } + ) } ZVecErrorCode zvec_collection_optimize(ZVecCollection *collection) { @@ -4880,7 +4929,7 @@ ZVecErrorCode zvec_collection_optimize(ZVecCollection *collection) { return ZVEC_ERROR_INVALID_ARGUMENT; } - try { + ZVEC_TRY_RETURN_ERROR("Exception occurred", auto coll_ptr = reinterpret_cast *>(collection); zvec::Status status = (*coll_ptr)->Optimize(); @@ -4889,10 +4938,7 @@ ZVecErrorCode zvec_collection_optimize(ZVecCollection *collection) { } return status_to_error_code(status); - } catch (const std::exception &e) { - set_last_error(std::string("Exception occurred: ") + e.what()); - return ZVEC_ERROR_INTERNAL_ERROR; - } + ) } @@ -4909,7 +4955,7 @@ ZVecErrorCode zvec_collection_add_column(ZVecCollection *collection, return ZVEC_ERROR_INVALID_ARGUMENT; } - try { + ZVEC_TRY_RETURN_ERROR("Exception occurred", auto coll_ptr = reinterpret_cast *>(collection); @@ -4940,10 +4986,7 @@ ZVecErrorCode zvec_collection_add_column(ZVecCollection *collection, } return status_to_error_code(status); - } catch (const std::exception &e) { - set_last_error(std::string("Exception occurred: ") + e.what()); - return ZVEC_ERROR_INTERNAL_ERROR; - } + ) } ZVecErrorCode zvec_collection_drop_column(ZVecCollection *collection, @@ -4954,7 +4997,7 @@ ZVecErrorCode zvec_collection_drop_column(ZVecCollection *collection, return ZVEC_ERROR_INVALID_ARGUMENT; } - try { + ZVEC_TRY_RETURN_ERROR("Exception occurred", auto coll_ptr = reinterpret_cast *>(collection); zvec::Status status = (*coll_ptr)->DropColumn(column_name); @@ -4964,10 +5007,7 @@ ZVecErrorCode zvec_collection_drop_column(ZVecCollection *collection, } return status_to_error_code(status); - } catch (const std::exception &e) { - set_last_error(std::string("Exception occurred: ") + e.what()); - return ZVEC_ERROR_INTERNAL_ERROR; - } + ) } ZVecErrorCode zvec_collection_alter_column(ZVecCollection *collection, @@ -4980,7 +5020,7 @@ ZVecErrorCode zvec_collection_alter_column(ZVecCollection *collection, return ZVEC_ERROR_INVALID_ARGUMENT; } - try { + ZVEC_TRY_RETURN_ERROR("Exception occurred", auto coll_ptr = reinterpret_cast *>(collection); std::string rename = new_name ? new_name : ""; @@ -5000,10 +5040,7 @@ ZVecErrorCode zvec_collection_alter_column(ZVecCollection *collection, } return status_to_error_code(status); - } catch (const std::exception &e) { - set_last_error(std::string("Exception occurred: ") + e.what()); - return ZVEC_ERROR_INTERNAL_ERROR; - } + ) } // ============================================================================= @@ -5022,7 +5059,7 @@ ZVecErrorCode zvec_collection_insert(ZVecCollection *collection, return ZVEC_ERROR_INVALID_ARGUMENT; } - try { + ZVEC_TRY_RETURN_ERROR("Exception in zvec_collection_insert_docs", auto coll_ptr = reinterpret_cast *>(collection); @@ -5048,13 +5085,7 @@ ZVecErrorCode zvec_collection_insert(ZVecCollection *collection, } return error_code; - } catch (const std::exception &e) { - set_last_error(std::string("Exception in zvec_collection_insert_docs: ") + - e.what()); - *success_count = 0; - *error_count = doc_count; - return ZVEC_ERROR_INTERNAL_ERROR; - } + ) } ZVecErrorCode zvec_collection_update(ZVecCollection *collection, @@ -5069,7 +5100,7 @@ ZVecErrorCode zvec_collection_update(ZVecCollection *collection, return ZVEC_ERROR_INVALID_ARGUMENT; } - try { + ZVEC_TRY_RETURN_ERROR("Exception occurred", auto coll_ptr = reinterpret_cast *>(collection); @@ -5092,10 +5123,7 @@ ZVecErrorCode zvec_collection_update(ZVecCollection *collection, } return error_code; - } catch (const std::exception &e) { - set_last_error(std::string("Exception occurred: ") + e.what()); - return ZVEC_ERROR_INTERNAL_ERROR; - } + ) } ZVecErrorCode zvec_collection_upsert(ZVecCollection *collection, @@ -5110,7 +5138,7 @@ ZVecErrorCode zvec_collection_upsert(ZVecCollection *collection, return ZVEC_ERROR_INVALID_ARGUMENT; } - try { + ZVEC_TRY_RETURN_ERROR("Exception occurred", auto coll_ptr = reinterpret_cast *>(collection); @@ -5133,10 +5161,7 @@ ZVecErrorCode zvec_collection_upsert(ZVecCollection *collection, } return error_code; - } catch (const std::exception &e) { - set_last_error(std::string("Exception occurred: ") + e.what()); - return ZVEC_ERROR_INTERNAL_ERROR; - } + ) } ZVecErrorCode zvec_collection_delete(ZVecCollection *collection, @@ -5150,7 +5175,7 @@ ZVecErrorCode zvec_collection_delete(ZVecCollection *collection, return ZVEC_ERROR_INVALID_ARGUMENT; } - try { + ZVEC_TRY_RETURN_ERROR("Exception occurred", auto coll_ptr = reinterpret_cast *>(collection); @@ -5178,10 +5203,7 @@ ZVecErrorCode zvec_collection_delete(ZVecCollection *collection, } return error_code; - } catch (const std::exception &e) { - set_last_error(std::string("Exception occurred: ") + e.what()); - return ZVEC_ERROR_INTERNAL_ERROR; - } + ) } ZVecErrorCode zvec_collection_delete_by_filter(ZVecCollection *collection, @@ -5191,7 +5213,7 @@ ZVecErrorCode zvec_collection_delete_by_filter(ZVecCollection *collection, return ZVEC_ERROR_INVALID_ARGUMENT; } - try { + ZVEC_TRY_RETURN_ERROR("Exception occurred", auto coll_ptr = reinterpret_cast *>(collection); @@ -5201,10 +5223,7 @@ ZVecErrorCode zvec_collection_delete_by_filter(ZVecCollection *collection, return status_to_error_code(status); } return ZVEC_OK; - } catch (const std::exception &e) { - set_last_error(std::string("Exception occurred: ") + e.what()); - return ZVEC_ERROR_INTERNAL_ERROR; - } + ) } // ============================================================================= @@ -5567,7 +5586,7 @@ ZVecErrorCode zvec_collection_query(const ZVecCollection *collection, return ZVEC_ERROR_INVALID_ARGUMENT; } - try { + ZVEC_TRY_RETURN_ERROR("Exception occurred", auto coll_ptr = reinterpret_cast *>(collection); @@ -5588,12 +5607,7 @@ ZVecErrorCode zvec_collection_query(const ZVecCollection *collection, } return error_code; - } catch (const std::exception &e) { - set_last_error(std::string("Exception occurred: ") + e.what()); - *results = nullptr; - *result_count = 0; - return ZVEC_ERROR_INTERNAL_ERROR; - } + ) } ZVecErrorCode zvec_collection_query_by_group( @@ -5607,7 +5621,7 @@ ZVecErrorCode zvec_collection_query_by_group( return ZVEC_ERROR_INVALID_ARGUMENT; } - try { + ZVEC_TRY_RETURN_ERROR("Exception occurred", auto coll_ptr = reinterpret_cast *>(collection); @@ -5628,13 +5642,7 @@ ZVecErrorCode zvec_collection_query_by_group( } return error_code; - } catch (const std::exception &e) { - set_last_error(std::string("Exception occurred: ") + e.what()); - *results = nullptr; - *group_by_values = nullptr; - *result_count = 0; - return ZVEC_ERROR_INTERNAL_ERROR; - } + ) } ZVecErrorCode zvec_collection_fetch(ZVecCollection *collection, @@ -5654,7 +5662,7 @@ ZVecErrorCode zvec_collection_fetch(ZVecCollection *collection, return ZVEC_OK; } - try { + ZVEC_TRY_RETURN_ERROR("Exception in zvec_collection_fetch", auto coll_ptr = reinterpret_cast *>(collection); @@ -5679,12 +5687,5 @@ ZVecErrorCode zvec_collection_fetch(ZVecCollection *collection, const auto &doc_map = result.value(); return convert_fetched_document_results(doc_map, results, doc_count); - - } catch (const std::exception &e) { - set_last_error(std::string("Exception in zvec_collection_fetch: ") + - e.what()); - *results = nullptr; - *doc_count = 0; - return ZVEC_ERROR_INTERNAL_ERROR; - } + ) } diff --git a/src/include/zvec/c_api.h b/src/include/zvec/c_api.h index c5ffbb2bc..d29c75bf6 100644 --- a/src/include/zvec/c_api.h +++ b/src/include/zvec/c_api.h @@ -422,17 +422,6 @@ typedef struct { uint32_t overdue_days; /**< Log expiration days */ } ZVecFileLogConfig; -/** - * @brief Log configuration union - */ -typedef struct { - ZVecLogType type; /**< Log type */ - union { - ZVecConsoleLogConfig console_config; /**< Console log configuration */ - ZVecFileLogConfig file_config; /**< File log configuration */ - } config; -} ZVecLogConfig; - /** * @brief ZVec configuration data structure (corresponds to zvec::ConfigData) */ @@ -440,8 +429,8 @@ typedef struct { uint64_t memory_limit_bytes; /**< Memory limit in bytes */ // log - ZVecLogConfig *log_config; /**< Log configuration (optional, NULL means using - default configuration) */ + ZVecLogType log_type; + void *log_config; /**< Log configuration (ZVecConsoleLogConfig or ZVecFileLogConfig) */ // query uint32_t query_thread_count; /**< Query thread count */ @@ -475,15 +464,6 @@ ZVEC_EXPORT ZVecFileLogConfig *ZVEC_CALL zvec_config_file_log_create( ZVecLogLevel level, const char *dir, const char *basename, uint32_t file_size, uint32_t overdue_days); -/** - * @brief Create log configuration - * @param type Log type - * @param config_data Configuration data (specific to log type) - * @return ZVecLogConfig* Pointer to the newly created log configuration - */ -ZVEC_EXPORT ZVecLogConfig *ZVEC_CALL zvec_config_log_create(ZVecLogType type, - void *config_data); - /** * @brief Destroy console log configuration * @param config Console log configuration pointer @@ -498,11 +478,6 @@ zvec_config_console_log_destroy(ZVecConsoleLogConfig *config); ZVEC_EXPORT void ZVEC_CALL zvec_config_file_log_destroy(ZVecFileLogConfig *config); -/** - * @brief Destroy log configuration - * @param config Log configuration pointer - */ -ZVEC_EXPORT void ZVEC_CALL zvec_config_log_destroy(ZVecLogConfig *config); /** * @brief Create configuration data @@ -528,11 +503,11 @@ ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_config_data_set_memory_limit( /** * @brief Set log configuration in configuration data * @param config Configuration data pointer - * @param log_config Log configuration pointer + * @param log_config Log configuration pointer (ownership is transferred to config, do not free separately) * @return ZVecErrorCode Error code */ ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_config_data_set_log_config( - ZVecConfigData *config, ZVecLogConfig *log_config); + ZVecConfigData *config, ZVecLogType log_type, void *log_config); /** * @brief Set query thread count in configuration data diff --git a/tests/c_api/c_api_test.c b/tests/c_api/c_api_test.c index e8256d802..28a322778 100644 --- a/tests/c_api/c_api_test.c +++ b/tests/c_api/c_api_test.c @@ -152,15 +152,7 @@ void test_zvec_config() { // Test 4: Log config creation with console type ZVecConsoleLogConfig *temp_console = zvec_config_console_log_create(ZVEC_LOG_LEVEL_ERROR); - ZVecLogConfig *log_config_console = - zvec_config_log_create(ZVEC_LOG_TYPE_CONSOLE, temp_console); - TEST_ASSERT(log_config_console != NULL); - if (log_config_console) { - TEST_ASSERT(log_config_console->type == ZVEC_LOG_TYPE_CONSOLE); - TEST_ASSERT(log_config_console->config.console_config.level == - ZVEC_LOG_LEVEL_ERROR); - zvec_config_log_destroy(log_config_console); - } + TEST_ASSERT(temp_console != NULL); if (temp_console) { zvec_config_console_log_destroy(temp_console); } @@ -168,41 +160,22 @@ void test_zvec_config() { // Test 5: Log config creation with file type ZVecFileLogConfig *temp_file = zvec_config_file_log_create( ZVEC_LOG_LEVEL_DEBUG, "./logs", "app", 50, 30); - ZVecLogConfig *log_config_file = - zvec_config_log_create(ZVEC_LOG_TYPE_FILE, temp_file); - TEST_ASSERT(log_config_file != NULL); - if (log_config_file) { - TEST_ASSERT(log_config_file->type == ZVEC_LOG_TYPE_FILE); - TEST_ASSERT(log_config_file->config.file_config.level == - ZVEC_LOG_LEVEL_DEBUG); - TEST_ASSERT( - strcmp(log_config_file->config.file_config.dir.data, "./logs") == 0); - TEST_ASSERT( - strcmp(log_config_file->config.file_config.basename.data, "app") == 0); - zvec_config_log_destroy(log_config_file); - } - if (temp_file) { - zvec_config_file_log_destroy(temp_file); - } - - // Test 6: Log config with NULL config data (should use defaults) - ZVecLogConfig *log_config_default = - zvec_config_log_create(ZVEC_LOG_TYPE_CONSOLE, NULL); - TEST_ASSERT(log_config_default != NULL); - if (log_config_default) { - TEST_ASSERT(log_config_default->type == ZVEC_LOG_TYPE_CONSOLE); - TEST_ASSERT(log_config_default->config.console_config.level == - ZVEC_LOG_LEVEL_WARN); - zvec_config_log_destroy(log_config_default); - } - - // Test 7: Config data creation and basic operations + TEST_ASSERT(temp_file != NULL); + TEST_ASSERT(temp_file->level == ZVEC_LOG_LEVEL_DEBUG); + TEST_ASSERT(strcmp(temp_file->dir.data, "./logs") == 0); + TEST_ASSERT(strcmp(temp_file->basename.data, "app") == 0); + TEST_ASSERT(temp_file->file_size == 50); + TEST_ASSERT(temp_file->overdue_days == 30); + + zvec_config_file_log_destroy(temp_file); + + // Test 6: Config data creation and basic operations ZVecConfigData *config_data = zvec_config_data_create(); TEST_ASSERT(config_data != NULL); if (config_data) { // Test initial values TEST_ASSERT(config_data->log_config != NULL); - TEST_ASSERT(config_data->log_config->type == ZVEC_LOG_TYPE_CONSOLE); + TEST_ASSERT(config_data->log_type == ZVEC_LOG_TYPE_CONSOLE); // Test memory limit setting ZVecErrorCode err = @@ -220,27 +193,25 @@ void test_zvec_config() { TEST_ASSERT(config_data->optimize_thread_count == 4); // Test log config replacement - ZVecConsoleLogConfig *new_console = - zvec_config_console_log_create(ZVEC_LOG_LEVEL_DEBUG); - ZVecLogConfig *new_log_config = - zvec_config_log_create(ZVEC_LOG_TYPE_CONSOLE, new_console); - if (new_log_config) { - err = zvec_config_data_set_log_config(config_data, new_log_config); - TEST_ASSERT(err == ZVEC_OK); - TEST_ASSERT(config_data->log_config == new_log_config); - } + TEST_ASSERT(config_data->log_type == ZVEC_LOG_TYPE_CONSOLE); + TEST_ASSERT(config_data->log_config != NULL); + ZVecFileLogConfig *new_file = zvec_config_file_log_create( + ZVEC_LOG_LEVEL_DEBUG, "./logs", "app", 50, 30); + TEST_ASSERT(new_file != NULL); + zvec_config_data_set_log_config(config_data, ZVEC_LOG_TYPE_FILE, new_file); + TEST_ASSERT(config_data->log_type == ZVEC_LOG_TYPE_FILE); + TEST_ASSERT(config_data->log_config != NULL); + zvec_config_data_destroy(config_data); - if (new_console) zvec_config_console_log_destroy(new_console); - if (new_log_config) zvec_config_log_destroy(new_log_config); } - // Test 8: Edge cases and error conditions + // Test 7: Edge cases and error conditions // Test NULL pointer handling ZVecErrorCode err = zvec_config_data_set_memory_limit(NULL, 1024); TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); - err = zvec_config_data_set_log_config(NULL, NULL); + err = zvec_config_data_set_log_config(NULL, ZVEC_LOG_TYPE_CONSOLE, NULL); TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); err = zvec_config_data_set_query_thread_count(NULL, 1); @@ -274,7 +245,7 @@ void test_zvec_config() { zvec_config_data_destroy(boundary_config); } - // Test 9: Memory leak prevention - double destroy safety + // Test 8: Memory leak prevention - double destroy safety ZVecConfigData *double_destroy_test = zvec_config_data_create(); if (double_destroy_test) { zvec_config_data_destroy(double_destroy_test); @@ -290,7 +261,7 @@ void test_zvec_initialize() { TEST_ASSERT(config != NULL); if (config) { TEST_ASSERT(config->log_config != NULL); - TEST_ASSERT(config->log_config->type == ZVEC_LOG_TYPE_CONSOLE); + TEST_ASSERT(config->log_type == ZVEC_LOG_TYPE_CONSOLE); } ZVecErrorCode err = zvec_initialize(config); TEST_ASSERT(err == ZVEC_OK);