From 0c7552a825c10b39ef49ccd140e4acd8b1e60247 Mon Sep 17 00:00:00 2001 From: Joseph Birkner Date: Mon, 4 May 2026 15:04:11 +0200 Subject: [PATCH 1/2] Fix tagged JSON parsing, expose buildModelNode so downstream JSON importers (mapget) can use it as well. --- include/simfil/model/json.h | 12 ++++ src/model/json.cpp | 140 ++++++++++++++++++++++++++++-------- test/complex.cpp | 21 +++++- 3 files changed, 141 insertions(+), 32 deletions(-) diff --git a/include/simfil/model/json.h b/include/simfil/model/json.h index 8e3ee5a2..4f8e6db5 100644 --- a/include/simfil/model/json.h +++ b/include/simfil/model/json.h @@ -9,13 +9,25 @@ #include "model.h" #include "simfil/error.h" +#include #include namespace simfil::json { +/** + * Build a ModelNode subtree from JSON using simfil's tagged JSON conventions + * such as `_bytes` and `_multimap`. + */ +auto buildModelNode(const nlohmann::json& input, ModelPool& model) -> tl::expected; + +/** Parse JSON from a stream and append the resulting root node to `model`. */ auto parse(std::istream& input, ModelPoolPtr const& model) -> tl::expected; + +/** Parse a JSON string and append the resulting root node to `model`. */ auto parse(const std::string& input, ModelPoolPtr const& model) -> tl::expected; + +/** Parse a JSON string into a freshly created ModelPool containing one root. */ auto parse(const std::string& input) -> tl::expected; } diff --git a/src/model/json.cpp b/src/model/json.cpp index 199fc0c5..b26b6135 100644 --- a/src/model/json.cpp +++ b/src/model/json.cpp @@ -3,6 +3,10 @@ #include "simfil/model/json.h" #include "simfil/model/model.h" +#include +#include +#include + #include #include "../expected.h" @@ -11,85 +15,159 @@ namespace simfil::json { using json = nlohmann::json; -static auto build(const json& j, ModelPool & model) -> tl::expected +namespace +{ +/** Resolve the canonical simfil null node instance. */ +auto nullNode(ModelPool& model) -> ModelNode::Ptr +{ + return model.resolve( + ModelNodeAddress{Model::Null, 1}, + ScalarValueType{}); +} + +/** Decode one object field through the shared recursive JSON builder. */ +auto buildObjectField( + model_ptr& object, + std::string const& key, + const json& value, + ModelPool& model) -> tl::expected +{ + auto child = buildModelNode(value, model); + TRY_EXPECTED(child); + auto result = object->addField(key, *child); + TRY_EXPECTED(result); + return {}; +} + +/** Expand `_multimap`-tagged objects into repeated simfil object fields. */ +auto buildMultimapObject( + const json& input, + model_ptr& object, + ModelPool& model) -> tl::expected +{ + for (auto&& [key, value] : input.items()) { + if (key == "_multimap") { + continue; + } + + if (!value.is_array()) { + return tl::unexpected( + Error::ParserError, + "Invalid multimap object: expected array values for every field"); + } + + for (const auto& item : value) { + auto result = buildObjectField(object, key, item, model); + TRY_EXPECTED(result); + } + } + + return {}; +} +} + +/** Build a simfil node tree from JSON while honoring simfil's tagged encodings. */ +auto buildModelNode(const json& input, ModelPool& model) -> tl::expected { - switch (j.type()) { + switch (input.type()) { case json::value_t::null: - return {}; + return nullNode(model); case json::value_t::boolean: - return model.newSmallValue(j.get()); + return model.newSmallValue(input.get()); case json::value_t::number_float: - return model.newValue(j.get()); + return model.newValue(input.get()); case json::value_t::number_unsigned: - return model.newValue((int64_t)j.get()); + if (input.get() > static_cast(std::numeric_limits::max())) { + // simfil stores JSON integers as signed int64_t, so reject lossy unsigned values. + return tl::unexpected( + Error::ParserError, + "Unsigned integer does not fit into simfil's signed JSON number storage"); + } + return model.newValue(static_cast(input.get())); case json::value_t::number_integer: - return model.newValue(j.get()); + return model.newValue(input.get()); case json::value_t::string: - if (auto stringId = model.strings()->emplace(j.get()); stringId) - return model.newValue((StringId)*stringId); - else - return tl::unexpected(stringId.error()); + return model.newValue(input.get()); default: break; } - if (j.is_object()) { - if (auto it = j.find("_bytes"); it != j.end() && it->is_boolean() && it->get()) { - auto hex = j.find("hex"); - if (hex == j.end() || !hex->is_string()) - return tl::unexpected(Error::ParserError, "Invalid tagged bytes object: expected string field 'hex'"); + if (input.is_object()) { + if (auto it = input.find("_bytes"); it != input.end() && it->is_boolean() && it->get()) { + // `_bytes` is the tagged JSON form emitted by ModelNode::toJson() for byte arrays. + auto hex = input.find("hex"); + if (hex == input.end() || !hex->is_string()) { + return tl::unexpected( + Error::ParserError, + "Invalid tagged bytes object: expected string field 'hex'"); + } auto decoded = ByteArray::fromHex(hex->get()); - if (!decoded) - return tl::unexpected(Error::ParserError, "Invalid tagged bytes object: hex decode failed"); + if (!decoded) { + return tl::unexpected( + Error::ParserError, + "Invalid tagged bytes object: hex decode failed"); + } return model.newValue(std::move(*decoded)); } - auto object = model.newObject(j.size(), true); - for (auto&& [key, value] : j.items()) { - auto child = build(value, model); - TRY_EXPECTED(child); - object->addField(key, *child); + auto object = model.newObject(input.size(), true); + auto multimap = input.find("_multimap"); + if (multimap != input.end() && multimap->is_boolean() && multimap->get()) { + // Repeated object keys are reconstructed only for explicitly tagged multimaps. + auto result = buildMultimapObject(input, object, model); + TRY_EXPECTED(result); + return object; + } + + for (auto&& [key, value] : input.items()) { + auto result = buildObjectField(object, key, value, model); + TRY_EXPECTED(result); } return object; } - if (j.is_array()) { - auto array = model.newArray(j.size(), true); - for (const auto& value : j) { - auto child = build(value, model); + if (input.is_array()) { + auto array = model.newArray(input.size(), true); + for (const auto& value : input) { + auto child = buildModelNode(value, model); TRY_EXPECTED(child); array->append(*child); } return array; } - return {}; + return tl::unexpected( + Error::ParserError, + "Unsupported JSON value type"); } +/** Parse JSON from a stream into an existing model pool. */ auto parse(std::istream& input, ModelPoolPtr const& model) -> tl::expected { - auto root = build(json::parse(input), *model); + auto root = buildModelNode(json::parse(input), *model); if (!root) return tl::unexpected(root.error()); model->addRoot(*root); return model->validate(); } +/** Parse a JSON string into an existing model pool. */ auto parse(const std::string& input, ModelPoolPtr const& model) -> tl::expected { - auto root = build(json::parse(input), *model); + auto root = buildModelNode(json::parse(input), *model); if (!root) return tl::unexpected(root.error()); model->addRoot(*root); return model->validate(); } +/** Parse a JSON string into a new model pool with one validated root. */ auto parse(const std::string& input) -> tl::expected { auto model = std::make_shared(); - auto root = build(json::parse(input), *model); + auto root = buildModelNode(json::parse(input), *model); if (!root) return tl::unexpected(root.error()); model->addRoot(*root); diff --git a/test/complex.cpp b/test/complex.cpp index c24312d2..a37080dc 100644 --- a/test/complex.cpp +++ b/test/complex.cpp @@ -98,8 +98,27 @@ TEST_CASE("Multimap JSON", "[multimap.serialization]") { root->addField("c", array); root->addField("c", static_cast(2)); + auto const expected = nlohmann::json::parse(R"([{"a":[1],"b":[1,2,3],"c":[[1],2],"_multimap":true}])"); INFO(model->toJson().dump(2)); - REQUIRE(model->toJson() == nlohmann::json::parse(R"([{"a":[1],"b":[1,2,3],"c":[[1],2],"_multimap":true}])")); + REQUIRE(model->toJson() == expected); + + auto roundTrip = json::parse(expected.dump()); + REQUIRE(roundTrip); + auto roundTripRoot = roundTrip.value()->root(0); + REQUIRE(roundTripRoot); + auto roundTripObject = roundTripRoot.value()->at(0); + REQUIRE(roundTripObject); + REQUIRE(roundTripObject->size() == 6); + REQUIRE(roundTripObject->toJson() == expected[0]); +} + +TEST_CASE("Null JSON values build explicit null nodes", "[json.serialization][null]") { + auto model = json::parse(R"({"a":null,"b":[null,{"c":null}]})"); + REQUIRE(model); + + auto root = model.value()->root(0); + REQUIRE(root); + REQUIRE(root.value()->toJson() == nlohmann::json::parse(R"({"a":null,"b":[null,{"c":null}]})")); } TEST_CASE("Tagged bytes JSON", "[bytes.serialization]") { From b5dd897ea60d2645c6acb31d93893305255292f5 Mon Sep 17 00:00:00 2001 From: Joseph Birkner Date: Tue, 5 May 2026 11:11:12 +0200 Subject: [PATCH 2/2] Repair test. --- src/model/json.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/model/json.cpp b/src/model/json.cpp index b26b6135..7a0343ef 100644 --- a/src/model/json.cpp +++ b/src/model/json.cpp @@ -87,7 +87,15 @@ auto buildModelNode(const json& input, ModelPool& model) -> tl::expected()); case json::value_t::string: - return model.newValue(input.get()); + { + // JSON strings are expected to participate in simfil's pooled-string + // facilities such as completion of uppercase constants. + auto stringId = model.strings()->emplace(input.get()); + if (!stringId) { + return tl::unexpected(stringId.error()); + } + return model.newValue(static_cast(*stringId)); + } default: break; }