From bd44e83f6784b5507a66841f2c8ecde63951f426 Mon Sep 17 00:00:00 2001 From: Taha YASSINE Date: Tue, 29 Apr 2025 14:55:55 +0100 Subject: [PATCH 1/7] Add str.startswith() --- include/minja/minja.hpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/minja/minja.hpp b/include/minja/minja.hpp index ec89bc7..1518b9a 100644 --- a/include/minja/minja.hpp +++ b/include/minja/minja.hpp @@ -1521,6 +1521,10 @@ class MethodCallExpr : public Expression { vargs.expectArgs("endswith method", {1, 1}, {0, 0}); auto suffix = vargs.args[0].get(); return suffix.length() <= str.length() && std::equal(suffix.rbegin(), suffix.rend(), str.rbegin()); + } else if (method->get_name() == "startswith") { + vargs.expectArgs("startswith method", {1, 1}, {0, 0}); + auto prefix = vargs.args[0].get(); + return prefix.length() <= str.length() && std::equal(prefix.begin(), prefix.end(), str.begin()); } else if (method->get_name() == "title") { vargs.expectArgs("title method", {0, 0}, {0, 0}); auto res = str; From e3905a7efb0135ae446b0dc542ee6b43688e125e Mon Sep 17 00:00:00 2001 From: Taha YASSINE Date: Wed, 30 Apr 2025 05:43:22 +0100 Subject: [PATCH 2/7] Add support for step=-1 in slice --- include/minja/minja.hpp | 106 ++++++++++++++++++++++++++++------------ 1 file changed, 75 insertions(+), 31 deletions(-) diff --git a/include/minja/minja.hpp b/include/minja/minja.hpp index 1518b9a..d14957f 100644 --- a/include/minja/minja.hpp +++ b/include/minja/minja.hpp @@ -1201,9 +1201,9 @@ class DictExpr : public Expression { class SliceExpr : public Expression { public: - std::shared_ptr start, end; - SliceExpr(const Location & loc, std::shared_ptr && s, std::shared_ptr && e) - : Expression(loc), start(std::move(s)), end(std::move(e)) {} + std::shared_ptr start, end, step; + SliceExpr(const Location & loc, std::shared_ptr && s, std::shared_ptr && e, std::shared_ptr && st = nullptr) + : Expression(loc), start(std::move(s)), end(std::move(e)), step(std::move(st)) {} Value do_evaluate(const std::shared_ptr &) const override { throw std::runtime_error("SliceExpr not implemented"); } @@ -1220,19 +1220,54 @@ class SubscriptExpr : public Expression { if (!index) throw std::runtime_error("SubscriptExpr.index is null"); auto target_value = base->evaluate(context); if (auto slice = dynamic_cast(index.get())) { - auto start = slice->start ? slice->start->evaluate(context).get() : 0; - auto end = slice->end ? slice->end->evaluate(context).get() : (int64_t) target_value.size(); + int64_t step = slice->step ? slice->step->evaluate(context).get() : 1; + if (step != 1 && step != -1) { + throw std::runtime_error("Slicing with step other than 1 or -1 is not supported"); + } + bool reverse = step == -1; + + size_t len = target_value.size(); + int64_t start = slice->start ? slice->start->evaluate(context).get() : (reverse ? len - 1 : 0); + int64_t end = slice->end ? slice->end->evaluate(context).get() : (reverse ? -1 : len); + + if (slice->start && start < 0) { + start = (int64_t)len + start; + } + if (slice->end && end < 0) { + end = (int64_t)len + end; + } + if (target_value.is_string()) { std::string s = target_value.get(); - if (start < 0) start = s.size() + start; - if (end < 0) end = s.size() + end; - return s.substr(start, end - start); - } else if (target_value.is_array()) { - if (start < 0) start = target_value.size() + start; - if (end < 0) end = target_value.size() + end; + + std::string result_str; + if (reverse) { + for (int64_t i = start; i > end; --i) { + if (i >= 0 && i < (int64_t)len) { + result_str += s[i]; + } else if (i < 0) { + break; + } + } + } else { + result_str = s.substr(start, end - start); + } + return result_str; + + } else if (target_value.is_array()) { auto result = Value::array(); - for (auto i = start; i < end; ++i) { - result.push_back(target_value.at(i)); + if (reverse) { + for (int64_t i = start; i > end; --i) { + if (i >= 0 && i < (int64_t)len) { + result.push_back(target_value.at(i)); + } else if (i < 0) { + break; + } + } + } else { + for (auto i = start; i < end; ++i) { + result.push_back(target_value.at(i)); + } } return result; } else { @@ -2087,28 +2122,37 @@ class Parser { while (it != end && consumeSpaces() && peekSymbols({ "[", "." })) { if (!consumeToken("[").empty()) { - std::shared_ptr index; + std::shared_ptr index; + auto slice_loc = get_location(); + std::shared_ptr start, end, step; + bool has_first_colon = false, has_second_colon = false; + + if (!peekSymbols({ ":" })) { + start = parseExpression(); + } + + if (!consumeToken(":").empty()) { + has_first_colon = true; + if (!peekSymbols({ ":", "]" })) { + end = parseExpression(); + } if (!consumeToken(":").empty()) { - auto slice_end = parseExpression(); - index = std::make_shared(slice_end->location, nullptr, std::move(slice_end)); - } else { - auto slice_start = parseExpression(); - if (!consumeToken(":").empty()) { - consumeSpaces(); - if (peekSymbols({ "]" })) { - index = std::make_shared(slice_start->location, std::move(slice_start), nullptr); - } else { - auto slice_end = parseExpression(); - index = std::make_shared(slice_start->location, std::move(slice_start), std::move(slice_end)); - } - } else { - index = std::move(slice_start); + has_second_colon = true; + if (!peekSymbols({ "]" })) { + step = parseExpression(); } } - if (!index) throw std::runtime_error("Empty index in subscript"); - if (consumeToken("]").empty()) throw std::runtime_error("Expected closing bracket in subscript"); + } + + if ((has_first_colon || has_second_colon) && (start || end || step)) { + index = std::make_shared(slice_loc, std::move(start), std::move(end), std::move(step)); + } else { + index = std::move(start); + } + if (!index) throw std::runtime_error("Empty index in subscript"); + if (consumeToken("]").empty()) throw std::runtime_error("Expected closing bracket in subscript"); - value = std::make_shared(value->location, std::move(value), std::move(index)); + value = std::make_shared(value->location, std::move(value), std::move(index)); } else if (!consumeToken(".").empty()) { auto identifier = parseIdentifier(); if (!identifier) throw std::runtime_error("Expected identifier in subscript"); From 918f392b202eb6d2fdf7df174221f5443d9d2172 Mon Sep 17 00:00:00 2001 From: Taha YASSINE Date: Wed, 30 Apr 2025 05:44:51 +0100 Subject: [PATCH 3/7] Add tests --- tests/test-syntax.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/test-syntax.cpp b/tests/test-syntax.cpp index 5bc82fa..7aa0f0d 100644 --- a/tests/test-syntax.cpp +++ b/tests/test-syntax.cpp @@ -184,6 +184,9 @@ TEST(SyntaxTest, SimpleCases) { EXPECT_EQ( "1", render(R"({{ 1 | safe }})", {}, {})); + EXPECT_EQ( + "True,False", + render(R"({{ 'abc'.startswith('ab') }},{{ ''.startswith('a') }})", {}, {})); EXPECT_EQ( "True,False", render(R"({{ 'abc'.endswith('bc') }},{{ ''.endswith('a') }})", {}, {})); @@ -465,6 +468,9 @@ TEST(SyntaxTest, SimpleCases) { EXPECT_EQ( "[1, 2, 3][0, 1][1, 2]", render("{% set x = [0, 1, 2, 3] %}{{ x[1:] }}{{ x[:2] }}{{ x[1:3] }}", {}, {})); + EXPECT_EQ( + "[3, 2, 1, 0][3, 2, 1][2, 1, 0][2, 1]", + render("{% set x = [0, 1, 2, 3] %}{{ x[::-1] }}{{ x[:0:-1] }}{{ x[2::-1] }}{{ x[2:0:-1] }}", {}, {})); EXPECT_EQ( "a", render("{{ ' a ' | trim }}", {}, {})); From 4121892b9b49de17c7b3cf12aa5a0cbe338dfa05 Mon Sep 17 00:00:00 2001 From: Taha YASSINE Date: Wed, 30 Apr 2025 05:46:02 +0100 Subject: [PATCH 4/7] Add Qwen3 template --- tests/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index aa3a756..09323b3 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -318,6 +318,7 @@ set(MODEL_IDS ValiantLabs/Llama3.1-8B-Enigma xwen-team/Xwen-72B-Chat xwen-team/Xwen-7B-Chat + Qwen/Qwen3-4B # Broken, TODO: # ai21labs/AI21-Jamba-1.5-Large # https://github.com/google/minja/issues/8 From 3261d84e932e8a6c5344f79dfaae75615690c3e3 Mon Sep 17 00:00:00 2001 From: Taha YASSINE Date: Thu, 15 May 2025 00:16:03 +0200 Subject: [PATCH 5/7] Clamp out-of-bounds slice indices --- include/minja/minja.hpp | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/include/minja/minja.hpp b/include/minja/minja.hpp index d14957f..1c6168e 100644 --- a/include/minja/minja.hpp +++ b/include/minja/minja.hpp @@ -1226,7 +1226,7 @@ class SubscriptExpr : public Expression { } bool reverse = step == -1; - size_t len = target_value.size(); + size_t len = target_value.size(); int64_t start = slice->start ? slice->start->evaluate(context).get() : (reverse ? len - 1 : 0); int64_t end = slice->end ? slice->end->evaluate(context).get() : (reverse ? -1 : len); @@ -1237,19 +1237,27 @@ class SubscriptExpr : public Expression { end = (int64_t)len + end; } + if (!reverse) { + start = std::max((int64_t)0, start); + start = std::min(start, (int64_t)len); + end = std::max((int64_t)0, end); + end = std::min(end, (int64_t)len); + } else { + start = std::max((int64_t)-1, start); + start = std::min(start, (int64_t)len - 1); + end = std::max((int64_t)-1, end); + end = std::min(end, (int64_t)len - 1); + } + if (target_value.is_string()) { std::string s = target_value.get(); std::string result_str; if (reverse) { for (int64_t i = start; i > end; --i) { - if (i >= 0 && i < (int64_t)len) { - result_str += s[i]; - } else if (i < 0) { - break; - } + result_str += s[i]; } - } else { + } else if (start < end) { result_str = s.substr(start, end - start); } return result_str; @@ -1258,11 +1266,7 @@ class SubscriptExpr : public Expression { auto result = Value::array(); if (reverse) { for (int64_t i = start; i > end; --i) { - if (i >= 0 && i < (int64_t)len) { - result.push_back(target_value.at(i)); - } else if (i < 0) { - break; - } + result.push_back(target_value.at(i)); } } else { for (auto i = start; i < end; ++i) { From 38a8e1cb47a0d413a9a3a6aabf9096c3ab5aaacb Mon Sep 17 00:00:00 2001 From: Olivier Chafik Date: Thu, 15 May 2025 11:11:45 +0100 Subject: [PATCH 6/7] Simplify subscript logic + handle any (non-zero) step --- include/minja/minja.hpp | 62 +++++++++++++---------------------------- 1 file changed, 20 insertions(+), 42 deletions(-) diff --git a/include/minja/minja.hpp b/include/minja/minja.hpp index 1c6168e..2f84146 100644 --- a/include/minja/minja.hpp +++ b/include/minja/minja.hpp @@ -1220,58 +1220,36 @@ class SubscriptExpr : public Expression { if (!index) throw std::runtime_error("SubscriptExpr.index is null"); auto target_value = base->evaluate(context); if (auto slice = dynamic_cast(index.get())) { + auto len = target_value.size(); + auto wrap = [len](int64_t i) -> int64_t { + if (i < 0) { + return i + len; + } + return i; + }; int64_t step = slice->step ? slice->step->evaluate(context).get() : 1; - if (step != 1 && step != -1) { - throw std::runtime_error("Slicing with step other than 1 or -1 is not supported"); - } - bool reverse = step == -1; - - size_t len = target_value.size(); - int64_t start = slice->start ? slice->start->evaluate(context).get() : (reverse ? len - 1 : 0); - int64_t end = slice->end ? slice->end->evaluate(context).get() : (reverse ? -1 : len); - - if (slice->start && start < 0) { - start = (int64_t)len + start; - } - if (slice->end && end < 0) { - end = (int64_t)len + end; - } - - if (!reverse) { - start = std::max((int64_t)0, start); - start = std::min(start, (int64_t)len); - end = std::max((int64_t)0, end); - end = std::min(end, (int64_t)len); - } else { - start = std::max((int64_t)-1, start); - start = std::min(start, (int64_t)len - 1); - end = std::max((int64_t)-1, end); - end = std::min(end, (int64_t)len - 1); + if (!step) { + throw std::runtime_error("slice step cannot be zero"); } - + int64_t start = slice->start ? wrap(slice->start->evaluate(context).get()) : (step < 0 ? len - 1 : 0); + int64_t end = slice->end ? wrap(slice->end->evaluate(context).get()) : (step < 0 ? -1 : len); if (target_value.is_string()) { std::string s = target_value.get(); - std::string result_str; - if (reverse) { - for (int64_t i = start; i > end; --i) { - result_str += s[i]; + std::string result; + if (start < end && step == 1) { + result = s.substr(start, end - start); + } else { + for (int64_t i = start; step > 0 ? i < end : i > end; i += step) { + result += s[i]; } - } else if (start < end) { - result_str = s.substr(start, end - start); } - return result_str; + return result; } else if (target_value.is_array()) { auto result = Value::array(); - if (reverse) { - for (int64_t i = start; i > end; --i) { - result.push_back(target_value.at(i)); - } - } else { - for (auto i = start; i < end; ++i) { - result.push_back(target_value.at(i)); - } + for (int64_t i = start; step > 0 ? i < end : i > end; i += step) { + result.push_back(target_value.at(i)); } return result; } else { From d5dc2455d9031aa0c1db218bea56bf80b97f3e3a Mon Sep 17 00:00:00 2001 From: Olivier Chafik Date: Thu, 15 May 2025 11:12:34 +0100 Subject: [PATCH 7/7] Test more steps + test string subscripting --- tests/test-syntax.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tests/test-syntax.cpp b/tests/test-syntax.cpp index 7aa0f0d..5682bf5 100644 --- a/tests/test-syntax.cpp +++ b/tests/test-syntax.cpp @@ -469,8 +469,14 @@ TEST(SyntaxTest, SimpleCases) { "[1, 2, 3][0, 1][1, 2]", render("{% set x = [0, 1, 2, 3] %}{{ x[1:] }}{{ x[:2] }}{{ x[1:3] }}", {}, {})); EXPECT_EQ( - "[3, 2, 1, 0][3, 2, 1][2, 1, 0][2, 1]", - render("{% set x = [0, 1, 2, 3] %}{{ x[::-1] }}{{ x[:0:-1] }}{{ x[2::-1] }}{{ x[2:0:-1] }}", {}, {})); + "123;01;12", + render("{% set x = '0123' %}{{ x[1:] }};{{ x[:2] }};{{ x[1:3] }}", {}, {})); + EXPECT_EQ( + "[3, 2, 1, 0][3, 2, 1][2, 1, 0][2, 1][0, 2][3, 1][2, 0]", + render("{% set x = [0, 1, 2, 3] %}{{ x[::-1] }}{{ x[:0:-1] }}{{ x[2::-1] }}{{ x[2:0:-1] }}{{ x[::2] }}{{ x[::-2] }}{{ x[-2::-2] }}", {}, {})); + EXPECT_EQ( + "3210;321;210;21;02;31;20", + render("{% set x = '0123' %}{{ x[::-1] }};{{ x[:0:-1] }};{{ x[2::-1] }};{{ x[2:0:-1] }};{{ x[::2] }};{{ x[::-2] }};{{ x[-2::-2] }}", {}, {})); EXPECT_EQ( "a", render("{{ ' a ' | trim }}", {}, {}));