From 4c8db1e644916daab95d6d54440aacca41203edd Mon Sep 17 00:00:00 2001
From: Xuejun <XuejunZhai@intel.com>
Date: Tue, 19 May 2026 16:47:12 +0800
Subject: [PATCH 1/2] OpenVINO backend: enable arch test for qwen3vl

---
 ggml/src/ggml-openvino/openvino/op/rope.cpp           | 11 ++++++++++-
 ggml/src/ggml-openvino/openvino/translate_session.cpp |  6 ++++++
 ggml/src/ggml-openvino/openvino/utils.cpp             | 10 +++++++++-
 ggml/src/ggml-openvino/openvino/utils.h               |  3 ++-
 4 files changed, 27 insertions(+), 3 deletions(-)
diff --git a/ggml/src/ggml-openvino/openvino/op/rope.cpp b/ggml/src/ggml-openvino/openvino/op/rope.cpp
index de8bcdb38de..e3c13d787f1 100644
--- a/ggml/src/ggml-openvino/openvino/op/rope.cpp
+++ b/ggml/src/ggml-openvino/openvino/op/rope.cpp
@@ -55,7 +55,16 @@ OutputVector translate_rope(const NodeContext & context) {
         if (context.get_input_size() == 3) {
             rope_freqs_weight = context.get_input(2).get_node_shared_ptr();
         }
-        auto sin_cos = make_sin_cos(op_params, inp_pos, rope_freqs_weight, mode == TYPE_IMROPE);
+        std::shared_ptr<ov::Node> token_len_per_seq;
+        if (context.has_input("token_len_per_seq")) {
+            token_len_per_seq = context.get_input("token_len_per_seq").get_node_shared_ptr();
+        }
+        auto sin_cos = make_sin_cos(op_params,
+                                    inp_pos,
+                                    rope_freqs_weight,
+                                    mode == TYPE_IMROPE,
+                                    false,
+                                    token_len_per_seq);
         sin_theta_node = sin_cos.first;
         cos_theta_node = sin_cos.second;
     }
diff --git a/ggml/src/ggml-openvino/openvino/translate_session.cpp b/ggml/src/ggml-openvino/openvino/translate_session.cpp
index 189de0fc37f..c22d95e05a8 100644
--- a/ggml/src/ggml-openvino/openvino/translate_session.cpp
+++ b/ggml/src/ggml-openvino/openvino/translate_session.cpp
@@ -124,6 +124,12 @@ void add_rope_sin_cos(TensorMap & tensor_map, GgmlDecoder & ggml_model_decoder)
     if (ggml_model_decoder.has_mixed_rope_params()) {
         return;
     }
+    // Dynamic active-sequence slicing is reconstructed per ROPE node. Reusing a
+    // single shared rope_sin/rope_cos across the whole graph is unsafe here,
+    // because the graph-level inp_pos does not necessarily match each ROPE use.
+    if (tensor_map.find("seq_active_start") != tensor_map.end() && tensor_map.find("seq_active_end") != tensor_map.end()) {
+        return;
+    }
     int32_t * rope_params = ggml_model_decoder.get_rope_params();
     if (tensor_map.find("inp_pos") == tensor_map.end() || rope_params == nullptr) {
         return;
diff --git a/ggml/src/ggml-openvino/openvino/utils.cpp b/ggml/src/ggml-openvino/openvino/utils.cpp
index e0344aee3b8..c4082e071ee 100644
--- a/ggml/src/ggml-openvino/openvino/utils.cpp
+++ b/ggml/src/ggml-openvino/openvino/utils.cpp
@@ -121,7 +121,8 @@ std::pair<ov::Output<Node>, ov::Output<Node>> make_sin_cos(int32_t * rope_params
                                                            std::shared_ptr<ov::Node> inp_pos,
                                                            std::shared_ptr<ov::Node> rope_freqs_weight,
                                                            bool imrope,
-                                                           bool stateful) {
+                                                           bool stateful,
+                                                           std::shared_ptr<ov::Node> token_len_per_seq) {
     if (stateful) {
         inp_pos = std::make_shared<ov::op::v0::Squeeze>(inp_pos, ov::op::v0::Constant::create(ov::element::i64, {1}, {0}));
         inp_pos = std::make_shared<ov::op::v0::Convert>(inp_pos, ov::element::f32);
@@ -140,6 +141,13 @@ std::pair<ov::Output<Node>, ov::Output<Node>> make_sin_cos(int32_t * rope_params
         auto pos_perm =
             std::make_shared<ov::op::v0::Constant>(ov::element::i64, ov::Shape{4}, std::vector<int64_t>{0, 3, 1, 2});
         inp_pos = std::make_shared<ov::op::v1::Transpose>(inp_pos, pos_perm);
+
+        if (!imrope && token_len_per_seq) {
+            auto zero = ov::op::v0::Constant::create(ov::element::i64, {1}, {0});
+            auto one = ov::op::v0::Constant::create(ov::element::i64, {1}, {1});
+            auto axis = ov::op::v0::Constant::create(ov::element::i64, {1}, {1});
+            inp_pos = std::make_shared<ov::op::v8::Slice>(inp_pos, zero, token_len_per_seq, one, axis);
+        }
     }
 
     float freq_base;
diff --git a/ggml/src/ggml-openvino/openvino/utils.h b/ggml/src/ggml-openvino/openvino/utils.h
index 53f793b57d7..343491e0f2c 100644
--- a/ggml/src/ggml-openvino/openvino/utils.h
+++ b/ggml/src/ggml-openvino/openvino/utils.h
@@ -68,7 +68,8 @@ std::pair<ov::Output<Node>, ov::Output<Node>> make_sin_cos(int32_t* rope_params,
                                                            std::shared_ptr<ov::Node> inp_pos,
                                                            std::shared_ptr<ov::Node> rope_freqs_weight = nullptr,
                                                            bool imrope = false,
-                                                           bool stateful = false);
+                                                           bool stateful = false,
+                                                           std::shared_ptr<ov::Node> token_len_per_seq = nullptr);
 
 ov::Output<ov::Node> process_view_input(const NodeContext& context, int input_index, int slice_len = 0);
 

From 3884cdcd6c49cee159a2eea7b350c2f68db9a85d Mon Sep 17 00:00:00 2001
From: Xuejun <Xuejun.Zhai@intel.com>
Date: Wed, 20 May 2026 14:28:06 +0800
Subject: [PATCH 2/2] OpenVINO backend: enable cohere2 for arch test

---
 ggml/src/ggml-openvino/ggml-decoder.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ggml/src/ggml-openvino/ggml-decoder.cpp b/ggml/src/ggml-openvino/ggml-decoder.cpp
index d005b40458f..91c7b05ae49 100644
--- a/ggml/src/ggml-openvino/ggml-decoder.cpp
+++ b/ggml/src/ggml-openvino/ggml-decoder.cpp
@@ -407,7 +407,7 @@ std::pair<ModelParams, ComputeParams> GgmlOvDecoder::compute_llm_params(ggml_cgr
             model_params.head_size = cache_k_permute->ne[0];
             model_params.n_heads_kv = cache_k_permute->ne[2];
             compute_params.input_len = node->src[0]->ne[1];
-            compute_params.token_len_per_seq = node->ne[2];
+            compute_params.token_len_per_seq = node->src[0]->ne[1];
 
             auto * cache_k_view = cache_k_permute->src[0];
             if (cache_k_view->op != GGML_OP_VIEW) {