From 4c8db1e644916daab95d6d54440aacca41203edd Mon Sep 17 00:00:00 2001 From: Xuejun Date: Tue, 19 May 2026 16:47:12 +0800 Subject: [PATCH 1/2] OpenVINO backend: enable arch test for qwen3vl --- ggml/src/ggml-openvino/openvino/op/rope.cpp | 11 ++++++++++- ggml/src/ggml-openvino/openvino/translate_session.cpp | 6 ++++++ ggml/src/ggml-openvino/openvino/utils.cpp | 10 +++++++++- ggml/src/ggml-openvino/openvino/utils.h | 3 ++- 4 files changed, 27 insertions(+), 3 deletions(-) diff --git a/ggml/src/ggml-openvino/openvino/op/rope.cpp b/ggml/src/ggml-openvino/openvino/op/rope.cpp index de8bcdb38de..e3c13d787f1 100644 --- a/ggml/src/ggml-openvino/openvino/op/rope.cpp +++ b/ggml/src/ggml-openvino/openvino/op/rope.cpp @@ -55,7 +55,16 @@ OutputVector translate_rope(const NodeContext & context) { if (context.get_input_size() == 3) { rope_freqs_weight = context.get_input(2).get_node_shared_ptr(); } - auto sin_cos = make_sin_cos(op_params, inp_pos, rope_freqs_weight, mode == TYPE_IMROPE); + std::shared_ptr token_len_per_seq; + if (context.has_input("token_len_per_seq")) { + token_len_per_seq = context.get_input("token_len_per_seq").get_node_shared_ptr(); + } + auto sin_cos = make_sin_cos(op_params, + inp_pos, + rope_freqs_weight, + mode == TYPE_IMROPE, + false, + token_len_per_seq); sin_theta_node = sin_cos.first; cos_theta_node = sin_cos.second; } diff --git a/ggml/src/ggml-openvino/openvino/translate_session.cpp b/ggml/src/ggml-openvino/openvino/translate_session.cpp index 189de0fc37f..c22d95e05a8 100644 --- a/ggml/src/ggml-openvino/openvino/translate_session.cpp +++ b/ggml/src/ggml-openvino/openvino/translate_session.cpp @@ -124,6 +124,12 @@ void add_rope_sin_cos(TensorMap & tensor_map, GgmlDecoder & ggml_model_decoder) if (ggml_model_decoder.has_mixed_rope_params()) { return; } + // Dynamic active-sequence slicing is reconstructed per ROPE node. Reusing a + // single shared rope_sin/rope_cos across the whole graph is unsafe here, + // because the graph-level inp_pos does not necessarily match each ROPE use. + if (tensor_map.find("seq_active_start") != tensor_map.end() && tensor_map.find("seq_active_end") != tensor_map.end()) { + return; + } int32_t * rope_params = ggml_model_decoder.get_rope_params(); if (tensor_map.find("inp_pos") == tensor_map.end() || rope_params == nullptr) { return; diff --git a/ggml/src/ggml-openvino/openvino/utils.cpp b/ggml/src/ggml-openvino/openvino/utils.cpp index e0344aee3b8..c4082e071ee 100644 --- a/ggml/src/ggml-openvino/openvino/utils.cpp +++ b/ggml/src/ggml-openvino/openvino/utils.cpp @@ -121,7 +121,8 @@ std::pair, ov::Output> make_sin_cos(int32_t * rope_params std::shared_ptr inp_pos, std::shared_ptr rope_freqs_weight, bool imrope, - bool stateful) { + bool stateful, + std::shared_ptr token_len_per_seq) { if (stateful) { inp_pos = std::make_shared(inp_pos, ov::op::v0::Constant::create(ov::element::i64, {1}, {0})); inp_pos = std::make_shared(inp_pos, ov::element::f32); @@ -140,6 +141,13 @@ std::pair, ov::Output> make_sin_cos(int32_t * rope_params auto pos_perm = std::make_shared(ov::element::i64, ov::Shape{4}, std::vector{0, 3, 1, 2}); inp_pos = std::make_shared(inp_pos, pos_perm); + + if (!imrope && token_len_per_seq) { + auto zero = ov::op::v0::Constant::create(ov::element::i64, {1}, {0}); + auto one = ov::op::v0::Constant::create(ov::element::i64, {1}, {1}); + auto axis = ov::op::v0::Constant::create(ov::element::i64, {1}, {1}); + inp_pos = std::make_shared(inp_pos, zero, token_len_per_seq, one, axis); + } } float freq_base; diff --git a/ggml/src/ggml-openvino/openvino/utils.h b/ggml/src/ggml-openvino/openvino/utils.h index 53f793b57d7..343491e0f2c 100644 --- a/ggml/src/ggml-openvino/openvino/utils.h +++ b/ggml/src/ggml-openvino/openvino/utils.h @@ -68,7 +68,8 @@ std::pair, ov::Output> make_sin_cos(int32_t* rope_params, std::shared_ptr inp_pos, std::shared_ptr rope_freqs_weight = nullptr, bool imrope = false, - bool stateful = false); + bool stateful = false, + std::shared_ptr token_len_per_seq = nullptr); ov::Output process_view_input(const NodeContext& context, int input_index, int slice_len = 0); From 3884cdcd6c49cee159a2eea7b350c2f68db9a85d Mon Sep 17 00:00:00 2001 From: Xuejun Date: Wed, 20 May 2026 14:28:06 +0800 Subject: [PATCH 2/2] OpenVINO backend: enable cohere2 for arch test --- ggml/src/ggml-openvino/ggml-decoder.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggml/src/ggml-openvino/ggml-decoder.cpp b/ggml/src/ggml-openvino/ggml-decoder.cpp index d005b40458f..91c7b05ae49 100644 --- a/ggml/src/ggml-openvino/ggml-decoder.cpp +++ b/ggml/src/ggml-openvino/ggml-decoder.cpp @@ -407,7 +407,7 @@ std::pair GgmlOvDecoder::compute_llm_params(ggml_cgr model_params.head_size = cache_k_permute->ne[0]; model_params.n_heads_kv = cache_k_permute->ne[2]; compute_params.input_len = node->src[0]->ne[1]; - compute_params.token_len_per_seq = node->ne[2]; + compute_params.token_len_per_seq = node->src[0]->ne[1]; auto * cache_k_view = cache_k_permute->src[0]; if (cache_k_view->op != GGML_OP_VIEW) {