From ed986aa39c2b596c085c4d7d9098a3db6b929513 Mon Sep 17 00:00:00 2001
From: Mustafa Cavus <mustafa.cavus@intel.com>
Date: Thu, 21 May 2026 15:42:32 -0700
Subject: [PATCH 1/5] Initiall gemma4 npu support

---
 ggml/src/ggml-openvino/ggml-decoder.cpp       |  4 +-
 ggml/src/ggml-openvino/ggml-decoder.h         |  4 +
 ggml/src/ggml-openvino/openvino/decoder.h     |  2 +
 .../src/ggml-openvino/openvino/node_context.h | 24 +++++
 ggml/src/ggml-openvino/openvino/op/cont.cpp   |  2 +-
 ggml/src/ggml-openvino/openvino/op/view.cpp   | 98 ++++++++++++++++++-
 ggml/src/ggml-openvino/openvino/utils.cpp     | 95 ++++++++++++++++++
 7 files changed, 225 insertions(+), 4 deletions(-)

diff --git a/ggml/src/ggml-openvino/ggml-decoder.cpp b/ggml/src/ggml-openvino/ggml-decoder.cpp
index d005b40458f..f5054641615 100644
--- a/ggml/src/ggml-openvino/ggml-decoder.cpp
+++ b/ggml/src/ggml-openvino/ggml-decoder.cpp
@@ -1131,7 +1131,7 @@ ov::PartialShape GgmlOvDecoder::get_view_input_ov_shape(int node_idx, const std:
             if (dynamic_it != m_node_dynamic_dims.end() && dynamic_it->second != -1) {
                 int dynamic_dim_index = dynamic_it->second;
                 // GGML uses reverse indexing, so convert to OpenVINO indexing
-                shape[3 - dynamic_dim_index] = -1;
+                shape[3 - dynamic_dim_index] = m_is_static ? get_static_n_tokens() : -1;
             }
 
             return shape;
@@ -1154,7 +1154,7 @@ ov::PartialShape GgmlOvDecoder::get_view_input_src_ov_shape(int node_idx, const
                 if (dynamic_it != m_node_dynamic_dims.end() && dynamic_it->second != -1) {
                     int dynamic_dim_index = dynamic_it->second;
                     // GGML uses reverse indexing, so convert to OpenVINO indexing
-                    shape[3 - dynamic_dim_index] = -1;
+                    shape[3 - dynamic_dim_index] = m_is_static ? get_static_n_tokens() : -1;
                 }
 
                 return shape;
diff --git a/ggml/src/ggml-openvino/ggml-decoder.h b/ggml/src/ggml-openvino/ggml-decoder.h
index 91850a000b5..35bed0ba476 100644
--- a/ggml/src/ggml-openvino/ggml-decoder.h
+++ b/ggml/src/ggml-openvino/ggml-decoder.h
@@ -206,6 +206,10 @@ class GgmlOvDecoder : public ov::frontend::ggml::GgmlDecoder {
 
     virtual bool is_stateful() const override { return m_is_stateful; }
 
+    virtual int get_static_n_tokens() const override {
+        return m_is_prefill ? m_prefill_chunk_size : 1;
+    }
+
     virtual bool is_splited_model() const override {
         return m_model_is_splitted;
     }
diff --git a/ggml/src/ggml-openvino/openvino/decoder.h b/ggml/src/ggml-openvino/openvino/decoder.h
index bc41876875c..c602aae73d7 100644
--- a/ggml/src/ggml-openvino/openvino/decoder.h
+++ b/ggml/src/ggml-openvino/openvino/decoder.h
@@ -101,6 +101,8 @@ class GgmlDecoder : public DecoderBase {
     virtual int is_swa_layer(int layer) const = 0;
 
     virtual int32_t get_op_dynamic_dim(int node_idx) const = 0;
+
+    virtual int get_static_n_tokens() const = 0;
 };
 
 }  // namespace ggml
diff --git a/ggml/src/ggml-openvino/openvino/node_context.h b/ggml/src/ggml-openvino/openvino/node_context.h
index 2402a74a908..8e834caa422 100644
--- a/ggml/src/ggml-openvino/openvino/node_context.h
+++ b/ggml/src/ggml-openvino/openvino/node_context.h
@@ -125,6 +125,16 @@ class NodeContext : public frontend::NodeContext {
         if (view_input_size > 0) {
             // This is a VIEW input, get the base tensor name (last element in the chain)
             std::string base_name = m_decoder->get_view_input_src_name(m_node_idx, m_input_names[idx], view_input_size - 1);
+            // Check if the VIEW has been resolved (translate_view produced a Slice)
+            auto view_it = m_tensor_map->find(m_input_names[idx]);
+            if (!base_name.empty() && view_it != m_tensor_map->end()) {
+                auto base_it = m_tensor_map->find(base_name);
+                if (base_it != m_tensor_map->end() &&
+                    view_it->second.get_node_shared_ptr() != base_it->second.get_node_shared_ptr()) {
+                    return view_it->second;
+                }
+                return base_it->second;
+            }
             if (!base_name.empty()) {
                 return m_tensor_map->at(base_name);
             }
@@ -133,6 +143,18 @@ class NodeContext : public frontend::NodeContext {
         return m_tensor_map->at(m_input_names[idx]);
     }
 
+    void cache_tensor(const std::string& name, const Output<Node>& tensor) const {
+        (*m_tensor_map)[name] = tensor;
+    }
+
+    Output<Node> get_cached_tensor(const std::string& name) const {
+        auto it = m_tensor_map->find(name);
+        if (it != m_tensor_map->end()) {
+            return it->second;
+        }
+        return Output<Node>();
+    }
+
     Output<Node> get_input(const std::string& name) const override {
         if (m_tensor_map->find(name) == m_tensor_map->end()) {
             throw std::runtime_error("'" + name + "' not found in tensor map.");
@@ -160,6 +182,8 @@ class NodeContext : public frontend::NodeContext {
 
     bool is_stateful() const { return m_decoder->is_stateful(); }
 
+    int get_static_n_tokens() const { return m_decoder->get_static_n_tokens(); }
+
 private:
     std::shared_ptr<GgmlDecoder> m_decoder;
     std::shared_ptr<TensorMap>& m_tensor_map;
diff --git a/ggml/src/ggml-openvino/openvino/op/cont.cpp b/ggml/src/ggml-openvino/openvino/op/cont.cpp
index 1d6cc672126..fed72cbfb93 100644
--- a/ggml/src/ggml-openvino/openvino/op/cont.cpp
+++ b/ggml/src/ggml-openvino/openvino/op/cont.cpp
@@ -22,7 +22,7 @@ OutputVector translate_cont(const NodeContext & context) {
     auto dst_shape = context.get_output_shape().to_shape();
 
     if (context.get_op_dynamic_dim() != -1) {
-        dst_shape[3 - context.get_op_dynamic_dim()] = -1;
+        dst_shape[3 - context.get_op_dynamic_dim()] = context.is_static() ? context.get_static_n_tokens() : -1;
     }
 
     auto input = process_view_input_new(context, 0);
diff --git a/ggml/src/ggml-openvino/openvino/op/view.cpp b/ggml/src/ggml-openvino/openvino/op/view.cpp
index 7d777291939..33ea8517c88 100644
--- a/ggml/src/ggml-openvino/openvino/op/view.cpp
+++ b/ggml/src/ggml-openvino/openvino/op/view.cpp
@@ -1,6 +1,8 @@
 #include "../op_table.h"
 #include "../utils.h"
+#include <openvino/op/constant.hpp>
 #include <openvino/op/reshape.hpp>
+#include <openvino/op/slice.hpp>
 #include <set>
 namespace ov {
 namespace frontend {
@@ -9,7 +11,101 @@ namespace op {
 
 OutputVector translate_view(const NodeContext & context) {
     num_inputs_check(context, 1, 1);
-    return {context.get_input(0)};
+
+    if (!context.is_static()) {
+        return {context.get_input(0)};
+    }
+
+    auto input = context.get_input(0);
+    auto src_shape = context.get_input_shape(0);
+    auto dst_shape = context.get_output_shape();
+
+    if (src_shape.rank().is_dynamic() || dst_shape.rank().is_dynamic()) {
+        return {input};
+    }
+
+    int64_t src_elems = 1, dst_elems = 1;
+    for (int64_t i = 0; i < src_shape.rank().get_length(); ++i) {
+        if (src_shape[i].is_dynamic()) return {input};
+        src_elems *= src_shape[i].get_length();
+    }
+    for (int64_t i = 0; i < dst_shape.rank().get_length(); ++i) {
+        if (dst_shape[i].is_dynamic()) return {input};
+        dst_elems *= dst_shape[i].get_length();
+    }
+
+    if (dst_elems >= src_elems) {
+        return {input};
+    }
+
+    auto src_stride = context.get_input_stride(0);
+    auto dst_stride = context.get_output_stride();
+    size_t view_offset = context.get_output_op_offset();
+
+    bool same_stride = (src_stride.size() == dst_stride.size());
+    if (same_stride) {
+        for (size_t i = 0; i < src_stride.size(); ++i) {
+            if (src_stride[i] != dst_stride[i]) {
+                same_stride = false;
+                break;
+            }
+        }
+    }
+
+    if (!same_stride) {
+        return {input};
+    }
+
+    auto src_ov_shape = src_shape.to_shape();
+    auto dst_ov_shape = dst_shape.to_shape();
+    size_t ndims = src_ov_shape.size();
+    if (dst_ov_shape.size() != ndims) {
+        return {input};
+    }
+
+    std::vector<int> diff_dims;
+    for (size_t i = 0; i < ndims; ++i) {
+        if (src_ov_shape[i] != dst_ov_shape[i]) {
+            diff_dims.push_back(static_cast<int>(i));
+        }
+    }
+
+    if (diff_dims.size() != 1) {
+        return {input};
+    }
+
+    int slice_dim = diff_dims[0];
+    int64_t dim_size = static_cast<int64_t>(src_ov_shape[slice_dim]);
+
+    size_t stride_at_dim = (slice_dim < static_cast<int>(ndims) - 1) ?
+        src_stride[slice_dim + 1] : src_stride[slice_dim];
+
+    size_t ov_stride_for_dim = 1;
+    for (size_t i = slice_dim + 1; i < ndims; ++i) {
+        ov_stride_for_dim *= src_ov_shape[i];
+    }
+    size_t elem_size = src_stride.back();
+    if (elem_size == 0) elem_size = 1;
+
+    int64_t begin_val = 0;
+    if (ov_stride_for_dim > 0 && elem_size > 0) {
+        begin_val = static_cast<int64_t>((view_offset / elem_size) / ov_stride_for_dim);
+    }
+    int64_t end_val = begin_val + static_cast<int64_t>(dst_ov_shape[slice_dim]);
+
+    if (begin_val < 0 || end_val > dim_size) {
+        return {input};
+    }
+
+    auto sliced = std::make_shared<ov::op::v8::Slice>(
+        input,
+        ov::op::v0::Constant::create(ov::element::i64, {1}, {begin_val}),
+        ov::op::v0::Constant::create(ov::element::i64, {1}, {end_val}),
+        ov::op::v0::Constant::create(ov::element::i64, {1}, {1}),
+        ov::op::v0::Constant::create(ov::element::i64, {1}, {slice_dim}));
+
+    sliced->set_friendly_name(context.get_output_name());
+    return {sliced->output(0)};
 }
 
 }  // namespace op
diff --git a/ggml/src/ggml-openvino/openvino/utils.cpp b/ggml/src/ggml-openvino/openvino/utils.cpp
index e0344aee3b8..3ca11989245 100644
--- a/ggml/src/ggml-openvino/openvino/utils.cpp
+++ b/ggml/src/ggml-openvino/openvino/utils.cpp
@@ -17,6 +17,7 @@
 #include <openvino/op/reshape.hpp>
 #include <openvino/op/shape_of.hpp>
 #include <openvino/op/sin.hpp>
+#include <openvino/op/split.hpp>
 #include <openvino/op/squeeze.hpp>
 #include <openvino/op/subtract.hpp>
 #include <openvino/op/transpose.hpp>
@@ -262,6 +263,100 @@ ov::Output<ov::Node> process_view_input_new(const NodeContext & context, int inp
         return input;
     }
 
+    // If translate_view already resolved this VIEW (produced a Slice), the input
+    // will already have the expected shape — skip re-slicing.
+    auto expected_ov_shape = context.get_view_input_ov_shape(input_index, 0);
+    auto actual_shape = input.get_partial_shape();
+    if (expected_ov_shape.rank().is_static() && actual_shape.rank().is_static() &&
+        expected_ov_shape.rank() == actual_shape.rank()) {
+        bool shapes_match = true;
+        for (int64_t i = 0; i < expected_ov_shape.rank().get_length(); ++i) {
+            if (expected_ov_shape[i].is_static() && actual_shape[i].is_static() &&
+                expected_ov_shape[i] != actual_shape[i]) {
+                shapes_match = false;
+                break;
+            }
+        }
+        if (shapes_match) {
+            return input;
+        }
+    }
+
+    // In static mode, use Split instead of Slice for single-dimension reductions.
+    // This ensures NPUW's FOLD doesn't parametrize per-layer slice indices (which
+    // would introduce dynamic shapes). A shared Split node sits outside the repeated
+    // subgraph boundary; each layer receives one of its output ports.
+    if (context.is_static() && view_input_size == 1) {
+        auto view_stride_v = context.get_view_input_stride(input_index, 0);
+        auto view_src_stride_v = context.get_view_input_src_stride(input_index, 0);
+        auto view_ggml_shape = context.get_view_input_ggml_shape(input_index, 0);
+        auto view_src_ggml_shape = context.get_view_input_src_ggml_shape(input_index, 0);
+        auto view_offset = context.get_view_input_offset(input_index, 0);
+        auto view_src_offset = context.get_view_input_src_offset(input_index, 0);
+
+        size_t ndims = view_ggml_shape.size();
+        std::vector<int> diff_dims;
+        if (view_src_ggml_shape.size() == ndims) {
+            for (size_t i = 0; i < ndims; ++i) {
+                if (view_ggml_shape[i] != view_src_ggml_shape[i]) {
+                    diff_dims.push_back(static_cast<int>(i));
+                }
+            }
+        }
+
+        if (diff_dims.size() == 1) {
+            int split_dim = diff_dims[0];
+            int64_t num_splits = static_cast<int64_t>(view_src_ggml_shape[split_dim]);
+            int64_t chunk_size = static_cast<int64_t>(view_ggml_shape[split_dim]);
+
+            // Only apply when slicing exactly 1 element from a multi-element dimension
+            if (chunk_size == 1 && num_splits > 1) {
+                // Check suffix strides match (dimensions after split_dim)
+                bool suffix_ok = view_stride_v.size() == view_src_stride_v.size();
+                if (suffix_ok) {
+                    for (size_t i = static_cast<size_t>(split_dim) + 1; i < ndims; ++i) {
+                        if (view_stride_v[i] != view_src_stride_v[i]) {
+                            suffix_ok = false;
+                            break;
+                        }
+                    }
+                }
+
+                if (suffix_ok && view_src_stride_v[split_dim] > 0) {
+                    size_t relative_offset = view_offset >= view_src_offset ?
+                        view_offset - view_src_offset : 0;
+                    int64_t split_index = static_cast<int64_t>(
+                        relative_offset / view_src_stride_v[split_dim]);
+
+                    if (split_index >= 0 && split_index < num_splits) {
+                        // TODO: avoid hardcoded name
+                        std::string src_name = context.get_view_input_src_name(input_index, 0);
+                        std::string cache_key = "__split__" + src_name + "__" +
+                            std::to_string(split_dim) + "__";
+
+                        auto cached = context.get_cached_tensor(cache_key + "0");
+                        if (cached.get_node_shared_ptr() == nullptr) {
+                            auto axis_const = ov::op::v0::Constant::create(
+                                ov::element::i64, {}, {static_cast<int64_t>(split_dim)});
+                            auto split_node = std::make_shared<ov::op::v1::Split>(
+                                input, axis_const, static_cast<size_t>(num_splits));
+                            split_node->set_friendly_name(src_name + "_split");
+
+                            for (int64_t p = 0; p < num_splits; ++p) {
+                                context.cache_tensor(
+                                    cache_key + std::to_string(p),
+                                    split_node->output(static_cast<size_t>(p)));
+                            }
+                        }
+
+                        return context.get_cached_tensor(
+                            cache_key + std::to_string(split_index));
+                    }
+                }
+            }
+        }
+    }
+
     // Lambda function to process a single view operation
     auto process_single_view = [](ov::Output<ov::Node> current,
                                   size_t view_offset,

From 98093b1720df8682165d3e8225afeb101a430ec3 Mon Sep 17 00:00:00 2001
From: Mustafa Cavus <mustafa.cavus@intel.com>
Date: Thu, 21 May 2026 16:04:57 -0700
Subject: [PATCH 2/5] temp. fix for gemma4 accuracy bug on npu

---
 ggml/src/ggml-openvino/openvino/op/glu_geglu.cpp | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/ggml/src/ggml-openvino/openvino/op/glu_geglu.cpp b/ggml/src/ggml-openvino/openvino/op/glu_geglu.cpp
index d9fa4c24367..4124b6550b3 100644
--- a/ggml/src/ggml-openvino/openvino/op/glu_geglu.cpp
+++ b/ggml/src/ggml-openvino/openvino/op/glu_geglu.cpp
@@ -4,6 +4,7 @@
 
 #include <memory>
 #include <openvino/core/node_output.hpp>
+#include <openvino/op/clamp.hpp>
 #include <openvino/op/constant.hpp>
 #include <openvino/op/gelu.hpp>
 #include <openvino/op/multiply.hpp>
@@ -49,6 +50,16 @@ OutputVector translate_glu_geglu(const NodeContext & context) {
         std::swap(src0, src1);
     }
 
+    if (context.is_static()) {
+        // TODO: Temporary solution for NPU accuracy issue due to fp16 overflow
+       // To be removed once permanent solution is implemented
+       // Justification:
+        // For |x| > 5, GELU(x) ≈ max(x, 0)  (behaves like ReLU)
+        // So Clamp(-10, 10) only affects values where GELU would return ≈ x anyway.
+        // The only loss: values > 10 get mapped to 10 instead of x.
+        // In practice, FFN intermediates rarely exceed 10 after GEGLU gating.
+        src0 = std::make_shared<ov::op::v0::Clamp>(src0, -10.0, 10.0);
+    }
     auto gelu = std::make_shared<ov::op::v7::Gelu>(src0);
     auto res = std::make_shared<ov::op::v1::Multiply>(gelu, src1);
 

From 51ad7b6d7b73e5cd85830c63384326485eef2760 Mon Sep 17 00:00:00 2001
From: Mustafa Cavus <mustafa.cavus@intel.com>
Date: Thu, 21 May 2026 16:19:31 -0700
Subject: [PATCH 3/5] Remove hardcoded names for npu-fold handling

---
 .../src/ggml-openvino/openvino/node_context.h | 12 ----------
 ggml/src/ggml-openvino/openvino/utils.cpp     | 23 +++++++------------
 2 files changed, 8 insertions(+), 27 deletions(-)

diff --git a/ggml/src/ggml-openvino/openvino/node_context.h b/ggml/src/ggml-openvino/openvino/node_context.h
index 8e834caa422..a34764dde6e 100644
--- a/ggml/src/ggml-openvino/openvino/node_context.h
+++ b/ggml/src/ggml-openvino/openvino/node_context.h
@@ -143,18 +143,6 @@ class NodeContext : public frontend::NodeContext {
         return m_tensor_map->at(m_input_names[idx]);
     }
 
-    void cache_tensor(const std::string& name, const Output<Node>& tensor) const {
-        (*m_tensor_map)[name] = tensor;
-    }
-
-    Output<Node> get_cached_tensor(const std::string& name) const {
-        auto it = m_tensor_map->find(name);
-        if (it != m_tensor_map->end()) {
-            return it->second;
-        }
-        return Output<Node>();
-    }
-
     Output<Node> get_input(const std::string& name) const override {
         if (m_tensor_map->find(name) == m_tensor_map->end()) {
             throw std::runtime_error("'" + name + "' not found in tensor map.");
diff --git a/ggml/src/ggml-openvino/openvino/utils.cpp b/ggml/src/ggml-openvino/openvino/utils.cpp
index 3ca11989245..e8be8cfe790 100644
--- a/ggml/src/ggml-openvino/openvino/utils.cpp
+++ b/ggml/src/ggml-openvino/openvino/utils.cpp
@@ -329,28 +329,21 @@ ov::Output<ov::Node> process_view_input_new(const NodeContext & context, int inp
                         relative_offset / view_src_stride_v[split_dim]);
 
                     if (split_index >= 0 && split_index < num_splits) {
-                        // TODO: avoid hardcoded name
-                        std::string src_name = context.get_view_input_src_name(input_index, 0);
-                        std::string cache_key = "__split__" + src_name + "__" +
-                            std::to_string(split_dim) + "__";
+                        auto src_node = input.get_node_shared_ptr();
+                        std::string rt_key = "split_dim_" + std::to_string(split_dim);
+                        auto & rt_info = src_node->get_rt_info();
 
-                        auto cached = context.get_cached_tensor(cache_key + "0");
-                        if (cached.get_node_shared_ptr() == nullptr) {
+                        if (rt_info.find(rt_key) == rt_info.end()) {
                             auto axis_const = ov::op::v0::Constant::create(
                                 ov::element::i64, {}, {static_cast<int64_t>(split_dim)});
                             auto split_node = std::make_shared<ov::op::v1::Split>(
                                 input, axis_const, static_cast<size_t>(num_splits));
-                            split_node->set_friendly_name(src_name + "_split");
-
-                            for (int64_t p = 0; p < num_splits; ++p) {
-                                context.cache_tensor(
-                                    cache_key + std::to_string(p),
-                                    split_node->output(static_cast<size_t>(p)));
-                            }
+                            split_node->set_friendly_name(src_node->get_friendly_name() + "_split");
+                            rt_info[rt_key] = split_node;
                         }
 
-                        return context.get_cached_tensor(
-                            cache_key + std::to_string(split_index));
+                        auto split_node = rt_info[rt_key].as<std::shared_ptr<ov::op::v1::Split>>();
+                        return split_node->output(static_cast<size_t>(split_index));
                     }
                 }
             }

From 0fa0534f40752f022f496f4533f872530e3edb86 Mon Sep 17 00:00:00 2001
From: Mustafa Cavus <mustafa.cavus@intel.com>
Date: Thu, 21 May 2026 16:50:36 -0700
Subject: [PATCH 4/5] revert static n tokens for cont translation as it is not
 needed

---
 ggml/src/ggml-openvino/ggml-decoder.h          | 2 +-
 ggml/src/ggml-openvino/openvino/decoder.h      | 2 --
 ggml/src/ggml-openvino/openvino/node_context.h | 2 --
 ggml/src/ggml-openvino/openvino/op/cont.cpp    | 2 +-
 4 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/ggml/src/ggml-openvino/ggml-decoder.h b/ggml/src/ggml-openvino/ggml-decoder.h
index 35bed0ba476..d59180ce149 100644
--- a/ggml/src/ggml-openvino/ggml-decoder.h
+++ b/ggml/src/ggml-openvino/ggml-decoder.h
@@ -206,7 +206,7 @@ class GgmlOvDecoder : public ov::frontend::ggml::GgmlDecoder {
 
     virtual bool is_stateful() const override { return m_is_stateful; }
 
-    virtual int get_static_n_tokens() const override {
+    int get_static_n_tokens() const {
         return m_is_prefill ? m_prefill_chunk_size : 1;
     }
 
diff --git a/ggml/src/ggml-openvino/openvino/decoder.h b/ggml/src/ggml-openvino/openvino/decoder.h
index c602aae73d7..bc41876875c 100644
--- a/ggml/src/ggml-openvino/openvino/decoder.h
+++ b/ggml/src/ggml-openvino/openvino/decoder.h
@@ -101,8 +101,6 @@ class GgmlDecoder : public DecoderBase {
     virtual int is_swa_layer(int layer) const = 0;
 
     virtual int32_t get_op_dynamic_dim(int node_idx) const = 0;
-
-    virtual int get_static_n_tokens() const = 0;
 };
 
 }  // namespace ggml
diff --git a/ggml/src/ggml-openvino/openvino/node_context.h b/ggml/src/ggml-openvino/openvino/node_context.h
index a34764dde6e..383ee8ac4ba 100644
--- a/ggml/src/ggml-openvino/openvino/node_context.h
+++ b/ggml/src/ggml-openvino/openvino/node_context.h
@@ -170,8 +170,6 @@ class NodeContext : public frontend::NodeContext {
 
     bool is_stateful() const { return m_decoder->is_stateful(); }
 
-    int get_static_n_tokens() const { return m_decoder->get_static_n_tokens(); }
-
 private:
     std::shared_ptr<GgmlDecoder> m_decoder;
     std::shared_ptr<TensorMap>& m_tensor_map;
diff --git a/ggml/src/ggml-openvino/openvino/op/cont.cpp b/ggml/src/ggml-openvino/openvino/op/cont.cpp
index fed72cbfb93..1d6cc672126 100644
--- a/ggml/src/ggml-openvino/openvino/op/cont.cpp
+++ b/ggml/src/ggml-openvino/openvino/op/cont.cpp
@@ -22,7 +22,7 @@ OutputVector translate_cont(const NodeContext & context) {
     auto dst_shape = context.get_output_shape().to_shape();
 
     if (context.get_op_dynamic_dim() != -1) {
-        dst_shape[3 - context.get_op_dynamic_dim()] = context.is_static() ? context.get_static_n_tokens() : -1;
+        dst_shape[3 - context.get_op_dynamic_dim()] = -1;
     }
 
     auto input = process_view_input_new(context, 0);

From 59b8969add8d54839583a20ba121b992c63661e7 Mon Sep 17 00:00:00 2001
From: Mustafa Cavus <mustafa.cavus@intel.com>
Date: Mon, 25 May 2026 08:17:18 -0700
Subject: [PATCH 5/5] removed unused variable

---
 ggml/src/ggml-openvino/openvino/op/view.cpp | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/ggml/src/ggml-openvino/openvino/op/view.cpp b/ggml/src/ggml-openvino/openvino/op/view.cpp
index 33ea8517c88..183d6bb7e58 100644
--- a/ggml/src/ggml-openvino/openvino/op/view.cpp
+++ b/ggml/src/ggml-openvino/openvino/op/view.cpp
@@ -77,9 +77,6 @@ OutputVector translate_view(const NodeContext & context) {
     int slice_dim = diff_dims[0];
     int64_t dim_size = static_cast<int64_t>(src_ov_shape[slice_dim]);
 
-    size_t stride_at_dim = (slice_dim < static_cast<int>(ndims) - 1) ?
-        src_stride[slice_dim + 1] : src_stride[slice_dim];
-
     size_t ov_stride_for_dim = 1;
     for (size_t i = slice_dim + 1; i < ndims; ++i) {
         ov_stride_for_dim *= src_ov_shape[i];