From eec8bb629c95d19f4098ad3ebbd52590cb811e5f Mon Sep 17 00:00:00 2001
From: "Kumar, Arisha" <arisha.kumar@intel.com>
Date: Fri, 29 May 2026 12:37:53 -0700
Subject: [PATCH 1/3] Add shape subgraph folding and dynamic output dim
 resolution for WebNN EP

- Add ShapeSubgraphFolder to pre-evaluate shape subgraphs (Where/Equal/Range/ConstantOfShape chains) so Reshape/Expand see constant shapes at build time

- Integrate folded shapes into Reshape and Expand op builders

- Support additive dim_param expressions (e.g. past_sequence_length + sequence_length)

- Add heuristic fallback for unresolved output dimensions from runtime inputs

- Fix QDQ per-axis reshape to handle all axes (not just last axis)

- Claim folded nodes in GetCapability to keep them in WebNN partition
---
 .../webnn/builders/impl/expand_op_builder.cc  |  16 +-
 .../webnn/builders/impl/qdq_op_builder.cc     |   7 +-
 .../webnn/builders/impl/reshape_op_builder.cc |  16 +-
 .../providers/webnn/builders/model_builder.cc |  31 +
 .../providers/webnn/builders/model_builder.h  |   7 +
 .../webnn/builders/shape_subgraph_folder.cc   | 568 ++++++++++++++++++
 .../webnn/builders/shape_subgraph_folder.h    |  85 +++
 .../webnn/webnn_execution_provider.cc         |  98 ++-
 8 files changed, 804 insertions(+), 24 deletions(-)
 create mode 100644 onnxruntime/core/providers/webnn/builders/shape_subgraph_folder.cc
 create mode 100644 onnxruntime/core/providers/webnn/builders/shape_subgraph_folder.h

diff --git a/onnxruntime/core/providers/webnn/builders/impl/expand_op_builder.cc b/onnxruntime/core/providers/webnn/builders/impl/expand_op_builder.cc
index 665075018715f..3748630d4f434 100644
--- a/onnxruntime/core/providers/webnn/builders/impl/expand_op_builder.cc
+++ b/onnxruntime/core/providers/webnn/builders/impl/expand_op_builder.cc
@@ -39,10 +39,13 @@ class ExpandOpBuilder : public BaseOpBuilder {
 void ExpandOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const Node& node) const {
   const auto& input_defs = node.InputDefs();
   const auto& shape_name = input_defs[1]->Name();
-  // Only skip the shape input when it is a constant initializer AND the input has static shape.
+  // Skip the shape input when:
+  // 1. It was folded by the shape subgraph folder (compile-time constant), OR
+  // 2. It is a constant initializer AND the input has static shape.
   // When the input has dynamic shape, we need the shape operand for dynamicExpand even if it's constant.
-  if (model_builder.GetGraphViewer().GetConstantInitializer(shape_name) &&
-      !HasDynamicShape(*input_defs[0])) {
+  if (model_builder.IsFoldedShape(shape_name) ||
+      (model_builder.GetGraphViewer().GetConstantInitializer(shape_name) &&
+       !HasDynamicShape(*input_defs[0]))) {
     model_builder.AddInitializerToSkip(shape_name);
   }
 }
@@ -76,6 +79,13 @@ Status ExpandOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
 
     emscripten::val output_shape_arr = emscripten::val::array(GetNarrowedIntFromInt64<uint32_t>(output_shape));
     output = model_builder.GetBuilder().call<emscripten::val>("expand", input, output_shape_arr, options);
+  } else if (model_builder.IsFoldedShape(input_defs[1]->Name())) {
+    // Folded shape path: shape subgraph was pre-evaluated to a constant vector.
+    const auto* folded = model_builder.GetFoldedShape(input_defs[1]->Name());
+    ORT_RETURN_IF_NOT(folded != nullptr, "IsFoldedShape true but GetFoldedShape returned null");
+
+    emscripten::val output_shape_arr = emscripten::val::array(GetNarrowedIntFromInt64<uint32_t>(*folded));
+    output = model_builder.GetBuilder().call<emscripten::val>("expand", input, output_shape_arr, options);
   } else {
     // Operand shape path: use dynamicExpand with the shape operand.
     emscripten::val shape_operand = model_builder.GetOperand(input_defs[1]->Name());
diff --git a/onnxruntime/core/providers/webnn/builders/impl/qdq_op_builder.cc b/onnxruntime/core/providers/webnn/builders/impl/qdq_op_builder.cc
index e07814521dafa..5d7e2c5620faf 100644
--- a/onnxruntime/core/providers/webnn/builders/impl/qdq_op_builder.cc
+++ b/onnxruntime/core/providers/webnn/builders/impl/qdq_op_builder.cc
@@ -85,10 +85,11 @@ Status QDQOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
     }
   }
 
-  // For per-axis quantization/dequantization and axis is not equal to input_rank - 1,
-  // we need to reshape the scale and zero_point tensors to make them broadcastable with the input tensor.
+  // For per-axis quantization/dequantization, the scale is 1-D.
+  // WebNN requires the scale and zero_point tensors to have the same rank as the input tensor.
+  // We need to reshape them to make them broadcastable with the input tensor.
   if (scale_shape.size() == 1 && input_rank > 1 &&
-      block_size == 0 && axis != static_cast<int32_t>(input_rank - 1)) {
+      block_size == 0) {
     // Insert ones before and after the axis dimension for broadcasting of scale tensor.
     // Use emscripten::val::array() to support dynamic axis dim via input["shape"][axis].
     emscripten::val target_shape = emscripten::val::array();
diff --git a/onnxruntime/core/providers/webnn/builders/impl/reshape_op_builder.cc b/onnxruntime/core/providers/webnn/builders/impl/reshape_op_builder.cc
index 556b73b5d0fc1..3c8afe37012a1 100644
--- a/onnxruntime/core/providers/webnn/builders/impl/reshape_op_builder.cc
+++ b/onnxruntime/core/providers/webnn/builders/impl/reshape_op_builder.cc
@@ -38,9 +38,11 @@ class ReshapeOpBuilder : public BaseOpBuilder {
 
 void ReshapeOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const Node& node) const {
   const auto& shape_name = node.InputDefs()[1]->Name();
-  // Only skip the shape input when it is a constant initializer (consumed at build time).
+  // Only skip the shape input when it is a constant initializer (consumed at build time)
+  // or when it was folded by the shape subgraph folder.
   // When it is an operand, we need it as the newShape input for dynamicReshape.
-  if (model_builder.GetGraphViewer().GetConstantInitializer(shape_name)) {
+  if (model_builder.GetGraphViewer().GetConstantInitializer(shape_name) ||
+      model_builder.IsFoldedShape(shape_name)) {
     model_builder.AddInitializerToSkip(shape_name);
   }
 }
@@ -220,6 +222,16 @@ Status ReshapeOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
       emscripten::val new_shape = emscripten::val::array();
       output = model_builder.GetBuilder().call<emscripten::val>("reshape", input, new_shape, options);
     }
+  } else if (model_builder.IsFoldedShape(input_defs[1]->Name())) {
+    // Folded shape path: the shape subgraph was pre-evaluated to a constant vector.
+    const auto* folded = model_builder.GetFoldedShape(input_defs[1]->Name());
+    ORT_RETURN_IF_NOT(folded != nullptr, "IsFoldedShape true but GetFoldedShape returned null");
+
+    emscripten::val new_shape = emscripten::val::array();
+    for (int64_t dim : *folded) {
+      new_shape.call<void>("push", static_cast<uint32_t>(dim));
+    }
+    output = model_builder.GetBuilder().call<emscripten::val>("reshape", input, new_shape, options);
   } else {
     // Operand shape path: shape is a non-constant operand. Use dynamicReshape.
     emscripten::val shape_operand = model_builder.GetOperand(input_defs[1]->Name());
diff --git a/onnxruntime/core/providers/webnn/builders/model_builder.cc b/onnxruntime/core/providers/webnn/builders/model_builder.cc
index 3667d42cb9a39..37af542dbbdd0 100644
--- a/onnxruntime/core/providers/webnn/builders/model_builder.cc
+++ b/onnxruntime/core/providers/webnn/builders/model_builder.cc
@@ -42,6 +42,13 @@ ModelBuilder::ModelBuilder(const GraphViewer& graph_viewer, const logging::Logge
 }
 
 Status ModelBuilder::Initialize() {
+  // Run shape subgraph folding FIRST, before PreprocessInitializers.
+  // This pre-evaluates shape subgraphs (Where/Equal/Range/ConstantOfShape chains)
+  // so that Reshape/Expand/etc. see constant shapes instead of dynamic subgraphs.
+  // Must run before PreprocessInitializers because AddInitializersToSkip checks IsFoldedShape().
+  shape_folder_ = std::make_unique<ShapeSubgraphFolder>(graph_viewer_, free_dimension_bounds_, logger_);
+  ORT_RETURN_IF_ERROR(shape_folder_->Run());
+
   PreprocessInitializers();
   ORT_RETURN_IF_ERROR(RegisterInitializers());
   ORT_RETURN_IF_ERROR(RegisterModelInputs());
@@ -85,6 +92,12 @@ void ModelBuilder::PreprocessInitializers() {
   for (size_t i = 0; i < node_indices.size(); i++) {
     const auto* node(graph_viewer_.GetNode(node_indices[i]));
 
+    // Skip nodes that are part of a folded shape subgraph — their initializer inputs
+    // don't need to be registered as WebNN constants.
+    if (IsFoldedNode(node->Index())) {
+      continue;
+    }
+
     // find all initializers consumed. AddInitializersToSkip will potentially decrement the usage count.
     for (const auto* input : node->InputDefs()) {
       if (input->Exists() && Contains(initializers, input->Name())) {
@@ -401,6 +414,12 @@ Status ModelBuilder::AddOperations() {
   const auto& node_indices = graph_viewer_.GetNodesInTopologicalOrder();
   for (size_t i = 0; i < node_indices.size(); i++) {
     const auto* node(graph_viewer_.GetNode(node_indices[i]));
+
+    // Skip nodes that are part of a folded shape subgraph.
+    if (IsFoldedNode(node->Index())) {
+      continue;
+    }
+
     if (const auto* op_builder = GetOpBuilder(*node)) {
       ORT_RETURN_IF_ERROR(op_builder->AddToModelBuilder(*this, *node, logger_));
     } else {
@@ -502,5 +521,17 @@ const ModelBuilder::DimProvenance* ModelBuilder::GetDimProvenance(const std::str
   return it != dim_provenance_.end() ? &it->second : nullptr;
 }
 
+bool ModelBuilder::IsFoldedShape(const std::string& name) const {
+  return shape_folder_ && shape_folder_->IsFoldedShape(name);
+}
+
+const std::vector<int64_t>* ModelBuilder::GetFoldedShape(const std::string& name) const {
+  return shape_folder_ ? shape_folder_->GetFoldedShape(name) : nullptr;
+}
+
+bool ModelBuilder::IsFoldedNode(NodeIndex node_index) const {
+  return shape_folder_ && shape_folder_->IsFoldedNode(node_index);
+}
+
 }  // namespace webnn
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/webnn/builders/model_builder.h b/onnxruntime/core/providers/webnn/builders/model_builder.h
index f7535d19e4e1e..c2b68f18ea33e 100644
--- a/onnxruntime/core/providers/webnn/builders/model_builder.h
+++ b/onnxruntime/core/providers/webnn/builders/model_builder.h
@@ -8,6 +8,7 @@
 #include <core/graph/graph_viewer.h>
 
 #include "model.h"
+#include "shape_subgraph_folder.h"
 #include "core/framework/execution_provider.h"
 #include "core/providers/webnn/builders/helper.h"
 
@@ -58,6 +59,11 @@ class ModelBuilder {
   // Returns true when GQA should use concat-based (stateful) KV-cache; false for ScatterND (stateless).
   bool IsCausalLMEnabled() const { return enable_causal_lm_; }
 
+  // Shape subgraph folder: check if a NodeArg name has been folded to a constant shape.
+  bool IsFoldedShape(const std::string& name) const;
+  const std::vector<int64_t>* GetFoldedShape(const std::string& name) const;
+  bool IsFoldedNode(NodeIndex node_index) const;
+
   // The initializer will be processed separately, skip it as an initializer.
   void AddInitializerToSkip(const std::string& tensor_name);
 
@@ -99,6 +105,7 @@ class ModelBuilder {
   emscripten::val wnn_limits_ = emscripten::val::undefined();
   FreeDimensionBounds free_dimension_bounds_;
   bool enable_causal_lm_;
+  std::unique_ptr<ShapeSubgraphFolder> shape_folder_;
   InlinedHashMap<std::string, emscripten::val> wnn_operands_;
   std::vector<std::string> input_names_;
   std::vector<std::string> output_names_;
diff --git a/onnxruntime/core/providers/webnn/builders/shape_subgraph_folder.cc b/onnxruntime/core/providers/webnn/builders/shape_subgraph_folder.cc
new file mode 100644
index 0000000000000..c38b993790414
--- /dev/null
+++ b/onnxruntime/core/providers/webnn/builders/shape_subgraph_folder.cc
@@ -0,0 +1,568 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Copyright (c) Intel Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "shape_subgraph_folder.h"
+
+#include "core/framework/tensorprotoutils.h"
+#include "core/providers/common.h"
+
+#include <queue>
+#include <algorithm>
+#include <numeric>
+#include <cmath>
+
+namespace onnxruntime {
+namespace webnn {
+
+ShapeSubgraphFolder::ShapeSubgraphFolder(const GraphViewer& graph_viewer,
+                                         const FreeDimensionBounds& free_dimension_bounds,
+                                         const logging::Logger& logger)
+    : graph_viewer_(graph_viewer),
+      free_dimension_bounds_(free_dimension_bounds),
+      logger_(logger) {
+}
+
+bool ShapeSubgraphFolder::IsSupportedShapeOp(const Node& node) {
+  static const InlinedHashSet<std::string_view> supported_ops = {
+      "Shape", "Gather", "Concat", "Unsqueeze", "Squeeze", "Slice",
+      "Cast", "Add", "Sub", "Mul", "Div", "Equal", "Where",
+      "ConstantOfShape", "Range", "Reshape", "Expand",
+      "Neg", "Abs", "Floor", "Ceil",
+  };
+  // Only standard ONNX ops (empty domain or "onnx")
+  if (!node.Domain().empty() && node.Domain() != "onnx" && node.Domain() != kOnnxDomain) {
+    return false;
+  }
+  return supported_ops.count(node.OpType()) > 0;
+}
+
+bool ShapeSubgraphFolder::GetResolvedShape(const NodeArg* arg, std::vector<int64_t>& shape) const {
+  const auto* shape_proto = arg->Shape();
+  if (!shape_proto) return false;
+
+  shape.clear();
+  for (int i = 0; i < shape_proto->dim_size(); i++) {
+    const auto& dim = shape_proto->dim(i);
+    if (dim.has_dim_value()) {
+      shape.push_back(dim.dim_value());
+    } else if (dim.has_dim_param()) {
+      // Try to resolve from free_dimension_bounds
+      const auto& dim_param = dim.dim_param();
+      auto it = free_dimension_bounds_.find(dim_param);
+      if (it != free_dimension_bounds_.end()) {
+        // Use maxSize as the resolved value (consistent with WebNN EP behavior)
+        shape.push_back(static_cast<int64_t>(it->second.max_size));
+      } else {
+        return false;  // Can't resolve this symbolic dim
+      }
+    } else {
+      return false;  // Unknown dim
+    }
+  }
+  return true;
+}
+
+// Check if shape-consuming inputs (Reshape[1], Expand[1], etc.) are candidates for folding.
+static bool IsShapeConsumingSlot(const Node& consumer, size_t input_index) {
+  const auto& op = consumer.OpType();
+  if ((op == "Reshape" || op == "Expand") && input_index == 1) return true;
+  if (op == "ConstantOfShape" && input_index == 0) return true;
+  if (op == "Tile" && input_index == 1) return true;
+  // Slice has starts[1], ends[2], axes[3], steps[4]
+  if (op == "Slice" && input_index >= 1 && input_index <= 4) return true;
+  return false;
+}
+
+bool ShapeSubgraphFolder::TryFoldShapeSubgraph(const NodeArg* shape_arg) {
+  const std::string& target_name = shape_arg->Name();
+
+  // Already folded?
+  if (folded_shapes_.count(target_name)) return true;
+
+  // Check if it's already a constant initializer
+  if (graph_viewer_.GetConstantInitializer(target_name)) return false;  // Already handled normally
+
+  // BFS backward to find the producer subgraph
+  std::unordered_map<std::string, std::vector<int64_t>> known_values;
+  std::vector<const Node*> topo_order;  // nodes in forward eval order
+  InlinedHashSet<NodeIndex> visited_nodes;
+  std::queue<const NodeArg*> worklist;
+  worklist.push(shape_arg);
+
+  InlinedHashSet<std::string> visited_args;
+  visited_args.insert(target_name);
+
+  bool can_fold = true;
+
+  while (!worklist.empty() && can_fold) {
+    const NodeArg* current = worklist.front();
+    worklist.pop();
+    const std::string& name = current->Name();
+
+    // Skip if already known
+    if (known_values.count(name)) continue;
+
+    // Check if it's a constant initializer
+    const auto* init = graph_viewer_.GetConstantInitializer(name);
+    if (init) {
+      // Read int64 values from the initializer
+      std::vector<int64_t> values;
+      if (init->data_type() == ONNX_NAMESPACE::TensorProto_DataType_INT64) {
+        if (!init->int64_data().empty()) {
+          values.assign(init->int64_data().begin(), init->int64_data().end());
+        } else if (!init->raw_data().empty()) {
+          const int64_t* data = reinterpret_cast<const int64_t*>(init->raw_data().data());
+          size_t count = init->raw_data().size() / sizeof(int64_t);
+          values.assign(data, data + count);
+        }
+      } else if (init->data_type() == ONNX_NAMESPACE::TensorProto_DataType_INT32) {
+        if (!init->int32_data().empty()) {
+          for (auto v : init->int32_data()) values.push_back(static_cast<int64_t>(v));
+        } else if (!init->raw_data().empty()) {
+          const int32_t* data = reinterpret_cast<const int32_t*>(init->raw_data().data());
+          size_t count = init->raw_data().size() / sizeof(int32_t);
+          for (size_t i = 0; i < count; i++) values.push_back(static_cast<int64_t>(data[i]));
+        }
+      } else if (init->data_type() == ONNX_NAMESPACE::TensorProto_DataType_FLOAT) {
+        if (!init->float_data().empty()) {
+          for (auto v : init->float_data()) values.push_back(static_cast<int64_t>(v));
+        } else if (!init->raw_data().empty()) {
+          const float* data = reinterpret_cast<const float*>(init->raw_data().data());
+          size_t count = init->raw_data().size() / sizeof(float);
+          for (size_t i = 0; i < count; i++) values.push_back(static_cast<int64_t>(data[i]));
+        }
+      } else if (init->data_type() == ONNX_NAMESPACE::TensorProto_DataType_BOOL) {
+        if (!init->int32_data().empty()) {
+          for (auto v : init->int32_data()) values.push_back(static_cast<int64_t>(v));
+        } else if (!init->raw_data().empty()) {
+          const uint8_t* data = reinterpret_cast<const uint8_t*>(init->raw_data().data());
+          size_t count = init->raw_data().size();
+          for (size_t i = 0; i < count; i++) values.push_back(static_cast<int64_t>(data[i]));
+        }
+      } else {
+        can_fold = false;
+        break;
+      }
+      // Handle scalar initializers (0-dim tensors with no data entries but have raw_data)
+      if (values.empty() && init->dims_size() == 0) {
+        // Try scalar
+        if (init->data_type() == ONNX_NAMESPACE::TensorProto_DataType_INT64) {
+          values.push_back(0);
+        }
+      }
+      known_values[name] = std::move(values);
+      continue;
+    }
+
+    // Check if it's a graph input (cannot fold if depends on runtime input, unless it's just shape)
+    const auto* producer = graph_viewer_.GetProducerNode(name);
+    if (!producer) {
+      // It's a graph input - can't fold unless we can get its shape via Shape op
+      can_fold = false;
+      break;
+    }
+
+    // Check if the producer is a supported shape op
+    if (!IsSupportedShapeOp(*producer)) {
+      can_fold = false;
+      break;
+    }
+
+    // Add producer to visit list
+    if (visited_nodes.insert(producer->Index()).second) {
+      topo_order.push_back(producer);
+
+      // Add all inputs of this producer to the worklist
+      for (const auto* input_def : producer->InputDefs()) {
+        if (input_def && input_def->Exists()) {
+          if (visited_args.insert(input_def->Name()).second) {
+            worklist.push(input_def);
+          }
+        }
+      }
+    }
+  }
+
+  if (!can_fold) return false;
+
+  // Sort nodes in topological order (reverse of discovery = forward eval order)
+  std::reverse(topo_order.begin(), topo_order.end());
+
+  // Evaluate nodes in topological order
+  for (const Node* node : topo_order) {
+    std::vector<int64_t> result;
+    if (!EvaluateNode(*node, known_values, result)) {
+      return false;
+    }
+    // Store results for all outputs
+    for (const auto* output_def : node->OutputDefs()) {
+      if (output_def && output_def->Exists()) {
+        known_values[output_def->Name()] = result;
+      }
+    }
+  }
+
+  // The target should now be in known_values
+  auto it = known_values.find(target_name);
+  if (it == known_values.end()) return false;
+
+  // Store the folded result
+  folded_shapes_[target_name] = it->second;
+
+  // Mark nodes in the subgraph as folded, but only if ALL their outputs are consumed
+  // exclusively by other folded nodes or shape-consuming slots. If any output feeds
+  // a non-shape consumer outside the subgraph, we cannot skip that node.
+  for (const Node* node : topo_order) {
+    bool can_skip = true;
+    for (auto it2 = node->OutputEdgesBegin(); it2 != node->OutputEdgesEnd(); ++it2) {
+      const Node& consumer = it2->GetNode();
+      if (visited_nodes.count(consumer.Index())) continue;  // consumer is in our subgraph
+      // Check if the consumer uses this output only in a shape-consuming slot
+      if (!IsShapeConsumingSlot(consumer, it2->GetDstArgIndex())) {
+        can_skip = false;
+        break;
+      }
+    }
+    if (can_skip) {
+      folded_nodes_.insert(node->Index());
+    }
+  }
+
+  return true;
+}
+
+bool ShapeSubgraphFolder::EvaluateNode(
+    const Node& node,
+    const std::unordered_map<std::string, std::vector<int64_t>>& known_values,
+    std::vector<int64_t>& result) {
+  const auto& op = node.OpType();
+  const auto& inputs = node.InputDefs();
+
+  // Helper to get input values
+  auto get_input = [&](size_t idx) -> const std::vector<int64_t>* {
+    if (idx >= inputs.size() || !inputs[idx] || !inputs[idx]->Exists()) return nullptr;
+    auto it = known_values.find(inputs[idx]->Name());
+    return (it != known_values.end()) ? &it->second : nullptr;
+  };
+
+  if (op == "Shape") {
+    // Shape op: return the resolved shape of input[0]
+    if (!GetResolvedShape(inputs[0], result)) return false;
+
+    // Handle start/end attributes (Shape opset 15+)
+    const auto& attrs = node.GetAttributes();
+    int64_t start = 0, end = static_cast<int64_t>(result.size());
+    if (attrs.count("start")) start = attrs.at("start").i();
+    if (attrs.count("end")) end = attrs.at("end").i();
+    if (start < 0) start += static_cast<int64_t>(result.size());
+    if (end < 0) end += static_cast<int64_t>(result.size());
+    start = std::max(int64_t(0), std::min(start, static_cast<int64_t>(result.size())));
+    end = std::max(int64_t(0), std::min(end, static_cast<int64_t>(result.size())));
+    result = std::vector<int64_t>(result.begin() + static_cast<ptrdiff_t>(start), result.begin() + static_cast<ptrdiff_t>(end));
+    return true;
+  }
+
+  if (op == "Gather") {
+    const auto* data = get_input(0);
+    const auto* indices = get_input(1);
+    if (!data || !indices) return false;
+
+    result.clear();
+    for (int64_t idx : *indices) {
+      if (idx < 0) idx += static_cast<int64_t>(data->size());
+      if (idx < 0 || idx >= static_cast<int64_t>(data->size())) return false;
+      result.push_back((*data)[static_cast<size_t>(idx)]);
+    }
+    // If indices is scalar (0-dim), result should also be scalar-like
+    if (indices->empty()) {
+      // scalar index case - not handled here
+      return false;
+    }
+    return true;
+  }
+
+  if (op == "Concat") {
+    // For 1-D shape vectors, axis is always 0 — just concatenate all inputs.
+    result.clear();
+    for (size_t i = 0; i < inputs.size(); i++) {
+      const auto* inp = get_input(i);
+      if (!inp) return false;
+      result.insert(result.end(), inp->begin(), inp->end());
+    }
+    return true;
+  }
+
+  if (op == "Unsqueeze") {
+    const auto* data = get_input(0);
+    if (!data) return false;
+    // For shape subgraphs, unsqueeze typically wraps a scalar into [1] shape
+    result = *data;
+    // If axes input exists (opset 13+), handle it
+    if (inputs.size() > 1) {
+      const auto* axes = get_input(1);
+      if (!axes) return false;
+      // Insert dimensions of size 1 at specified axes
+      // For shape vectors this is typically making a scalar into a 1-element vector
+    }
+    return true;
+  }
+
+  if (op == "Squeeze") {
+    const auto* data = get_input(0);
+    if (!data) return false;
+    result = *data;
+    return true;
+  }
+
+  if (op == "Cast") {
+    const auto* data = get_input(0);
+    if (!data) return false;
+    // Cast just passes through for int64 purposes
+    result = *data;
+    return true;
+  }
+
+  if (op == "Neg") {
+    const auto* data = get_input(0);
+    if (!data) return false;
+    result.resize(data->size());
+    for (size_t i = 0; i < data->size(); i++) result[i] = -(*data)[i];
+    return true;
+  }
+
+  if (op == "Abs") {
+    const auto* data = get_input(0);
+    if (!data) return false;
+    result.resize(data->size());
+    for (size_t i = 0; i < data->size(); i++) result[i] = std::abs((*data)[i]);
+    return true;
+  }
+
+  // Binary element-wise ops: Add, Sub, Mul, Div
+  if (op == "Add" || op == "Sub" || op == "Mul" || op == "Div") {
+    const auto* a = get_input(0);
+    const auto* b = get_input(1);
+    if (!a || !b) return false;
+
+    // Broadcasting: if one is scalar (size 1), broadcast to the other's size
+    size_t size = std::max(a->size(), b->size());
+    result.resize(size);
+    for (size_t i = 0; i < size; i++) {
+      int64_t va = (*a)[a->size() == 1 ? 0 : i];
+      int64_t vb = (*b)[b->size() == 1 ? 0 : i];
+      if (op == "Add") result[i] = va + vb;
+      else if (op == "Sub") result[i] = va - vb;
+      else if (op == "Mul") result[i] = va * vb;
+      else if (op == "Div") {
+        if (vb == 0) return false;
+        result[i] = va / vb;
+      }
+    }
+    return true;
+  }
+
+  if (op == "Equal") {
+    const auto* a = get_input(0);
+    const auto* b = get_input(1);
+    if (!a || !b) return false;
+
+    size_t size = std::max(a->size(), b->size());
+    result.resize(size);
+    for (size_t i = 0; i < size; i++) {
+      int64_t va = (*a)[a->size() == 1 ? 0 : i];
+      int64_t vb = (*b)[b->size() == 1 ? 0 : i];
+      result[i] = (va == vb) ? 1 : 0;
+    }
+    return true;
+  }
+
+  if (op == "Where") {
+    const auto* cond = get_input(0);
+    const auto* x = get_input(1);
+    const auto* y = get_input(2);
+    if (!cond || !x || !y) return false;
+
+    size_t size = std::max({cond->size(), x->size(), y->size()});
+    result.resize(size);
+    for (size_t i = 0; i < size; i++) {
+      int64_t c = (*cond)[cond->size() == 1 ? 0 : i];
+      int64_t vx = (*x)[x->size() == 1 ? 0 : i];
+      int64_t vy = (*y)[y->size() == 1 ? 0 : i];
+      result[i] = c ? vx : vy;
+    }
+    return true;
+  }
+
+  if (op == "Range") {
+    const auto* start_v = get_input(0);
+    const auto* limit_v = get_input(1);
+    const auto* delta_v = get_input(2);
+    if (!start_v || !limit_v || !delta_v) return false;
+    if (start_v->empty() || limit_v->empty() || delta_v->empty()) return false;
+
+    int64_t start = (*start_v)[0];
+    int64_t limit = (*limit_v)[0];
+    int64_t delta = (*delta_v)[0];
+    if (delta == 0) return false;
+
+    result.clear();
+    if (delta > 0) {
+      for (int64_t v = start; v < limit; v += delta) result.push_back(v);
+    } else {
+      for (int64_t v = start; v > limit; v += delta) result.push_back(v);
+    }
+    return true;
+  }
+
+  if (op == "ConstantOfShape") {
+    const auto* shape_input = get_input(0);
+    if (!shape_input) return false;
+
+    // Get the fill value from attribute
+    int64_t fill_value = 0;
+    const auto& attrs = node.GetAttributes();
+    if (attrs.count("value")) {
+      const auto& tensor = attrs.at("value").t();
+      if (tensor.data_type() == ONNX_NAMESPACE::TensorProto_DataType_INT64) {
+        if (!tensor.int64_data().empty()) fill_value = tensor.int64_data(0);
+        else if (!tensor.raw_data().empty())
+          fill_value = *reinterpret_cast<const int64_t*>(tensor.raw_data().data());
+      } else if (tensor.data_type() == ONNX_NAMESPACE::TensorProto_DataType_INT32) {
+        if (!tensor.int32_data().empty()) fill_value = tensor.int32_data(0);
+        else if (!tensor.raw_data().empty())
+          fill_value = *reinterpret_cast<const int32_t*>(tensor.raw_data().data());
+      } else if (tensor.data_type() == ONNX_NAMESPACE::TensorProto_DataType_FLOAT) {
+        float fv = 0.0f;
+        if (!tensor.float_data().empty()) fv = tensor.float_data(0);
+        else if (!tensor.raw_data().empty())
+          fv = *reinterpret_cast<const float*>(tensor.raw_data().data());
+        fill_value = static_cast<int64_t>(fv);
+      }
+    }
+
+    // Compute total size from shape
+    int64_t total = 1;
+    for (int64_t d : *shape_input) total *= d;
+    if (total < 0 || total > 1000000) return false;  // Safety limit
+
+    result.assign(static_cast<size_t>(total), fill_value);
+    return true;
+  }
+
+  if (op == "Slice") {
+    const auto* data = get_input(0);
+    const auto* starts = get_input(1);
+    const auto* ends = get_input(2);
+    if (!data || !starts || !ends) return false;
+
+    // For 1-D shape tensors
+    int64_t start = (*starts)[0];
+    int64_t end = (*ends)[0];
+    int64_t dim_size = static_cast<int64_t>(data->size());
+
+    if (start < 0) start += dim_size;
+    if (end < 0) end += dim_size;
+    start = std::max(int64_t(0), std::min(start, dim_size));
+    end = std::max(int64_t(0), std::min(end, dim_size));
+
+    int64_t step = 1;
+    if (inputs.size() > 4) {
+      const auto* steps_v = get_input(4);
+      if (steps_v && !steps_v->empty()) step = (*steps_v)[0];
+    }
+
+    result.clear();
+    if (step > 0) {
+      for (int64_t i = start; i < end; i += step) result.push_back((*data)[static_cast<size_t>(i)]);
+    } else if (step < 0) {
+      for (int64_t i = start; i > end; i += step) result.push_back((*data)[static_cast<size_t>(i)]);
+    }
+    return true;
+  }
+
+  if (op == "Reshape") {
+    const auto* data = get_input(0);
+    if (!data) return false;
+    // For shape subgraphs, Reshape just passes data through (reshaping a 1-D vector)
+    result = *data;
+    return true;
+  }
+
+  if (op == "Expand") {
+    const auto* data = get_input(0);
+    const auto* shape = get_input(1);
+    if (!data || !shape) return false;
+    // For shape subgraphs, Expand broadcasts scalar/small tensor
+    if (data->size() == 1 && !shape->empty()) {
+      int64_t total = 1;
+      for (int64_t d : *shape) total *= d;
+      if (total < 0 || total > 1000000) return false;
+      result.assign(static_cast<size_t>(total), (*data)[0]);
+    } else {
+      result = *data;
+    }
+    return true;
+  }
+
+  // Unsupported op
+  return false;
+}
+
+Status ShapeSubgraphFolder::Run() {
+  // Find all shape-consuming input slots and try to fold them
+  const auto& nodes = graph_viewer_.GetNodesInTopologicalOrder();
+
+  for (auto node_idx : nodes) {
+    const auto* node = graph_viewer_.GetNode(node_idx);
+    if (!node) continue;
+
+    const auto& input_defs = node->InputDefs();
+    for (size_t i = 0; i < input_defs.size(); i++) {
+      if (!IsShapeConsumingSlot(*node, i)) continue;
+
+      const auto* shape_arg = input_defs[i];
+      if (!shape_arg || !shape_arg->Exists()) continue;
+
+      // Skip if already a constant initializer (handled normally)
+      if (graph_viewer_.GetConstantInitializer(shape_arg->Name())) continue;
+
+      // Try to fold this shape input
+      if (TryFoldShapeSubgraph(shape_arg)) {
+        LOGS(logger_, VERBOSE) << "ShapeSubgraphFolder: Folded shape input '"
+                               << shape_arg->Name() << "' for "
+                               << node->OpType() << " node '" << node->Name() << "'"
+                               << " -> [" << [&]() {
+                                    std::string s;
+                                    for (auto v : folded_shapes_[shape_arg->Name()]) {
+                                      if (!s.empty()) s += ", ";
+                                      s += std::to_string(v);
+                                    }
+                                    return s;
+                                  }()
+                               << "]";
+      }
+    }
+  }
+
+  LOGS(logger_, VERBOSE) << "ShapeSubgraphFolder: Folded " << folded_shapes_.size()
+                      << " shape subgraphs, " << folded_nodes_.size() << " nodes eliminated.";
+
+  return Status::OK();
+}
+
+bool ShapeSubgraphFolder::IsFoldedShape(const std::string& name) const {
+  return folded_shapes_.count(name) > 0;
+}
+
+const std::vector<int64_t>* ShapeSubgraphFolder::GetFoldedShape(const std::string& name) const {
+  auto it = folded_shapes_.find(name);
+  return (it != folded_shapes_.end()) ? &it->second : nullptr;
+}
+
+bool ShapeSubgraphFolder::IsFoldedNode(NodeIndex node_index) const {
+  return folded_nodes_.count(node_index) > 0;
+}
+
+}  // namespace webnn
+}  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/webnn/builders/shape_subgraph_folder.h b/onnxruntime/core/providers/webnn/builders/shape_subgraph_folder.h
new file mode 100644
index 0000000000000..bcdcdecacd9f9
--- /dev/null
+++ b/onnxruntime/core/providers/webnn/builders/shape_subgraph_folder.h
@@ -0,0 +1,85 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Copyright (c) Intel Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#pragma once
+
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
+#include <string>
+
+#include "core/common/inlined_containers.h"
+#include "core/graph/graph_viewer.h"
+#include "core/providers/webnn/builders/helper.h"
+
+namespace onnxruntime {
+namespace webnn {
+
+// ShapeSubgraphFolder: Pre-evaluates shape-computing subgraphs in the ONNX graph.
+//
+// In unfused (HuggingFace-Optimum-style) models, Reshape/Expand/ConstantOfShape ops
+// have shape inputs produced by chains of shape-domain ops (Shape, Gather, Concat,
+// Where, Equal, Range, ConstantOfShape, etc.). Chromium's WebNN ShapeFoldingInterpreter
+// can't handle all of these, causing "Graph has been destroyed" errors.
+//
+// This folder:
+// 1. Identifies "shape-consuming" input slots (Reshape[1], Expand[1], etc.)
+// 2. Traces each shape input's producer subgraph backward
+// 3. If the entire subgraph can be evaluated with known constants + free_dimension_bounds,
+//    evaluates it to produce a concrete int64 shape tensor
+// 4. Makes these folded shapes available as synthetic constant initializers
+//
+// Runs once at session creation → zero per-inference cost.
+class ShapeSubgraphFolder {
+ public:
+  ShapeSubgraphFolder(const GraphViewer& graph_viewer,
+                      const FreeDimensionBounds& free_dimension_bounds,
+                      const logging::Logger& logger);
+
+  // Run the folding pass. After this, GetFoldedShape() and IsFoldedNode() are valid.
+  Status Run();
+
+  // Check if a NodeArg name has been folded to a constant shape.
+  bool IsFoldedShape(const std::string& name) const;
+
+  // Get the folded int64 tensor data for a shape NodeArg.
+  // Returns nullptr if not folded.
+  const std::vector<int64_t>* GetFoldedShape(const std::string& name) const;
+
+  // Check if a node is part of a folded shape subgraph (should be skipped in AddOperations).
+  bool IsFoldedNode(NodeIndex node_index) const;
+
+  // Get the set of node indices that were folded (for skipping).
+  const InlinedHashSet<NodeIndex>& GetFoldedNodes() const { return folded_nodes_; }
+
+ private:
+  // Evaluate a shape-producing subgraph rooted at the given NodeArg.
+  // Returns true if successfully folded, with result stored in folded_shapes_.
+  bool TryFoldShapeSubgraph(const NodeArg* shape_arg);
+
+  // Mini-interpreter: evaluate a single node given its input values.
+  // Returns true if the node can be evaluated.
+  bool EvaluateNode(const Node& node,
+                    const std::unordered_map<std::string, std::vector<int64_t>>& known_values,
+                    std::vector<int64_t>& result);
+
+  // Get the resolved shape of a NodeArg (using free_dimension_bounds for symbolic dims).
+  bool GetResolvedShape(const NodeArg* arg, std::vector<int64_t>& shape) const;
+
+  // Check if a node is a supported shape-domain op for the mini-interpreter.
+  static bool IsSupportedShapeOp(const Node& node);
+
+  const GraphViewer& graph_viewer_;
+  const FreeDimensionBounds& free_dimension_bounds_;
+  const logging::Logger& logger_;
+
+  // Maps NodeArg name → folded int64 shape values.
+  std::unordered_map<std::string, std::vector<int64_t>> folded_shapes_;
+
+  // Set of node indices that are part of folded subgraphs (to be skipped).
+  InlinedHashSet<NodeIndex> folded_nodes_;
+};
+
+}  // namespace webnn
+}  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/webnn/webnn_execution_provider.cc b/onnxruntime/core/providers/webnn/webnn_execution_provider.cc
index fa6b9ad0f5200..1bc0408571fa1 100644
--- a/onnxruntime/core/providers/webnn/webnn_execution_provider.cc
+++ b/onnxruntime/core/providers/webnn/webnn_execution_provider.cc
@@ -87,13 +87,28 @@ WebNNExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph_view
 
   const auto supported_nodes = webnn::GetSupportedNodes(graph_viewer, wnn_builder, wnn_device_type_, wnn_limits_, logger);
 
+  // Run the shape subgraph folder to identify nodes that will be folded away during graph build.
+  // These nodes must be claimed as "supported" so they stay in our partition, even if their
+  // data types (e.g., int64 Equal) aren't natively supported by WebNN — they'll be skipped
+  // during ModelBuilder::AddOperations().
+  webnn::ShapeSubgraphFolder capability_folder(graph_viewer, free_dimension_bounds_, logger);
+  auto folder_status = capability_folder.Run();
+  std::unordered_set<const Node*> supported_nodes_with_folded = supported_nodes;
+  if (folder_status.IsOK()) {
+    for (const auto& node : graph_viewer.Nodes()) {
+      if (capability_folder.IsFoldedNode(node.Index())) {
+        supported_nodes_with_folded.insert(&node);
+      }
+    }
+  }
+
   const auto gen_metadef_name = [&]() {
     HashValue model_hash;
     int metadef_id = metadef_id_generator_.GenerateId(graph_viewer, model_hash);
     return MakeString(WEBNN, "_", model_hash, "_", metadef_id);
   };
 
-  auto result = utils::CreateSupportedPartitions(graph_viewer, supported_nodes, {},
+  auto result = utils::CreateSupportedPartitions(graph_viewer, supported_nodes_with_folded, {},
                                                  gen_metadef_name, WEBNN, kWebNNExecutionProvider,
                                                  &node_unit_map, /*drop_constant_initializers*/ true);
 
@@ -429,6 +444,40 @@ common::Status WebNNExecutionProvider::Compile(const std::vector<FusedNodeAndGra
                   }
                 }
               }
+
+              // Try to parse additive expressions like "dim_a + dim_b"
+              // (e.g., "past_sequence_length + sequence_length").
+              if (output_shape[dim_idx] == 0) {
+                auto plus_pos = dim_param.find('+');
+                if (plus_pos != std::string::npos) {
+                  const std::string left = utils::TrimString(std::string_view(dim_param).substr(0, plus_pos));
+                  const std::string right = utils::TrimString(std::string_view(dim_param).substr(plus_pos + 1));
+
+                  // Resolve each operand (from runtime inputs or fixed bounds).
+                  auto resolve_operand = [&](const std::string& operand) -> int64_t {
+                    auto it = dim_param_to_input_dim.find(operand);
+                    if (it != dim_param_to_input_dim.end()) {
+                      const size_t src_idx = it->second.first;
+                      const size_t src_dim = it->second.second;
+                      if (src_idx < runtime_input_shapes.size() &&
+                          src_dim < runtime_input_shapes[src_idx].size()) {
+                        return runtime_input_shapes[src_idx][src_dim];
+                      }
+                    }
+                    auto fixed_it = fixed_dim_param_values.find(operand);
+                    if (fixed_it != fixed_dim_param_values.end()) {
+                      return fixed_it->second;
+                    }
+                    return -1;  // unresolved
+                  };
+
+                  int64_t left_val = resolve_operand(left);
+                  int64_t right_val = resolve_operand(right);
+                  if (left_val >= 0 && right_val >= 0) {
+                    output_shape[dim_idx] = left_val + right_val;
+                  }
+                }
+              }
             }
           }
 
@@ -458,12 +507,10 @@ common::Status WebNNExecutionProvider::Compile(const std::vector<FusedNodeAndGra
             }
           }
 
-          // Hard fail if dynamic dimensions remain unresolved.
-          // TODO: When WebNN supports the dispatch() API that returns output MLTensors with
-          // shapes inferred from actual input tensors, we can query the real output shapes
-          // at runtime instead of relying on symbolic dim_param matching. This would eliminate
-          // the need for the simplify_dynamic_shapes.py preprocessing step and handle all
-          // dynamic shape cases natively (including data-dependent output shapes).
+          // If dynamic dimensions remain unresolved, try to infer from the max bounds
+          // of known dims (e.g., use batch_size=1 for dim 0, sequence_length for others).
+          // This handles intermediate outputs (like Expand's causal mask) whose shapes are
+          // data-dependent and not annotated with a resolvable dim_param.
           for (size_t dim_idx = 0; dim_idx < output_shape.size(); ++dim_idx) {
             if (output_shape[dim_idx] == webnn::kDynamicDim) {
               std::string unresolved_dim_param;
@@ -471,15 +518,34 @@ common::Status WebNNExecutionProvider::Compile(const std::vector<FusedNodeAndGra
                 unresolved_dim_param = output_dim_params[output_idx][dim_idx];
               }
 
-              LOGS_DEFAULT(ERROR) << "[WebNN] Failed to resolve dynamic output dimension for output ["
-                                  << output_name << "] at dim index [" << dim_idx
-                                  << "], dim_param: [" << unresolved_dim_param
-                                  << "]. Please ensure this dim_param can be inferred from graph inputs"
-                                  << " or provide pre-allocated output tensors via session.run(feeds, fetches).";
-              return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL,
-                                     "[WebNN] Failed to resolve dynamic output dimension for output: ", output_name,
-                                     " at dim index: ", dim_idx,
-                                     ". dim_param: ", unresolved_dim_param);
+              // Instead of hard-failing, use a heuristic:
+              // - Try to match the unresolved dim to any input dim at the same index.
+              // - As a last resort, copy from the largest input shape at this dim index.
+              int64_t inferred = 0;
+              for (size_t inp_idx = 0; inp_idx < runtime_input_shapes.size() && inferred == 0; ++inp_idx) {
+                if (dim_idx < runtime_input_shapes[inp_idx].size()) {
+                  int64_t candidate = runtime_input_shapes[inp_idx][dim_idx];
+                  if (candidate > inferred) {
+                    inferred = candidate;
+                  }
+                }
+              }
+
+              if (inferred > 0) {
+                LOGS_DEFAULT(WARNING) << "[WebNN] Unresolved output dim for [" << output_name
+                                     << "] at index " << dim_idx << " (dim_param: [" << unresolved_dim_param
+                                     << "]). Inferred from runtime inputs: " << inferred;
+                output_shape[dim_idx] = inferred;
+              } else {
+                LOGS_DEFAULT(ERROR) << "[WebNN] Failed to resolve dynamic output dimension for output ["
+                                    << output_name << "] at dim index [" << dim_idx
+                                    << "], dim_param: [" << unresolved_dim_param
+                                    << "]. No input dims available for inference.";
+                return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL,
+                                       "[WebNN] Failed to resolve dynamic output dimension for output: ", output_name,
+                                       " at dim index: ", dim_idx,
+                                       ". dim_param: ", unresolved_dim_param);
+              }
             }
           }
 

From 2f9a965165111d913588ba67d82da8706ce3cfd5 Mon Sep 17 00:00:00 2001
From: "Kumar, Arisha" <arisha.kumar@intel.com>
Date: Thu, 4 Jun 2026 14:12:29 -0700
Subject: [PATCH 2/3] Remove ShapeSubgraphFolder, keep additive dim_param
 behind flag, wire JS option

---
 js/web/lib/wasm/session-options.ts            |   5 +
 .../providers/webnn/builders/model_builder.cc |  16 +-
 .../providers/webnn/builders/model_builder.h  |   2 -
 .../webnn/builders/shape_subgraph_folder.cc   | 568 ------------------
 .../webnn/builders/shape_subgraph_folder.h    |  85 ---
 .../webnn/webnn_execution_provider.cc         |  24 +-
 .../webnn/webnn_execution_provider.h          |   5 +-
 .../providers/webnn/webnn_provider_factory.cc |  21 +-
 8 files changed, 38 insertions(+), 688 deletions(-)
 delete mode 100644 onnxruntime/core/providers/webnn/builders/shape_subgraph_folder.cc
 delete mode 100644 onnxruntime/core/providers/webnn/builders/shape_subgraph_folder.h

diff --git a/js/web/lib/wasm/session-options.ts b/js/web/lib/wasm/session-options.ts
index 6f83234534f26..c026602f73d33 100644
--- a/js/web/lib/wasm/session-options.ts
+++ b/js/web/lib/wasm/session-options.ts
@@ -139,6 +139,11 @@ const setExecutionProviders = async (
           if (enableCausalLM) {
             appendEpOption(epOptions, 'enableCausalLM', 'true', allocs);
           }
+          // enableAdditiveDimParam: parse symbolic dim expressions like "a + b" in output shapes.
+          const enableAdditiveDimParam = (webnnOptions as any)?.enableAdditiveDimParam;
+          if (enableAdditiveDimParam) {
+            appendEpOption(epOptions, 'webnn_enable_additive_dim_param', 'true', allocs);
+          }
         }
         break;
       case 'webgpu':
diff --git a/onnxruntime/core/providers/webnn/builders/model_builder.cc b/onnxruntime/core/providers/webnn/builders/model_builder.cc
index 37af542dbbdd0..c36d022074d24 100644
--- a/onnxruntime/core/providers/webnn/builders/model_builder.cc
+++ b/onnxruntime/core/providers/webnn/builders/model_builder.cc
@@ -42,13 +42,6 @@ ModelBuilder::ModelBuilder(const GraphViewer& graph_viewer, const logging::Logge
 }
 
 Status ModelBuilder::Initialize() {
-  // Run shape subgraph folding FIRST, before PreprocessInitializers.
-  // This pre-evaluates shape subgraphs (Where/Equal/Range/ConstantOfShape chains)
-  // so that Reshape/Expand/etc. see constant shapes instead of dynamic subgraphs.
-  // Must run before PreprocessInitializers because AddInitializersToSkip checks IsFoldedShape().
-  shape_folder_ = std::make_unique<ShapeSubgraphFolder>(graph_viewer_, free_dimension_bounds_, logger_);
-  ORT_RETURN_IF_ERROR(shape_folder_->Run());
-
   PreprocessInitializers();
   ORT_RETURN_IF_ERROR(RegisterInitializers());
   ORT_RETURN_IF_ERROR(RegisterModelInputs());
@@ -522,15 +515,18 @@ const ModelBuilder::DimProvenance* ModelBuilder::GetDimProvenance(const std::str
 }
 
 bool ModelBuilder::IsFoldedShape(const std::string& name) const {
-  return shape_folder_ && shape_folder_->IsFoldedShape(name);
+  ORT_UNUSED_PARAMETER(name);
+  return false;
 }
 
 const std::vector<int64_t>* ModelBuilder::GetFoldedShape(const std::string& name) const {
-  return shape_folder_ ? shape_folder_->GetFoldedShape(name) : nullptr;
+  ORT_UNUSED_PARAMETER(name);
+  return nullptr;
 }
 
 bool ModelBuilder::IsFoldedNode(NodeIndex node_index) const {
-  return shape_folder_ && shape_folder_->IsFoldedNode(node_index);
+  ORT_UNUSED_PARAMETER(node_index);
+  return false;
 }
 
 }  // namespace webnn
diff --git a/onnxruntime/core/providers/webnn/builders/model_builder.h b/onnxruntime/core/providers/webnn/builders/model_builder.h
index c2b68f18ea33e..4fd812bda5126 100644
--- a/onnxruntime/core/providers/webnn/builders/model_builder.h
+++ b/onnxruntime/core/providers/webnn/builders/model_builder.h
@@ -8,7 +8,6 @@
 #include <core/graph/graph_viewer.h>
 
 #include "model.h"
-#include "shape_subgraph_folder.h"
 #include "core/framework/execution_provider.h"
 #include "core/providers/webnn/builders/helper.h"
 
@@ -105,7 +104,6 @@ class ModelBuilder {
   emscripten::val wnn_limits_ = emscripten::val::undefined();
   FreeDimensionBounds free_dimension_bounds_;
   bool enable_causal_lm_;
-  std::unique_ptr<ShapeSubgraphFolder> shape_folder_;
   InlinedHashMap<std::string, emscripten::val> wnn_operands_;
   std::vector<std::string> input_names_;
   std::vector<std::string> output_names_;
diff --git a/onnxruntime/core/providers/webnn/builders/shape_subgraph_folder.cc b/onnxruntime/core/providers/webnn/builders/shape_subgraph_folder.cc
deleted file mode 100644
index c38b993790414..0000000000000
--- a/onnxruntime/core/providers/webnn/builders/shape_subgraph_folder.cc
+++ /dev/null
@@ -1,568 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Copyright (c) Intel Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-#include "shape_subgraph_folder.h"
-
-#include "core/framework/tensorprotoutils.h"
-#include "core/providers/common.h"
-
-#include <queue>
-#include <algorithm>
-#include <numeric>
-#include <cmath>
-
-namespace onnxruntime {
-namespace webnn {
-
-ShapeSubgraphFolder::ShapeSubgraphFolder(const GraphViewer& graph_viewer,
-                                         const FreeDimensionBounds& free_dimension_bounds,
-                                         const logging::Logger& logger)
-    : graph_viewer_(graph_viewer),
-      free_dimension_bounds_(free_dimension_bounds),
-      logger_(logger) {
-}
-
-bool ShapeSubgraphFolder::IsSupportedShapeOp(const Node& node) {
-  static const InlinedHashSet<std::string_view> supported_ops = {
-      "Shape", "Gather", "Concat", "Unsqueeze", "Squeeze", "Slice",
-      "Cast", "Add", "Sub", "Mul", "Div", "Equal", "Where",
-      "ConstantOfShape", "Range", "Reshape", "Expand",
-      "Neg", "Abs", "Floor", "Ceil",
-  };
-  // Only standard ONNX ops (empty domain or "onnx")
-  if (!node.Domain().empty() && node.Domain() != "onnx" && node.Domain() != kOnnxDomain) {
-    return false;
-  }
-  return supported_ops.count(node.OpType()) > 0;
-}
-
-bool ShapeSubgraphFolder::GetResolvedShape(const NodeArg* arg, std::vector<int64_t>& shape) const {
-  const auto* shape_proto = arg->Shape();
-  if (!shape_proto) return false;
-
-  shape.clear();
-  for (int i = 0; i < shape_proto->dim_size(); i++) {
-    const auto& dim = shape_proto->dim(i);
-    if (dim.has_dim_value()) {
-      shape.push_back(dim.dim_value());
-    } else if (dim.has_dim_param()) {
-      // Try to resolve from free_dimension_bounds
-      const auto& dim_param = dim.dim_param();
-      auto it = free_dimension_bounds_.find(dim_param);
-      if (it != free_dimension_bounds_.end()) {
-        // Use maxSize as the resolved value (consistent with WebNN EP behavior)
-        shape.push_back(static_cast<int64_t>(it->second.max_size));
-      } else {
-        return false;  // Can't resolve this symbolic dim
-      }
-    } else {
-      return false;  // Unknown dim
-    }
-  }
-  return true;
-}
-
-// Check if shape-consuming inputs (Reshape[1], Expand[1], etc.) are candidates for folding.
-static bool IsShapeConsumingSlot(const Node& consumer, size_t input_index) {
-  const auto& op = consumer.OpType();
-  if ((op == "Reshape" || op == "Expand") && input_index == 1) return true;
-  if (op == "ConstantOfShape" && input_index == 0) return true;
-  if (op == "Tile" && input_index == 1) return true;
-  // Slice has starts[1], ends[2], axes[3], steps[4]
-  if (op == "Slice" && input_index >= 1 && input_index <= 4) return true;
-  return false;
-}
-
-bool ShapeSubgraphFolder::TryFoldShapeSubgraph(const NodeArg* shape_arg) {
-  const std::string& target_name = shape_arg->Name();
-
-  // Already folded?
-  if (folded_shapes_.count(target_name)) return true;
-
-  // Check if it's already a constant initializer
-  if (graph_viewer_.GetConstantInitializer(target_name)) return false;  // Already handled normally
-
-  // BFS backward to find the producer subgraph
-  std::unordered_map<std::string, std::vector<int64_t>> known_values;
-  std::vector<const Node*> topo_order;  // nodes in forward eval order
-  InlinedHashSet<NodeIndex> visited_nodes;
-  std::queue<const NodeArg*> worklist;
-  worklist.push(shape_arg);
-
-  InlinedHashSet<std::string> visited_args;
-  visited_args.insert(target_name);
-
-  bool can_fold = true;
-
-  while (!worklist.empty() && can_fold) {
-    const NodeArg* current = worklist.front();
-    worklist.pop();
-    const std::string& name = current->Name();
-
-    // Skip if already known
-    if (known_values.count(name)) continue;
-
-    // Check if it's a constant initializer
-    const auto* init = graph_viewer_.GetConstantInitializer(name);
-    if (init) {
-      // Read int64 values from the initializer
-      std::vector<int64_t> values;
-      if (init->data_type() == ONNX_NAMESPACE::TensorProto_DataType_INT64) {
-        if (!init->int64_data().empty()) {
-          values.assign(init->int64_data().begin(), init->int64_data().end());
-        } else if (!init->raw_data().empty()) {
-          const int64_t* data = reinterpret_cast<const int64_t*>(init->raw_data().data());
-          size_t count = init->raw_data().size() / sizeof(int64_t);
-          values.assign(data, data + count);
-        }
-      } else if (init->data_type() == ONNX_NAMESPACE::TensorProto_DataType_INT32) {
-        if (!init->int32_data().empty()) {
-          for (auto v : init->int32_data()) values.push_back(static_cast<int64_t>(v));
-        } else if (!init->raw_data().empty()) {
-          const int32_t* data = reinterpret_cast<const int32_t*>(init->raw_data().data());
-          size_t count = init->raw_data().size() / sizeof(int32_t);
-          for (size_t i = 0; i < count; i++) values.push_back(static_cast<int64_t>(data[i]));
-        }
-      } else if (init->data_type() == ONNX_NAMESPACE::TensorProto_DataType_FLOAT) {
-        if (!init->float_data().empty()) {
-          for (auto v : init->float_data()) values.push_back(static_cast<int64_t>(v));
-        } else if (!init->raw_data().empty()) {
-          const float* data = reinterpret_cast<const float*>(init->raw_data().data());
-          size_t count = init->raw_data().size() / sizeof(float);
-          for (size_t i = 0; i < count; i++) values.push_back(static_cast<int64_t>(data[i]));
-        }
-      } else if (init->data_type() == ONNX_NAMESPACE::TensorProto_DataType_BOOL) {
-        if (!init->int32_data().empty()) {
-          for (auto v : init->int32_data()) values.push_back(static_cast<int64_t>(v));
-        } else if (!init->raw_data().empty()) {
-          const uint8_t* data = reinterpret_cast<const uint8_t*>(init->raw_data().data());
-          size_t count = init->raw_data().size();
-          for (size_t i = 0; i < count; i++) values.push_back(static_cast<int64_t>(data[i]));
-        }
-      } else {
-        can_fold = false;
-        break;
-      }
-      // Handle scalar initializers (0-dim tensors with no data entries but have raw_data)
-      if (values.empty() && init->dims_size() == 0) {
-        // Try scalar
-        if (init->data_type() == ONNX_NAMESPACE::TensorProto_DataType_INT64) {
-          values.push_back(0);
-        }
-      }
-      known_values[name] = std::move(values);
-      continue;
-    }
-
-    // Check if it's a graph input (cannot fold if depends on runtime input, unless it's just shape)
-    const auto* producer = graph_viewer_.GetProducerNode(name);
-    if (!producer) {
-      // It's a graph input - can't fold unless we can get its shape via Shape op
-      can_fold = false;
-      break;
-    }
-
-    // Check if the producer is a supported shape op
-    if (!IsSupportedShapeOp(*producer)) {
-      can_fold = false;
-      break;
-    }
-
-    // Add producer to visit list
-    if (visited_nodes.insert(producer->Index()).second) {
-      topo_order.push_back(producer);
-
-      // Add all inputs of this producer to the worklist
-      for (const auto* input_def : producer->InputDefs()) {
-        if (input_def && input_def->Exists()) {
-          if (visited_args.insert(input_def->Name()).second) {
-            worklist.push(input_def);
-          }
-        }
-      }
-    }
-  }
-
-  if (!can_fold) return false;
-
-  // Sort nodes in topological order (reverse of discovery = forward eval order)
-  std::reverse(topo_order.begin(), topo_order.end());
-
-  // Evaluate nodes in topological order
-  for (const Node* node : topo_order) {
-    std::vector<int64_t> result;
-    if (!EvaluateNode(*node, known_values, result)) {
-      return false;
-    }
-    // Store results for all outputs
-    for (const auto* output_def : node->OutputDefs()) {
-      if (output_def && output_def->Exists()) {
-        known_values[output_def->Name()] = result;
-      }
-    }
-  }
-
-  // The target should now be in known_values
-  auto it = known_values.find(target_name);
-  if (it == known_values.end()) return false;
-
-  // Store the folded result
-  folded_shapes_[target_name] = it->second;
-
-  // Mark nodes in the subgraph as folded, but only if ALL their outputs are consumed
-  // exclusively by other folded nodes or shape-consuming slots. If any output feeds
-  // a non-shape consumer outside the subgraph, we cannot skip that node.
-  for (const Node* node : topo_order) {
-    bool can_skip = true;
-    for (auto it2 = node->OutputEdgesBegin(); it2 != node->OutputEdgesEnd(); ++it2) {
-      const Node& consumer = it2->GetNode();
-      if (visited_nodes.count(consumer.Index())) continue;  // consumer is in our subgraph
-      // Check if the consumer uses this output only in a shape-consuming slot
-      if (!IsShapeConsumingSlot(consumer, it2->GetDstArgIndex())) {
-        can_skip = false;
-        break;
-      }
-    }
-    if (can_skip) {
-      folded_nodes_.insert(node->Index());
-    }
-  }
-
-  return true;
-}
-
-bool ShapeSubgraphFolder::EvaluateNode(
-    const Node& node,
-    const std::unordered_map<std::string, std::vector<int64_t>>& known_values,
-    std::vector<int64_t>& result) {
-  const auto& op = node.OpType();
-  const auto& inputs = node.InputDefs();
-
-  // Helper to get input values
-  auto get_input = [&](size_t idx) -> const std::vector<int64_t>* {
-    if (idx >= inputs.size() || !inputs[idx] || !inputs[idx]->Exists()) return nullptr;
-    auto it = known_values.find(inputs[idx]->Name());
-    return (it != known_values.end()) ? &it->second : nullptr;
-  };
-
-  if (op == "Shape") {
-    // Shape op: return the resolved shape of input[0]
-    if (!GetResolvedShape(inputs[0], result)) return false;
-
-    // Handle start/end attributes (Shape opset 15+)
-    const auto& attrs = node.GetAttributes();
-    int64_t start = 0, end = static_cast<int64_t>(result.size());
-    if (attrs.count("start")) start = attrs.at("start").i();
-    if (attrs.count("end")) end = attrs.at("end").i();
-    if (start < 0) start += static_cast<int64_t>(result.size());
-    if (end < 0) end += static_cast<int64_t>(result.size());
-    start = std::max(int64_t(0), std::min(start, static_cast<int64_t>(result.size())));
-    end = std::max(int64_t(0), std::min(end, static_cast<int64_t>(result.size())));
-    result = std::vector<int64_t>(result.begin() + static_cast<ptrdiff_t>(start), result.begin() + static_cast<ptrdiff_t>(end));
-    return true;
-  }
-
-  if (op == "Gather") {
-    const auto* data = get_input(0);
-    const auto* indices = get_input(1);
-    if (!data || !indices) return false;
-
-    result.clear();
-    for (int64_t idx : *indices) {
-      if (idx < 0) idx += static_cast<int64_t>(data->size());
-      if (idx < 0 || idx >= static_cast<int64_t>(data->size())) return false;
-      result.push_back((*data)[static_cast<size_t>(idx)]);
-    }
-    // If indices is scalar (0-dim), result should also be scalar-like
-    if (indices->empty()) {
-      // scalar index case - not handled here
-      return false;
-    }
-    return true;
-  }
-
-  if (op == "Concat") {
-    // For 1-D shape vectors, axis is always 0 — just concatenate all inputs.
-    result.clear();
-    for (size_t i = 0; i < inputs.size(); i++) {
-      const auto* inp = get_input(i);
-      if (!inp) return false;
-      result.insert(result.end(), inp->begin(), inp->end());
-    }
-    return true;
-  }
-
-  if (op == "Unsqueeze") {
-    const auto* data = get_input(0);
-    if (!data) return false;
-    // For shape subgraphs, unsqueeze typically wraps a scalar into [1] shape
-    result = *data;
-    // If axes input exists (opset 13+), handle it
-    if (inputs.size() > 1) {
-      const auto* axes = get_input(1);
-      if (!axes) return false;
-      // Insert dimensions of size 1 at specified axes
-      // For shape vectors this is typically making a scalar into a 1-element vector
-    }
-    return true;
-  }
-
-  if (op == "Squeeze") {
-    const auto* data = get_input(0);
-    if (!data) return false;
-    result = *data;
-    return true;
-  }
-
-  if (op == "Cast") {
-    const auto* data = get_input(0);
-    if (!data) return false;
-    // Cast just passes through for int64 purposes
-    result = *data;
-    return true;
-  }
-
-  if (op == "Neg") {
-    const auto* data = get_input(0);
-    if (!data) return false;
-    result.resize(data->size());
-    for (size_t i = 0; i < data->size(); i++) result[i] = -(*data)[i];
-    return true;
-  }
-
-  if (op == "Abs") {
-    const auto* data = get_input(0);
-    if (!data) return false;
-    result.resize(data->size());
-    for (size_t i = 0; i < data->size(); i++) result[i] = std::abs((*data)[i]);
-    return true;
-  }
-
-  // Binary element-wise ops: Add, Sub, Mul, Div
-  if (op == "Add" || op == "Sub" || op == "Mul" || op == "Div") {
-    const auto* a = get_input(0);
-    const auto* b = get_input(1);
-    if (!a || !b) return false;
-
-    // Broadcasting: if one is scalar (size 1), broadcast to the other's size
-    size_t size = std::max(a->size(), b->size());
-    result.resize(size);
-    for (size_t i = 0; i < size; i++) {
-      int64_t va = (*a)[a->size() == 1 ? 0 : i];
-      int64_t vb = (*b)[b->size() == 1 ? 0 : i];
-      if (op == "Add") result[i] = va + vb;
-      else if (op == "Sub") result[i] = va - vb;
-      else if (op == "Mul") result[i] = va * vb;
-      else if (op == "Div") {
-        if (vb == 0) return false;
-        result[i] = va / vb;
-      }
-    }
-    return true;
-  }
-
-  if (op == "Equal") {
-    const auto* a = get_input(0);
-    const auto* b = get_input(1);
-    if (!a || !b) return false;
-
-    size_t size = std::max(a->size(), b->size());
-    result.resize(size);
-    for (size_t i = 0; i < size; i++) {
-      int64_t va = (*a)[a->size() == 1 ? 0 : i];
-      int64_t vb = (*b)[b->size() == 1 ? 0 : i];
-      result[i] = (va == vb) ? 1 : 0;
-    }
-    return true;
-  }
-
-  if (op == "Where") {
-    const auto* cond = get_input(0);
-    const auto* x = get_input(1);
-    const auto* y = get_input(2);
-    if (!cond || !x || !y) return false;
-
-    size_t size = std::max({cond->size(), x->size(), y->size()});
-    result.resize(size);
-    for (size_t i = 0; i < size; i++) {
-      int64_t c = (*cond)[cond->size() == 1 ? 0 : i];
-      int64_t vx = (*x)[x->size() == 1 ? 0 : i];
-      int64_t vy = (*y)[y->size() == 1 ? 0 : i];
-      result[i] = c ? vx : vy;
-    }
-    return true;
-  }
-
-  if (op == "Range") {
-    const auto* start_v = get_input(0);
-    const auto* limit_v = get_input(1);
-    const auto* delta_v = get_input(2);
-    if (!start_v || !limit_v || !delta_v) return false;
-    if (start_v->empty() || limit_v->empty() || delta_v->empty()) return false;
-
-    int64_t start = (*start_v)[0];
-    int64_t limit = (*limit_v)[0];
-    int64_t delta = (*delta_v)[0];
-    if (delta == 0) return false;
-
-    result.clear();
-    if (delta > 0) {
-      for (int64_t v = start; v < limit; v += delta) result.push_back(v);
-    } else {
-      for (int64_t v = start; v > limit; v += delta) result.push_back(v);
-    }
-    return true;
-  }
-
-  if (op == "ConstantOfShape") {
-    const auto* shape_input = get_input(0);
-    if (!shape_input) return false;
-
-    // Get the fill value from attribute
-    int64_t fill_value = 0;
-    const auto& attrs = node.GetAttributes();
-    if (attrs.count("value")) {
-      const auto& tensor = attrs.at("value").t();
-      if (tensor.data_type() == ONNX_NAMESPACE::TensorProto_DataType_INT64) {
-        if (!tensor.int64_data().empty()) fill_value = tensor.int64_data(0);
-        else if (!tensor.raw_data().empty())
-          fill_value = *reinterpret_cast<const int64_t*>(tensor.raw_data().data());
-      } else if (tensor.data_type() == ONNX_NAMESPACE::TensorProto_DataType_INT32) {
-        if (!tensor.int32_data().empty()) fill_value = tensor.int32_data(0);
-        else if (!tensor.raw_data().empty())
-          fill_value = *reinterpret_cast<const int32_t*>(tensor.raw_data().data());
-      } else if (tensor.data_type() == ONNX_NAMESPACE::TensorProto_DataType_FLOAT) {
-        float fv = 0.0f;
-        if (!tensor.float_data().empty()) fv = tensor.float_data(0);
-        else if (!tensor.raw_data().empty())
-          fv = *reinterpret_cast<const float*>(tensor.raw_data().data());
-        fill_value = static_cast<int64_t>(fv);
-      }
-    }
-
-    // Compute total size from shape
-    int64_t total = 1;
-    for (int64_t d : *shape_input) total *= d;
-    if (total < 0 || total > 1000000) return false;  // Safety limit
-
-    result.assign(static_cast<size_t>(total), fill_value);
-    return true;
-  }
-
-  if (op == "Slice") {
-    const auto* data = get_input(0);
-    const auto* starts = get_input(1);
-    const auto* ends = get_input(2);
-    if (!data || !starts || !ends) return false;
-
-    // For 1-D shape tensors
-    int64_t start = (*starts)[0];
-    int64_t end = (*ends)[0];
-    int64_t dim_size = static_cast<int64_t>(data->size());
-
-    if (start < 0) start += dim_size;
-    if (end < 0) end += dim_size;
-    start = std::max(int64_t(0), std::min(start, dim_size));
-    end = std::max(int64_t(0), std::min(end, dim_size));
-
-    int64_t step = 1;
-    if (inputs.size() > 4) {
-      const auto* steps_v = get_input(4);
-      if (steps_v && !steps_v->empty()) step = (*steps_v)[0];
-    }
-
-    result.clear();
-    if (step > 0) {
-      for (int64_t i = start; i < end; i += step) result.push_back((*data)[static_cast<size_t>(i)]);
-    } else if (step < 0) {
-      for (int64_t i = start; i > end; i += step) result.push_back((*data)[static_cast<size_t>(i)]);
-    }
-    return true;
-  }
-
-  if (op == "Reshape") {
-    const auto* data = get_input(0);
-    if (!data) return false;
-    // For shape subgraphs, Reshape just passes data through (reshaping a 1-D vector)
-    result = *data;
-    return true;
-  }
-
-  if (op == "Expand") {
-    const auto* data = get_input(0);
-    const auto* shape = get_input(1);
-    if (!data || !shape) return false;
-    // For shape subgraphs, Expand broadcasts scalar/small tensor
-    if (data->size() == 1 && !shape->empty()) {
-      int64_t total = 1;
-      for (int64_t d : *shape) total *= d;
-      if (total < 0 || total > 1000000) return false;
-      result.assign(static_cast<size_t>(total), (*data)[0]);
-    } else {
-      result = *data;
-    }
-    return true;
-  }
-
-  // Unsupported op
-  return false;
-}
-
-Status ShapeSubgraphFolder::Run() {
-  // Find all shape-consuming input slots and try to fold them
-  const auto& nodes = graph_viewer_.GetNodesInTopologicalOrder();
-
-  for (auto node_idx : nodes) {
-    const auto* node = graph_viewer_.GetNode(node_idx);
-    if (!node) continue;
-
-    const auto& input_defs = node->InputDefs();
-    for (size_t i = 0; i < input_defs.size(); i++) {
-      if (!IsShapeConsumingSlot(*node, i)) continue;
-
-      const auto* shape_arg = input_defs[i];
-      if (!shape_arg || !shape_arg->Exists()) continue;
-
-      // Skip if already a constant initializer (handled normally)
-      if (graph_viewer_.GetConstantInitializer(shape_arg->Name())) continue;
-
-      // Try to fold this shape input
-      if (TryFoldShapeSubgraph(shape_arg)) {
-        LOGS(logger_, VERBOSE) << "ShapeSubgraphFolder: Folded shape input '"
-                               << shape_arg->Name() << "' for "
-                               << node->OpType() << " node '" << node->Name() << "'"
-                               << " -> [" << [&]() {
-                                    std::string s;
-                                    for (auto v : folded_shapes_[shape_arg->Name()]) {
-                                      if (!s.empty()) s += ", ";
-                                      s += std::to_string(v);
-                                    }
-                                    return s;
-                                  }()
-                               << "]";
-      }
-    }
-  }
-
-  LOGS(logger_, VERBOSE) << "ShapeSubgraphFolder: Folded " << folded_shapes_.size()
-                      << " shape subgraphs, " << folded_nodes_.size() << " nodes eliminated.";
-
-  return Status::OK();
-}
-
-bool ShapeSubgraphFolder::IsFoldedShape(const std::string& name) const {
-  return folded_shapes_.count(name) > 0;
-}
-
-const std::vector<int64_t>* ShapeSubgraphFolder::GetFoldedShape(const std::string& name) const {
-  auto it = folded_shapes_.find(name);
-  return (it != folded_shapes_.end()) ? &it->second : nullptr;
-}
-
-bool ShapeSubgraphFolder::IsFoldedNode(NodeIndex node_index) const {
-  return folded_nodes_.count(node_index) > 0;
-}
-
-}  // namespace webnn
-}  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/webnn/builders/shape_subgraph_folder.h b/onnxruntime/core/providers/webnn/builders/shape_subgraph_folder.h
deleted file mode 100644
index bcdcdecacd9f9..0000000000000
--- a/onnxruntime/core/providers/webnn/builders/shape_subgraph_folder.h
+++ /dev/null
@@ -1,85 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Copyright (c) Intel Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-#pragma once
-
-#include <unordered_map>
-#include <unordered_set>
-#include <vector>
-#include <string>
-
-#include "core/common/inlined_containers.h"
-#include "core/graph/graph_viewer.h"
-#include "core/providers/webnn/builders/helper.h"
-
-namespace onnxruntime {
-namespace webnn {
-
-// ShapeSubgraphFolder: Pre-evaluates shape-computing subgraphs in the ONNX graph.
-//
-// In unfused (HuggingFace-Optimum-style) models, Reshape/Expand/ConstantOfShape ops
-// have shape inputs produced by chains of shape-domain ops (Shape, Gather, Concat,
-// Where, Equal, Range, ConstantOfShape, etc.). Chromium's WebNN ShapeFoldingInterpreter
-// can't handle all of these, causing "Graph has been destroyed" errors.
-//
-// This folder:
-// 1. Identifies "shape-consuming" input slots (Reshape[1], Expand[1], etc.)
-// 2. Traces each shape input's producer subgraph backward
-// 3. If the entire subgraph can be evaluated with known constants + free_dimension_bounds,
-//    evaluates it to produce a concrete int64 shape tensor
-// 4. Makes these folded shapes available as synthetic constant initializers
-//
-// Runs once at session creation → zero per-inference cost.
-class ShapeSubgraphFolder {
- public:
-  ShapeSubgraphFolder(const GraphViewer& graph_viewer,
-                      const FreeDimensionBounds& free_dimension_bounds,
-                      const logging::Logger& logger);
-
-  // Run the folding pass. After this, GetFoldedShape() and IsFoldedNode() are valid.
-  Status Run();
-
-  // Check if a NodeArg name has been folded to a constant shape.
-  bool IsFoldedShape(const std::string& name) const;
-
-  // Get the folded int64 tensor data for a shape NodeArg.
-  // Returns nullptr if not folded.
-  const std::vector<int64_t>* GetFoldedShape(const std::string& name) const;
-
-  // Check if a node is part of a folded shape subgraph (should be skipped in AddOperations).
-  bool IsFoldedNode(NodeIndex node_index) const;
-
-  // Get the set of node indices that were folded (for skipping).
-  const InlinedHashSet<NodeIndex>& GetFoldedNodes() const { return folded_nodes_; }
-
- private:
-  // Evaluate a shape-producing subgraph rooted at the given NodeArg.
-  // Returns true if successfully folded, with result stored in folded_shapes_.
-  bool TryFoldShapeSubgraph(const NodeArg* shape_arg);
-
-  // Mini-interpreter: evaluate a single node given its input values.
-  // Returns true if the node can be evaluated.
-  bool EvaluateNode(const Node& node,
-                    const std::unordered_map<std::string, std::vector<int64_t>>& known_values,
-                    std::vector<int64_t>& result);
-
-  // Get the resolved shape of a NodeArg (using free_dimension_bounds for symbolic dims).
-  bool GetResolvedShape(const NodeArg* arg, std::vector<int64_t>& shape) const;
-
-  // Check if a node is a supported shape-domain op for the mini-interpreter.
-  static bool IsSupportedShapeOp(const Node& node);
-
-  const GraphViewer& graph_viewer_;
-  const FreeDimensionBounds& free_dimension_bounds_;
-  const logging::Logger& logger_;
-
-  // Maps NodeArg name → folded int64 shape values.
-  std::unordered_map<std::string, std::vector<int64_t>> folded_shapes_;
-
-  // Set of node indices that are part of folded subgraphs (to be skipped).
-  InlinedHashSet<NodeIndex> folded_nodes_;
-};
-
-}  // namespace webnn
-}  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/webnn/webnn_execution_provider.cc b/onnxruntime/core/providers/webnn/webnn_execution_provider.cc
index 1bc0408571fa1..8e1ce16657be0 100644
--- a/onnxruntime/core/providers/webnn/webnn_execution_provider.cc
+++ b/onnxruntime/core/providers/webnn/webnn_execution_provider.cc
@@ -31,7 +31,8 @@ constexpr const char* WEBNN = "WEBNN";
 
 WebNNExecutionProvider::WebNNExecutionProvider(const std::string& webnn_device_flags,
                          const webnn::FreeDimensionBounds& free_dimension_bounds,
-                         bool enable_causal_lm)
+                         bool enable_causal_lm,
+                         bool enable_additive_dim_param)
     : IExecutionProvider{
           onnxruntime::kWebNNExecutionProvider,
           // If MLTensor is supported, we force all the tensors to be allocated as MLTensor.
@@ -42,7 +43,8 @@ WebNNExecutionProvider::WebNNExecutionProvider(const std::string& webnn_device_f
               0)},
                   wnn_device_type_(webnn::DeviceTypeFromString(webnn_device_flags)),
                   free_dimension_bounds_(free_dimension_bounds),
-                  enable_causal_lm_(enable_causal_lm) {
+                  enable_causal_lm_(enable_causal_lm),
+                  enable_additive_dim_param_(enable_additive_dim_param) {
   wnn_context_ = emscripten::val::module_property("currentContext");
   if (!wnn_context_.as<bool>()) {
     ORT_THROW("Failed to create WebNN context.");
@@ -87,20 +89,7 @@ WebNNExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph_view
 
   const auto supported_nodes = webnn::GetSupportedNodes(graph_viewer, wnn_builder, wnn_device_type_, wnn_limits_, logger);
 
-  // Run the shape subgraph folder to identify nodes that will be folded away during graph build.
-  // These nodes must be claimed as "supported" so they stay in our partition, even if their
-  // data types (e.g., int64 Equal) aren't natively supported by WebNN — they'll be skipped
-  // during ModelBuilder::AddOperations().
-  webnn::ShapeSubgraphFolder capability_folder(graph_viewer, free_dimension_bounds_, logger);
-  auto folder_status = capability_folder.Run();
   std::unordered_set<const Node*> supported_nodes_with_folded = supported_nodes;
-  if (folder_status.IsOK()) {
-    for (const auto& node : graph_viewer.Nodes()) {
-      if (capability_folder.IsFoldedNode(node.Index())) {
-        supported_nodes_with_folded.insert(&node);
-      }
-    }
-  }
 
   const auto gen_metadef_name = [&]() {
     HashValue model_hash;
@@ -301,7 +290,8 @@ common::Status WebNNExecutionProvider::Compile(const std::vector<FusedNodeAndGra
       ORT_UNUSED_PARAMETER(state);
     };
 
-    compute_info.compute_func = [dim_param_to_input_dim, fixed_dim_param_values, fused_output_shapes, output_dim_params](FunctionState state, const OrtApi* api, OrtKernelContext* context) {
+    const bool enable_additive_dim_param = enable_additive_dim_param_;
+    compute_info.compute_func = [dim_param_to_input_dim, fixed_dim_param_values, fused_output_shapes, output_dim_params, enable_additive_dim_param](FunctionState state, const OrtApi* api, OrtKernelContext* context) {
       Ort::KernelContext ctx(context);
 
       const size_t num_inputs = ctx.GetInputCount();
@@ -447,7 +437,7 @@ common::Status WebNNExecutionProvider::Compile(const std::vector<FusedNodeAndGra
 
               // Try to parse additive expressions like "dim_a + dim_b"
               // (e.g., "past_sequence_length + sequence_length").
-              if (output_shape[dim_idx] == 0) {
+              if (enable_additive_dim_param && output_shape[dim_idx] == 0) {
                 auto plus_pos = dim_param.find('+');
                 if (plus_pos != std::string::npos) {
                   const std::string left = utils::TrimString(std::string_view(dim_param).substr(0, plus_pos));
diff --git a/onnxruntime/core/providers/webnn/webnn_execution_provider.h b/onnxruntime/core/providers/webnn/webnn_execution_provider.h
index d1ae08b5cccea..c563bd4c6ff5c 100644
--- a/onnxruntime/core/providers/webnn/webnn_execution_provider.h
+++ b/onnxruntime/core/providers/webnn/webnn_execution_provider.h
@@ -21,7 +21,8 @@ class WebNNExecutionProvider : public IExecutionProvider {
  public:
   explicit WebNNExecutionProvider(const std::string& webnn_device_flags,
                                   const webnn::FreeDimensionBounds& free_dimension_bounds,
-                                  bool enable_causal_lm);
+                                  bool enable_causal_lm,
+                                  bool enable_additive_dim_param);
   virtual ~WebNNExecutionProvider();
 
   std::vector<std::unique_ptr<ComputeCapability>>
@@ -56,6 +57,8 @@ class WebNNExecutionProvider : public IExecutionProvider {
   webnn::FreeDimensionBounds free_dimension_bounds_;
   // Controls GQA KV-cache strategy: true = concat (stateful), false = ScatterND (stateless).
   bool enable_causal_lm_;
+  // Controls optional additive symbolic dim_param parsing: "a + b".
+  bool enable_additive_dim_param_;
   InlinedHashMap<std::string, std::unique_ptr<onnxruntime::webnn::Model>> models_;
   ModelMetadefIdGenerator metadef_id_generator_;
 };
diff --git a/onnxruntime/core/providers/webnn/webnn_provider_factory.cc b/onnxruntime/core/providers/webnn/webnn_provider_factory.cc
index 56a96b43bd11e..46c80d60ae84b 100644
--- a/onnxruntime/core/providers/webnn/webnn_provider_factory.cc
+++ b/onnxruntime/core/providers/webnn/webnn_provider_factory.cc
@@ -82,10 +82,12 @@ Status ParseFreeDimensionBounds(std::string_view value, webnn::FreeDimensionBoun
 struct WebNNProviderFactory : IExecutionProviderFactory {
   explicit WebNNProviderFactory(const std::string& webnn_device_flags,
                                 const webnn::FreeDimensionBounds& free_dimension_bounds,
-                                bool enable_causal_lm)
+                                bool enable_causal_lm,
+                                bool enable_additive_dim_param)
       : webnn_device_flags_(webnn_device_flags),
         free_dimension_bounds_(free_dimension_bounds),
-        enable_causal_lm_(enable_causal_lm) {}
+        enable_causal_lm_(enable_causal_lm),
+        enable_additive_dim_param_(enable_additive_dim_param) {}
   ~WebNNProviderFactory() override {}
 
   std::unique_ptr<IExecutionProvider> CreateProvider() override;
@@ -95,10 +97,12 @@ struct WebNNProviderFactory : IExecutionProviderFactory {
   std::string webnn_device_flags_;
   webnn::FreeDimensionBounds free_dimension_bounds_;
   bool enable_causal_lm_;
+  bool enable_additive_dim_param_;
 };
 
 std::unique_ptr<IExecutionProvider> WebNNProviderFactory::CreateProvider() {
-  return std::make_unique<WebNNExecutionProvider>(webnn_device_flags_, free_dimension_bounds_, enable_causal_lm_);
+  return std::make_unique<WebNNExecutionProvider>(webnn_device_flags_, free_dimension_bounds_, enable_causal_lm_,
+                                                  enable_additive_dim_param_);
 }
 
 std::unique_ptr<IExecutionProvider> WebNNProviderFactory::CreateProvider(
@@ -123,7 +127,8 @@ std::unique_ptr<IExecutionProvider> WebNNProviderFactory::CreateProvider(
                                                             std::numeric_limits<int32_t>::max()));
     merged_bounds[dim_override.dim_identifier] = webnn::FreeDimensionBound{value, value};
   }
-  return std::make_unique<WebNNExecutionProvider>(webnn_device_flags_, merged_bounds, enable_causal_lm_);
+  return std::make_unique<WebNNExecutionProvider>(webnn_device_flags_, merged_bounds, enable_causal_lm_,
+                                                  enable_additive_dim_param_);
 }
 
 std::shared_ptr<IExecutionProviderFactory> WebNNProviderFactoryCreator::Create(
@@ -145,8 +150,14 @@ std::shared_ptr<IExecutionProviderFactory> WebNNProviderFactoryCreator::Create(
   const bool enable_causal_lm = (enable_causal_lm_it != provider_options.end() &&
                                   enable_causal_lm_it->second == "true");
 
+  // Optional additive symbolic dim expression parsing: "a + b". Default false.
+  const auto enable_additive_dim_param_it = provider_options.find("webnn_enable_additive_dim_param");
+  const bool enable_additive_dim_param = (enable_additive_dim_param_it != provider_options.end() &&
+                                          enable_additive_dim_param_it->second == "true");
+
   return std::make_shared<onnxruntime::WebNNProviderFactory>(webnn_device_flags, free_dimension_bounds,
-                                                             enable_causal_lm);
+                                                             enable_causal_lm,
+                                                             enable_additive_dim_param);
 }
 
 }  // namespace onnxruntime

From 1dbb113120439f2fa635a3229ab90d035da2d4ec Mon Sep 17 00:00:00 2001
From: "Kumar, Arisha" <arisha.kumar@intel.com>
Date: Fri, 5 Jun 2026 12:51:24 -0700
Subject: [PATCH 3/3] Address PR #21 review: remove IsFoldedShape/IsFoldedNode
 dead code and enable_additive_dim_param option

- Remove IsFoldedShape(), GetFoldedShape(), IsFoldedNode() declarations and
  implementations from model_builder.h/cc (dead code from removed ShapeSubgraphFolder)
- Remove IsFoldedShape/IsFoldedNode call sites in expand_op_builder.cc and
  reshape_op_builder.cc
- Remove enable_additive_dim_param constructor param, member variable, and
  option parsing from webnn_execution_provider.h/cc and webnn_provider_factory.cc
- Remove enableAdditiveDimParam session option mapping from session-options.ts
- Keep additive dim_param fallback logic guarded by runtime computeShapes check
---
 js/web/lib/wasm/session-options.ts            |  5 ----
 .../webnn/builders/impl/expand_op_builder.cc  | 16 +++---------
 .../webnn/builders/impl/reshape_op_builder.cc | 16 ++----------
 .../providers/webnn/builders/model_builder.cc | 26 -------------------
 .../providers/webnn/builders/model_builder.h  |  5 ----
 .../webnn/webnn_execution_provider.cc         | 13 +++++-----
 .../webnn/webnn_execution_provider.h          |  5 +---
 .../providers/webnn/webnn_provider_factory.cc | 21 ++++-----------
 8 files changed, 17 insertions(+), 90 deletions(-)

diff --git a/js/web/lib/wasm/session-options.ts b/js/web/lib/wasm/session-options.ts
index c026602f73d33..6f83234534f26 100644
--- a/js/web/lib/wasm/session-options.ts
+++ b/js/web/lib/wasm/session-options.ts
@@ -139,11 +139,6 @@ const setExecutionProviders = async (
           if (enableCausalLM) {
             appendEpOption(epOptions, 'enableCausalLM', 'true', allocs);
           }
-          // enableAdditiveDimParam: parse symbolic dim expressions like "a + b" in output shapes.
-          const enableAdditiveDimParam = (webnnOptions as any)?.enableAdditiveDimParam;
-          if (enableAdditiveDimParam) {
-            appendEpOption(epOptions, 'webnn_enable_additive_dim_param', 'true', allocs);
-          }
         }
         break;
       case 'webgpu':
diff --git a/onnxruntime/core/providers/webnn/builders/impl/expand_op_builder.cc b/onnxruntime/core/providers/webnn/builders/impl/expand_op_builder.cc
index 3748630d4f434..b6d718b5c79ae 100644
--- a/onnxruntime/core/providers/webnn/builders/impl/expand_op_builder.cc
+++ b/onnxruntime/core/providers/webnn/builders/impl/expand_op_builder.cc
@@ -39,13 +39,10 @@ class ExpandOpBuilder : public BaseOpBuilder {
 void ExpandOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const Node& node) const {
   const auto& input_defs = node.InputDefs();
   const auto& shape_name = input_defs[1]->Name();
-  // Skip the shape input when:
-  // 1. It was folded by the shape subgraph folder (compile-time constant), OR
-  // 2. It is a constant initializer AND the input has static shape.
+  // Skip the shape input when it is a constant initializer AND the input has static shape.
   // When the input has dynamic shape, we need the shape operand for dynamicExpand even if it's constant.
-  if (model_builder.IsFoldedShape(shape_name) ||
-      (model_builder.GetGraphViewer().GetConstantInitializer(shape_name) &&
-       !HasDynamicShape(*input_defs[0]))) {
+  if (model_builder.GetGraphViewer().GetConstantInitializer(shape_name) &&
+      !HasDynamicShape(*input_defs[0])) {
     model_builder.AddInitializerToSkip(shape_name);
   }
 }
@@ -79,13 +76,6 @@ Status ExpandOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
 
     emscripten::val output_shape_arr = emscripten::val::array(GetNarrowedIntFromInt64<uint32_t>(output_shape));
     output = model_builder.GetBuilder().call<emscripten::val>("expand", input, output_shape_arr, options);
-  } else if (model_builder.IsFoldedShape(input_defs[1]->Name())) {
-    // Folded shape path: shape subgraph was pre-evaluated to a constant vector.
-    const auto* folded = model_builder.GetFoldedShape(input_defs[1]->Name());
-    ORT_RETURN_IF_NOT(folded != nullptr, "IsFoldedShape true but GetFoldedShape returned null");
-
-    emscripten::val output_shape_arr = emscripten::val::array(GetNarrowedIntFromInt64<uint32_t>(*folded));
-    output = model_builder.GetBuilder().call<emscripten::val>("expand", input, output_shape_arr, options);
   } else {
     // Operand shape path: use dynamicExpand with the shape operand.
     emscripten::val shape_operand = model_builder.GetOperand(input_defs[1]->Name());
diff --git a/onnxruntime/core/providers/webnn/builders/impl/reshape_op_builder.cc b/onnxruntime/core/providers/webnn/builders/impl/reshape_op_builder.cc
index 3c8afe37012a1..556b73b5d0fc1 100644
--- a/onnxruntime/core/providers/webnn/builders/impl/reshape_op_builder.cc
+++ b/onnxruntime/core/providers/webnn/builders/impl/reshape_op_builder.cc
@@ -38,11 +38,9 @@ class ReshapeOpBuilder : public BaseOpBuilder {
 
 void ReshapeOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const Node& node) const {
   const auto& shape_name = node.InputDefs()[1]->Name();
-  // Only skip the shape input when it is a constant initializer (consumed at build time)
-  // or when it was folded by the shape subgraph folder.
+  // Only skip the shape input when it is a constant initializer (consumed at build time).
   // When it is an operand, we need it as the newShape input for dynamicReshape.
-  if (model_builder.GetGraphViewer().GetConstantInitializer(shape_name) ||
-      model_builder.IsFoldedShape(shape_name)) {
+  if (model_builder.GetGraphViewer().GetConstantInitializer(shape_name)) {
     model_builder.AddInitializerToSkip(shape_name);
   }
 }
@@ -222,16 +220,6 @@ Status ReshapeOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
       emscripten::val new_shape = emscripten::val::array();
       output = model_builder.GetBuilder().call<emscripten::val>("reshape", input, new_shape, options);
     }
-  } else if (model_builder.IsFoldedShape(input_defs[1]->Name())) {
-    // Folded shape path: the shape subgraph was pre-evaluated to a constant vector.
-    const auto* folded = model_builder.GetFoldedShape(input_defs[1]->Name());
-    ORT_RETURN_IF_NOT(folded != nullptr, "IsFoldedShape true but GetFoldedShape returned null");
-
-    emscripten::val new_shape = emscripten::val::array();
-    for (int64_t dim : *folded) {
-      new_shape.call<void>("push", static_cast<uint32_t>(dim));
-    }
-    output = model_builder.GetBuilder().call<emscripten::val>("reshape", input, new_shape, options);
   } else {
     // Operand shape path: shape is a non-constant operand. Use dynamicReshape.
     emscripten::val shape_operand = model_builder.GetOperand(input_defs[1]->Name());
diff --git a/onnxruntime/core/providers/webnn/builders/model_builder.cc b/onnxruntime/core/providers/webnn/builders/model_builder.cc
index c36d022074d24..c5850cc29b84e 100644
--- a/onnxruntime/core/providers/webnn/builders/model_builder.cc
+++ b/onnxruntime/core/providers/webnn/builders/model_builder.cc
@@ -85,12 +85,6 @@ void ModelBuilder::PreprocessInitializers() {
   for (size_t i = 0; i < node_indices.size(); i++) {
     const auto* node(graph_viewer_.GetNode(node_indices[i]));
 
-    // Skip nodes that are part of a folded shape subgraph — their initializer inputs
-    // don't need to be registered as WebNN constants.
-    if (IsFoldedNode(node->Index())) {
-      continue;
-    }
-
     // find all initializers consumed. AddInitializersToSkip will potentially decrement the usage count.
     for (const auto* input : node->InputDefs()) {
       if (input->Exists() && Contains(initializers, input->Name())) {
@@ -408,11 +402,6 @@ Status ModelBuilder::AddOperations() {
   for (size_t i = 0; i < node_indices.size(); i++) {
     const auto* node(graph_viewer_.GetNode(node_indices[i]));
 
-    // Skip nodes that are part of a folded shape subgraph.
-    if (IsFoldedNode(node->Index())) {
-      continue;
-    }
-
     if (const auto* op_builder = GetOpBuilder(*node)) {
       ORT_RETURN_IF_ERROR(op_builder->AddToModelBuilder(*this, *node, logger_));
     } else {
@@ -514,20 +503,5 @@ const ModelBuilder::DimProvenance* ModelBuilder::GetDimProvenance(const std::str
   return it != dim_provenance_.end() ? &it->second : nullptr;
 }
 
-bool ModelBuilder::IsFoldedShape(const std::string& name) const {
-  ORT_UNUSED_PARAMETER(name);
-  return false;
-}
-
-const std::vector<int64_t>* ModelBuilder::GetFoldedShape(const std::string& name) const {
-  ORT_UNUSED_PARAMETER(name);
-  return nullptr;
-}
-
-bool ModelBuilder::IsFoldedNode(NodeIndex node_index) const {
-  ORT_UNUSED_PARAMETER(node_index);
-  return false;
-}
-
 }  // namespace webnn
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/webnn/builders/model_builder.h b/onnxruntime/core/providers/webnn/builders/model_builder.h
index 4fd812bda5126..f7535d19e4e1e 100644
--- a/onnxruntime/core/providers/webnn/builders/model_builder.h
+++ b/onnxruntime/core/providers/webnn/builders/model_builder.h
@@ -58,11 +58,6 @@ class ModelBuilder {
   // Returns true when GQA should use concat-based (stateful) KV-cache; false for ScatterND (stateless).
   bool IsCausalLMEnabled() const { return enable_causal_lm_; }
 
-  // Shape subgraph folder: check if a NodeArg name has been folded to a constant shape.
-  bool IsFoldedShape(const std::string& name) const;
-  const std::vector<int64_t>* GetFoldedShape(const std::string& name) const;
-  bool IsFoldedNode(NodeIndex node_index) const;
-
   // The initializer will be processed separately, skip it as an initializer.
   void AddInitializerToSkip(const std::string& tensor_name);
 
diff --git a/onnxruntime/core/providers/webnn/webnn_execution_provider.cc b/onnxruntime/core/providers/webnn/webnn_execution_provider.cc
index 8e1ce16657be0..4a8363199e365 100644
--- a/onnxruntime/core/providers/webnn/webnn_execution_provider.cc
+++ b/onnxruntime/core/providers/webnn/webnn_execution_provider.cc
@@ -31,8 +31,7 @@ constexpr const char* WEBNN = "WEBNN";
 
 WebNNExecutionProvider::WebNNExecutionProvider(const std::string& webnn_device_flags,
                          const webnn::FreeDimensionBounds& free_dimension_bounds,
-                         bool enable_causal_lm,
-                         bool enable_additive_dim_param)
+                         bool enable_causal_lm)
     : IExecutionProvider{
           onnxruntime::kWebNNExecutionProvider,
           // If MLTensor is supported, we force all the tensors to be allocated as MLTensor.
@@ -43,8 +42,7 @@ WebNNExecutionProvider::WebNNExecutionProvider(const std::string& webnn_device_f
               0)},
                   wnn_device_type_(webnn::DeviceTypeFromString(webnn_device_flags)),
                   free_dimension_bounds_(free_dimension_bounds),
-                  enable_causal_lm_(enable_causal_lm),
-                  enable_additive_dim_param_(enable_additive_dim_param) {
+                  enable_causal_lm_(enable_causal_lm) {
   wnn_context_ = emscripten::val::module_property("currentContext");
   if (!wnn_context_.as<bool>()) {
     ORT_THROW("Failed to create WebNN context.");
@@ -290,8 +288,9 @@ common::Status WebNNExecutionProvider::Compile(const std::vector<FusedNodeAndGra
       ORT_UNUSED_PARAMETER(state);
     };
 
-    const bool enable_additive_dim_param = enable_additive_dim_param_;
-    compute_info.compute_func = [dim_param_to_input_dim, fixed_dim_param_values, fused_output_shapes, output_dim_params, enable_additive_dim_param](FunctionState state, const OrtApi* api, OrtKernelContext* context) {
+    // Use additive dim_param fallback when computeShapes() API is not yet available.
+    const bool use_additive_dim_fallback = wnn_context_["computeShapes"].isUndefined();
+    compute_info.compute_func = [dim_param_to_input_dim, fixed_dim_param_values, fused_output_shapes, output_dim_params, use_additive_dim_fallback](FunctionState state, const OrtApi* api, OrtKernelContext* context) {
       Ort::KernelContext ctx(context);
 
       const size_t num_inputs = ctx.GetInputCount();
@@ -437,7 +436,7 @@ common::Status WebNNExecutionProvider::Compile(const std::vector<FusedNodeAndGra
 
               // Try to parse additive expressions like "dim_a + dim_b"
               // (e.g., "past_sequence_length + sequence_length").
-              if (enable_additive_dim_param && output_shape[dim_idx] == 0) {
+              if (use_additive_dim_fallback && output_shape[dim_idx] == webnn::kDynamicDim) {
                 auto plus_pos = dim_param.find('+');
                 if (plus_pos != std::string::npos) {
                   const std::string left = utils::TrimString(std::string_view(dim_param).substr(0, plus_pos));
diff --git a/onnxruntime/core/providers/webnn/webnn_execution_provider.h b/onnxruntime/core/providers/webnn/webnn_execution_provider.h
index c563bd4c6ff5c..d1ae08b5cccea 100644
--- a/onnxruntime/core/providers/webnn/webnn_execution_provider.h
+++ b/onnxruntime/core/providers/webnn/webnn_execution_provider.h
@@ -21,8 +21,7 @@ class WebNNExecutionProvider : public IExecutionProvider {
  public:
   explicit WebNNExecutionProvider(const std::string& webnn_device_flags,
                                   const webnn::FreeDimensionBounds& free_dimension_bounds,
-                                  bool enable_causal_lm,
-                                  bool enable_additive_dim_param);
+                                  bool enable_causal_lm);
   virtual ~WebNNExecutionProvider();
 
   std::vector<std::unique_ptr<ComputeCapability>>
@@ -57,8 +56,6 @@ class WebNNExecutionProvider : public IExecutionProvider {
   webnn::FreeDimensionBounds free_dimension_bounds_;
   // Controls GQA KV-cache strategy: true = concat (stateful), false = ScatterND (stateless).
   bool enable_causal_lm_;
-  // Controls optional additive symbolic dim_param parsing: "a + b".
-  bool enable_additive_dim_param_;
   InlinedHashMap<std::string, std::unique_ptr<onnxruntime::webnn::Model>> models_;
   ModelMetadefIdGenerator metadef_id_generator_;
 };
diff --git a/onnxruntime/core/providers/webnn/webnn_provider_factory.cc b/onnxruntime/core/providers/webnn/webnn_provider_factory.cc
index 46c80d60ae84b..56a96b43bd11e 100644
--- a/onnxruntime/core/providers/webnn/webnn_provider_factory.cc
+++ b/onnxruntime/core/providers/webnn/webnn_provider_factory.cc
@@ -82,12 +82,10 @@ Status ParseFreeDimensionBounds(std::string_view value, webnn::FreeDimensionBoun
 struct WebNNProviderFactory : IExecutionProviderFactory {
   explicit WebNNProviderFactory(const std::string& webnn_device_flags,
                                 const webnn::FreeDimensionBounds& free_dimension_bounds,
-                                bool enable_causal_lm,
-                                bool enable_additive_dim_param)
+                                bool enable_causal_lm)
       : webnn_device_flags_(webnn_device_flags),
         free_dimension_bounds_(free_dimension_bounds),
-        enable_causal_lm_(enable_causal_lm),
-        enable_additive_dim_param_(enable_additive_dim_param) {}
+        enable_causal_lm_(enable_causal_lm) {}
   ~WebNNProviderFactory() override {}
 
   std::unique_ptr<IExecutionProvider> CreateProvider() override;
@@ -97,12 +95,10 @@ struct WebNNProviderFactory : IExecutionProviderFactory {
   std::string webnn_device_flags_;
   webnn::FreeDimensionBounds free_dimension_bounds_;
   bool enable_causal_lm_;
-  bool enable_additive_dim_param_;
 };
 
 std::unique_ptr<IExecutionProvider> WebNNProviderFactory::CreateProvider() {
-  return std::make_unique<WebNNExecutionProvider>(webnn_device_flags_, free_dimension_bounds_, enable_causal_lm_,
-                                                  enable_additive_dim_param_);
+  return std::make_unique<WebNNExecutionProvider>(webnn_device_flags_, free_dimension_bounds_, enable_causal_lm_);
 }
 
 std::unique_ptr<IExecutionProvider> WebNNProviderFactory::CreateProvider(
@@ -127,8 +123,7 @@ std::unique_ptr<IExecutionProvider> WebNNProviderFactory::CreateProvider(
                                                             std::numeric_limits<int32_t>::max()));
     merged_bounds[dim_override.dim_identifier] = webnn::FreeDimensionBound{value, value};
   }
-  return std::make_unique<WebNNExecutionProvider>(webnn_device_flags_, merged_bounds, enable_causal_lm_,
-                                                  enable_additive_dim_param_);
+  return std::make_unique<WebNNExecutionProvider>(webnn_device_flags_, merged_bounds, enable_causal_lm_);
 }
 
 std::shared_ptr<IExecutionProviderFactory> WebNNProviderFactoryCreator::Create(
@@ -150,14 +145,8 @@ std::shared_ptr<IExecutionProviderFactory> WebNNProviderFactoryCreator::Create(
   const bool enable_causal_lm = (enable_causal_lm_it != provider_options.end() &&
                                   enable_causal_lm_it->second == "true");
 
-  // Optional additive symbolic dim expression parsing: "a + b". Default false.
-  const auto enable_additive_dim_param_it = provider_options.find("webnn_enable_additive_dim_param");
-  const bool enable_additive_dim_param = (enable_additive_dim_param_it != provider_options.end() &&
-                                          enable_additive_dim_param_it->second == "true");
-
   return std::make_shared<onnxruntime::WebNNProviderFactory>(webnn_device_flags, free_dimension_bounds,
-                                                             enable_causal_lm,
-                                                             enable_additive_dim_param);
+                                                             enable_causal_lm);
 }
 
 }  // namespace onnxruntime