From eec8bb629c95d19f4098ad3ebbd52590cb811e5f Mon Sep 17 00:00:00 2001 From: "Kumar, Arisha" Date: Fri, 29 May 2026 12:37:53 -0700 Subject: [PATCH 1/3] Add shape subgraph folding and dynamic output dim resolution for WebNN EP - Add ShapeSubgraphFolder to pre-evaluate shape subgraphs (Where/Equal/Range/ConstantOfShape chains) so Reshape/Expand see constant shapes at build time - Integrate folded shapes into Reshape and Expand op builders - Support additive dim_param expressions (e.g. past_sequence_length + sequence_length) - Add heuristic fallback for unresolved output dimensions from runtime inputs - Fix QDQ per-axis reshape to handle all axes (not just last axis) - Claim folded nodes in GetCapability to keep them in WebNN partition --- .../webnn/builders/impl/expand_op_builder.cc | 16 +- .../webnn/builders/impl/qdq_op_builder.cc | 7 +- .../webnn/builders/impl/reshape_op_builder.cc | 16 +- .../providers/webnn/builders/model_builder.cc | 31 + .../providers/webnn/builders/model_builder.h | 7 + .../webnn/builders/shape_subgraph_folder.cc | 568 ++++++++++++++++++ .../webnn/builders/shape_subgraph_folder.h | 85 +++ .../webnn/webnn_execution_provider.cc | 98 ++- 8 files changed, 804 insertions(+), 24 deletions(-) create mode 100644 onnxruntime/core/providers/webnn/builders/shape_subgraph_folder.cc create mode 100644 onnxruntime/core/providers/webnn/builders/shape_subgraph_folder.h diff --git a/onnxruntime/core/providers/webnn/builders/impl/expand_op_builder.cc b/onnxruntime/core/providers/webnn/builders/impl/expand_op_builder.cc index 665075018715f..3748630d4f434 100644 --- a/onnxruntime/core/providers/webnn/builders/impl/expand_op_builder.cc +++ b/onnxruntime/core/providers/webnn/builders/impl/expand_op_builder.cc @@ -39,10 +39,13 @@ class ExpandOpBuilder : public BaseOpBuilder { void ExpandOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const Node& node) const { const auto& input_defs = node.InputDefs(); const auto& shape_name = input_defs[1]->Name(); - // Only skip the shape input when it is a constant initializer AND the input has static shape. + // Skip the shape input when: + // 1. It was folded by the shape subgraph folder (compile-time constant), OR + // 2. It is a constant initializer AND the input has static shape. // When the input has dynamic shape, we need the shape operand for dynamicExpand even if it's constant. - if (model_builder.GetGraphViewer().GetConstantInitializer(shape_name) && - !HasDynamicShape(*input_defs[0])) { + if (model_builder.IsFoldedShape(shape_name) || + (model_builder.GetGraphViewer().GetConstantInitializer(shape_name) && + !HasDynamicShape(*input_defs[0]))) { model_builder.AddInitializerToSkip(shape_name); } } @@ -76,6 +79,13 @@ Status ExpandOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, emscripten::val output_shape_arr = emscripten::val::array(GetNarrowedIntFromInt64(output_shape)); output = model_builder.GetBuilder().call("expand", input, output_shape_arr, options); + } else if (model_builder.IsFoldedShape(input_defs[1]->Name())) { + // Folded shape path: shape subgraph was pre-evaluated to a constant vector. + const auto* folded = model_builder.GetFoldedShape(input_defs[1]->Name()); + ORT_RETURN_IF_NOT(folded != nullptr, "IsFoldedShape true but GetFoldedShape returned null"); + + emscripten::val output_shape_arr = emscripten::val::array(GetNarrowedIntFromInt64(*folded)); + output = model_builder.GetBuilder().call("expand", input, output_shape_arr, options); } else { // Operand shape path: use dynamicExpand with the shape operand. emscripten::val shape_operand = model_builder.GetOperand(input_defs[1]->Name()); diff --git a/onnxruntime/core/providers/webnn/builders/impl/qdq_op_builder.cc b/onnxruntime/core/providers/webnn/builders/impl/qdq_op_builder.cc index e07814521dafa..5d7e2c5620faf 100644 --- a/onnxruntime/core/providers/webnn/builders/impl/qdq_op_builder.cc +++ b/onnxruntime/core/providers/webnn/builders/impl/qdq_op_builder.cc @@ -85,10 +85,11 @@ Status QDQOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, } } - // For per-axis quantization/dequantization and axis is not equal to input_rank - 1, - // we need to reshape the scale and zero_point tensors to make them broadcastable with the input tensor. + // For per-axis quantization/dequantization, the scale is 1-D. + // WebNN requires the scale and zero_point tensors to have the same rank as the input tensor. + // We need to reshape them to make them broadcastable with the input tensor. if (scale_shape.size() == 1 && input_rank > 1 && - block_size == 0 && axis != static_cast(input_rank - 1)) { + block_size == 0) { // Insert ones before and after the axis dimension for broadcasting of scale tensor. // Use emscripten::val::array() to support dynamic axis dim via input["shape"][axis]. emscripten::val target_shape = emscripten::val::array(); diff --git a/onnxruntime/core/providers/webnn/builders/impl/reshape_op_builder.cc b/onnxruntime/core/providers/webnn/builders/impl/reshape_op_builder.cc index 556b73b5d0fc1..3c8afe37012a1 100644 --- a/onnxruntime/core/providers/webnn/builders/impl/reshape_op_builder.cc +++ b/onnxruntime/core/providers/webnn/builders/impl/reshape_op_builder.cc @@ -38,9 +38,11 @@ class ReshapeOpBuilder : public BaseOpBuilder { void ReshapeOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const Node& node) const { const auto& shape_name = node.InputDefs()[1]->Name(); - // Only skip the shape input when it is a constant initializer (consumed at build time). + // Only skip the shape input when it is a constant initializer (consumed at build time) + // or when it was folded by the shape subgraph folder. // When it is an operand, we need it as the newShape input for dynamicReshape. - if (model_builder.GetGraphViewer().GetConstantInitializer(shape_name)) { + if (model_builder.GetGraphViewer().GetConstantInitializer(shape_name) || + model_builder.IsFoldedShape(shape_name)) { model_builder.AddInitializerToSkip(shape_name); } } @@ -220,6 +222,16 @@ Status ReshapeOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, emscripten::val new_shape = emscripten::val::array(); output = model_builder.GetBuilder().call("reshape", input, new_shape, options); } + } else if (model_builder.IsFoldedShape(input_defs[1]->Name())) { + // Folded shape path: the shape subgraph was pre-evaluated to a constant vector. + const auto* folded = model_builder.GetFoldedShape(input_defs[1]->Name()); + ORT_RETURN_IF_NOT(folded != nullptr, "IsFoldedShape true but GetFoldedShape returned null"); + + emscripten::val new_shape = emscripten::val::array(); + for (int64_t dim : *folded) { + new_shape.call("push", static_cast(dim)); + } + output = model_builder.GetBuilder().call("reshape", input, new_shape, options); } else { // Operand shape path: shape is a non-constant operand. Use dynamicReshape. emscripten::val shape_operand = model_builder.GetOperand(input_defs[1]->Name()); diff --git a/onnxruntime/core/providers/webnn/builders/model_builder.cc b/onnxruntime/core/providers/webnn/builders/model_builder.cc index 3667d42cb9a39..37af542dbbdd0 100644 --- a/onnxruntime/core/providers/webnn/builders/model_builder.cc +++ b/onnxruntime/core/providers/webnn/builders/model_builder.cc @@ -42,6 +42,13 @@ ModelBuilder::ModelBuilder(const GraphViewer& graph_viewer, const logging::Logge } Status ModelBuilder::Initialize() { + // Run shape subgraph folding FIRST, before PreprocessInitializers. + // This pre-evaluates shape subgraphs (Where/Equal/Range/ConstantOfShape chains) + // so that Reshape/Expand/etc. see constant shapes instead of dynamic subgraphs. + // Must run before PreprocessInitializers because AddInitializersToSkip checks IsFoldedShape(). + shape_folder_ = std::make_unique(graph_viewer_, free_dimension_bounds_, logger_); + ORT_RETURN_IF_ERROR(shape_folder_->Run()); + PreprocessInitializers(); ORT_RETURN_IF_ERROR(RegisterInitializers()); ORT_RETURN_IF_ERROR(RegisterModelInputs()); @@ -85,6 +92,12 @@ void ModelBuilder::PreprocessInitializers() { for (size_t i = 0; i < node_indices.size(); i++) { const auto* node(graph_viewer_.GetNode(node_indices[i])); + // Skip nodes that are part of a folded shape subgraph — their initializer inputs + // don't need to be registered as WebNN constants. + if (IsFoldedNode(node->Index())) { + continue; + } + // find all initializers consumed. AddInitializersToSkip will potentially decrement the usage count. for (const auto* input : node->InputDefs()) { if (input->Exists() && Contains(initializers, input->Name())) { @@ -401,6 +414,12 @@ Status ModelBuilder::AddOperations() { const auto& node_indices = graph_viewer_.GetNodesInTopologicalOrder(); for (size_t i = 0; i < node_indices.size(); i++) { const auto* node(graph_viewer_.GetNode(node_indices[i])); + + // Skip nodes that are part of a folded shape subgraph. + if (IsFoldedNode(node->Index())) { + continue; + } + if (const auto* op_builder = GetOpBuilder(*node)) { ORT_RETURN_IF_ERROR(op_builder->AddToModelBuilder(*this, *node, logger_)); } else { @@ -502,5 +521,17 @@ const ModelBuilder::DimProvenance* ModelBuilder::GetDimProvenance(const std::str return it != dim_provenance_.end() ? &it->second : nullptr; } +bool ModelBuilder::IsFoldedShape(const std::string& name) const { + return shape_folder_ && shape_folder_->IsFoldedShape(name); +} + +const std::vector* ModelBuilder::GetFoldedShape(const std::string& name) const { + return shape_folder_ ? shape_folder_->GetFoldedShape(name) : nullptr; +} + +bool ModelBuilder::IsFoldedNode(NodeIndex node_index) const { + return shape_folder_ && shape_folder_->IsFoldedNode(node_index); +} + } // namespace webnn } // namespace onnxruntime diff --git a/onnxruntime/core/providers/webnn/builders/model_builder.h b/onnxruntime/core/providers/webnn/builders/model_builder.h index f7535d19e4e1e..c2b68f18ea33e 100644 --- a/onnxruntime/core/providers/webnn/builders/model_builder.h +++ b/onnxruntime/core/providers/webnn/builders/model_builder.h @@ -8,6 +8,7 @@ #include #include "model.h" +#include "shape_subgraph_folder.h" #include "core/framework/execution_provider.h" #include "core/providers/webnn/builders/helper.h" @@ -58,6 +59,11 @@ class ModelBuilder { // Returns true when GQA should use concat-based (stateful) KV-cache; false for ScatterND (stateless). bool IsCausalLMEnabled() const { return enable_causal_lm_; } + // Shape subgraph folder: check if a NodeArg name has been folded to a constant shape. + bool IsFoldedShape(const std::string& name) const; + const std::vector* GetFoldedShape(const std::string& name) const; + bool IsFoldedNode(NodeIndex node_index) const; + // The initializer will be processed separately, skip it as an initializer. void AddInitializerToSkip(const std::string& tensor_name); @@ -99,6 +105,7 @@ class ModelBuilder { emscripten::val wnn_limits_ = emscripten::val::undefined(); FreeDimensionBounds free_dimension_bounds_; bool enable_causal_lm_; + std::unique_ptr shape_folder_; InlinedHashMap wnn_operands_; std::vector input_names_; std::vector output_names_; diff --git a/onnxruntime/core/providers/webnn/builders/shape_subgraph_folder.cc b/onnxruntime/core/providers/webnn/builders/shape_subgraph_folder.cc new file mode 100644 index 0000000000000..c38b993790414 --- /dev/null +++ b/onnxruntime/core/providers/webnn/builders/shape_subgraph_folder.cc @@ -0,0 +1,568 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Copyright (c) Intel Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "shape_subgraph_folder.h" + +#include "core/framework/tensorprotoutils.h" +#include "core/providers/common.h" + +#include +#include +#include +#include + +namespace onnxruntime { +namespace webnn { + +ShapeSubgraphFolder::ShapeSubgraphFolder(const GraphViewer& graph_viewer, + const FreeDimensionBounds& free_dimension_bounds, + const logging::Logger& logger) + : graph_viewer_(graph_viewer), + free_dimension_bounds_(free_dimension_bounds), + logger_(logger) { +} + +bool ShapeSubgraphFolder::IsSupportedShapeOp(const Node& node) { + static const InlinedHashSet supported_ops = { + "Shape", "Gather", "Concat", "Unsqueeze", "Squeeze", "Slice", + "Cast", "Add", "Sub", "Mul", "Div", "Equal", "Where", + "ConstantOfShape", "Range", "Reshape", "Expand", + "Neg", "Abs", "Floor", "Ceil", + }; + // Only standard ONNX ops (empty domain or "onnx") + if (!node.Domain().empty() && node.Domain() != "onnx" && node.Domain() != kOnnxDomain) { + return false; + } + return supported_ops.count(node.OpType()) > 0; +} + +bool ShapeSubgraphFolder::GetResolvedShape(const NodeArg* arg, std::vector& shape) const { + const auto* shape_proto = arg->Shape(); + if (!shape_proto) return false; + + shape.clear(); + for (int i = 0; i < shape_proto->dim_size(); i++) { + const auto& dim = shape_proto->dim(i); + if (dim.has_dim_value()) { + shape.push_back(dim.dim_value()); + } else if (dim.has_dim_param()) { + // Try to resolve from free_dimension_bounds + const auto& dim_param = dim.dim_param(); + auto it = free_dimension_bounds_.find(dim_param); + if (it != free_dimension_bounds_.end()) { + // Use maxSize as the resolved value (consistent with WebNN EP behavior) + shape.push_back(static_cast(it->second.max_size)); + } else { + return false; // Can't resolve this symbolic dim + } + } else { + return false; // Unknown dim + } + } + return true; +} + +// Check if shape-consuming inputs (Reshape[1], Expand[1], etc.) are candidates for folding. +static bool IsShapeConsumingSlot(const Node& consumer, size_t input_index) { + const auto& op = consumer.OpType(); + if ((op == "Reshape" || op == "Expand") && input_index == 1) return true; + if (op == "ConstantOfShape" && input_index == 0) return true; + if (op == "Tile" && input_index == 1) return true; + // Slice has starts[1], ends[2], axes[3], steps[4] + if (op == "Slice" && input_index >= 1 && input_index <= 4) return true; + return false; +} + +bool ShapeSubgraphFolder::TryFoldShapeSubgraph(const NodeArg* shape_arg) { + const std::string& target_name = shape_arg->Name(); + + // Already folded? + if (folded_shapes_.count(target_name)) return true; + + // Check if it's already a constant initializer + if (graph_viewer_.GetConstantInitializer(target_name)) return false; // Already handled normally + + // BFS backward to find the producer subgraph + std::unordered_map> known_values; + std::vector topo_order; // nodes in forward eval order + InlinedHashSet visited_nodes; + std::queue worklist; + worklist.push(shape_arg); + + InlinedHashSet visited_args; + visited_args.insert(target_name); + + bool can_fold = true; + + while (!worklist.empty() && can_fold) { + const NodeArg* current = worklist.front(); + worklist.pop(); + const std::string& name = current->Name(); + + // Skip if already known + if (known_values.count(name)) continue; + + // Check if it's a constant initializer + const auto* init = graph_viewer_.GetConstantInitializer(name); + if (init) { + // Read int64 values from the initializer + std::vector values; + if (init->data_type() == ONNX_NAMESPACE::TensorProto_DataType_INT64) { + if (!init->int64_data().empty()) { + values.assign(init->int64_data().begin(), init->int64_data().end()); + } else if (!init->raw_data().empty()) { + const int64_t* data = reinterpret_cast(init->raw_data().data()); + size_t count = init->raw_data().size() / sizeof(int64_t); + values.assign(data, data + count); + } + } else if (init->data_type() == ONNX_NAMESPACE::TensorProto_DataType_INT32) { + if (!init->int32_data().empty()) { + for (auto v : init->int32_data()) values.push_back(static_cast(v)); + } else if (!init->raw_data().empty()) { + const int32_t* data = reinterpret_cast(init->raw_data().data()); + size_t count = init->raw_data().size() / sizeof(int32_t); + for (size_t i = 0; i < count; i++) values.push_back(static_cast(data[i])); + } + } else if (init->data_type() == ONNX_NAMESPACE::TensorProto_DataType_FLOAT) { + if (!init->float_data().empty()) { + for (auto v : init->float_data()) values.push_back(static_cast(v)); + } else if (!init->raw_data().empty()) { + const float* data = reinterpret_cast(init->raw_data().data()); + size_t count = init->raw_data().size() / sizeof(float); + for (size_t i = 0; i < count; i++) values.push_back(static_cast(data[i])); + } + } else if (init->data_type() == ONNX_NAMESPACE::TensorProto_DataType_BOOL) { + if (!init->int32_data().empty()) { + for (auto v : init->int32_data()) values.push_back(static_cast(v)); + } else if (!init->raw_data().empty()) { + const uint8_t* data = reinterpret_cast(init->raw_data().data()); + size_t count = init->raw_data().size(); + for (size_t i = 0; i < count; i++) values.push_back(static_cast(data[i])); + } + } else { + can_fold = false; + break; + } + // Handle scalar initializers (0-dim tensors with no data entries but have raw_data) + if (values.empty() && init->dims_size() == 0) { + // Try scalar + if (init->data_type() == ONNX_NAMESPACE::TensorProto_DataType_INT64) { + values.push_back(0); + } + } + known_values[name] = std::move(values); + continue; + } + + // Check if it's a graph input (cannot fold if depends on runtime input, unless it's just shape) + const auto* producer = graph_viewer_.GetProducerNode(name); + if (!producer) { + // It's a graph input - can't fold unless we can get its shape via Shape op + can_fold = false; + break; + } + + // Check if the producer is a supported shape op + if (!IsSupportedShapeOp(*producer)) { + can_fold = false; + break; + } + + // Add producer to visit list + if (visited_nodes.insert(producer->Index()).second) { + topo_order.push_back(producer); + + // Add all inputs of this producer to the worklist + for (const auto* input_def : producer->InputDefs()) { + if (input_def && input_def->Exists()) { + if (visited_args.insert(input_def->Name()).second) { + worklist.push(input_def); + } + } + } + } + } + + if (!can_fold) return false; + + // Sort nodes in topological order (reverse of discovery = forward eval order) + std::reverse(topo_order.begin(), topo_order.end()); + + // Evaluate nodes in topological order + for (const Node* node : topo_order) { + std::vector result; + if (!EvaluateNode(*node, known_values, result)) { + return false; + } + // Store results for all outputs + for (const auto* output_def : node->OutputDefs()) { + if (output_def && output_def->Exists()) { + known_values[output_def->Name()] = result; + } + } + } + + // The target should now be in known_values + auto it = known_values.find(target_name); + if (it == known_values.end()) return false; + + // Store the folded result + folded_shapes_[target_name] = it->second; + + // Mark nodes in the subgraph as folded, but only if ALL their outputs are consumed + // exclusively by other folded nodes or shape-consuming slots. If any output feeds + // a non-shape consumer outside the subgraph, we cannot skip that node. + for (const Node* node : topo_order) { + bool can_skip = true; + for (auto it2 = node->OutputEdgesBegin(); it2 != node->OutputEdgesEnd(); ++it2) { + const Node& consumer = it2->GetNode(); + if (visited_nodes.count(consumer.Index())) continue; // consumer is in our subgraph + // Check if the consumer uses this output only in a shape-consuming slot + if (!IsShapeConsumingSlot(consumer, it2->GetDstArgIndex())) { + can_skip = false; + break; + } + } + if (can_skip) { + folded_nodes_.insert(node->Index()); + } + } + + return true; +} + +bool ShapeSubgraphFolder::EvaluateNode( + const Node& node, + const std::unordered_map>& known_values, + std::vector& result) { + const auto& op = node.OpType(); + const auto& inputs = node.InputDefs(); + + // Helper to get input values + auto get_input = [&](size_t idx) -> const std::vector* { + if (idx >= inputs.size() || !inputs[idx] || !inputs[idx]->Exists()) return nullptr; + auto it = known_values.find(inputs[idx]->Name()); + return (it != known_values.end()) ? &it->second : nullptr; + }; + + if (op == "Shape") { + // Shape op: return the resolved shape of input[0] + if (!GetResolvedShape(inputs[0], result)) return false; + + // Handle start/end attributes (Shape opset 15+) + const auto& attrs = node.GetAttributes(); + int64_t start = 0, end = static_cast(result.size()); + if (attrs.count("start")) start = attrs.at("start").i(); + if (attrs.count("end")) end = attrs.at("end").i(); + if (start < 0) start += static_cast(result.size()); + if (end < 0) end += static_cast(result.size()); + start = std::max(int64_t(0), std::min(start, static_cast(result.size()))); + end = std::max(int64_t(0), std::min(end, static_cast(result.size()))); + result = std::vector(result.begin() + static_cast(start), result.begin() + static_cast(end)); + return true; + } + + if (op == "Gather") { + const auto* data = get_input(0); + const auto* indices = get_input(1); + if (!data || !indices) return false; + + result.clear(); + for (int64_t idx : *indices) { + if (idx < 0) idx += static_cast(data->size()); + if (idx < 0 || idx >= static_cast(data->size())) return false; + result.push_back((*data)[static_cast(idx)]); + } + // If indices is scalar (0-dim), result should also be scalar-like + if (indices->empty()) { + // scalar index case - not handled here + return false; + } + return true; + } + + if (op == "Concat") { + // For 1-D shape vectors, axis is always 0 — just concatenate all inputs. + result.clear(); + for (size_t i = 0; i < inputs.size(); i++) { + const auto* inp = get_input(i); + if (!inp) return false; + result.insert(result.end(), inp->begin(), inp->end()); + } + return true; + } + + if (op == "Unsqueeze") { + const auto* data = get_input(0); + if (!data) return false; + // For shape subgraphs, unsqueeze typically wraps a scalar into [1] shape + result = *data; + // If axes input exists (opset 13+), handle it + if (inputs.size() > 1) { + const auto* axes = get_input(1); + if (!axes) return false; + // Insert dimensions of size 1 at specified axes + // For shape vectors this is typically making a scalar into a 1-element vector + } + return true; + } + + if (op == "Squeeze") { + const auto* data = get_input(0); + if (!data) return false; + result = *data; + return true; + } + + if (op == "Cast") { + const auto* data = get_input(0); + if (!data) return false; + // Cast just passes through for int64 purposes + result = *data; + return true; + } + + if (op == "Neg") { + const auto* data = get_input(0); + if (!data) return false; + result.resize(data->size()); + for (size_t i = 0; i < data->size(); i++) result[i] = -(*data)[i]; + return true; + } + + if (op == "Abs") { + const auto* data = get_input(0); + if (!data) return false; + result.resize(data->size()); + for (size_t i = 0; i < data->size(); i++) result[i] = std::abs((*data)[i]); + return true; + } + + // Binary element-wise ops: Add, Sub, Mul, Div + if (op == "Add" || op == "Sub" || op == "Mul" || op == "Div") { + const auto* a = get_input(0); + const auto* b = get_input(1); + if (!a || !b) return false; + + // Broadcasting: if one is scalar (size 1), broadcast to the other's size + size_t size = std::max(a->size(), b->size()); + result.resize(size); + for (size_t i = 0; i < size; i++) { + int64_t va = (*a)[a->size() == 1 ? 0 : i]; + int64_t vb = (*b)[b->size() == 1 ? 0 : i]; + if (op == "Add") result[i] = va + vb; + else if (op == "Sub") result[i] = va - vb; + else if (op == "Mul") result[i] = va * vb; + else if (op == "Div") { + if (vb == 0) return false; + result[i] = va / vb; + } + } + return true; + } + + if (op == "Equal") { + const auto* a = get_input(0); + const auto* b = get_input(1); + if (!a || !b) return false; + + size_t size = std::max(a->size(), b->size()); + result.resize(size); + for (size_t i = 0; i < size; i++) { + int64_t va = (*a)[a->size() == 1 ? 0 : i]; + int64_t vb = (*b)[b->size() == 1 ? 0 : i]; + result[i] = (va == vb) ? 1 : 0; + } + return true; + } + + if (op == "Where") { + const auto* cond = get_input(0); + const auto* x = get_input(1); + const auto* y = get_input(2); + if (!cond || !x || !y) return false; + + size_t size = std::max({cond->size(), x->size(), y->size()}); + result.resize(size); + for (size_t i = 0; i < size; i++) { + int64_t c = (*cond)[cond->size() == 1 ? 0 : i]; + int64_t vx = (*x)[x->size() == 1 ? 0 : i]; + int64_t vy = (*y)[y->size() == 1 ? 0 : i]; + result[i] = c ? vx : vy; + } + return true; + } + + if (op == "Range") { + const auto* start_v = get_input(0); + const auto* limit_v = get_input(1); + const auto* delta_v = get_input(2); + if (!start_v || !limit_v || !delta_v) return false; + if (start_v->empty() || limit_v->empty() || delta_v->empty()) return false; + + int64_t start = (*start_v)[0]; + int64_t limit = (*limit_v)[0]; + int64_t delta = (*delta_v)[0]; + if (delta == 0) return false; + + result.clear(); + if (delta > 0) { + for (int64_t v = start; v < limit; v += delta) result.push_back(v); + } else { + for (int64_t v = start; v > limit; v += delta) result.push_back(v); + } + return true; + } + + if (op == "ConstantOfShape") { + const auto* shape_input = get_input(0); + if (!shape_input) return false; + + // Get the fill value from attribute + int64_t fill_value = 0; + const auto& attrs = node.GetAttributes(); + if (attrs.count("value")) { + const auto& tensor = attrs.at("value").t(); + if (tensor.data_type() == ONNX_NAMESPACE::TensorProto_DataType_INT64) { + if (!tensor.int64_data().empty()) fill_value = tensor.int64_data(0); + else if (!tensor.raw_data().empty()) + fill_value = *reinterpret_cast(tensor.raw_data().data()); + } else if (tensor.data_type() == ONNX_NAMESPACE::TensorProto_DataType_INT32) { + if (!tensor.int32_data().empty()) fill_value = tensor.int32_data(0); + else if (!tensor.raw_data().empty()) + fill_value = *reinterpret_cast(tensor.raw_data().data()); + } else if (tensor.data_type() == ONNX_NAMESPACE::TensorProto_DataType_FLOAT) { + float fv = 0.0f; + if (!tensor.float_data().empty()) fv = tensor.float_data(0); + else if (!tensor.raw_data().empty()) + fv = *reinterpret_cast(tensor.raw_data().data()); + fill_value = static_cast(fv); + } + } + + // Compute total size from shape + int64_t total = 1; + for (int64_t d : *shape_input) total *= d; + if (total < 0 || total > 1000000) return false; // Safety limit + + result.assign(static_cast(total), fill_value); + return true; + } + + if (op == "Slice") { + const auto* data = get_input(0); + const auto* starts = get_input(1); + const auto* ends = get_input(2); + if (!data || !starts || !ends) return false; + + // For 1-D shape tensors + int64_t start = (*starts)[0]; + int64_t end = (*ends)[0]; + int64_t dim_size = static_cast(data->size()); + + if (start < 0) start += dim_size; + if (end < 0) end += dim_size; + start = std::max(int64_t(0), std::min(start, dim_size)); + end = std::max(int64_t(0), std::min(end, dim_size)); + + int64_t step = 1; + if (inputs.size() > 4) { + const auto* steps_v = get_input(4); + if (steps_v && !steps_v->empty()) step = (*steps_v)[0]; + } + + result.clear(); + if (step > 0) { + for (int64_t i = start; i < end; i += step) result.push_back((*data)[static_cast(i)]); + } else if (step < 0) { + for (int64_t i = start; i > end; i += step) result.push_back((*data)[static_cast(i)]); + } + return true; + } + + if (op == "Reshape") { + const auto* data = get_input(0); + if (!data) return false; + // For shape subgraphs, Reshape just passes data through (reshaping a 1-D vector) + result = *data; + return true; + } + + if (op == "Expand") { + const auto* data = get_input(0); + const auto* shape = get_input(1); + if (!data || !shape) return false; + // For shape subgraphs, Expand broadcasts scalar/small tensor + if (data->size() == 1 && !shape->empty()) { + int64_t total = 1; + for (int64_t d : *shape) total *= d; + if (total < 0 || total > 1000000) return false; + result.assign(static_cast(total), (*data)[0]); + } else { + result = *data; + } + return true; + } + + // Unsupported op + return false; +} + +Status ShapeSubgraphFolder::Run() { + // Find all shape-consuming input slots and try to fold them + const auto& nodes = graph_viewer_.GetNodesInTopologicalOrder(); + + for (auto node_idx : nodes) { + const auto* node = graph_viewer_.GetNode(node_idx); + if (!node) continue; + + const auto& input_defs = node->InputDefs(); + for (size_t i = 0; i < input_defs.size(); i++) { + if (!IsShapeConsumingSlot(*node, i)) continue; + + const auto* shape_arg = input_defs[i]; + if (!shape_arg || !shape_arg->Exists()) continue; + + // Skip if already a constant initializer (handled normally) + if (graph_viewer_.GetConstantInitializer(shape_arg->Name())) continue; + + // Try to fold this shape input + if (TryFoldShapeSubgraph(shape_arg)) { + LOGS(logger_, VERBOSE) << "ShapeSubgraphFolder: Folded shape input '" + << shape_arg->Name() << "' for " + << node->OpType() << " node '" << node->Name() << "'" + << " -> [" << [&]() { + std::string s; + for (auto v : folded_shapes_[shape_arg->Name()]) { + if (!s.empty()) s += ", "; + s += std::to_string(v); + } + return s; + }() + << "]"; + } + } + } + + LOGS(logger_, VERBOSE) << "ShapeSubgraphFolder: Folded " << folded_shapes_.size() + << " shape subgraphs, " << folded_nodes_.size() << " nodes eliminated."; + + return Status::OK(); +} + +bool ShapeSubgraphFolder::IsFoldedShape(const std::string& name) const { + return folded_shapes_.count(name) > 0; +} + +const std::vector* ShapeSubgraphFolder::GetFoldedShape(const std::string& name) const { + auto it = folded_shapes_.find(name); + return (it != folded_shapes_.end()) ? &it->second : nullptr; +} + +bool ShapeSubgraphFolder::IsFoldedNode(NodeIndex node_index) const { + return folded_nodes_.count(node_index) > 0; +} + +} // namespace webnn +} // namespace onnxruntime diff --git a/onnxruntime/core/providers/webnn/builders/shape_subgraph_folder.h b/onnxruntime/core/providers/webnn/builders/shape_subgraph_folder.h new file mode 100644 index 0000000000000..bcdcdecacd9f9 --- /dev/null +++ b/onnxruntime/core/providers/webnn/builders/shape_subgraph_folder.h @@ -0,0 +1,85 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Copyright (c) Intel Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include +#include +#include +#include + +#include "core/common/inlined_containers.h" +#include "core/graph/graph_viewer.h" +#include "core/providers/webnn/builders/helper.h" + +namespace onnxruntime { +namespace webnn { + +// ShapeSubgraphFolder: Pre-evaluates shape-computing subgraphs in the ONNX graph. +// +// In unfused (HuggingFace-Optimum-style) models, Reshape/Expand/ConstantOfShape ops +// have shape inputs produced by chains of shape-domain ops (Shape, Gather, Concat, +// Where, Equal, Range, ConstantOfShape, etc.). Chromium's WebNN ShapeFoldingInterpreter +// can't handle all of these, causing "Graph has been destroyed" errors. +// +// This folder: +// 1. Identifies "shape-consuming" input slots (Reshape[1], Expand[1], etc.) +// 2. Traces each shape input's producer subgraph backward +// 3. If the entire subgraph can be evaluated with known constants + free_dimension_bounds, +// evaluates it to produce a concrete int64 shape tensor +// 4. Makes these folded shapes available as synthetic constant initializers +// +// Runs once at session creation → zero per-inference cost. +class ShapeSubgraphFolder { + public: + ShapeSubgraphFolder(const GraphViewer& graph_viewer, + const FreeDimensionBounds& free_dimension_bounds, + const logging::Logger& logger); + + // Run the folding pass. After this, GetFoldedShape() and IsFoldedNode() are valid. + Status Run(); + + // Check if a NodeArg name has been folded to a constant shape. + bool IsFoldedShape(const std::string& name) const; + + // Get the folded int64 tensor data for a shape NodeArg. + // Returns nullptr if not folded. + const std::vector* GetFoldedShape(const std::string& name) const; + + // Check if a node is part of a folded shape subgraph (should be skipped in AddOperations). + bool IsFoldedNode(NodeIndex node_index) const; + + // Get the set of node indices that were folded (for skipping). + const InlinedHashSet& GetFoldedNodes() const { return folded_nodes_; } + + private: + // Evaluate a shape-producing subgraph rooted at the given NodeArg. + // Returns true if successfully folded, with result stored in folded_shapes_. + bool TryFoldShapeSubgraph(const NodeArg* shape_arg); + + // Mini-interpreter: evaluate a single node given its input values. + // Returns true if the node can be evaluated. + bool EvaluateNode(const Node& node, + const std::unordered_map>& known_values, + std::vector& result); + + // Get the resolved shape of a NodeArg (using free_dimension_bounds for symbolic dims). + bool GetResolvedShape(const NodeArg* arg, std::vector& shape) const; + + // Check if a node is a supported shape-domain op for the mini-interpreter. + static bool IsSupportedShapeOp(const Node& node); + + const GraphViewer& graph_viewer_; + const FreeDimensionBounds& free_dimension_bounds_; + const logging::Logger& logger_; + + // Maps NodeArg name → folded int64 shape values. + std::unordered_map> folded_shapes_; + + // Set of node indices that are part of folded subgraphs (to be skipped). + InlinedHashSet folded_nodes_; +}; + +} // namespace webnn +} // namespace onnxruntime diff --git a/onnxruntime/core/providers/webnn/webnn_execution_provider.cc b/onnxruntime/core/providers/webnn/webnn_execution_provider.cc index fa6b9ad0f5200..1bc0408571fa1 100644 --- a/onnxruntime/core/providers/webnn/webnn_execution_provider.cc +++ b/onnxruntime/core/providers/webnn/webnn_execution_provider.cc @@ -87,13 +87,28 @@ WebNNExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph_view const auto supported_nodes = webnn::GetSupportedNodes(graph_viewer, wnn_builder, wnn_device_type_, wnn_limits_, logger); + // Run the shape subgraph folder to identify nodes that will be folded away during graph build. + // These nodes must be claimed as "supported" so they stay in our partition, even if their + // data types (e.g., int64 Equal) aren't natively supported by WebNN — they'll be skipped + // during ModelBuilder::AddOperations(). + webnn::ShapeSubgraphFolder capability_folder(graph_viewer, free_dimension_bounds_, logger); + auto folder_status = capability_folder.Run(); + std::unordered_set supported_nodes_with_folded = supported_nodes; + if (folder_status.IsOK()) { + for (const auto& node : graph_viewer.Nodes()) { + if (capability_folder.IsFoldedNode(node.Index())) { + supported_nodes_with_folded.insert(&node); + } + } + } + const auto gen_metadef_name = [&]() { HashValue model_hash; int metadef_id = metadef_id_generator_.GenerateId(graph_viewer, model_hash); return MakeString(WEBNN, "_", model_hash, "_", metadef_id); }; - auto result = utils::CreateSupportedPartitions(graph_viewer, supported_nodes, {}, + auto result = utils::CreateSupportedPartitions(graph_viewer, supported_nodes_with_folded, {}, gen_metadef_name, WEBNN, kWebNNExecutionProvider, &node_unit_map, /*drop_constant_initializers*/ true); @@ -429,6 +444,40 @@ common::Status WebNNExecutionProvider::Compile(const std::vector int64_t { + auto it = dim_param_to_input_dim.find(operand); + if (it != dim_param_to_input_dim.end()) { + const size_t src_idx = it->second.first; + const size_t src_dim = it->second.second; + if (src_idx < runtime_input_shapes.size() && + src_dim < runtime_input_shapes[src_idx].size()) { + return runtime_input_shapes[src_idx][src_dim]; + } + } + auto fixed_it = fixed_dim_param_values.find(operand); + if (fixed_it != fixed_dim_param_values.end()) { + return fixed_it->second; + } + return -1; // unresolved + }; + + int64_t left_val = resolve_operand(left); + int64_t right_val = resolve_operand(right); + if (left_val >= 0 && right_val >= 0) { + output_shape[dim_idx] = left_val + right_val; + } + } + } } } @@ -458,12 +507,10 @@ common::Status WebNNExecutionProvider::Compile(const std::vector inferred) { + inferred = candidate; + } + } + } + + if (inferred > 0) { + LOGS_DEFAULT(WARNING) << "[WebNN] Unresolved output dim for [" << output_name + << "] at index " << dim_idx << " (dim_param: [" << unresolved_dim_param + << "]). Inferred from runtime inputs: " << inferred; + output_shape[dim_idx] = inferred; + } else { + LOGS_DEFAULT(ERROR) << "[WebNN] Failed to resolve dynamic output dimension for output [" + << output_name << "] at dim index [" << dim_idx + << "], dim_param: [" << unresolved_dim_param + << "]. No input dims available for inference."; + return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, + "[WebNN] Failed to resolve dynamic output dimension for output: ", output_name, + " at dim index: ", dim_idx, + ". dim_param: ", unresolved_dim_param); + } } } From 2f9a965165111d913588ba67d82da8706ce3cfd5 Mon Sep 17 00:00:00 2001 From: "Kumar, Arisha" Date: Thu, 4 Jun 2026 14:12:29 -0700 Subject: [PATCH 2/3] Remove ShapeSubgraphFolder, keep additive dim_param behind flag, wire JS option --- js/web/lib/wasm/session-options.ts | 5 + .../providers/webnn/builders/model_builder.cc | 16 +- .../providers/webnn/builders/model_builder.h | 2 - .../webnn/builders/shape_subgraph_folder.cc | 568 ------------------ .../webnn/builders/shape_subgraph_folder.h | 85 --- .../webnn/webnn_execution_provider.cc | 24 +- .../webnn/webnn_execution_provider.h | 5 +- .../providers/webnn/webnn_provider_factory.cc | 21 +- 8 files changed, 38 insertions(+), 688 deletions(-) delete mode 100644 onnxruntime/core/providers/webnn/builders/shape_subgraph_folder.cc delete mode 100644 onnxruntime/core/providers/webnn/builders/shape_subgraph_folder.h diff --git a/js/web/lib/wasm/session-options.ts b/js/web/lib/wasm/session-options.ts index 6f83234534f26..c026602f73d33 100644 --- a/js/web/lib/wasm/session-options.ts +++ b/js/web/lib/wasm/session-options.ts @@ -139,6 +139,11 @@ const setExecutionProviders = async ( if (enableCausalLM) { appendEpOption(epOptions, 'enableCausalLM', 'true', allocs); } + // enableAdditiveDimParam: parse symbolic dim expressions like "a + b" in output shapes. + const enableAdditiveDimParam = (webnnOptions as any)?.enableAdditiveDimParam; + if (enableAdditiveDimParam) { + appendEpOption(epOptions, 'webnn_enable_additive_dim_param', 'true', allocs); + } } break; case 'webgpu': diff --git a/onnxruntime/core/providers/webnn/builders/model_builder.cc b/onnxruntime/core/providers/webnn/builders/model_builder.cc index 37af542dbbdd0..c36d022074d24 100644 --- a/onnxruntime/core/providers/webnn/builders/model_builder.cc +++ b/onnxruntime/core/providers/webnn/builders/model_builder.cc @@ -42,13 +42,6 @@ ModelBuilder::ModelBuilder(const GraphViewer& graph_viewer, const logging::Logge } Status ModelBuilder::Initialize() { - // Run shape subgraph folding FIRST, before PreprocessInitializers. - // This pre-evaluates shape subgraphs (Where/Equal/Range/ConstantOfShape chains) - // so that Reshape/Expand/etc. see constant shapes instead of dynamic subgraphs. - // Must run before PreprocessInitializers because AddInitializersToSkip checks IsFoldedShape(). - shape_folder_ = std::make_unique(graph_viewer_, free_dimension_bounds_, logger_); - ORT_RETURN_IF_ERROR(shape_folder_->Run()); - PreprocessInitializers(); ORT_RETURN_IF_ERROR(RegisterInitializers()); ORT_RETURN_IF_ERROR(RegisterModelInputs()); @@ -522,15 +515,18 @@ const ModelBuilder::DimProvenance* ModelBuilder::GetDimProvenance(const std::str } bool ModelBuilder::IsFoldedShape(const std::string& name) const { - return shape_folder_ && shape_folder_->IsFoldedShape(name); + ORT_UNUSED_PARAMETER(name); + return false; } const std::vector* ModelBuilder::GetFoldedShape(const std::string& name) const { - return shape_folder_ ? shape_folder_->GetFoldedShape(name) : nullptr; + ORT_UNUSED_PARAMETER(name); + return nullptr; } bool ModelBuilder::IsFoldedNode(NodeIndex node_index) const { - return shape_folder_ && shape_folder_->IsFoldedNode(node_index); + ORT_UNUSED_PARAMETER(node_index); + return false; } } // namespace webnn diff --git a/onnxruntime/core/providers/webnn/builders/model_builder.h b/onnxruntime/core/providers/webnn/builders/model_builder.h index c2b68f18ea33e..4fd812bda5126 100644 --- a/onnxruntime/core/providers/webnn/builders/model_builder.h +++ b/onnxruntime/core/providers/webnn/builders/model_builder.h @@ -8,7 +8,6 @@ #include #include "model.h" -#include "shape_subgraph_folder.h" #include "core/framework/execution_provider.h" #include "core/providers/webnn/builders/helper.h" @@ -105,7 +104,6 @@ class ModelBuilder { emscripten::val wnn_limits_ = emscripten::val::undefined(); FreeDimensionBounds free_dimension_bounds_; bool enable_causal_lm_; - std::unique_ptr shape_folder_; InlinedHashMap wnn_operands_; std::vector input_names_; std::vector output_names_; diff --git a/onnxruntime/core/providers/webnn/builders/shape_subgraph_folder.cc b/onnxruntime/core/providers/webnn/builders/shape_subgraph_folder.cc deleted file mode 100644 index c38b993790414..0000000000000 --- a/onnxruntime/core/providers/webnn/builders/shape_subgraph_folder.cc +++ /dev/null @@ -1,568 +0,0 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. -// Copyright (c) Intel Corporation. All rights reserved. -// Licensed under the MIT License. - -#include "shape_subgraph_folder.h" - -#include "core/framework/tensorprotoutils.h" -#include "core/providers/common.h" - -#include -#include -#include -#include - -namespace onnxruntime { -namespace webnn { - -ShapeSubgraphFolder::ShapeSubgraphFolder(const GraphViewer& graph_viewer, - const FreeDimensionBounds& free_dimension_bounds, - const logging::Logger& logger) - : graph_viewer_(graph_viewer), - free_dimension_bounds_(free_dimension_bounds), - logger_(logger) { -} - -bool ShapeSubgraphFolder::IsSupportedShapeOp(const Node& node) { - static const InlinedHashSet supported_ops = { - "Shape", "Gather", "Concat", "Unsqueeze", "Squeeze", "Slice", - "Cast", "Add", "Sub", "Mul", "Div", "Equal", "Where", - "ConstantOfShape", "Range", "Reshape", "Expand", - "Neg", "Abs", "Floor", "Ceil", - }; - // Only standard ONNX ops (empty domain or "onnx") - if (!node.Domain().empty() && node.Domain() != "onnx" && node.Domain() != kOnnxDomain) { - return false; - } - return supported_ops.count(node.OpType()) > 0; -} - -bool ShapeSubgraphFolder::GetResolvedShape(const NodeArg* arg, std::vector& shape) const { - const auto* shape_proto = arg->Shape(); - if (!shape_proto) return false; - - shape.clear(); - for (int i = 0; i < shape_proto->dim_size(); i++) { - const auto& dim = shape_proto->dim(i); - if (dim.has_dim_value()) { - shape.push_back(dim.dim_value()); - } else if (dim.has_dim_param()) { - // Try to resolve from free_dimension_bounds - const auto& dim_param = dim.dim_param(); - auto it = free_dimension_bounds_.find(dim_param); - if (it != free_dimension_bounds_.end()) { - // Use maxSize as the resolved value (consistent with WebNN EP behavior) - shape.push_back(static_cast(it->second.max_size)); - } else { - return false; // Can't resolve this symbolic dim - } - } else { - return false; // Unknown dim - } - } - return true; -} - -// Check if shape-consuming inputs (Reshape[1], Expand[1], etc.) are candidates for folding. -static bool IsShapeConsumingSlot(const Node& consumer, size_t input_index) { - const auto& op = consumer.OpType(); - if ((op == "Reshape" || op == "Expand") && input_index == 1) return true; - if (op == "ConstantOfShape" && input_index == 0) return true; - if (op == "Tile" && input_index == 1) return true; - // Slice has starts[1], ends[2], axes[3], steps[4] - if (op == "Slice" && input_index >= 1 && input_index <= 4) return true; - return false; -} - -bool ShapeSubgraphFolder::TryFoldShapeSubgraph(const NodeArg* shape_arg) { - const std::string& target_name = shape_arg->Name(); - - // Already folded? - if (folded_shapes_.count(target_name)) return true; - - // Check if it's already a constant initializer - if (graph_viewer_.GetConstantInitializer(target_name)) return false; // Already handled normally - - // BFS backward to find the producer subgraph - std::unordered_map> known_values; - std::vector topo_order; // nodes in forward eval order - InlinedHashSet visited_nodes; - std::queue worklist; - worklist.push(shape_arg); - - InlinedHashSet visited_args; - visited_args.insert(target_name); - - bool can_fold = true; - - while (!worklist.empty() && can_fold) { - const NodeArg* current = worklist.front(); - worklist.pop(); - const std::string& name = current->Name(); - - // Skip if already known - if (known_values.count(name)) continue; - - // Check if it's a constant initializer - const auto* init = graph_viewer_.GetConstantInitializer(name); - if (init) { - // Read int64 values from the initializer - std::vector values; - if (init->data_type() == ONNX_NAMESPACE::TensorProto_DataType_INT64) { - if (!init->int64_data().empty()) { - values.assign(init->int64_data().begin(), init->int64_data().end()); - } else if (!init->raw_data().empty()) { - const int64_t* data = reinterpret_cast(init->raw_data().data()); - size_t count = init->raw_data().size() / sizeof(int64_t); - values.assign(data, data + count); - } - } else if (init->data_type() == ONNX_NAMESPACE::TensorProto_DataType_INT32) { - if (!init->int32_data().empty()) { - for (auto v : init->int32_data()) values.push_back(static_cast(v)); - } else if (!init->raw_data().empty()) { - const int32_t* data = reinterpret_cast(init->raw_data().data()); - size_t count = init->raw_data().size() / sizeof(int32_t); - for (size_t i = 0; i < count; i++) values.push_back(static_cast(data[i])); - } - } else if (init->data_type() == ONNX_NAMESPACE::TensorProto_DataType_FLOAT) { - if (!init->float_data().empty()) { - for (auto v : init->float_data()) values.push_back(static_cast(v)); - } else if (!init->raw_data().empty()) { - const float* data = reinterpret_cast(init->raw_data().data()); - size_t count = init->raw_data().size() / sizeof(float); - for (size_t i = 0; i < count; i++) values.push_back(static_cast(data[i])); - } - } else if (init->data_type() == ONNX_NAMESPACE::TensorProto_DataType_BOOL) { - if (!init->int32_data().empty()) { - for (auto v : init->int32_data()) values.push_back(static_cast(v)); - } else if (!init->raw_data().empty()) { - const uint8_t* data = reinterpret_cast(init->raw_data().data()); - size_t count = init->raw_data().size(); - for (size_t i = 0; i < count; i++) values.push_back(static_cast(data[i])); - } - } else { - can_fold = false; - break; - } - // Handle scalar initializers (0-dim tensors with no data entries but have raw_data) - if (values.empty() && init->dims_size() == 0) { - // Try scalar - if (init->data_type() == ONNX_NAMESPACE::TensorProto_DataType_INT64) { - values.push_back(0); - } - } - known_values[name] = std::move(values); - continue; - } - - // Check if it's a graph input (cannot fold if depends on runtime input, unless it's just shape) - const auto* producer = graph_viewer_.GetProducerNode(name); - if (!producer) { - // It's a graph input - can't fold unless we can get its shape via Shape op - can_fold = false; - break; - } - - // Check if the producer is a supported shape op - if (!IsSupportedShapeOp(*producer)) { - can_fold = false; - break; - } - - // Add producer to visit list - if (visited_nodes.insert(producer->Index()).second) { - topo_order.push_back(producer); - - // Add all inputs of this producer to the worklist - for (const auto* input_def : producer->InputDefs()) { - if (input_def && input_def->Exists()) { - if (visited_args.insert(input_def->Name()).second) { - worklist.push(input_def); - } - } - } - } - } - - if (!can_fold) return false; - - // Sort nodes in topological order (reverse of discovery = forward eval order) - std::reverse(topo_order.begin(), topo_order.end()); - - // Evaluate nodes in topological order - for (const Node* node : topo_order) { - std::vector result; - if (!EvaluateNode(*node, known_values, result)) { - return false; - } - // Store results for all outputs - for (const auto* output_def : node->OutputDefs()) { - if (output_def && output_def->Exists()) { - known_values[output_def->Name()] = result; - } - } - } - - // The target should now be in known_values - auto it = known_values.find(target_name); - if (it == known_values.end()) return false; - - // Store the folded result - folded_shapes_[target_name] = it->second; - - // Mark nodes in the subgraph as folded, but only if ALL their outputs are consumed - // exclusively by other folded nodes or shape-consuming slots. If any output feeds - // a non-shape consumer outside the subgraph, we cannot skip that node. - for (const Node* node : topo_order) { - bool can_skip = true; - for (auto it2 = node->OutputEdgesBegin(); it2 != node->OutputEdgesEnd(); ++it2) { - const Node& consumer = it2->GetNode(); - if (visited_nodes.count(consumer.Index())) continue; // consumer is in our subgraph - // Check if the consumer uses this output only in a shape-consuming slot - if (!IsShapeConsumingSlot(consumer, it2->GetDstArgIndex())) { - can_skip = false; - break; - } - } - if (can_skip) { - folded_nodes_.insert(node->Index()); - } - } - - return true; -} - -bool ShapeSubgraphFolder::EvaluateNode( - const Node& node, - const std::unordered_map>& known_values, - std::vector& result) { - const auto& op = node.OpType(); - const auto& inputs = node.InputDefs(); - - // Helper to get input values - auto get_input = [&](size_t idx) -> const std::vector* { - if (idx >= inputs.size() || !inputs[idx] || !inputs[idx]->Exists()) return nullptr; - auto it = known_values.find(inputs[idx]->Name()); - return (it != known_values.end()) ? &it->second : nullptr; - }; - - if (op == "Shape") { - // Shape op: return the resolved shape of input[0] - if (!GetResolvedShape(inputs[0], result)) return false; - - // Handle start/end attributes (Shape opset 15+) - const auto& attrs = node.GetAttributes(); - int64_t start = 0, end = static_cast(result.size()); - if (attrs.count("start")) start = attrs.at("start").i(); - if (attrs.count("end")) end = attrs.at("end").i(); - if (start < 0) start += static_cast(result.size()); - if (end < 0) end += static_cast(result.size()); - start = std::max(int64_t(0), std::min(start, static_cast(result.size()))); - end = std::max(int64_t(0), std::min(end, static_cast(result.size()))); - result = std::vector(result.begin() + static_cast(start), result.begin() + static_cast(end)); - return true; - } - - if (op == "Gather") { - const auto* data = get_input(0); - const auto* indices = get_input(1); - if (!data || !indices) return false; - - result.clear(); - for (int64_t idx : *indices) { - if (idx < 0) idx += static_cast(data->size()); - if (idx < 0 || idx >= static_cast(data->size())) return false; - result.push_back((*data)[static_cast(idx)]); - } - // If indices is scalar (0-dim), result should also be scalar-like - if (indices->empty()) { - // scalar index case - not handled here - return false; - } - return true; - } - - if (op == "Concat") { - // For 1-D shape vectors, axis is always 0 — just concatenate all inputs. - result.clear(); - for (size_t i = 0; i < inputs.size(); i++) { - const auto* inp = get_input(i); - if (!inp) return false; - result.insert(result.end(), inp->begin(), inp->end()); - } - return true; - } - - if (op == "Unsqueeze") { - const auto* data = get_input(0); - if (!data) return false; - // For shape subgraphs, unsqueeze typically wraps a scalar into [1] shape - result = *data; - // If axes input exists (opset 13+), handle it - if (inputs.size() > 1) { - const auto* axes = get_input(1); - if (!axes) return false; - // Insert dimensions of size 1 at specified axes - // For shape vectors this is typically making a scalar into a 1-element vector - } - return true; - } - - if (op == "Squeeze") { - const auto* data = get_input(0); - if (!data) return false; - result = *data; - return true; - } - - if (op == "Cast") { - const auto* data = get_input(0); - if (!data) return false; - // Cast just passes through for int64 purposes - result = *data; - return true; - } - - if (op == "Neg") { - const auto* data = get_input(0); - if (!data) return false; - result.resize(data->size()); - for (size_t i = 0; i < data->size(); i++) result[i] = -(*data)[i]; - return true; - } - - if (op == "Abs") { - const auto* data = get_input(0); - if (!data) return false; - result.resize(data->size()); - for (size_t i = 0; i < data->size(); i++) result[i] = std::abs((*data)[i]); - return true; - } - - // Binary element-wise ops: Add, Sub, Mul, Div - if (op == "Add" || op == "Sub" || op == "Mul" || op == "Div") { - const auto* a = get_input(0); - const auto* b = get_input(1); - if (!a || !b) return false; - - // Broadcasting: if one is scalar (size 1), broadcast to the other's size - size_t size = std::max(a->size(), b->size()); - result.resize(size); - for (size_t i = 0; i < size; i++) { - int64_t va = (*a)[a->size() == 1 ? 0 : i]; - int64_t vb = (*b)[b->size() == 1 ? 0 : i]; - if (op == "Add") result[i] = va + vb; - else if (op == "Sub") result[i] = va - vb; - else if (op == "Mul") result[i] = va * vb; - else if (op == "Div") { - if (vb == 0) return false; - result[i] = va / vb; - } - } - return true; - } - - if (op == "Equal") { - const auto* a = get_input(0); - const auto* b = get_input(1); - if (!a || !b) return false; - - size_t size = std::max(a->size(), b->size()); - result.resize(size); - for (size_t i = 0; i < size; i++) { - int64_t va = (*a)[a->size() == 1 ? 0 : i]; - int64_t vb = (*b)[b->size() == 1 ? 0 : i]; - result[i] = (va == vb) ? 1 : 0; - } - return true; - } - - if (op == "Where") { - const auto* cond = get_input(0); - const auto* x = get_input(1); - const auto* y = get_input(2); - if (!cond || !x || !y) return false; - - size_t size = std::max({cond->size(), x->size(), y->size()}); - result.resize(size); - for (size_t i = 0; i < size; i++) { - int64_t c = (*cond)[cond->size() == 1 ? 0 : i]; - int64_t vx = (*x)[x->size() == 1 ? 0 : i]; - int64_t vy = (*y)[y->size() == 1 ? 0 : i]; - result[i] = c ? vx : vy; - } - return true; - } - - if (op == "Range") { - const auto* start_v = get_input(0); - const auto* limit_v = get_input(1); - const auto* delta_v = get_input(2); - if (!start_v || !limit_v || !delta_v) return false; - if (start_v->empty() || limit_v->empty() || delta_v->empty()) return false; - - int64_t start = (*start_v)[0]; - int64_t limit = (*limit_v)[0]; - int64_t delta = (*delta_v)[0]; - if (delta == 0) return false; - - result.clear(); - if (delta > 0) { - for (int64_t v = start; v < limit; v += delta) result.push_back(v); - } else { - for (int64_t v = start; v > limit; v += delta) result.push_back(v); - } - return true; - } - - if (op == "ConstantOfShape") { - const auto* shape_input = get_input(0); - if (!shape_input) return false; - - // Get the fill value from attribute - int64_t fill_value = 0; - const auto& attrs = node.GetAttributes(); - if (attrs.count("value")) { - const auto& tensor = attrs.at("value").t(); - if (tensor.data_type() == ONNX_NAMESPACE::TensorProto_DataType_INT64) { - if (!tensor.int64_data().empty()) fill_value = tensor.int64_data(0); - else if (!tensor.raw_data().empty()) - fill_value = *reinterpret_cast(tensor.raw_data().data()); - } else if (tensor.data_type() == ONNX_NAMESPACE::TensorProto_DataType_INT32) { - if (!tensor.int32_data().empty()) fill_value = tensor.int32_data(0); - else if (!tensor.raw_data().empty()) - fill_value = *reinterpret_cast(tensor.raw_data().data()); - } else if (tensor.data_type() == ONNX_NAMESPACE::TensorProto_DataType_FLOAT) { - float fv = 0.0f; - if (!tensor.float_data().empty()) fv = tensor.float_data(0); - else if (!tensor.raw_data().empty()) - fv = *reinterpret_cast(tensor.raw_data().data()); - fill_value = static_cast(fv); - } - } - - // Compute total size from shape - int64_t total = 1; - for (int64_t d : *shape_input) total *= d; - if (total < 0 || total > 1000000) return false; // Safety limit - - result.assign(static_cast(total), fill_value); - return true; - } - - if (op == "Slice") { - const auto* data = get_input(0); - const auto* starts = get_input(1); - const auto* ends = get_input(2); - if (!data || !starts || !ends) return false; - - // For 1-D shape tensors - int64_t start = (*starts)[0]; - int64_t end = (*ends)[0]; - int64_t dim_size = static_cast(data->size()); - - if (start < 0) start += dim_size; - if (end < 0) end += dim_size; - start = std::max(int64_t(0), std::min(start, dim_size)); - end = std::max(int64_t(0), std::min(end, dim_size)); - - int64_t step = 1; - if (inputs.size() > 4) { - const auto* steps_v = get_input(4); - if (steps_v && !steps_v->empty()) step = (*steps_v)[0]; - } - - result.clear(); - if (step > 0) { - for (int64_t i = start; i < end; i += step) result.push_back((*data)[static_cast(i)]); - } else if (step < 0) { - for (int64_t i = start; i > end; i += step) result.push_back((*data)[static_cast(i)]); - } - return true; - } - - if (op == "Reshape") { - const auto* data = get_input(0); - if (!data) return false; - // For shape subgraphs, Reshape just passes data through (reshaping a 1-D vector) - result = *data; - return true; - } - - if (op == "Expand") { - const auto* data = get_input(0); - const auto* shape = get_input(1); - if (!data || !shape) return false; - // For shape subgraphs, Expand broadcasts scalar/small tensor - if (data->size() == 1 && !shape->empty()) { - int64_t total = 1; - for (int64_t d : *shape) total *= d; - if (total < 0 || total > 1000000) return false; - result.assign(static_cast(total), (*data)[0]); - } else { - result = *data; - } - return true; - } - - // Unsupported op - return false; -} - -Status ShapeSubgraphFolder::Run() { - // Find all shape-consuming input slots and try to fold them - const auto& nodes = graph_viewer_.GetNodesInTopologicalOrder(); - - for (auto node_idx : nodes) { - const auto* node = graph_viewer_.GetNode(node_idx); - if (!node) continue; - - const auto& input_defs = node->InputDefs(); - for (size_t i = 0; i < input_defs.size(); i++) { - if (!IsShapeConsumingSlot(*node, i)) continue; - - const auto* shape_arg = input_defs[i]; - if (!shape_arg || !shape_arg->Exists()) continue; - - // Skip if already a constant initializer (handled normally) - if (graph_viewer_.GetConstantInitializer(shape_arg->Name())) continue; - - // Try to fold this shape input - if (TryFoldShapeSubgraph(shape_arg)) { - LOGS(logger_, VERBOSE) << "ShapeSubgraphFolder: Folded shape input '" - << shape_arg->Name() << "' for " - << node->OpType() << " node '" << node->Name() << "'" - << " -> [" << [&]() { - std::string s; - for (auto v : folded_shapes_[shape_arg->Name()]) { - if (!s.empty()) s += ", "; - s += std::to_string(v); - } - return s; - }() - << "]"; - } - } - } - - LOGS(logger_, VERBOSE) << "ShapeSubgraphFolder: Folded " << folded_shapes_.size() - << " shape subgraphs, " << folded_nodes_.size() << " nodes eliminated."; - - return Status::OK(); -} - -bool ShapeSubgraphFolder::IsFoldedShape(const std::string& name) const { - return folded_shapes_.count(name) > 0; -} - -const std::vector* ShapeSubgraphFolder::GetFoldedShape(const std::string& name) const { - auto it = folded_shapes_.find(name); - return (it != folded_shapes_.end()) ? &it->second : nullptr; -} - -bool ShapeSubgraphFolder::IsFoldedNode(NodeIndex node_index) const { - return folded_nodes_.count(node_index) > 0; -} - -} // namespace webnn -} // namespace onnxruntime diff --git a/onnxruntime/core/providers/webnn/builders/shape_subgraph_folder.h b/onnxruntime/core/providers/webnn/builders/shape_subgraph_folder.h deleted file mode 100644 index bcdcdecacd9f9..0000000000000 --- a/onnxruntime/core/providers/webnn/builders/shape_subgraph_folder.h +++ /dev/null @@ -1,85 +0,0 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. -// Copyright (c) Intel Corporation. All rights reserved. -// Licensed under the MIT License. - -#pragma once - -#include -#include -#include -#include - -#include "core/common/inlined_containers.h" -#include "core/graph/graph_viewer.h" -#include "core/providers/webnn/builders/helper.h" - -namespace onnxruntime { -namespace webnn { - -// ShapeSubgraphFolder: Pre-evaluates shape-computing subgraphs in the ONNX graph. -// -// In unfused (HuggingFace-Optimum-style) models, Reshape/Expand/ConstantOfShape ops -// have shape inputs produced by chains of shape-domain ops (Shape, Gather, Concat, -// Where, Equal, Range, ConstantOfShape, etc.). Chromium's WebNN ShapeFoldingInterpreter -// can't handle all of these, causing "Graph has been destroyed" errors. -// -// This folder: -// 1. Identifies "shape-consuming" input slots (Reshape[1], Expand[1], etc.) -// 2. Traces each shape input's producer subgraph backward -// 3. If the entire subgraph can be evaluated with known constants + free_dimension_bounds, -// evaluates it to produce a concrete int64 shape tensor -// 4. Makes these folded shapes available as synthetic constant initializers -// -// Runs once at session creation → zero per-inference cost. -class ShapeSubgraphFolder { - public: - ShapeSubgraphFolder(const GraphViewer& graph_viewer, - const FreeDimensionBounds& free_dimension_bounds, - const logging::Logger& logger); - - // Run the folding pass. After this, GetFoldedShape() and IsFoldedNode() are valid. - Status Run(); - - // Check if a NodeArg name has been folded to a constant shape. - bool IsFoldedShape(const std::string& name) const; - - // Get the folded int64 tensor data for a shape NodeArg. - // Returns nullptr if not folded. - const std::vector* GetFoldedShape(const std::string& name) const; - - // Check if a node is part of a folded shape subgraph (should be skipped in AddOperations). - bool IsFoldedNode(NodeIndex node_index) const; - - // Get the set of node indices that were folded (for skipping). - const InlinedHashSet& GetFoldedNodes() const { return folded_nodes_; } - - private: - // Evaluate a shape-producing subgraph rooted at the given NodeArg. - // Returns true if successfully folded, with result stored in folded_shapes_. - bool TryFoldShapeSubgraph(const NodeArg* shape_arg); - - // Mini-interpreter: evaluate a single node given its input values. - // Returns true if the node can be evaluated. - bool EvaluateNode(const Node& node, - const std::unordered_map>& known_values, - std::vector& result); - - // Get the resolved shape of a NodeArg (using free_dimension_bounds for symbolic dims). - bool GetResolvedShape(const NodeArg* arg, std::vector& shape) const; - - // Check if a node is a supported shape-domain op for the mini-interpreter. - static bool IsSupportedShapeOp(const Node& node); - - const GraphViewer& graph_viewer_; - const FreeDimensionBounds& free_dimension_bounds_; - const logging::Logger& logger_; - - // Maps NodeArg name → folded int64 shape values. - std::unordered_map> folded_shapes_; - - // Set of node indices that are part of folded subgraphs (to be skipped). - InlinedHashSet folded_nodes_; -}; - -} // namespace webnn -} // namespace onnxruntime diff --git a/onnxruntime/core/providers/webnn/webnn_execution_provider.cc b/onnxruntime/core/providers/webnn/webnn_execution_provider.cc index 1bc0408571fa1..8e1ce16657be0 100644 --- a/onnxruntime/core/providers/webnn/webnn_execution_provider.cc +++ b/onnxruntime/core/providers/webnn/webnn_execution_provider.cc @@ -31,7 +31,8 @@ constexpr const char* WEBNN = "WEBNN"; WebNNExecutionProvider::WebNNExecutionProvider(const std::string& webnn_device_flags, const webnn::FreeDimensionBounds& free_dimension_bounds, - bool enable_causal_lm) + bool enable_causal_lm, + bool enable_additive_dim_param) : IExecutionProvider{ onnxruntime::kWebNNExecutionProvider, // If MLTensor is supported, we force all the tensors to be allocated as MLTensor. @@ -42,7 +43,8 @@ WebNNExecutionProvider::WebNNExecutionProvider(const std::string& webnn_device_f 0)}, wnn_device_type_(webnn::DeviceTypeFromString(webnn_device_flags)), free_dimension_bounds_(free_dimension_bounds), - enable_causal_lm_(enable_causal_lm) { + enable_causal_lm_(enable_causal_lm), + enable_additive_dim_param_(enable_additive_dim_param) { wnn_context_ = emscripten::val::module_property("currentContext"); if (!wnn_context_.as()) { ORT_THROW("Failed to create WebNN context."); @@ -87,20 +89,7 @@ WebNNExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph_view const auto supported_nodes = webnn::GetSupportedNodes(graph_viewer, wnn_builder, wnn_device_type_, wnn_limits_, logger); - // Run the shape subgraph folder to identify nodes that will be folded away during graph build. - // These nodes must be claimed as "supported" so they stay in our partition, even if their - // data types (e.g., int64 Equal) aren't natively supported by WebNN — they'll be skipped - // during ModelBuilder::AddOperations(). - webnn::ShapeSubgraphFolder capability_folder(graph_viewer, free_dimension_bounds_, logger); - auto folder_status = capability_folder.Run(); std::unordered_set supported_nodes_with_folded = supported_nodes; - if (folder_status.IsOK()) { - for (const auto& node : graph_viewer.Nodes()) { - if (capability_folder.IsFoldedNode(node.Index())) { - supported_nodes_with_folded.insert(&node); - } - } - } const auto gen_metadef_name = [&]() { HashValue model_hash; @@ -301,7 +290,8 @@ common::Status WebNNExecutionProvider::Compile(const std::vector> @@ -56,6 +57,8 @@ class WebNNExecutionProvider : public IExecutionProvider { webnn::FreeDimensionBounds free_dimension_bounds_; // Controls GQA KV-cache strategy: true = concat (stateful), false = ScatterND (stateless). bool enable_causal_lm_; + // Controls optional additive symbolic dim_param parsing: "a + b". + bool enable_additive_dim_param_; InlinedHashMap> models_; ModelMetadefIdGenerator metadef_id_generator_; }; diff --git a/onnxruntime/core/providers/webnn/webnn_provider_factory.cc b/onnxruntime/core/providers/webnn/webnn_provider_factory.cc index 56a96b43bd11e..46c80d60ae84b 100644 --- a/onnxruntime/core/providers/webnn/webnn_provider_factory.cc +++ b/onnxruntime/core/providers/webnn/webnn_provider_factory.cc @@ -82,10 +82,12 @@ Status ParseFreeDimensionBounds(std::string_view value, webnn::FreeDimensionBoun struct WebNNProviderFactory : IExecutionProviderFactory { explicit WebNNProviderFactory(const std::string& webnn_device_flags, const webnn::FreeDimensionBounds& free_dimension_bounds, - bool enable_causal_lm) + bool enable_causal_lm, + bool enable_additive_dim_param) : webnn_device_flags_(webnn_device_flags), free_dimension_bounds_(free_dimension_bounds), - enable_causal_lm_(enable_causal_lm) {} + enable_causal_lm_(enable_causal_lm), + enable_additive_dim_param_(enable_additive_dim_param) {} ~WebNNProviderFactory() override {} std::unique_ptr CreateProvider() override; @@ -95,10 +97,12 @@ struct WebNNProviderFactory : IExecutionProviderFactory { std::string webnn_device_flags_; webnn::FreeDimensionBounds free_dimension_bounds_; bool enable_causal_lm_; + bool enable_additive_dim_param_; }; std::unique_ptr WebNNProviderFactory::CreateProvider() { - return std::make_unique(webnn_device_flags_, free_dimension_bounds_, enable_causal_lm_); + return std::make_unique(webnn_device_flags_, free_dimension_bounds_, enable_causal_lm_, + enable_additive_dim_param_); } std::unique_ptr WebNNProviderFactory::CreateProvider( @@ -123,7 +127,8 @@ std::unique_ptr WebNNProviderFactory::CreateProvider( std::numeric_limits::max())); merged_bounds[dim_override.dim_identifier] = webnn::FreeDimensionBound{value, value}; } - return std::make_unique(webnn_device_flags_, merged_bounds, enable_causal_lm_); + return std::make_unique(webnn_device_flags_, merged_bounds, enable_causal_lm_, + enable_additive_dim_param_); } std::shared_ptr WebNNProviderFactoryCreator::Create( @@ -145,8 +150,14 @@ std::shared_ptr WebNNProviderFactoryCreator::Create( const bool enable_causal_lm = (enable_causal_lm_it != provider_options.end() && enable_causal_lm_it->second == "true"); + // Optional additive symbolic dim expression parsing: "a + b". Default false. + const auto enable_additive_dim_param_it = provider_options.find("webnn_enable_additive_dim_param"); + const bool enable_additive_dim_param = (enable_additive_dim_param_it != provider_options.end() && + enable_additive_dim_param_it->second == "true"); + return std::make_shared(webnn_device_flags, free_dimension_bounds, - enable_causal_lm); + enable_causal_lm, + enable_additive_dim_param); } } // namespace onnxruntime From 1dbb113120439f2fa635a3229ab90d035da2d4ec Mon Sep 17 00:00:00 2001 From: "Kumar, Arisha" Date: Fri, 5 Jun 2026 12:51:24 -0700 Subject: [PATCH 3/3] Address PR #21 review: remove IsFoldedShape/IsFoldedNode dead code and enable_additive_dim_param option - Remove IsFoldedShape(), GetFoldedShape(), IsFoldedNode() declarations and implementations from model_builder.h/cc (dead code from removed ShapeSubgraphFolder) - Remove IsFoldedShape/IsFoldedNode call sites in expand_op_builder.cc and reshape_op_builder.cc - Remove enable_additive_dim_param constructor param, member variable, and option parsing from webnn_execution_provider.h/cc and webnn_provider_factory.cc - Remove enableAdditiveDimParam session option mapping from session-options.ts - Keep additive dim_param fallback logic guarded by runtime computeShapes check --- js/web/lib/wasm/session-options.ts | 5 ---- .../webnn/builders/impl/expand_op_builder.cc | 16 +++--------- .../webnn/builders/impl/reshape_op_builder.cc | 16 ++---------- .../providers/webnn/builders/model_builder.cc | 26 ------------------- .../providers/webnn/builders/model_builder.h | 5 ---- .../webnn/webnn_execution_provider.cc | 13 +++++----- .../webnn/webnn_execution_provider.h | 5 +--- .../providers/webnn/webnn_provider_factory.cc | 21 ++++----------- 8 files changed, 17 insertions(+), 90 deletions(-) diff --git a/js/web/lib/wasm/session-options.ts b/js/web/lib/wasm/session-options.ts index c026602f73d33..6f83234534f26 100644 --- a/js/web/lib/wasm/session-options.ts +++ b/js/web/lib/wasm/session-options.ts @@ -139,11 +139,6 @@ const setExecutionProviders = async ( if (enableCausalLM) { appendEpOption(epOptions, 'enableCausalLM', 'true', allocs); } - // enableAdditiveDimParam: parse symbolic dim expressions like "a + b" in output shapes. - const enableAdditiveDimParam = (webnnOptions as any)?.enableAdditiveDimParam; - if (enableAdditiveDimParam) { - appendEpOption(epOptions, 'webnn_enable_additive_dim_param', 'true', allocs); - } } break; case 'webgpu': diff --git a/onnxruntime/core/providers/webnn/builders/impl/expand_op_builder.cc b/onnxruntime/core/providers/webnn/builders/impl/expand_op_builder.cc index 3748630d4f434..b6d718b5c79ae 100644 --- a/onnxruntime/core/providers/webnn/builders/impl/expand_op_builder.cc +++ b/onnxruntime/core/providers/webnn/builders/impl/expand_op_builder.cc @@ -39,13 +39,10 @@ class ExpandOpBuilder : public BaseOpBuilder { void ExpandOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const Node& node) const { const auto& input_defs = node.InputDefs(); const auto& shape_name = input_defs[1]->Name(); - // Skip the shape input when: - // 1. It was folded by the shape subgraph folder (compile-time constant), OR - // 2. It is a constant initializer AND the input has static shape. + // Skip the shape input when it is a constant initializer AND the input has static shape. // When the input has dynamic shape, we need the shape operand for dynamicExpand even if it's constant. - if (model_builder.IsFoldedShape(shape_name) || - (model_builder.GetGraphViewer().GetConstantInitializer(shape_name) && - !HasDynamicShape(*input_defs[0]))) { + if (model_builder.GetGraphViewer().GetConstantInitializer(shape_name) && + !HasDynamicShape(*input_defs[0])) { model_builder.AddInitializerToSkip(shape_name); } } @@ -79,13 +76,6 @@ Status ExpandOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, emscripten::val output_shape_arr = emscripten::val::array(GetNarrowedIntFromInt64(output_shape)); output = model_builder.GetBuilder().call("expand", input, output_shape_arr, options); - } else if (model_builder.IsFoldedShape(input_defs[1]->Name())) { - // Folded shape path: shape subgraph was pre-evaluated to a constant vector. - const auto* folded = model_builder.GetFoldedShape(input_defs[1]->Name()); - ORT_RETURN_IF_NOT(folded != nullptr, "IsFoldedShape true but GetFoldedShape returned null"); - - emscripten::val output_shape_arr = emscripten::val::array(GetNarrowedIntFromInt64(*folded)); - output = model_builder.GetBuilder().call("expand", input, output_shape_arr, options); } else { // Operand shape path: use dynamicExpand with the shape operand. emscripten::val shape_operand = model_builder.GetOperand(input_defs[1]->Name()); diff --git a/onnxruntime/core/providers/webnn/builders/impl/reshape_op_builder.cc b/onnxruntime/core/providers/webnn/builders/impl/reshape_op_builder.cc index 3c8afe37012a1..556b73b5d0fc1 100644 --- a/onnxruntime/core/providers/webnn/builders/impl/reshape_op_builder.cc +++ b/onnxruntime/core/providers/webnn/builders/impl/reshape_op_builder.cc @@ -38,11 +38,9 @@ class ReshapeOpBuilder : public BaseOpBuilder { void ReshapeOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const Node& node) const { const auto& shape_name = node.InputDefs()[1]->Name(); - // Only skip the shape input when it is a constant initializer (consumed at build time) - // or when it was folded by the shape subgraph folder. + // Only skip the shape input when it is a constant initializer (consumed at build time). // When it is an operand, we need it as the newShape input for dynamicReshape. - if (model_builder.GetGraphViewer().GetConstantInitializer(shape_name) || - model_builder.IsFoldedShape(shape_name)) { + if (model_builder.GetGraphViewer().GetConstantInitializer(shape_name)) { model_builder.AddInitializerToSkip(shape_name); } } @@ -222,16 +220,6 @@ Status ReshapeOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, emscripten::val new_shape = emscripten::val::array(); output = model_builder.GetBuilder().call("reshape", input, new_shape, options); } - } else if (model_builder.IsFoldedShape(input_defs[1]->Name())) { - // Folded shape path: the shape subgraph was pre-evaluated to a constant vector. - const auto* folded = model_builder.GetFoldedShape(input_defs[1]->Name()); - ORT_RETURN_IF_NOT(folded != nullptr, "IsFoldedShape true but GetFoldedShape returned null"); - - emscripten::val new_shape = emscripten::val::array(); - for (int64_t dim : *folded) { - new_shape.call("push", static_cast(dim)); - } - output = model_builder.GetBuilder().call("reshape", input, new_shape, options); } else { // Operand shape path: shape is a non-constant operand. Use dynamicReshape. emscripten::val shape_operand = model_builder.GetOperand(input_defs[1]->Name()); diff --git a/onnxruntime/core/providers/webnn/builders/model_builder.cc b/onnxruntime/core/providers/webnn/builders/model_builder.cc index c36d022074d24..c5850cc29b84e 100644 --- a/onnxruntime/core/providers/webnn/builders/model_builder.cc +++ b/onnxruntime/core/providers/webnn/builders/model_builder.cc @@ -85,12 +85,6 @@ void ModelBuilder::PreprocessInitializers() { for (size_t i = 0; i < node_indices.size(); i++) { const auto* node(graph_viewer_.GetNode(node_indices[i])); - // Skip nodes that are part of a folded shape subgraph — their initializer inputs - // don't need to be registered as WebNN constants. - if (IsFoldedNode(node->Index())) { - continue; - } - // find all initializers consumed. AddInitializersToSkip will potentially decrement the usage count. for (const auto* input : node->InputDefs()) { if (input->Exists() && Contains(initializers, input->Name())) { @@ -408,11 +402,6 @@ Status ModelBuilder::AddOperations() { for (size_t i = 0; i < node_indices.size(); i++) { const auto* node(graph_viewer_.GetNode(node_indices[i])); - // Skip nodes that are part of a folded shape subgraph. - if (IsFoldedNode(node->Index())) { - continue; - } - if (const auto* op_builder = GetOpBuilder(*node)) { ORT_RETURN_IF_ERROR(op_builder->AddToModelBuilder(*this, *node, logger_)); } else { @@ -514,20 +503,5 @@ const ModelBuilder::DimProvenance* ModelBuilder::GetDimProvenance(const std::str return it != dim_provenance_.end() ? &it->second : nullptr; } -bool ModelBuilder::IsFoldedShape(const std::string& name) const { - ORT_UNUSED_PARAMETER(name); - return false; -} - -const std::vector* ModelBuilder::GetFoldedShape(const std::string& name) const { - ORT_UNUSED_PARAMETER(name); - return nullptr; -} - -bool ModelBuilder::IsFoldedNode(NodeIndex node_index) const { - ORT_UNUSED_PARAMETER(node_index); - return false; -} - } // namespace webnn } // namespace onnxruntime diff --git a/onnxruntime/core/providers/webnn/builders/model_builder.h b/onnxruntime/core/providers/webnn/builders/model_builder.h index 4fd812bda5126..f7535d19e4e1e 100644 --- a/onnxruntime/core/providers/webnn/builders/model_builder.h +++ b/onnxruntime/core/providers/webnn/builders/model_builder.h @@ -58,11 +58,6 @@ class ModelBuilder { // Returns true when GQA should use concat-based (stateful) KV-cache; false for ScatterND (stateless). bool IsCausalLMEnabled() const { return enable_causal_lm_; } - // Shape subgraph folder: check if a NodeArg name has been folded to a constant shape. - bool IsFoldedShape(const std::string& name) const; - const std::vector* GetFoldedShape(const std::string& name) const; - bool IsFoldedNode(NodeIndex node_index) const; - // The initializer will be processed separately, skip it as an initializer. void AddInitializerToSkip(const std::string& tensor_name); diff --git a/onnxruntime/core/providers/webnn/webnn_execution_provider.cc b/onnxruntime/core/providers/webnn/webnn_execution_provider.cc index 8e1ce16657be0..4a8363199e365 100644 --- a/onnxruntime/core/providers/webnn/webnn_execution_provider.cc +++ b/onnxruntime/core/providers/webnn/webnn_execution_provider.cc @@ -31,8 +31,7 @@ constexpr const char* WEBNN = "WEBNN"; WebNNExecutionProvider::WebNNExecutionProvider(const std::string& webnn_device_flags, const webnn::FreeDimensionBounds& free_dimension_bounds, - bool enable_causal_lm, - bool enable_additive_dim_param) + bool enable_causal_lm) : IExecutionProvider{ onnxruntime::kWebNNExecutionProvider, // If MLTensor is supported, we force all the tensors to be allocated as MLTensor. @@ -43,8 +42,7 @@ WebNNExecutionProvider::WebNNExecutionProvider(const std::string& webnn_device_f 0)}, wnn_device_type_(webnn::DeviceTypeFromString(webnn_device_flags)), free_dimension_bounds_(free_dimension_bounds), - enable_causal_lm_(enable_causal_lm), - enable_additive_dim_param_(enable_additive_dim_param) { + enable_causal_lm_(enable_causal_lm) { wnn_context_ = emscripten::val::module_property("currentContext"); if (!wnn_context_.as()) { ORT_THROW("Failed to create WebNN context."); @@ -290,8 +288,9 @@ common::Status WebNNExecutionProvider::Compile(const std::vector> @@ -57,8 +56,6 @@ class WebNNExecutionProvider : public IExecutionProvider { webnn::FreeDimensionBounds free_dimension_bounds_; // Controls GQA KV-cache strategy: true = concat (stateful), false = ScatterND (stateless). bool enable_causal_lm_; - // Controls optional additive symbolic dim_param parsing: "a + b". - bool enable_additive_dim_param_; InlinedHashMap> models_; ModelMetadefIdGenerator metadef_id_generator_; }; diff --git a/onnxruntime/core/providers/webnn/webnn_provider_factory.cc b/onnxruntime/core/providers/webnn/webnn_provider_factory.cc index 46c80d60ae84b..56a96b43bd11e 100644 --- a/onnxruntime/core/providers/webnn/webnn_provider_factory.cc +++ b/onnxruntime/core/providers/webnn/webnn_provider_factory.cc @@ -82,12 +82,10 @@ Status ParseFreeDimensionBounds(std::string_view value, webnn::FreeDimensionBoun struct WebNNProviderFactory : IExecutionProviderFactory { explicit WebNNProviderFactory(const std::string& webnn_device_flags, const webnn::FreeDimensionBounds& free_dimension_bounds, - bool enable_causal_lm, - bool enable_additive_dim_param) + bool enable_causal_lm) : webnn_device_flags_(webnn_device_flags), free_dimension_bounds_(free_dimension_bounds), - enable_causal_lm_(enable_causal_lm), - enable_additive_dim_param_(enable_additive_dim_param) {} + enable_causal_lm_(enable_causal_lm) {} ~WebNNProviderFactory() override {} std::unique_ptr CreateProvider() override; @@ -97,12 +95,10 @@ struct WebNNProviderFactory : IExecutionProviderFactory { std::string webnn_device_flags_; webnn::FreeDimensionBounds free_dimension_bounds_; bool enable_causal_lm_; - bool enable_additive_dim_param_; }; std::unique_ptr WebNNProviderFactory::CreateProvider() { - return std::make_unique(webnn_device_flags_, free_dimension_bounds_, enable_causal_lm_, - enable_additive_dim_param_); + return std::make_unique(webnn_device_flags_, free_dimension_bounds_, enable_causal_lm_); } std::unique_ptr WebNNProviderFactory::CreateProvider( @@ -127,8 +123,7 @@ std::unique_ptr WebNNProviderFactory::CreateProvider( std::numeric_limits::max())); merged_bounds[dim_override.dim_identifier] = webnn::FreeDimensionBound{value, value}; } - return std::make_unique(webnn_device_flags_, merged_bounds, enable_causal_lm_, - enable_additive_dim_param_); + return std::make_unique(webnn_device_flags_, merged_bounds, enable_causal_lm_); } std::shared_ptr WebNNProviderFactoryCreator::Create( @@ -150,14 +145,8 @@ std::shared_ptr WebNNProviderFactoryCreator::Create( const bool enable_causal_lm = (enable_causal_lm_it != provider_options.end() && enable_causal_lm_it->second == "true"); - // Optional additive symbolic dim expression parsing: "a + b". Default false. - const auto enable_additive_dim_param_it = provider_options.find("webnn_enable_additive_dim_param"); - const bool enable_additive_dim_param = (enable_additive_dim_param_it != provider_options.end() && - enable_additive_dim_param_it->second == "true"); - return std::make_shared(webnn_device_flags, free_dimension_bounds, - enable_causal_lm, - enable_additive_dim_param); + enable_causal_lm); } } // namespace onnxruntime