From 206e92b14c5a72de5de29996b95b2fe771088bb6 Mon Sep 17 00:00:00 2001 From: Saoirse Stewart Date: Wed, 10 Dec 2025 13:57:23 +0000 Subject: [PATCH] Arm backend: Add int16x8 testcases to more supported operators - Fixes issue for grouped conv2d with bias - Adds int16 to U55 operator support Signed-off-by: Saoirse Stewart --- .../_passes/decompose_grouped_conv_pass.py | 15 +++- .../arm/operator_support/ethos_u55_support.py | 4 +- backends/arm/test/ops/test_amax.py | 32 ++++++++- backends/arm/test/ops/test_amin.py | 31 +++++++- backends/arm/test/ops/test_conv2d.py | 49 +++++++++++++ backends/arm/test/ops/test_depthwise_conv.py | 48 +++++++++++++ backends/arm/test/ops/test_matmul.py | 72 ++++++++++++++----- backends/arm/test/ops/test_maximum.py | 44 +++++++++++- backends/arm/test/ops/test_minimum.py | 44 +++++++++++- backends/arm/test/ops/test_neg.py | 47 +++++++++++- backends/arm/test/ops/test_repeat.py | 47 +++++++++++- 11 files changed, 405 insertions(+), 28 deletions(-) diff --git a/backends/arm/_passes/decompose_grouped_conv_pass.py b/backends/arm/_passes/decompose_grouped_conv_pass.py index a0765b865fc..e64063975d6 100644 --- a/backends/arm/_passes/decompose_grouped_conv_pass.py +++ b/backends/arm/_passes/decompose_grouped_conv_pass.py @@ -1,4 +1,4 @@ -# Copyright 2025 Arm Limited and/or its affiliates. +# Copyright 2025-2026 Arm Limited and/or its affiliates. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. @@ -75,15 +75,24 @@ def _split_per_channel_qparams(qarg, index, output_slice_size): @staticmethod def _get_meta_copy(meta, i, output_slice_size): meta_copy = meta.copy() + if "input_qparams" in meta.data and len(meta.data["input_qparams"]) > 0: # Handle per-channel quantization by splitting quantization params # similarly to how activations/weights/biases are split. new_qparams = meta.data.get("input_qparams").copy() # Get quantization params of the weights and slice them. - qarg = new_qparams[1] + w_qarg = new_qparams[1] new_qparams[1] = DecomposeGroupedConvPass._split_per_channel_qparams( - qarg, index=i, output_slice_size=output_slice_size + w_qarg, index=i, output_slice_size=output_slice_size ) + # Special case for int16, grouped conv2d when bias is included. + # As we add bias after in the DecomposeConv2dWithInt16ActivationPass we must + # also split the bias quantization parameters for bias. + if new_qparams[0].dtype == torch.int16 and len(new_qparams) > 2: + b_qarg = new_qparams[2] + new_qparams[2] = DecomposeGroupedConvPass._split_per_channel_qparams( + b_qarg, index=i, output_slice_size=output_slice_size + ) meta_copy.data["input_qparams"] = new_qparams diff --git a/backends/arm/operator_support/ethos_u55_support.py b/backends/arm/operator_support/ethos_u55_support.py index bbffa8efa33..65d4a3cc1a0 100644 --- a/backends/arm/operator_support/ethos_u55_support.py +++ b/backends/arm/operator_support/ethos_u55_support.py @@ -153,10 +153,10 @@ def is_node_supported( # noqa: C901 ): for input_node in node.all_input_nodes: dtype = _try_determine_dtype(input_node) - if dtype is not None and dtype != torch.int8: + if dtype is not None and dtype not in (torch.int8, torch.int16): self.reporter.report_reject( input_node, - f"Input {input_node.name} has unsupported dtype {dtype} (Supports i8).", + f"Input {input_node.name} has unsupported dtype {dtype} (Supports i8, i16).", ) return False diff --git a/backends/arm/test/ops/test_amax.py b/backends/arm/test/ops/test_amax.py index 7109def6aeb..2c5b2888b94 100644 --- a/backends/arm/test/ops/test_amax.py +++ b/backends/arm/test/ops/test_amax.py @@ -1,4 +1,4 @@ -# Copyright 2025 Arm Limited and/or its affiliates. +# Copyright 2025-2026 Arm Limited and/or its affiliates. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. @@ -189,3 +189,33 @@ def test_max_dim_vgf_quant_to_amax(test_data: Max.input_t): quantize=True, ) pipeline.run() + + +@common.parametrize("test_data", Amax.test_data) +def test_amax_tosa_INT_a16w8(test_data: Amax.input_t): + """Test amax with 16A8W quantization for TOSA INT.""" + data, dim, keep_dims = test_data() + module = Amax(dim, keep_dims) + pipeline = TosaPipelineINT[Max.input_t]( + module, + data, + "torch.ops.aten.amax", + tosa_extensions=["int16"], + ) + pipeline.run() + + +@common.parametrize("test_data", Amax.test_data) +@common.XfailIfNoCorstone320 +def test_amax_u85_INT_a16w8(test_data: Amax.input_t): + """Test amax with 16A8W quantization on U85 (16-bit activations, 8-bit weights)""" + data, dim, keep_dims = test_data() + module = Amax(dim, keep_dims) + pipeline = EthosU85PipelineINT[Max.input_t]( + module, + data, + "torch.ops.aten.amax", + a16w8_quantization=True, + use_to_edge_transform_and_lower=True, + ) + pipeline.run() diff --git a/backends/arm/test/ops/test_amin.py b/backends/arm/test/ops/test_amin.py index c90980fc25d..cbd8bd684b7 100644 --- a/backends/arm/test/ops/test_amin.py +++ b/backends/arm/test/ops/test_amin.py @@ -1,4 +1,4 @@ -# Copyright 2025 Arm Limited and/or its affiliates. +# Copyright 2025-2026 Arm Limited and/or its affiliates. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. @@ -203,3 +203,32 @@ def test_min_dim_vgf_quant_to_amin(test_data: Min.input_t): quantize=True, ) pipeline.run() + + +@common.parametrize("test_data", Amin.test_data) +def test_amin_tosa_INT_a16w8(test_data: Amin.input_t): + """Test amin with 16A8W quantization for TOSA INT.""" + data, dim, keep_dims = test_data() + pipeline = TosaPipelineINT[Amin.input_t]( + Amin(dim, keep_dims), + data, + Amin.aten_op, + tosa_extensions=["int16"], + ) + pipeline.run() + + +@common.parametrize("test_data", Amin.test_data) +@common.XfailIfNoCorstone320 +def test_amin_u85_INT_a16w8(test_data: Min.input_t): + """Test amin with 16A8W quantization on U85 (16-bit activations, 8-bit weights)""" + data, dim, keep_dims = test_data() + pipeline = EthosU85PipelineINT[Amin.input_t]( + Amin(dim, keep_dims), + data, + Amin.aten_op, + per_channel_quantization=False, + a16w8_quantization=True, + use_to_edge_transform_and_lower=True, + ) + pipeline.run() diff --git a/backends/arm/test/ops/test_conv2d.py b/backends/arm/test/ops/test_conv2d.py index 55eee293f95..1b05b30a6ab 100644 --- a/backends/arm/test/ops/test_conv2d.py +++ b/backends/arm/test/ops/test_conv2d.py @@ -590,3 +590,52 @@ def test_convolution_2d_u55_INT_not_delegated(module: Conv2d): quantize=True, u55_subset=True, ).run() + + +@common.parametrize("test_data", test_data_INT) +def test_conv2d_tosa_INT_a16w8(test_data: input_t): + """Test conv2d with 16A8W quantization for TOSA INT.""" + model, per_channel_quantization = test_data() + pipeline = TosaPipelineINT[input_t]( + model, + model.get_inputs(), + aten_op, + exir_op, + tosa_extensions=["int16"], + per_channel_quantization=per_channel_quantization, + ) + pipeline.run() + + +@common.parametrize("test_data", test_data_INT) +@common.XfailIfNoCorstone300 +def test_conv2d_u55_INT_a16w8(test_data: input_t): + """Test conv2d with 16A8W quantization on U55 (16-bit activations, 8-bit weights)""" + model, per_channel_quantization = test_data() + pipeline = EthosU55PipelineINT[input_t]( + model, + model.get_inputs(), + aten_op, + exir_op, + a16w8_quantization=True, + use_to_edge_transform_and_lower=True, + per_channel_quantization=per_channel_quantization, + ) + pipeline.run() + + +@common.parametrize("test_data", test_data_INT) +@common.XfailIfNoCorstone320 +def test_conv2d_u85_INT_a16w8(test_data: input_t): + """Test conv2d with 16A8W quantization on U85 (16-bit activations, 8-bit weights)""" + model, per_channel_quantization = test_data() + pipeline = EthosU85PipelineINT[input_t]( + model, + model.get_inputs(), + aten_op, + exir_op, + a16w8_quantization=True, + use_to_edge_transform_and_lower=True, + per_channel_quantization=per_channel_quantization, + ) + pipeline.run() diff --git a/backends/arm/test/ops/test_depthwise_conv.py b/backends/arm/test/ops/test_depthwise_conv.py index b4289f922ce..80c6cc73d47 100644 --- a/backends/arm/test/ops/test_depthwise_conv.py +++ b/backends/arm/test/ops/test_depthwise_conv.py @@ -402,3 +402,51 @@ def test_convolution_1d_u85_INT_a8w4_depthwise(test_data): get_symmetric_a8w4_quantization_config(is_per_channel=per_channel_quantization) ) pipeline.run() + + +@common.parametrize("test_data", test_data_conv2d_INT) +def test_convolution_2d_tosa_INT_a16w8_depthwise(test_data: input_t): + """Test depthwise_conv with 16A8W quantization for TOSA INT.""" + model, per_channel_quantization = test_data() + pipeline = TosaPipelineINT[input_t]( + model, + model.get_inputs(), + aten_op=[], + exir_op=exir_op, + tosa_extensions=["int16"], + per_channel_quantization=per_channel_quantization, + ) + pipeline.run() + + +@common.parametrize("test_data", test_data_conv2d_INT) +@common.XfailIfNoCorstone300 +def test_convolution_2d_u85_INT_a16w8_depthwise(test_data: input_t): + """Test depthwise_conv with 16A8W quantization on U55 (16-bit activations, 8-bit weights)""" + model, per_channel_quantization = test_data() + pipeline = EthosU55PipelineINT[input_t]( + model, + model.get_inputs(), + aten_ops=[], + exir_ops=exir_op, + per_channel_quantization=per_channel_quantization, + a16w8_quantization=True, + use_to_edge_transform_and_lower=True, + ) + pipeline.run() + + +@common.parametrize("test_data", test_data_conv2d_INT) +@common.XfailIfNoCorstone320 +def test_convolution_2d_u55_INT_a16w8_depthwise(test_data: input_t): + """Test depthwise_conv with 16A8W quantization on U85 (16-bit activations, 8-bit weights)""" + model, per_channel_quantization = test_data() + pipeline = EthosU85PipelineINT[input_t]( + model, + model.get_inputs(), + aten_ops=[], + exir_ops=exir_op, + a16w8_quantization=True, + ) + + pipeline.run() diff --git a/backends/arm/test/ops/test_matmul.py b/backends/arm/test/ops/test_matmul.py index c9d5665f84c..6993c243073 100644 --- a/backends/arm/test/ops/test_matmul.py +++ b/backends/arm/test/ops/test_matmul.py @@ -1,10 +1,12 @@ -# Copyright 2025 Arm Limited and/or its affiliates. +# Copyright 2025-2026 Arm Limited and/or its affiliates. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. from typing import Tuple +import pytest + import torch from executorch.backends.arm.test import common from executorch.backends.arm.test.tester.test_pipeline import ( @@ -134,10 +136,7 @@ def test_matmul_u55_INT(test_data: input_t1): pipeline.run() -@common.parametrize( - "test_data", - MatMulSingleInput.test_data_generators, -) +@common.parametrize("test_data", MatMulSingleInput.test_data_generators) @common.XfailIfNoCorstone300 def test_matmul_u55_INT_single_input(test_data: input_t1): pipeline = EthosU55PipelineINT[input_t1]( @@ -150,10 +149,7 @@ def test_matmul_u55_INT_single_input(test_data: input_t1): pipeline.run() -@common.parametrize( - "test_data", - MatMulCombo.test_data_generators, -) +@common.parametrize("test_data", MatMulCombo.test_data_generators) @common.XfailIfNoCorstone300 def test_matmul_u55_INT_combo(test_data: input_t1): pipeline = EthosU55PipelineINT[input_t1]( @@ -179,10 +175,7 @@ def test_matmul_u85_INT(test_data: input_t1): pipeline.run() -@common.parametrize( - "test_data", - MatMulSingleInput.test_data_generators, -) +@common.parametrize("test_data", MatMulSingleInput.test_data_generators) @common.XfailIfNoCorstone320 def test_matmul_u85_INT_single_input(test_data: input_t1): pipeline = EthosU85PipelineINT[input_t1]( @@ -195,10 +188,7 @@ def test_matmul_u85_INT_single_input(test_data: input_t1): pipeline.run() -@common.parametrize( - "test_data", - MatMulCombo.test_data_generators, -) +@common.parametrize("test_data", MatMulCombo.test_data_generators) @common.XfailIfNoCorstone320 def test_matmul_u85_INT_combo(test_data: input_t1): pipeline = EthosU85PipelineINT[input_t1]( @@ -287,3 +277,51 @@ def test_matmul_vgf_quant_combo(test_data: input_t1): quantize=True, ) pipeline.run() + + +@common.parametrize("test_data", MatMulCombo.test_data_generators) +def test_matmul_tosa_INT_a16w8(test_data: input_t1): + """Test matmul with 16A8W quantization for TOSA INT.""" + pipeline = TosaPipelineINT[Tuple[torch.Tensor]]( + MatMulCombo(), + test_data(), + aten_op_mm, + exir_op_mm, + tosa_extensions=["int16"], + ) + pipeline.run() + + +@common.parametrize("test_data", MatMulCombo.test_data_generators) +@pytest.mark.xfail( + reason="Vela compilation fails with 'Non-passthrough operation' for int16 matmul operations" +) +@common.XfailIfNoCorstone300 +def test_matmul_u55_INT_a16w8(test_data: input_t1): + """Test matmul with 16A8W quantization on U55 (16-bit activations, 8-bit weights)""" + pipeline = EthosU55PipelineINT[Tuple[torch.Tensor]]( + MatMulCombo(), + test_data(), + aten_op_mm, + exir_op_mm, + per_channel_quantization=False, + a16w8_quantization=True, + use_to_edge_transform_and_lower=True, + ) + pipeline.run() + + +@common.parametrize("test_data", MatMulCombo.test_data_generators) +@common.XfailIfNoCorstone320 +def test_matmul_u85_INT_a16w8(test_data: input_t1): + """Test matmul with 16A8W quantization on U85 (16-bit activations, 8-bit weights)""" + pipeline = EthosU85PipelineINT[Tuple[torch.Tensor]]( + MatMulCombo(), + test_data(), + aten_op_mm, + exir_op_mm, + per_channel_quantization=False, + a16w8_quantization=True, + use_to_edge_transform_and_lower=True, + ) + pipeline.run() diff --git a/backends/arm/test/ops/test_maximum.py b/backends/arm/test/ops/test_maximum.py index e213842494f..2c6cce1cfb4 100644 --- a/backends/arm/test/ops/test_maximum.py +++ b/backends/arm/test/ops/test_maximum.py @@ -1,6 +1,6 @@ # Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. -# Copyright 2024-2025 Arm Limited and/or its affiliates. +# Copyright 2024-2026 Arm Limited and/or its affiliates. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. @@ -96,3 +96,45 @@ def test_maximum_vgf_quant(test_data: Tuple): quantize=True, ) pipeline.run() + + +@common.parametrize("test_data", Maximum.test_parameters) +def test_maximum_tosa_INT_a16w8(test_data: test_t): + """Test maximum with 16A8W quantization for TOSA INT.""" + pipeline = TosaPipelineINT[test_t]( + Maximum(), + test_data(), + aten_op, + tosa_extensions=["int16"], + ) + pipeline.run() + + +@common.parametrize("test_data", Maximum.test_parameters) +@common.XfailIfNoCorstone300 +def test_maximum_u55_INT_a16w8(test_data: test_t): + """Test maximum with 16A8W quantization on U55 (16-bit activations, 8-bit weights)""" + pipeline = EthosU55PipelineINT[test_t]( + Maximum(), + test_data(), + aten_op, + per_channel_quantization=False, + a16w8_quantization=True, + use_to_edge_transform_and_lower=True, + ) + pipeline.run() + + +@common.parametrize("test_data", Maximum.test_parameters) +@common.XfailIfNoCorstone320 +def test_maximum_u85_INT_a16w8(test_data: test_t): + """Test maximum with 16A8W quantization on U85 (16-bit activations, 8-bit weights)""" + pipeline = EthosU85PipelineINT[test_t]( + Maximum(), + test_data(), + aten_op, + per_channel_quantization=False, + a16w8_quantization=True, + use_to_edge_transform_and_lower=True, + ) + pipeline.run() diff --git a/backends/arm/test/ops/test_minimum.py b/backends/arm/test/ops/test_minimum.py index ff706f7261e..6b680c4edb6 100644 --- a/backends/arm/test/ops/test_minimum.py +++ b/backends/arm/test/ops/test_minimum.py @@ -1,6 +1,6 @@ # Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. -# Copyright 2024-2025 Arm Limited and/or its affiliates. +# Copyright 2024-2026 Arm Limited and/or its affiliates. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. @@ -96,3 +96,45 @@ def test_minimum_vgf_quant(test_data: test_t): quantize=True, ) pipeline.run() + + +@common.parametrize("test_data", Minimum.test_parameters) +def test_minimum_tosa_INT_a16w8(test_data: test_t): + """Test minimum with 16A8W quantization for TOSA INT.""" + pipeline = TosaPipelineINT[test_t]( + Minimum(), + test_data(), + aten_op, + tosa_extensions=["int16"], + ) + pipeline.run() + + +@common.parametrize("test_data", Minimum.test_parameters) +@common.XfailIfNoCorstone300 +def test_minimum_u55_INT_a16w8(test_data: test_t): + """Test minimum with 16A8W quantization on U55 (16-bit activations, 8-bit weights)""" + pipeline = EthosU55PipelineINT[test_t]( + Minimum(), + test_data(), + aten_op, + per_channel_quantization=False, + a16w8_quantization=True, + use_to_edge_transform_and_lower=True, + ) + pipeline.run() + + +@common.parametrize("test_data", Minimum.test_parameters) +@common.XfailIfNoCorstone320 +def test_minimum_u85_INT_a16w8(test_data: test_t): + """Test minimum with 16A8W quantization on U85 (16-bit activations, 8-bit weights)""" + pipeline = EthosU85PipelineINT[test_t]( + Minimum(), + test_data(), + aten_op, + per_channel_quantization=False, + a16w8_quantization=True, + use_to_edge_transform_and_lower=True, + ) + pipeline.run() diff --git a/backends/arm/test/ops/test_neg.py b/backends/arm/test/ops/test_neg.py index 11d1153a171..bfed7d29f6c 100644 --- a/backends/arm/test/ops/test_neg.py +++ b/backends/arm/test/ops/test_neg.py @@ -1,4 +1,4 @@ -# Copyright 2025 Arm Limited and/or its affiliates. +# Copyright 2025-2026 Arm Limited and/or its affiliates. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. @@ -97,3 +97,48 @@ def test_neg_vgf_quant(test_data: input_t1): quantize=True, ) pipeline.run() + + +@common.parametrize("test_data", Neg.test_data) +def test_neg_tosa_INT_a16w8(test_data: input_t1): + """Test neg with 16A8W quantization for TOSA INT.""" + pipeline = TosaPipelineINT[Tuple[torch.Tensor]]( + Neg(), + test_data, + Neg.aten_op, + Neg.exir_op, + tosa_extensions=["int16"], + ) + pipeline.run() + + +@common.parametrize("test_data", Neg.test_data) +@common.XfailIfNoCorstone300 +def test_neg_u55_INT_a16w8(test_data: input_t1): + """Test neg with 16A8W quantization on U55 (16-bit activations, 8-bit weights)""" + pipeline = EthosU55PipelineINT[Tuple[torch.Tensor]]( + Neg(), + test_data, + Neg.aten_op, + Neg.exir_op, + per_channel_quantization=False, + a16w8_quantization=True, + use_to_edge_transform_and_lower=True, + ) + pipeline.run() + + +@common.parametrize("test_data", Neg.test_data) +@common.XfailIfNoCorstone320 +def test_neg_u85_INT_a16w8(test_data: input_t1): + """Test neg with 16A8W quantization on U85 (16-bit activations, 8-bit weights)""" + pipeline = EthosU85PipelineINT[Tuple[torch.Tensor]]( + Neg(), + test_data, + Neg.aten_op, + Neg.exir_op, + per_channel_quantization=False, + a16w8_quantization=True, + use_to_edge_transform_and_lower=True, + ) + pipeline.run() diff --git a/backends/arm/test/ops/test_repeat.py b/backends/arm/test/ops/test_repeat.py index 0b3de3b72df..7a41e0b7b7f 100644 --- a/backends/arm/test/ops/test_repeat.py +++ b/backends/arm/test/ops/test_repeat.py @@ -1,4 +1,4 @@ -# Copyright 2024-2025 Arm Limited and/or its affiliates. +# Copyright 2024-2026 Arm Limited and/or its affiliates. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. @@ -160,3 +160,48 @@ def test_repeat_vgf_quant(test_data: Tuple): quantize=True, ) pipeline.run() + + +@common.parametrize("test_data", test_data_suite) +def test_repeat_tosa_INT_a16w8(test_data): + """Test repeat with 16A8W quantization for TOSA INT.""" + module, args = test_data() + pipeline = TosaPipelineINT[Tuple[torch.Tensor]]( + module, + args, + module.aten_op, + tosa_extensions=["int16"], + ) + pipeline.run() + + +@common.parametrize("test_data", test_data_suite_u55) +@common.XfailIfNoCorstone300 +def test_repeat_u55_INT_a16w8(test_data): + """Test repeat with 16A8W quantization on U55 (16-bit activations, 8-bit weights)""" + module, args = test_data() + pipeline = EthosU55PipelineINT[Tuple[torch.Tensor]]( + module, + args, + module.aten_op, + per_channel_quantization=False, + a16w8_quantization=True, + use_to_edge_transform_and_lower=True, + ) + pipeline.run() + + +@common.parametrize("test_data", test_data_suite) +@common.XfailIfNoCorstone320 +def test_repeat_u85_INT_a16w8(test_data): + """Test repeat with 16A8W quantization on U85 (16-bit activations, 8-bit weights)""" + module, args = test_data() + pipeline = EthosU85PipelineINT[Tuple[torch.Tensor]]( + module, + args, + module.aten_op, + per_channel_quantization=False, + a16w8_quantization=True, + use_to_edge_transform_and_lower=True, + ) + pipeline.run()