From 206e92b14c5a72de5de29996b95b2fe771088bb6 Mon Sep 17 00:00:00 2001
From: Saoirse Stewart <saoirse.stewart@arm.com>
Date: Wed, 10 Dec 2025 13:57:23 +0000
Subject: [PATCH] Arm backend: Add int16x8 testcases to more supported
 operators

- Fixes issue for grouped conv2d with bias
- Adds int16 to U55 operator support

Signed-off-by: Saoirse Stewart <saoirse.stewart@arm.com>
---
 .../_passes/decompose_grouped_conv_pass.py    | 15 +++-
 .../arm/operator_support/ethos_u55_support.py |  4 +-
 backends/arm/test/ops/test_amax.py            | 32 ++++++++-
 backends/arm/test/ops/test_amin.py            | 31 +++++++-
 backends/arm/test/ops/test_conv2d.py          | 49 +++++++++++++
 backends/arm/test/ops/test_depthwise_conv.py  | 48 +++++++++++++
 backends/arm/test/ops/test_matmul.py          | 72 ++++++++++++++-----
 backends/arm/test/ops/test_maximum.py         | 44 +++++++++++-
 backends/arm/test/ops/test_minimum.py         | 44 +++++++++++-
 backends/arm/test/ops/test_neg.py             | 47 +++++++++++-
 backends/arm/test/ops/test_repeat.py          | 47 +++++++++++-
 11 files changed, 405 insertions(+), 28 deletions(-)

diff --git a/backends/arm/_passes/decompose_grouped_conv_pass.py b/backends/arm/_passes/decompose_grouped_conv_pass.py
index a0765b865fc..e64063975d6 100644
--- a/backends/arm/_passes/decompose_grouped_conv_pass.py
+++ b/backends/arm/_passes/decompose_grouped_conv_pass.py
@@ -1,4 +1,4 @@
-# Copyright 2025 Arm Limited and/or its affiliates.
+# Copyright 2025-2026 Arm Limited and/or its affiliates.
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
@@ -75,15 +75,24 @@ def _split_per_channel_qparams(qarg, index, output_slice_size):
     @staticmethod
     def _get_meta_copy(meta, i, output_slice_size):
         meta_copy = meta.copy()
+
         if "input_qparams" in meta.data and len(meta.data["input_qparams"]) > 0:
             # Handle per-channel quantization by splitting quantization params
             # similarly to how activations/weights/biases are split.
             new_qparams = meta.data.get("input_qparams").copy()
             # Get quantization params of the weights and slice them.
-            qarg = new_qparams[1]
+            w_qarg = new_qparams[1]
             new_qparams[1] = DecomposeGroupedConvPass._split_per_channel_qparams(
-                qarg, index=i, output_slice_size=output_slice_size
+                w_qarg, index=i, output_slice_size=output_slice_size
             )
+            # Special case for int16, grouped conv2d when bias is included.
+            # As we add bias after in the DecomposeConv2dWithInt16ActivationPass we must
+            # also split the bias quantization parameters for bias.
+            if new_qparams[0].dtype == torch.int16 and len(new_qparams) > 2:
+                b_qarg = new_qparams[2]
+                new_qparams[2] = DecomposeGroupedConvPass._split_per_channel_qparams(
+                    b_qarg, index=i, output_slice_size=output_slice_size
+                )
 
             meta_copy.data["input_qparams"] = new_qparams
 
diff --git a/backends/arm/operator_support/ethos_u55_support.py b/backends/arm/operator_support/ethos_u55_support.py
index bbffa8efa33..65d4a3cc1a0 100644
--- a/backends/arm/operator_support/ethos_u55_support.py
+++ b/backends/arm/operator_support/ethos_u55_support.py
@@ -153,10 +153,10 @@ def is_node_supported(  # noqa: C901
         ):
             for input_node in node.all_input_nodes:
                 dtype = _try_determine_dtype(input_node)
-                if dtype is not None and dtype != torch.int8:
+                if dtype is not None and dtype not in (torch.int8, torch.int16):
                     self.reporter.report_reject(
                         input_node,
-                        f"Input {input_node.name} has unsupported dtype {dtype} (Supports i8).",
+                        f"Input {input_node.name} has unsupported dtype {dtype} (Supports i8, i16).",
                     )
                     return False
 
diff --git a/backends/arm/test/ops/test_amax.py b/backends/arm/test/ops/test_amax.py
index 7109def6aeb..2c5b2888b94 100644
--- a/backends/arm/test/ops/test_amax.py
+++ b/backends/arm/test/ops/test_amax.py
@@ -1,4 +1,4 @@
-# Copyright 2025 Arm Limited and/or its affiliates.
+# Copyright 2025-2026 Arm Limited and/or its affiliates.
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
@@ -189,3 +189,33 @@ def test_max_dim_vgf_quant_to_amax(test_data: Max.input_t):
         quantize=True,
     )
     pipeline.run()
+
+
+@common.parametrize("test_data", Amax.test_data)
+def test_amax_tosa_INT_a16w8(test_data: Amax.input_t):
+    """Test amax with 16A8W quantization for TOSA INT."""
+    data, dim, keep_dims = test_data()
+    module = Amax(dim, keep_dims)
+    pipeline = TosaPipelineINT[Max.input_t](
+        module,
+        data,
+        "torch.ops.aten.amax",
+        tosa_extensions=["int16"],
+    )
+    pipeline.run()
+
+
+@common.parametrize("test_data", Amax.test_data)
+@common.XfailIfNoCorstone320
+def test_amax_u85_INT_a16w8(test_data: Amax.input_t):
+    """Test amax with 16A8W quantization on U85 (16-bit activations, 8-bit weights)"""
+    data, dim, keep_dims = test_data()
+    module = Amax(dim, keep_dims)
+    pipeline = EthosU85PipelineINT[Max.input_t](
+        module,
+        data,
+        "torch.ops.aten.amax",
+        a16w8_quantization=True,
+        use_to_edge_transform_and_lower=True,
+    )
+    pipeline.run()
diff --git a/backends/arm/test/ops/test_amin.py b/backends/arm/test/ops/test_amin.py
index c90980fc25d..cbd8bd684b7 100644
--- a/backends/arm/test/ops/test_amin.py
+++ b/backends/arm/test/ops/test_amin.py
@@ -1,4 +1,4 @@
-# Copyright 2025 Arm Limited and/or its affiliates.
+# Copyright 2025-2026 Arm Limited and/or its affiliates.
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
@@ -203,3 +203,32 @@ def test_min_dim_vgf_quant_to_amin(test_data: Min.input_t):
         quantize=True,
     )
     pipeline.run()
+
+
+@common.parametrize("test_data", Amin.test_data)
+def test_amin_tosa_INT_a16w8(test_data: Amin.input_t):
+    """Test amin with 16A8W quantization for TOSA INT."""
+    data, dim, keep_dims = test_data()
+    pipeline = TosaPipelineINT[Amin.input_t](
+        Amin(dim, keep_dims),
+        data,
+        Amin.aten_op,
+        tosa_extensions=["int16"],
+    )
+    pipeline.run()
+
+
+@common.parametrize("test_data", Amin.test_data)
+@common.XfailIfNoCorstone320
+def test_amin_u85_INT_a16w8(test_data: Min.input_t):
+    """Test amin with 16A8W quantization on U85 (16-bit activations, 8-bit weights)"""
+    data, dim, keep_dims = test_data()
+    pipeline = EthosU85PipelineINT[Amin.input_t](
+        Amin(dim, keep_dims),
+        data,
+        Amin.aten_op,
+        per_channel_quantization=False,
+        a16w8_quantization=True,
+        use_to_edge_transform_and_lower=True,
+    )
+    pipeline.run()
diff --git a/backends/arm/test/ops/test_conv2d.py b/backends/arm/test/ops/test_conv2d.py
index 55eee293f95..1b05b30a6ab 100644
--- a/backends/arm/test/ops/test_conv2d.py
+++ b/backends/arm/test/ops/test_conv2d.py
@@ -590,3 +590,52 @@ def test_convolution_2d_u55_INT_not_delegated(module: Conv2d):
         quantize=True,
         u55_subset=True,
     ).run()
+
+
+@common.parametrize("test_data", test_data_INT)
+def test_conv2d_tosa_INT_a16w8(test_data: input_t):
+    """Test conv2d with 16A8W quantization for TOSA INT."""
+    model, per_channel_quantization = test_data()
+    pipeline = TosaPipelineINT[input_t](
+        model,
+        model.get_inputs(),
+        aten_op,
+        exir_op,
+        tosa_extensions=["int16"],
+        per_channel_quantization=per_channel_quantization,
+    )
+    pipeline.run()
+
+
+@common.parametrize("test_data", test_data_INT)
+@common.XfailIfNoCorstone300
+def test_conv2d_u55_INT_a16w8(test_data: input_t):
+    """Test conv2d with 16A8W quantization on U55 (16-bit activations, 8-bit weights)"""
+    model, per_channel_quantization = test_data()
+    pipeline = EthosU55PipelineINT[input_t](
+        model,
+        model.get_inputs(),
+        aten_op,
+        exir_op,
+        a16w8_quantization=True,
+        use_to_edge_transform_and_lower=True,
+        per_channel_quantization=per_channel_quantization,
+    )
+    pipeline.run()
+
+
+@common.parametrize("test_data", test_data_INT)
+@common.XfailIfNoCorstone320
+def test_conv2d_u85_INT_a16w8(test_data: input_t):
+    """Test conv2d with 16A8W quantization on U85 (16-bit activations, 8-bit weights)"""
+    model, per_channel_quantization = test_data()
+    pipeline = EthosU85PipelineINT[input_t](
+        model,
+        model.get_inputs(),
+        aten_op,
+        exir_op,
+        a16w8_quantization=True,
+        use_to_edge_transform_and_lower=True,
+        per_channel_quantization=per_channel_quantization,
+    )
+    pipeline.run()
diff --git a/backends/arm/test/ops/test_depthwise_conv.py b/backends/arm/test/ops/test_depthwise_conv.py
index b4289f922ce..80c6cc73d47 100644
--- a/backends/arm/test/ops/test_depthwise_conv.py
+++ b/backends/arm/test/ops/test_depthwise_conv.py
@@ -402,3 +402,51 @@ def test_convolution_1d_u85_INT_a8w4_depthwise(test_data):
         get_symmetric_a8w4_quantization_config(is_per_channel=per_channel_quantization)
     )
     pipeline.run()
+
+
+@common.parametrize("test_data", test_data_conv2d_INT)
+def test_convolution_2d_tosa_INT_a16w8_depthwise(test_data: input_t):
+    """Test depthwise_conv with 16A8W quantization for TOSA INT."""
+    model, per_channel_quantization = test_data()
+    pipeline = TosaPipelineINT[input_t](
+        model,
+        model.get_inputs(),
+        aten_op=[],
+        exir_op=exir_op,
+        tosa_extensions=["int16"],
+        per_channel_quantization=per_channel_quantization,
+    )
+    pipeline.run()
+
+
+@common.parametrize("test_data", test_data_conv2d_INT)
+@common.XfailIfNoCorstone300
+def test_convolution_2d_u85_INT_a16w8_depthwise(test_data: input_t):
+    """Test depthwise_conv with 16A8W quantization on U55 (16-bit activations, 8-bit weights)"""
+    model, per_channel_quantization = test_data()
+    pipeline = EthosU55PipelineINT[input_t](
+        model,
+        model.get_inputs(),
+        aten_ops=[],
+        exir_ops=exir_op,
+        per_channel_quantization=per_channel_quantization,
+        a16w8_quantization=True,
+        use_to_edge_transform_and_lower=True,
+    )
+    pipeline.run()
+
+
+@common.parametrize("test_data", test_data_conv2d_INT)
+@common.XfailIfNoCorstone320
+def test_convolution_2d_u55_INT_a16w8_depthwise(test_data: input_t):
+    """Test depthwise_conv with 16A8W quantization on U85 (16-bit activations, 8-bit weights)"""
+    model, per_channel_quantization = test_data()
+    pipeline = EthosU85PipelineINT[input_t](
+        model,
+        model.get_inputs(),
+        aten_ops=[],
+        exir_ops=exir_op,
+        a16w8_quantization=True,
+    )
+
+    pipeline.run()
diff --git a/backends/arm/test/ops/test_matmul.py b/backends/arm/test/ops/test_matmul.py
index c9d5665f84c..6993c243073 100644
--- a/backends/arm/test/ops/test_matmul.py
+++ b/backends/arm/test/ops/test_matmul.py
@@ -1,10 +1,12 @@
-# Copyright 2025 Arm Limited and/or its affiliates.
+# Copyright 2025-2026 Arm Limited and/or its affiliates.
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
 from typing import Tuple
 
+import pytest
+
 import torch
 from executorch.backends.arm.test import common
 from executorch.backends.arm.test.tester.test_pipeline import (
@@ -134,10 +136,7 @@ def test_matmul_u55_INT(test_data: input_t1):
     pipeline.run()
 
 
-@common.parametrize(
-    "test_data",
-    MatMulSingleInput.test_data_generators,
-)
+@common.parametrize("test_data", MatMulSingleInput.test_data_generators)
 @common.XfailIfNoCorstone300
 def test_matmul_u55_INT_single_input(test_data: input_t1):
     pipeline = EthosU55PipelineINT[input_t1](
@@ -150,10 +149,7 @@ def test_matmul_u55_INT_single_input(test_data: input_t1):
     pipeline.run()
 
 
-@common.parametrize(
-    "test_data",
-    MatMulCombo.test_data_generators,
-)
+@common.parametrize("test_data", MatMulCombo.test_data_generators)
 @common.XfailIfNoCorstone300
 def test_matmul_u55_INT_combo(test_data: input_t1):
     pipeline = EthosU55PipelineINT[input_t1](
@@ -179,10 +175,7 @@ def test_matmul_u85_INT(test_data: input_t1):
     pipeline.run()
 
 
-@common.parametrize(
-    "test_data",
-    MatMulSingleInput.test_data_generators,
-)
+@common.parametrize("test_data", MatMulSingleInput.test_data_generators)
 @common.XfailIfNoCorstone320
 def test_matmul_u85_INT_single_input(test_data: input_t1):
     pipeline = EthosU85PipelineINT[input_t1](
@@ -195,10 +188,7 @@ def test_matmul_u85_INT_single_input(test_data: input_t1):
     pipeline.run()
 
 
-@common.parametrize(
-    "test_data",
-    MatMulCombo.test_data_generators,
-)
+@common.parametrize("test_data", MatMulCombo.test_data_generators)
 @common.XfailIfNoCorstone320
 def test_matmul_u85_INT_combo(test_data: input_t1):
     pipeline = EthosU85PipelineINT[input_t1](
@@ -287,3 +277,51 @@ def test_matmul_vgf_quant_combo(test_data: input_t1):
         quantize=True,
     )
     pipeline.run()
+
+
+@common.parametrize("test_data", MatMulCombo.test_data_generators)
+def test_matmul_tosa_INT_a16w8(test_data: input_t1):
+    """Test matmul with 16A8W quantization for TOSA INT."""
+    pipeline = TosaPipelineINT[Tuple[torch.Tensor]](
+        MatMulCombo(),
+        test_data(),
+        aten_op_mm,
+        exir_op_mm,
+        tosa_extensions=["int16"],
+    )
+    pipeline.run()
+
+
+@common.parametrize("test_data", MatMulCombo.test_data_generators)
+@pytest.mark.xfail(
+    reason="Vela compilation fails with 'Non-passthrough operation' for int16 matmul operations"
+)
+@common.XfailIfNoCorstone300
+def test_matmul_u55_INT_a16w8(test_data: input_t1):
+    """Test matmul with 16A8W quantization on U55 (16-bit activations, 8-bit weights)"""
+    pipeline = EthosU55PipelineINT[Tuple[torch.Tensor]](
+        MatMulCombo(),
+        test_data(),
+        aten_op_mm,
+        exir_op_mm,
+        per_channel_quantization=False,
+        a16w8_quantization=True,
+        use_to_edge_transform_and_lower=True,
+    )
+    pipeline.run()
+
+
+@common.parametrize("test_data", MatMulCombo.test_data_generators)
+@common.XfailIfNoCorstone320
+def test_matmul_u85_INT_a16w8(test_data: input_t1):
+    """Test matmul with 16A8W quantization on U85 (16-bit activations, 8-bit weights)"""
+    pipeline = EthosU85PipelineINT[Tuple[torch.Tensor]](
+        MatMulCombo(),
+        test_data(),
+        aten_op_mm,
+        exir_op_mm,
+        per_channel_quantization=False,
+        a16w8_quantization=True,
+        use_to_edge_transform_and_lower=True,
+    )
+    pipeline.run()
diff --git a/backends/arm/test/ops/test_maximum.py b/backends/arm/test/ops/test_maximum.py
index e213842494f..2c6cce1cfb4 100644
--- a/backends/arm/test/ops/test_maximum.py
+++ b/backends/arm/test/ops/test_maximum.py
@@ -1,6 +1,6 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
-# Copyright 2024-2025 Arm Limited and/or its affiliates.
+# Copyright 2024-2026 Arm Limited and/or its affiliates.
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
@@ -96,3 +96,45 @@ def test_maximum_vgf_quant(test_data: Tuple):
         quantize=True,
     )
     pipeline.run()
+
+
+@common.parametrize("test_data", Maximum.test_parameters)
+def test_maximum_tosa_INT_a16w8(test_data: test_t):
+    """Test maximum with 16A8W quantization for TOSA INT."""
+    pipeline = TosaPipelineINT[test_t](
+        Maximum(),
+        test_data(),
+        aten_op,
+        tosa_extensions=["int16"],
+    )
+    pipeline.run()
+
+
+@common.parametrize("test_data", Maximum.test_parameters)
+@common.XfailIfNoCorstone300
+def test_maximum_u55_INT_a16w8(test_data: test_t):
+    """Test maximum with 16A8W quantization on U55 (16-bit activations, 8-bit weights)"""
+    pipeline = EthosU55PipelineINT[test_t](
+        Maximum(),
+        test_data(),
+        aten_op,
+        per_channel_quantization=False,
+        a16w8_quantization=True,
+        use_to_edge_transform_and_lower=True,
+    )
+    pipeline.run()
+
+
+@common.parametrize("test_data", Maximum.test_parameters)
+@common.XfailIfNoCorstone320
+def test_maximum_u85_INT_a16w8(test_data: test_t):
+    """Test maximum with 16A8W quantization on U85 (16-bit activations, 8-bit weights)"""
+    pipeline = EthosU85PipelineINT[test_t](
+        Maximum(),
+        test_data(),
+        aten_op,
+        per_channel_quantization=False,
+        a16w8_quantization=True,
+        use_to_edge_transform_and_lower=True,
+    )
+    pipeline.run()
diff --git a/backends/arm/test/ops/test_minimum.py b/backends/arm/test/ops/test_minimum.py
index ff706f7261e..6b680c4edb6 100644
--- a/backends/arm/test/ops/test_minimum.py
+++ b/backends/arm/test/ops/test_minimum.py
@@ -1,6 +1,6 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
-# Copyright 2024-2025 Arm Limited and/or its affiliates.
+# Copyright 2024-2026 Arm Limited and/or its affiliates.
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
@@ -96,3 +96,45 @@ def test_minimum_vgf_quant(test_data: test_t):
         quantize=True,
     )
     pipeline.run()
+
+
+@common.parametrize("test_data", Minimum.test_parameters)
+def test_minimum_tosa_INT_a16w8(test_data: test_t):
+    """Test minimum with 16A8W quantization for TOSA INT."""
+    pipeline = TosaPipelineINT[test_t](
+        Minimum(),
+        test_data(),
+        aten_op,
+        tosa_extensions=["int16"],
+    )
+    pipeline.run()
+
+
+@common.parametrize("test_data", Minimum.test_parameters)
+@common.XfailIfNoCorstone300
+def test_minimum_u55_INT_a16w8(test_data: test_t):
+    """Test minimum with 16A8W quantization on U55 (16-bit activations, 8-bit weights)"""
+    pipeline = EthosU55PipelineINT[test_t](
+        Minimum(),
+        test_data(),
+        aten_op,
+        per_channel_quantization=False,
+        a16w8_quantization=True,
+        use_to_edge_transform_and_lower=True,
+    )
+    pipeline.run()
+
+
+@common.parametrize("test_data", Minimum.test_parameters)
+@common.XfailIfNoCorstone320
+def test_minimum_u85_INT_a16w8(test_data: test_t):
+    """Test minimum with 16A8W quantization on U85 (16-bit activations, 8-bit weights)"""
+    pipeline = EthosU85PipelineINT[test_t](
+        Minimum(),
+        test_data(),
+        aten_op,
+        per_channel_quantization=False,
+        a16w8_quantization=True,
+        use_to_edge_transform_and_lower=True,
+    )
+    pipeline.run()
diff --git a/backends/arm/test/ops/test_neg.py b/backends/arm/test/ops/test_neg.py
index 11d1153a171..bfed7d29f6c 100644
--- a/backends/arm/test/ops/test_neg.py
+++ b/backends/arm/test/ops/test_neg.py
@@ -1,4 +1,4 @@
-# Copyright 2025 Arm Limited and/or its affiliates.
+# Copyright 2025-2026 Arm Limited and/or its affiliates.
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
@@ -97,3 +97,48 @@ def test_neg_vgf_quant(test_data: input_t1):
         quantize=True,
     )
     pipeline.run()
+
+
+@common.parametrize("test_data", Neg.test_data)
+def test_neg_tosa_INT_a16w8(test_data: input_t1):
+    """Test neg with 16A8W quantization for TOSA INT."""
+    pipeline = TosaPipelineINT[Tuple[torch.Tensor]](
+        Neg(),
+        test_data,
+        Neg.aten_op,
+        Neg.exir_op,
+        tosa_extensions=["int16"],
+    )
+    pipeline.run()
+
+
+@common.parametrize("test_data", Neg.test_data)
+@common.XfailIfNoCorstone300
+def test_neg_u55_INT_a16w8(test_data: input_t1):
+    """Test neg with 16A8W quantization on U55 (16-bit activations, 8-bit weights)"""
+    pipeline = EthosU55PipelineINT[Tuple[torch.Tensor]](
+        Neg(),
+        test_data,
+        Neg.aten_op,
+        Neg.exir_op,
+        per_channel_quantization=False,
+        a16w8_quantization=True,
+        use_to_edge_transform_and_lower=True,
+    )
+    pipeline.run()
+
+
+@common.parametrize("test_data", Neg.test_data)
+@common.XfailIfNoCorstone320
+def test_neg_u85_INT_a16w8(test_data: input_t1):
+    """Test neg with 16A8W quantization on U85 (16-bit activations, 8-bit weights)"""
+    pipeline = EthosU85PipelineINT[Tuple[torch.Tensor]](
+        Neg(),
+        test_data,
+        Neg.aten_op,
+        Neg.exir_op,
+        per_channel_quantization=False,
+        a16w8_quantization=True,
+        use_to_edge_transform_and_lower=True,
+    )
+    pipeline.run()
diff --git a/backends/arm/test/ops/test_repeat.py b/backends/arm/test/ops/test_repeat.py
index 0b3de3b72df..7a41e0b7b7f 100644
--- a/backends/arm/test/ops/test_repeat.py
+++ b/backends/arm/test/ops/test_repeat.py
@@ -1,4 +1,4 @@
-# Copyright 2024-2025 Arm Limited and/or its affiliates.
+# Copyright 2024-2026 Arm Limited and/or its affiliates.
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
@@ -160,3 +160,48 @@ def test_repeat_vgf_quant(test_data: Tuple):
         quantize=True,
     )
     pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+def test_repeat_tosa_INT_a16w8(test_data):
+    """Test repeat with 16A8W quantization for TOSA INT."""
+    module, args = test_data()
+    pipeline = TosaPipelineINT[Tuple[torch.Tensor]](
+        module,
+        args,
+        module.aten_op,
+        tosa_extensions=["int16"],
+    )
+    pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite_u55)
+@common.XfailIfNoCorstone300
+def test_repeat_u55_INT_a16w8(test_data):
+    """Test repeat with 16A8W quantization on U55 (16-bit activations, 8-bit weights)"""
+    module, args = test_data()
+    pipeline = EthosU55PipelineINT[Tuple[torch.Tensor]](
+        module,
+        args,
+        module.aten_op,
+        per_channel_quantization=False,
+        a16w8_quantization=True,
+        use_to_edge_transform_and_lower=True,
+    )
+    pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.XfailIfNoCorstone320
+def test_repeat_u85_INT_a16w8(test_data):
+    """Test repeat with 16A8W quantization on U85 (16-bit activations, 8-bit weights)"""
+    module, args = test_data()
+    pipeline = EthosU85PipelineINT[Tuple[torch.Tensor]](
+        module,
+        args,
+        module.aten_op,
+        per_channel_quantization=False,
+        a16w8_quantization=True,
+        use_to_edge_transform_and_lower=True,
+    )
+    pipeline.run()