Skip to content
94 changes: 94 additions & 0 deletions backends/aoti/slim/c10/core/WrapDimMinimal.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#pragma once

#include <cstdint>
#include <utility>

#include <executorch/backends/aoti/slim/c10/macros/Macros.h>
#include <executorch/runtime/platform/assert.h>

namespace executorch::backends::aoti::slim::c10 {

namespace detail {

/// Slow path for maybe_wrap_dim when dimension needs validation.
template <typename T>
inline T maybe_wrap_dim_slow(T dim, T dim_post_expr, bool wrap_scalar) {
ET_CHECK_MSG(
dim_post_expr >= 0,
"Rank cannot be negative but got %ld",
static_cast<long>(dim_post_expr));

if (dim_post_expr == 0) {
ET_CHECK_MSG(
wrap_scalar,
"Dimension specified as %ld but tensor has no dimensions",
static_cast<long>(dim));
// Recursively call with dim_post_expr=1
if (dim >= 0 && dim < 1) {
return dim;
} else if (dim >= -1 && dim < 0) {
return dim + 1;
}
ET_CHECK_MSG(
false,
"Dimension out of range (expected to be in range of [-1, 0], but got %ld)",
static_cast<long>(dim));
}

T min = dim_post_expr * -1;
T max = dim_post_expr - 1;
ET_CHECK_MSG(
min <= dim && dim <= max,
"Dimension out of range (expected to be in range of [%ld, %ld], but got %ld)",
static_cast<long>(min),
static_cast<long>(max),
static_cast<long>(dim));

// This should be unreachable if above check passes
return dim < 0 ? dim + dim_post_expr : dim;
}

} // namespace detail

/// Wraps a dimension index to handle negative indexing.
/// For example, dim=-1 with dim_post_expr=3 returns 2.
///
/// @param dim The dimension index (may be negative).
/// @param dim_post_expr The number of dimensions.
/// @param wrap_scalar If true, allows wrapping for 0-dimensional tensors.
/// @return The wrapped dimension index (always non-negative).
template <typename T>
inline T _maybe_wrap_dim(T dim, T dim_post_expr, bool wrap_scalar = true) {
// Inline the fast paths
if (SLIMTENSOR_LIKELY(dim_post_expr * -1 <= dim && dim < dim_post_expr)) {
if (dim < 0) {
return dim + dim_post_expr;
}
return dim;
}
// Check edge-cases out-of-line
return detail::maybe_wrap_dim_slow<T>(
std::move(dim), std::move(dim_post_expr), wrap_scalar);
}

/// Wraps a dimension index for int64_t.
inline int64_t
maybe_wrap_dim(int64_t dim, int64_t dim_post_expr, bool wrap_scalar = true) {
return _maybe_wrap_dim(dim, dim_post_expr, wrap_scalar);
}

/// Wraps a dimension index for size_t.
inline int64_t
maybe_wrap_dim(int64_t dim, size_t dim_post_expr, bool wrap_scalar = true) {
return _maybe_wrap_dim(dim, static_cast<int64_t>(dim_post_expr), wrap_scalar);
}

} // namespace executorch::backends::aoti::slim::c10
14 changes: 14 additions & 0 deletions backends/aoti/slim/c10/core/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,19 @@ def define_common_targets():
],
)

# Header-only library for WrapDimMinimal
runtime.cxx_library(
name = "wrap_dim_minimal",
headers = [
"WrapDimMinimal.h",
],
visibility = ["@EXECUTORCH_CLIENTS"],
exported_deps = [
"//executorch/backends/aoti/slim/c10/macros:macros",
"//executorch/runtime/platform:platform",
],
)

# Combined c10 core library
runtime.cxx_library(
name = "core",
Expand All @@ -77,5 +90,6 @@ def define_common_targets():
":device_type",
":scalar_type",
":sizes_and_strides",
":wrap_dim_minimal",
],
)
114 changes: 106 additions & 8 deletions backends/aoti/slim/core/SlimTensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,12 @@

#include <cstdint>
#include <cstring>
#include <optional>
#include <utility>
#include <vector>

#include <c10/util/safe_numerics.h>

#include <executorch/backends/aoti/slim/c10/core/Contiguity.h>
#include <executorch/backends/aoti/slim/c10/core/Device.h>
#include <executorch/backends/aoti/slim/c10/core/ScalarType.h>
Expand Down Expand Up @@ -254,22 +257,113 @@ class SlimTensor {
}

/**
* Set sizes and strides together.
* Set sizes, strides, and storage offset together.
*/
void set_sizes_and_strides(IntArrayRef sizes, IntArrayRef strides) {
void set_sizes_and_strides(
IntArrayRef sizes,
IntArrayRef strides,
std::optional<int64_t> storage_offset = std::nullopt) {
const size_t new_dim = sizes.size();
ET_CHECK_MSG(
sizes.size() == strides.size(),
"sizes (%zu) and strides (%zu) must have the same length",
sizes.size(),
new_dim == strides.size(),
"dimensionality of sizes (%zu) must match dimensionality of strides (%zu)",
new_dim,
strides.size());

sizes_and_strides_.set_sizes(sizes);
sizes_and_strides_.set_strides(strides);
std::vector<int64_t> new_sizes = toVec(sizes);
std::vector<int64_t> new_strides = toVec(strides);

// stride calculation logic
bool overflowed = false;
if (new_dim > 0) {
for (int64_t dim = new_dim - 1; dim >= 0; dim--) {
if (strides[dim] >= 0) {
new_strides[dim] = strides[dim];
} else {
// for negative strides
if (dim == new_dim - 1) {
new_strides[dim] = 1;
} else {
overflowed |= ::c10::mul_overflows(
new_strides[dim + 1],
std::max<int64_t>(new_sizes[dim + 1], 1),
&new_strides[dim]);
}
}
}
}
ET_CHECK_MSG(!overflowed, "Stride calculation overflowed");

sizes_and_strides_.set_sizes(makeArrayRef(new_sizes));
sizes_and_strides_.set_strides(makeArrayRef(new_strides));
if (storage_offset.has_value()) {
storage_offset_ = *storage_offset;
}

refresh_numel();
refresh_contiguous();
}

/**
* Set sizes to a contiguous layout (computes strides automatically).
*/
void set_sizes_contiguous(IntArrayRef sizes) {
std::vector<int64_t> contig_strides = compute_contiguous_strides(sizes);
set_sizes_and_strides(sizes, makeArrayRef(contig_strides));
}

// =========================================================================
// View Operations
// =========================================================================

/**
* Returns a view of the tensor with the specified sizes, strides, and
* storage offset. The returned tensor shares the same underlying storage.
*
* @param sizes The sizes of the view.
* @param strides The strides of the view.
* @param storage_offset Offset into storage in number of elements.
* @return A new SlimTensor that is a view of this tensor.
*/
inline SlimTensor as_strided(
IntArrayRef sizes,
IntArrayRef strides,
int64_t storage_offset) const;

/**
* Overload for initializer lists.
*/
inline SlimTensor as_strided(
std::initializer_list<int64_t> sizes,
std::initializer_list<int64_t> strides,
int64_t storage_offset) const {
return as_strided(
makeArrayRef(sizes), makeArrayRef(strides), storage_offset);
}

/**
* Modifies this tensor in-place to have the specified sizes, strides, and
* storage offset. The underlying storage remains unchanged.
*
* @param sizes The new sizes.
* @param strides The new strides.
* @param storage_offset New offset into storage in number of elements.
* @return Reference to this tensor.
*/
inline SlimTensor&
as_strided_(IntArrayRef sizes, IntArrayRef strides, int64_t storage_offset);

/**
* Overload for initializer lists.
*/
inline SlimTensor& as_strided_(
std::initializer_list<int64_t> sizes,
std::initializer_list<int64_t> strides,
int64_t storage_offset) {
return as_strided_(
makeArrayRef(sizes), makeArrayRef(strides), storage_offset);
}

// =========================================================================
// Copy Operation
// =========================================================================
Expand All @@ -278,7 +372,7 @@ class SlimTensor {
* Copy data from another tensor to this tensor.
*
* Both tensors must have the same numel and dtype.
* Supports CPU-to-CPU and cross-device copies (CPU↔CUDA, CUDA↔CUDA).
* Currently only supports CPU-to-CPU copy (contiguous tensors only).
*
* @param other The source tensor to copy from
* @return Reference to this tensor
Expand Down Expand Up @@ -371,3 +465,7 @@ class SlimTensor {
};

} // namespace executorch::backends::aoti::slim

// Include view operations implementations (must be after SlimTensor class
// definition)
#include <executorch/backends/aoti/slim/core/SlimTensorView-incl.h>
52 changes: 52 additions & 0 deletions backends/aoti/slim/core/SlimTensorView-incl.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#pragma once

#include <executorch/backends/aoti/slim/c10/core/WrapDimMinimal.h>
#include <executorch/backends/aoti/slim/util/ArrayRefUtil.h>

namespace executorch::backends::aoti::slim {

inline SlimTensor SlimTensor::as_strided(
IntArrayRef sizes,
IntArrayRef strides,
int64_t storage_offset) const {
SlimTensor result = *this;
result.as_strided_(sizes, strides, storage_offset);
return result;
}

inline SlimTensor& SlimTensor::as_strided_(
IntArrayRef sizes,
IntArrayRef strides,
int64_t storage_offset) {
ET_CHECK_MSG(
sizes.size() == strides.size(),
"as_strided: number of sizes (%zu) must equal number of strides (%zu)",
sizes.size(),
strides.size());

for (size_t i = 0; i < sizes.size(); ++i) {
ET_CHECK_MSG(
sizes[i] >= 0,
"as_strided: size at dimension %zu is negative: %ld",
i,
static_cast<long>(sizes[i]));
}

ET_CHECK_MSG(
storage_offset >= 0,
"as_strided: storage_offset must be non-negative, got: %ld",
static_cast<long>(storage_offset));

this->set_sizes_and_strides(sizes, strides, storage_offset);
return *this;
}

} // namespace executorch::backends::aoti::slim
4 changes: 3 additions & 1 deletion backends/aoti/slim/core/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ def define_common_targets():
name = "slimtensor",
headers = [
"SlimTensor.h",
"SlimTensorView-incl.h",
],
visibility = ["@EXECUTORCH_CLIENTS"],
exported_deps = [
Expand All @@ -34,9 +35,10 @@ def define_common_targets():
"//executorch/backends/aoti/slim/c10/core:device",
"//executorch/backends/aoti/slim/c10/core:scalar_type",
"//executorch/backends/aoti/slim/c10/core:sizes_and_strides",
"//executorch/backends/aoti/slim/c10/core:wrap_dim_minimal",
"//executorch/backends/aoti/slim/util:array_ref_util",
"//executorch/backends/aoti/slim/util:size_util",
"//executorch/backends/aoti/slim/c10/cuda:exception",
"//executorch/runtime/platform:platform",
"//executorch/backends/aoti/slim/c10/cuda:exception",
],
)
31 changes: 21 additions & 10 deletions backends/aoti/slim/core/test/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,17 @@ def get_backend_mode():

def define_common_targets():
"""Define test targets for SlimTensor core module."""
runtime.cxx_test(
name = "test_slimtensor_dtypes",
srcs = [
"test_slimtensor_dtypes.cpp",
],
deps = [
"//executorch/backends/aoti/slim/factory:empty",
],
)

# GPU storage test with CUDA support
# Backend mode specific tests
for backend_mode in get_backend_mode():
backend_suffix = "_" + backend_mode if backend_mode == "cuda" else ""

Expand Down Expand Up @@ -57,12 +66,14 @@ def define_common_targets():
**backend_kwargs
)

runtime.cxx_test(
name = "test_slimtensor_dtypes",
srcs = [
"test_slimtensor_dtypes.cpp",
],
deps = [
"//executorch/backends/aoti/slim/factory:empty",
],
)
runtime.cxx_test(
name = "test_as_strided" + backend_suffix,
srcs = [
"test_as_strided.cpp",
],
deps = [
"//executorch/backends/aoti/slim/core:slimtensor",
"//executorch/backends/aoti/slim/factory:empty",
],
**backend_kwargs
)
Loading
Loading