Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 39 additions & 19 deletions backends/aoti/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,37 +25,57 @@ endif()
include(${EXECUTORCH_ROOT}/tools/cmake/Utils.cmake)
find_package_torch()

# Common AOTI functionality - combines all AOTI common components
set(_aoti_common_sources common_shims.cpp)
add_library(aoti_common STATIC ${_aoti_common_sources})
# ==============================================================================
# AOTI common shims using ETensor (for Metal backend)
# TODO(gasoonjia): Remove this after metal migration
# ==============================================================================
add_library(aoti_common_shims STATIC common_shims.cpp)
target_include_directories(
aoti_common
aoti_common_shims
PUBLIC $<BUILD_INTERFACE:${EXECUTORCH_ROOT}> $<INSTALL_INTERFACE:include>
$<BUILD_INTERFACE:${EXECUTORCH_ROOT}/..>
)
target_compile_options(
aoti_common
aoti_common_shims
PUBLIC $<$<CXX_COMPILER_ID:MSVC>:/EHsc /GR>
$<$<NOT:$<CXX_COMPILER_ID:MSVC>>:-fexceptions -frtti -fPIC>
)
target_compile_definitions(
aoti_common PRIVATE $<$<PLATFORM_ID:Windows>:EXPORT_AOTI_FUNCTIONS>
)
# Ensure symbols are exported properly
if(APPLE)
target_link_options(aoti_common PUBLIC -Wl,-export_dynamic)
else()
target_link_options(
aoti_common PUBLIC $<$<NOT:$<CXX_COMPILER_ID:MSVC>>:-Wl,--export-dynamic>
)
endif()
aoti_common_shims PUBLIC $<$<PLATFORM_ID:Windows>:EXPORT_AOTI_FUNCTIONS>
)
target_link_libraries(aoti_common_shims PUBLIC extension_tensor ${CMAKE_DL_LIBS})

# Link against ExecuTorch libraries and standard libraries
target_link_libraries(aoti_common PUBLIC extension_tensor ${CMAKE_DL_LIBS})
executorch_target_link_options_shared_lib(aoti_common)
install(
TARGETS aoti_common_shims
EXPORT ExecuTorchTargets
DESTINATION ${CMAKE_INSTALL_LIBDIR}
)

# ==============================================================================
# AOTI common shims using SlimTensor (for CUDA backend)
# Uses SlimTensor for all tensor operations
# TODO(gasoonjia): Replace aoti_common_shims with this one after metal migration
# ==============================================================================
add_library(aoti_common_shims_slim STATIC common_shims_slim.cpp)
target_include_directories(
aoti_common_shims_slim
PUBLIC $<BUILD_INTERFACE:${EXECUTORCH_ROOT}> $<INSTALL_INTERFACE:include>
$<BUILD_INTERFACE:${EXECUTORCH_ROOT}/..>
)
target_compile_options(
aoti_common_shims_slim
PUBLIC $<$<CXX_COMPILER_ID:MSVC>:/EHsc /GR>
$<$<NOT:$<CXX_COMPILER_ID:MSVC>>:-fexceptions -frtti -fPIC>
)
target_compile_definitions(
aoti_common_shims_slim PUBLIC $<$<PLATFORM_ID:Windows>:EXPORT_AOTI_FUNCTIONS>
)
target_link_libraries(
aoti_common_shims_slim PUBLIC slimtensor extension_tensor ${CMAKE_DL_LIBS}
)

install(
TARGETS aoti_common
TARGETS aoti_common_shims_slim
EXPORT ExecuTorchTargets
DESTINATION ${CMAKE_INSTALL_LIBDIR}
)
21 changes: 20 additions & 1 deletion backends/aoti/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ def define_common_targets():
],
)

# AOTI common shims functionality
# AOTI common shims functionality using ETensor
# TODO(gasoonjia): Remove this after metal migration
runtime.cxx_library(
name = "common_shims",
srcs = [
Expand Down Expand Up @@ -89,6 +90,7 @@ def define_common_targets():

# SlimTensor-based common shims library
# Uses SlimTensor for all tensor operations
# TODO(gasoonjia): Replace common_shims with this one after metal migration
runtime.cxx_library(
name = "common_shims_slim",
srcs = [
Expand All @@ -97,10 +99,27 @@ def define_common_targets():
headers = [
"common_shims_slim.h",
"export.h",
"utils.h",
],
visibility = ["@EXECUTORCH_CLIENTS"],
exported_deps = [
"//executorch/runtime/core:core",
"//executorch/runtime/core/exec_aten:lib",
"//executorch/backends/aoti/slim/core:slimtensor",
],
)

# Common AOTI functionality for SlimTensor-based backends (combining common_shims_slim and delegate_handle)
# All CUDA backend code should depend on this target
# TODO(gasoonjia): Replace aoti_common with this one after metal migration
runtime.cxx_library(
name = "aoti_common_slim",
# @lint-ignore BUCKLINT: Avoid `link_whole=True` (https://fburl.com/avoid-link-whole)
link_whole = True,
supports_python_dlopen = True,
visibility = ["PUBLIC"],
exported_deps = [
":common_shims_slim",
":delegate_handle",
],
)
14 changes: 9 additions & 5 deletions backends/cuda/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -99,14 +99,18 @@ install(

# CUDA-specific AOTI shim symbols (dynamically linked) Uses
# common_shims_slim.cpp for SlimTensor-based shim implementations
set(_aoti_cuda_shim_sources
runtime/shims/memory.cpp runtime/guard.cpp runtime/shims/cuda_guard.cpp
runtime/shims/int4mm.cu ${EXECUTORCH_ROOT}/backends/aoti/common_shims.cpp
${EXECUTORCH_ROOT}/backends/aoti/common_shims_slim.cpp
)
set(_aoti_cuda_shim_sources
runtime/shims/memory.cpp runtime/shims/cuda_guard.cpp
runtime/shims/int4mm.cu
${EXECUTORCH_ROOT}/backends/aoti/common_shims_slim.cpp
${EXECUTORCH_ROOT}/backends/aoti/slim/cuda/guard.cpp
)

add_library(aoti_cuda_shims SHARED ${_aoti_cuda_shim_sources})

# Define CUDA_AVAILABLE to use SlimTensor on GPU in common_shims_slim.h
target_compile_definitions(aoti_cuda_shims PRIVATE CUDA_AVAILABLE=1)

# Define export macros for shared library
if(MSVC)
target_compile_definitions(aoti_cuda_shims PRIVATE EXPORT_AOTI_FUNCTIONS)
Expand Down
67 changes: 12 additions & 55 deletions backends/cuda/runtime/TARGETS
Original file line number Diff line number Diff line change
Expand Up @@ -3,28 +3,6 @@ load("//tools/build/buck:nvcc_flags.bzl", "get_nvcc_arch_args")

oncall("executorch")

runtime.cxx_library(
name = "guard",
srcs = [
"guard.cpp",
],
headers = [
"guard.h",
"utils.h",
],
visibility = ["PUBLIC"],
deps = [
"//executorch/runtime/platform:platform",
],
exported_deps = [
"//executorch/runtime/core:core",
"//executorch/runtime/core/exec_aten:lib",
],
external_deps = [
("cuda", None, "cuda-lazy"),
],
)

runtime.cxx_library(
name = "cuda_platform",
srcs = [
Expand Down Expand Up @@ -71,14 +49,12 @@ runtime.cxx_library(
runtime.cxx_library(
name = "runtime_shims",
srcs = [
"guard.cpp",
"shims/cuda_guard.cpp",
"shims/int4mm.cu",
"shims/memory.cpp",
"shims/tensor_attribute.cpp",
],
headers = [
"guard.h",
"shims/cuda_guard.h",
"shims/int4mm.cuh",
"shims/int4mm.h",
Expand All @@ -91,43 +67,18 @@ runtime.cxx_library(
supports_python_dlopen = True,
# Constructor needed for backend registration.
compiler_flags = ["-Wno-global-constructors"],
preprocessor_flags = ["-DCUDA_AVAILABLE=1"],
visibility = ["PUBLIC"],
deps = [
":tensor_maker",
"//executorch/backends/aoti:common_shims",
"//executorch/runtime/core:core",
"//executorch/runtime/core/exec_aten:lib",
"//executorch/runtime/platform:platform",
"//executorch/backends/cuda/runtime:cuda_platform",
],
nvcc_flags = get_nvcc_arch_args() + [
"-_NVCC_HOST_COMPILER_FLAG_",
"gcc",
],
external_deps = [
("cuda", None, "cuda-lazy"),
],
)

runtime.cxx_library(
name = "runtime_shims_slim",
srcs = [
"shims/memory_slim.cpp",
],
headers = [
"shims/memory_slim.h",
],
# @lint-ignore BUCKLINT: Avoid `link_whole=True` (https://fburl.com/avoid-link-whole)
link_whole = True,
supports_python_dlopen = True,
visibility = ["@EXECUTORCH_CLIENTS"],
preprocessor_flags = ["-DCUDA_AVAILABLE=1"],
deps = [
"//executorch/backends/aoti:aoti_common_slim",
"//executorch/backends/aoti/slim/core:slimtensor",
"//executorch/backends/aoti/slim/factory:empty",
"//executorch/backends/aoti/slim/factory:from_blob",
"//executorch/backends/aoti:common_shims",
"//executorch/backends/aoti/slim/cuda:guard",
"//executorch/runtime/core:core",
"//executorch/runtime/core/exec_aten:lib",
"//executorch/runtime/core/exec_aten/util:tensor_util",
"//executorch/runtime/platform:platform",
],
nvcc_flags = get_nvcc_arch_args() + [
Expand All @@ -149,10 +100,16 @@ runtime.cxx_library(
supports_python_dlopen = True,
# Constructor needed for backend registration.
compiler_flags = ["-Wno-global-constructors"],
preprocessor_flags = ["-DCUDA_AVAILABLE=1"],
visibility = ["PUBLIC"],
deps = [
":runtime_shims",
"//executorch/backends/aoti:aoti_common",
"//executorch/backends/aoti:aoti_common_slim",
"//executorch/backends/aoti/slim/core:slimtensor",
"//executorch/backends/aoti/slim/factory:empty",
"//executorch/backends/aoti/slim/factory:from_blob",
"//executorch/backends/aoti/slim/factory:from_etensor",
"//executorch/extension/tensor:tensor",
"//executorch/runtime/backend:interface",
"//executorch/runtime/core/exec_aten/util:tensor_util",
],
Expand Down
Loading
Loading