diff --git a/include/infinicore.h b/include/infinicore.h index 49654937e..b01bae141 100644 --- a/include/infinicore.h +++ b/include/infinicore.h @@ -47,6 +47,7 @@ typedef enum { INFINI_DEVICE_KUNLUN = 7, INFINI_DEVICE_HYGON = 8, INFINI_DEVICE_QY = 9, + INFINI_DEVICE_ALI = 10, INFINI_DEVICE_TYPE_COUNT } infiniDevice_t; diff --git a/include/infinicore/device.hpp b/include/infinicore/device.hpp index 0c2562391..4bdab1088 100644 --- a/include/infinicore/device.hpp +++ b/include/infinicore/device.hpp @@ -22,6 +22,7 @@ class Device { KUNLUN = INFINI_DEVICE_KUNLUN, HYGON = INFINI_DEVICE_HYGON, QY = INFINI_DEVICE_QY, + ALI = INFINI_DEVICE_ALI, COUNT = INFINI_DEVICE_TYPE_COUNT, }; diff --git a/python/infinicore/device.py b/python/infinicore/device.py index 06198501f..8858a1235 100644 --- a/python/infinicore/device.py +++ b/python/infinicore/device.py @@ -82,6 +82,7 @@ def _from_infinicore_device(infinicore_device: _infinicore.Device): _infinicore.Device.Type.KUNLUN: "cuda", _infinicore.Device.Type.HYGON: "cuda", _infinicore.Device.Type.QY: "cuda", + _infinicore.Device.Type.ALI: "cuda", } diff --git a/src/infiniccl-test/main.cpp b/src/infiniccl-test/main.cpp index 065371d08..8126aa15a 100644 --- a/src/infiniccl-test/main.cpp +++ b/src/infiniccl-test/main.cpp @@ -12,7 +12,7 @@ void printUsage() { std::cout << "infiniccl-test --" << std::endl << std::endl; std::cout << " --" << std::endl; - std::cout << " Specify the device type --(nvidia|cambricon|ascend|metax|moore|iluvatar|qy|kunlun|hygon)." << std::endl + std::cout << " Specify the device type --(nvidia|cambricon|ascend|metax|moore|iluvatar|qy|kunlun|hygon|ali)." << std::endl << std::endl; std::cout << "The program will run tests on all visible devices of the specified device type." << " Use Environmental Variables such as CUDA_VSIBLE_DEVICES to limit visible device IDs."; @@ -46,6 +46,7 @@ ParsedArgs parseArgs(int argc, char *argv[]) { else PARSE_DEVICE("--qy", INFINI_DEVICE_QY) else PARSE_DEVICE("--kunlun", INFINI_DEVICE_KUNLUN) else PARSE_DEVICE("--hygon", INFINI_DEVICE_HYGON) + else PARSE_DEVICE("--ali", INFINI_DEVICE_ALI) else { printUsage(); } diff --git a/src/infiniccl/cuda/infiniccl_cuda.h b/src/infiniccl/cuda/infiniccl_cuda.h index 3eb14cd29..9bba02345 100644 --- a/src/infiniccl/cuda/infiniccl_cuda.h +++ b/src/infiniccl/cuda/infiniccl_cuda.h @@ -4,7 +4,7 @@ #include "../infiniccl_impl.h" // Windows does not support CUDA -#if (defined(ENABLE_NVIDIA_API) || defined(ENABLE_ILUVATAR_API) || defined(ENABLE_QY_API) || defined(ENABLE_HYGON_API)) && defined(ENABLE_CCL) && !defined(_WIN32) +#if (defined(ENABLE_NVIDIA_API) || defined(ENABLE_ILUVATAR_API) || defined(ENABLE_QY_API) || defined(ENABLE_HYGON_API) || defined(ENABLE_ALI_API)) && defined(ENABLE_CCL) && !defined(_WIN32) INFINICCL_DEVICE_API_IMPL(cuda) #else INFINICCL_DEVICE_API_NOOP(cuda) diff --git a/src/infiniccl/infiniccl.cc b/src/infiniccl/infiniccl.cc index 02ee9097d..25c2702a6 100644 --- a/src/infiniccl/infiniccl.cc +++ b/src/infiniccl/infiniccl.cc @@ -27,6 +27,7 @@ __C infiniStatus_t infinicclCommInitAll( COMM_INIT_ALL(INFINI_DEVICE_METAX, metax); COMM_INIT_ALL(INFINI_DEVICE_MOORE, moore); COMM_INIT_ALL(INFINI_DEVICE_KUNLUN, kunlun); + COMM_INIT_ALL(INFINI_DEVICE_ALI, cuda); default: return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED; } @@ -53,6 +54,7 @@ __C infiniStatus_t infinicclCommDestroy(infinicclComm_t comm) { COMM_DESTROY(INFINI_DEVICE_METAX, metax); COMM_DESTROY(INFINI_DEVICE_MOORE, moore); COMM_DESTROY(INFINI_DEVICE_KUNLUN, kunlun); + COMM_DESTROY(INFINI_DEVICE_ALI, cuda); default: return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED; } @@ -86,6 +88,7 @@ __C infiniStatus_t infinicclAllReduce( ALL_REDUCE(INFINI_DEVICE_METAX, metax); ALL_REDUCE(INFINI_DEVICE_MOORE, moore); ALL_REDUCE(INFINI_DEVICE_KUNLUN, kunlun); + ALL_REDUCE(INFINI_DEVICE_ALI, cuda); default: return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED; diff --git a/src/infinicore-test/README.md b/src/infinicore-test/README.md index 7035ee11f..a6349349d 100644 --- a/src/infinicore-test/README.md +++ b/src/infinicore-test/README.md @@ -66,6 +66,7 @@ xmake build infinicore-test ./infinicore-test --qy ./infinicore-test --kunlun ./infinicore-test --hygon +./infinicore-test --ali ``` ### Customize Test Parameters diff --git a/src/infinicore-test/main.cc b/src/infinicore-test/main.cc index c743bd20c..9410c8096 100644 --- a/src/infinicore-test/main.cc +++ b/src/infinicore-test/main.cc @@ -42,6 +42,7 @@ void printUsage() { << " qy" << std::endl << " kunlun" << std::endl << " hygon" << std::endl + << " ali" << std::endl << std::endl << "Available tests:" << std::endl << " basic - Basic memory allocation and deallocation tests" << std::endl @@ -84,6 +85,8 @@ ParsedArgs parseArgs(int argc, char *argv[]) { args.device_type = INFINI_DEVICE_KUNLUN; } else if (arg == "--hygon") { args.device_type = INFINI_DEVICE_HYGON; + } else if (arg == "--ali") { + args.device_type = INFINI_DEVICE_ALI; } else if (arg == "--test") { if (i + 1 >= argc) { std::cerr << "Error: --test requires a test name" << std::endl; diff --git a/src/infinicore/device.cc b/src/infinicore/device.cc index 30050f3ec..77c1f4986 100644 --- a/src/infinicore/device.cc +++ b/src/infinicore/device.cc @@ -41,6 +41,8 @@ std::string Device::toString(const Type &type) { return "KUNLUN"; case Type::HYGON: return "HYGON"; + case Type::ALI: + return "ALI"; case Type::COUNT: return "COUNT"; default: diff --git a/src/infinicore/nn/embedding.cc b/src/infinicore/nn/embedding.cc index 75475b410..9aadc537c 100644 --- a/src/infinicore/nn/embedding.cc +++ b/src/infinicore/nn/embedding.cc @@ -45,7 +45,7 @@ Embedding::Embedding(size_t num_embeddings, Tensor Embedding::forward(const Tensor &indices) const { // TODO: Implement on-device embedding for all devices, then remove the condition and the classic approach auto device_type = device_.getType(); - if (device_type == Device::Type::NVIDIA || device_type == Device::Type::ILUVATAR || device_type == Device::Type::METAX || device_type == Device::Type::MOORE) { + if (device_type == Device::Type::NVIDIA || device_type == Device::Type::ILUVATAR || device_type == Device::Type::METAX || device_type == Device::Type::MOORE || device_type == Device::Type::KUNLUN || device_type == Device::Type::HYGON || device_type == Device::Type::QY || device_type == Device::Type::ALI) { // Use op::embedding which supports device-side input and batch dimension return op::embedding(indices->contiguous()->to(device_), weight_); } diff --git a/src/infinicore/nn/rmsnorm.cc b/src/infinicore/nn/rmsnorm.cc index 107dac44a..bc703300f 100644 --- a/src/infinicore/nn/rmsnorm.cc +++ b/src/infinicore/nn/rmsnorm.cc @@ -30,7 +30,8 @@ void RMSNorm::forward_inplace(Tensor &x, Tensor &residual) const { || device_.getType() == Device::Type::NVIDIA || device_.getType() == Device::Type::ILUVATAR || device_.getType() == Device::Type::METAX - || device_.getType() == Device::Type::MOORE) { + || device_.getType() == Device::Type::MOORE + || device_.getType() == Device::Type::ALI) { op::add_rms_norm_inplace(x, residual, weight_, static_cast(eps_)); } else { op::add_(residual, x, residual); diff --git a/src/infinicore/pybind11/device.hpp b/src/infinicore/pybind11/device.hpp index 666d5cb19..74cb0a23b 100644 --- a/src/infinicore/pybind11/device.hpp +++ b/src/infinicore/pybind11/device.hpp @@ -22,6 +22,7 @@ inline void bind(py::module &m) { .value("QY", Device::Type::QY) .value("KUNLUN", Device::Type::KUNLUN) .value("HYGON", Device::Type::HYGON) + .value("ALI", Device::Type::ALI) .value("COUNT", Device::Type::COUNT); device diff --git a/src/infiniop-test/src/main.cpp b/src/infiniop-test/src/main.cpp index aa1257d82..00e16cc99 100644 --- a/src/infiniop-test/src/main.cpp +++ b/src/infiniop-test/src/main.cpp @@ -22,7 +22,7 @@ void printUsage() { std::cout << " Path to the test gguf file" << std::endl << std::endl; std::cout << " --[:id]" << std::endl; - std::cout << " (Optional) Specify the device type --(cpu|nvidia|cambricon|ascend|metax|moore|iluvatar|qy|kunlun|hygon) and device ID (optional). CPU by default." << std::endl + std::cout << " (Optional) Specify the device type --(cpu|nvidia|cambricon|ascend|metax|moore|iluvatar|qy|kunlun|hygon|ali) and device ID (optional). CPU by default." << std::endl << std::endl; std::cout << " --warmup " << std::endl; std::cout << " (Optional) Number of warmups to perform before timing. Default to 0." << std::endl @@ -80,6 +80,7 @@ ParsedArgs parseArgs(int argc, char *argv[]) { PARSE_DEVICE("--qy", INFINI_DEVICE_QY) PARSE_DEVICE("--kunlun", INFINI_DEVICE_KUNLUN) PARSE_DEVICE("--hygon", INFINI_DEVICE_HYGON) + PARSE_DEVICE("--ali", INFINI_DEVICE_ALI) else if (arg == "--warmup" && i + 1 < argc) { args.warmups = std::stoi(argv[++i]); } diff --git a/src/infiniop/devices/handle.cc b/src/infiniop/devices/handle.cc index 6b036e553..3011900b5 100644 --- a/src/infiniop/devices/handle.cc +++ b/src/infiniop/devices/handle.cc @@ -5,7 +5,7 @@ #ifdef ENABLE_CPU_API #include "cpu/cpu_handle.h" #endif -#if defined(ENABLE_NVIDIA_API) || defined(ENABLE_ILUVATAR_API) || defined(ENABLE_QY_API) || defined(ENABLE_HYGON_API) +#if defined(ENABLE_NVIDIA_API) || defined(ENABLE_ILUVATAR_API) || defined(ENABLE_QY_API) || defined(ENABLE_HYGON_API) || defined(ENABLE_ALI_API) #include "nvidia/nvidia_handle.h" #endif #ifdef ENABLE_CAMBRICON_API @@ -47,6 +47,9 @@ __C infiniStatus_t infiniopCreateHandle(infiniopHandle_t *handle_ptr) { #ifdef ENABLE_ILUVATAR_API CREATE(INFINI_DEVICE_ILUVATAR, iluvatar); #endif +#ifdef ENABLE_ALI_API + CREATE(INFINI_DEVICE_ALI, ali); +#endif #ifdef ENABLE_QY_API CREATE(INFINI_DEVICE_QY, qy); #endif @@ -93,6 +96,9 @@ __C infiniStatus_t infiniopDestroyHandle(infiniopHandle_t handle) { #ifdef ENABLE_ILUVATAR_API DELETE(INFINI_DEVICE_ILUVATAR, iluvatar); #endif +#ifdef ENABLE_ALI_API + DELETE(INFINI_DEVICE_ALI, ali); +#endif #ifdef ENABLE_QY_API DELETE(INFINI_DEVICE_QY, qy); #endif diff --git a/src/infiniop/devices/nvidia/nvidia_common.cu b/src/infiniop/devices/nvidia/nvidia_common.cu index 7c2369f1c..eca122dce 100644 --- a/src/infiniop/devices/nvidia/nvidia_common.cu +++ b/src/infiniop/devices/nvidia/nvidia_common.cu @@ -110,6 +110,18 @@ infiniStatus_t Handle::create(InfiniopHandle **handle_ptr, int device_id) { } // namespace iluvatar +namespace ali { + +Handle::Handle(int device_id) + : nvidia::Handle(INFINI_DEVICE_ALI, device_id) {} + +infiniStatus_t Handle::create(InfiniopHandle **handle_ptr, int device_id) { + *handle_ptr = new Handle(device_id); + return INFINI_STATUS_SUCCESS; +} + +} // namespace ali + namespace qy { Handle::Handle(int device_id) diff --git a/src/infiniop/devices/nvidia/nvidia_handle.h b/src/infiniop/devices/nvidia/nvidia_handle.h index f27d1c553..b644266e8 100644 --- a/src/infiniop/devices/nvidia/nvidia_handle.h +++ b/src/infiniop/devices/nvidia/nvidia_handle.h @@ -35,6 +35,17 @@ struct Handle : public nvidia::Handle { } // namespace iluvatar +namespace ali { + +struct Handle : public nvidia::Handle { + Handle(int device_id); + +public: + static infiniStatus_t create(InfiniopHandle **handle_ptr, int device_id); +}; + +} // namespace ali + namespace qy { struct Handle : public nvidia::Handle { diff --git a/src/infiniop/devices/nvidia/nvidia_kernel_common.cuh b/src/infiniop/devices/nvidia/nvidia_kernel_common.cuh index 02cee1ebf..9fc25432b 100644 --- a/src/infiniop/devices/nvidia/nvidia_kernel_common.cuh +++ b/src/infiniop/devices/nvidia/nvidia_kernel_common.cuh @@ -53,7 +53,7 @@ exp_(const float val) { return expf(val); } -#if !defined(ENABLE_ILUVATAR_API) && !defined(ENABLE_QY_API) && !defined(ENABLE_HYGON_API) +#if !defined(ENABLE_ILUVATAR_API) && !defined(ENABLE_QY_API) && !defined(ENABLE_HYGON_API) && !defined(ENABLE_ALI_API) __forceinline__ __device__ long double exp_(const long double val) { return expl(val); diff --git a/src/infiniop/ops/add/operator.cc b/src/infiniop/ops/add/operator.cc index eba226421..99d128851 100644 --- a/src/infiniop/ops/add/operator.cc +++ b/src/infiniop/ops/add/operator.cc @@ -5,7 +5,7 @@ #ifdef ENABLE_CPU_API #include "cpu/add_cpu.h" #endif -#if defined(ENABLE_NVIDIA_API) || defined(ENABLE_ILUVATAR_API) || defined(ENABLE_QY_API) +#if defined(ENABLE_NVIDIA_API) || defined(ENABLE_ILUVATAR_API) || defined(ENABLE_QY_API) || defined(ENABLE_ALI_API) #include "nvidia/add_nvidia.cuh" #endif #ifdef ENABLE_METAX_API @@ -48,6 +48,9 @@ __C infiniStatus_t infiniopCreateAddDescriptor( #ifdef ENABLE_ILUVATAR_API CREATE(INFINI_DEVICE_ILUVATAR, nvidia); #endif +#ifdef ENABLE_ALI_API + CREATE(INFINI_DEVICE_ALI, nvidia); +#endif #ifdef ENABLE_QY_API CREATE(INFINI_DEVICE_QY, nvidia); #endif @@ -88,6 +91,9 @@ __C infiniStatus_t infiniopGetAddWorkspaceSize(infiniopAddDescriptor_t desc, siz #ifdef ENABLE_ILUVATAR_API GET(INFINI_DEVICE_ILUVATAR, nvidia); #endif +#ifdef ENABLE_ALI_API + GET(INFINI_DEVICE_ALI, nvidia); +#endif #ifdef ENABLE_QY_API GET(INFINI_DEVICE_QY, nvidia); #endif @@ -136,6 +142,9 @@ __C infiniStatus_t infiniopAdd( #ifdef ENABLE_ILUVATAR_API CALCULATE(INFINI_DEVICE_ILUVATAR, nvidia); #endif +#ifdef ENABLE_ALI_API + CALCULATE(INFINI_DEVICE_ALI, nvidia); +#endif #ifdef ENABLE_QY_API CALCULATE(INFINI_DEVICE_QY, nvidia); #endif @@ -178,6 +187,9 @@ infiniopDestroyAddDescriptor(infiniopAddDescriptor_t desc) { #ifdef ENABLE_ILUVATAR_API DELETE(INFINI_DEVICE_ILUVATAR, nvidia); #endif +#ifdef ENABLE_ALI_API + DELETE(INFINI_DEVICE_ALI, nvidia); +#endif #ifdef ENABLE_QY_API DELETE(INFINI_DEVICE_QY, nvidia); #endif diff --git a/src/infiniop/ops/add_rms_norm/operator.cc b/src/infiniop/ops/add_rms_norm/operator.cc index 62187cf34..43ae9d731 100644 --- a/src/infiniop/ops/add_rms_norm/operator.cc +++ b/src/infiniop/ops/add_rms_norm/operator.cc @@ -5,7 +5,7 @@ #ifdef ENABLE_CPU_API #include "cpu/add_rms_norm_cpu.h" #endif -#if defined(ENABLE_NVIDIA_API) || defined(ENABLE_ILUVATAR_API) || defined(ENABLE_QY_API) || defined(ENABLE_HYGON_API) +#if defined(ENABLE_NVIDIA_API) || defined(ENABLE_ILUVATAR_API) || defined(ENABLE_QY_API) || defined(ENABLE_HYGON_API) || defined(ENABLE_ALI_API) #include "nvidia/add_rms_norm_nvidia.cuh" #endif #ifdef ENABLE_ASCEND_API @@ -59,6 +59,9 @@ __C infiniStatus_t infiniopCreateAddRMSNormDescriptor( #ifdef ENABLE_ILUVATAR_API CREATE(INFINI_DEVICE_ILUVATAR, nvidia); #endif +#ifdef ENABLE_ALI_API + CREATE(INFINI_DEVICE_ALI, nvidia); +#endif #ifdef ENABLE_MOORE_API CREATE(INFINI_DEVICE_MOORE, moore); #endif @@ -98,6 +101,9 @@ __C infiniStatus_t infiniopGetAddRMSNormWorkspaceSize(infiniopAddRMSNormDescript #ifdef ENABLE_ILUVATAR_API GET(INFINI_DEVICE_ILUVATAR, nvidia); #endif +#ifdef ENABLE_ALI_API + GET(INFINI_DEVICE_ALI, nvidia); +#endif #ifdef ENABLE_MOORE_API GET(INFINI_DEVICE_MOORE, moore); #endif @@ -148,6 +154,9 @@ __C infiniStatus_t infiniopAddRMSNorm( #ifdef ENABLE_ILUVATAR_API CALCULATE(INFINI_DEVICE_ILUVATAR, nvidia); #endif +#ifdef ENABLE_ALI_API + CALCULATE(INFINI_DEVICE_ALI, nvidia); +#endif #ifdef ENABLE_MOORE_API CALCULATE(INFINI_DEVICE_MOORE, moore); #endif @@ -189,6 +198,9 @@ __C infiniStatus_t infiniopDestroyAddRMSNormDescriptor(infiniopAddRMSNormDescrip #ifdef ENABLE_ILUVATAR_API DESTROY(INFINI_DEVICE_ILUVATAR, nvidia); #endif +#ifdef ENABLE_ALI_API + DESTROY(INFINI_DEVICE_ALI, nvidia); +#endif #ifdef ENABLE_MOORE_API DESTROY(INFINI_DEVICE_MOORE, moore); #endif diff --git a/src/infiniop/ops/causal_softmax/operator.cc b/src/infiniop/ops/causal_softmax/operator.cc index b1be4c075..972111080 100644 --- a/src/infiniop/ops/causal_softmax/operator.cc +++ b/src/infiniop/ops/causal_softmax/operator.cc @@ -5,7 +5,7 @@ #ifdef ENABLE_CPU_API #include "cpu/causal_softmax_cpu.h" #endif -#if defined(ENABLE_NVIDIA_API) || defined(ENABLE_ILUVATAR_API) || defined(ENABLE_QY_API) || defined(ENABLE_HYGON_API) +#if defined(ENABLE_NVIDIA_API) || defined(ENABLE_ILUVATAR_API) || defined(ENABLE_QY_API) || defined(ENABLE_HYGON_API) || defined(ENABLE_ALI_API) #include "nvidia/causal_softmax_nvidia.cuh" #endif #ifdef ENABLE_METAX_API @@ -48,6 +48,9 @@ __C infiniStatus_t infiniopCreateCausalSoftmaxDescriptor( #ifdef ENABLE_ILUVATAR_API CREATE(INFINI_DEVICE_ILUVATAR, nvidia); #endif +#ifdef ENABLE_ALI_API + CREATE(INFINI_DEVICE_ALI, nvidia); +#endif #ifdef ENABLE_QY_API CREATE(INFINI_DEVICE_QY, nvidia); #endif @@ -90,6 +93,9 @@ __C infiniStatus_t infiniopGetCausalSoftmaxWorkspaceSize(infiniopCausalSoftmaxDe #ifdef ENABLE_ILUVATAR_API GET(INFINI_DEVICE_ILUVATAR, nvidia); #endif +#ifdef ENABLE_ALI_API + GET(INFINI_DEVICE_ALI, nvidia); +#endif #ifdef ENABLE_QY_API GET(INFINI_DEVICE_QY, nvidia); #endif @@ -137,6 +143,9 @@ __C infiniStatus_t infiniopCausalSoftmax( #ifdef ENABLE_ILUVATAR_API CALCULATE(INFINI_DEVICE_ILUVATAR, nvidia); #endif +#ifdef ENABLE_ALI_API + CALCULATE(INFINI_DEVICE_ALI, nvidia); +#endif #ifdef ENABLE_QY_API CALCULATE(INFINI_DEVICE_QY, nvidia); #endif @@ -179,6 +188,9 @@ __C infiniStatus_t infiniopDestroyCausalSoftmaxDescriptor(infiniopCausalSoftmaxD #ifdef ENABLE_ILUVATAR_API DESTROY(INFINI_DEVICE_ILUVATAR, nvidia); #endif +#ifdef ENABLE_ALI_API + DESTROY(INFINI_DEVICE_ALI, nvidia); +#endif #ifdef ENABLE_QY_API DESTROY(INFINI_DEVICE_QY, nvidia); #endif diff --git a/src/infiniop/ops/clip/operator.cc b/src/infiniop/ops/clip/operator.cc index 88a5ac719..611ad21c7 100644 --- a/src/infiniop/ops/clip/operator.cc +++ b/src/infiniop/ops/clip/operator.cc @@ -5,7 +5,7 @@ #ifdef ENABLE_CPU_API #include "cpu/clip_cpu.h" #endif -#if defined(ENABLE_NVIDIA_API) || defined(ENABLE_ILUVATAR_API) || defined(ENABLE_QY_API) +#if defined(ENABLE_NVIDIA_API) || defined(ENABLE_ILUVATAR_API) || defined(ENABLE_QY_API) || defined(ENABLE_ALI_API) #include "nvidia/clip_nvidia.cuh" #endif #ifdef ENABLE_METAX_API @@ -42,6 +42,9 @@ __C infiniStatus_t infiniopCreateClipDescriptor( #ifdef ENABLE_ILUVATAR_API CREATE(INFINI_DEVICE_ILUVATAR, nvidia); #endif +#ifdef ENABLE_ALI_API + CREATE(INFINI_DEVICE_ALI, nvidia); +#endif #ifdef ENABLE_QY_API CREATE(INFINI_DEVICE_QY, nvidia); #endif @@ -76,6 +79,9 @@ __C infiniStatus_t infiniopGetClipWorkspaceSize(infiniopClipDescriptor_t desc, s #ifdef ENABLE_ILUVATAR_API GET(INFINI_DEVICE_ILUVATAR, nvidia); #endif +#ifdef ENABLE_ALI_API + GET(INFINI_DEVICE_ALI, nvidia); +#endif #ifdef ENABLE_QY_API GET(INFINI_DEVICE_QY, nvidia); #endif @@ -118,6 +124,9 @@ __C infiniStatus_t infiniopClip( #ifdef ENABLE_ILUVATAR_API CALCULATE(INFINI_DEVICE_ILUVATAR, nvidia); #endif +#ifdef ENABLE_ALI_API + CALCULATE(INFINI_DEVICE_ALI, nvidia); +#endif #ifdef ENABLE_QY_API CALCULATE(INFINI_DEVICE_QY, nvidia); #endif @@ -154,6 +163,9 @@ infiniopDestroyClipDescriptor(infiniopClipDescriptor_t desc) { #ifdef ENABLE_ILUVATAR_API DELETE(INFINI_DEVICE_ILUVATAR, nvidia); #endif +#ifdef ENABLE_ALI_API + DELETE(INFINI_DEVICE_ALI, nvidia); +#endif #ifdef ENABLE_QY_API DELETE(INFINI_DEVICE_QY, nvidia); #endif diff --git a/src/infiniop/ops/embedding/operator.cc b/src/infiniop/ops/embedding/operator.cc index 09cd1f737..f91bde335 100644 --- a/src/infiniop/ops/embedding/operator.cc +++ b/src/infiniop/ops/embedding/operator.cc @@ -5,7 +5,7 @@ #ifdef ENABLE_CPU_API #include "cpu/embedding_cpu.h" #endif -#if defined(ENABLE_NVIDIA_API) || defined(ENABLE_ILUVATAR_API) || defined(ENABLE_QY_API) || defined(ENABLE_HYGON_API) +#if defined(ENABLE_NVIDIA_API) || defined(ENABLE_ILUVATAR_API) || defined(ENABLE_QY_API) || defined(ENABLE_HYGON_API) || defined(ENABLE_ALI_API) #include "nvidia/embedding_nvidia.cuh" #endif #ifdef ENABLE_METAX_API @@ -42,6 +42,9 @@ __C infiniStatus_t infiniopCreateEmbeddingDescriptor( #ifdef ENABLE_ILUVATAR_API CREATE(INFINI_DEVICE_ILUVATAR, nvidia); #endif +#ifdef ENABLE_ALI_API + CREATE(INFINI_DEVICE_ALI, nvidia); +#endif #ifdef ENABLE_QY_API CREATE(INFINI_DEVICE_QY, nvidia); #endif @@ -85,6 +88,9 @@ __C infiniStatus_t infiniopEmbedding( #ifdef ENABLE_ILUVATAR_API CALCULATE(INFINI_DEVICE_ILUVATAR, nvidia); #endif +#ifdef ENABLE_ALI_API + CALCULATE(INFINI_DEVICE_ALI, nvidia); +#endif #ifdef ENABLE_QY_API CALCULATE(INFINI_DEVICE_QY, nvidia); #endif @@ -122,6 +128,9 @@ __C infiniStatus_t infiniopDestroyEmbeddingDescriptor(infiniopEmbeddingDescripto #ifdef ENABLE_ILUVATAR_API DESTROY(INFINI_DEVICE_ILUVATAR, nvidia); #endif +#ifdef ENABLE_ALI_API + DESTROY(INFINI_DEVICE_ALI, nvidia); +#endif #ifdef ENABLE_QY_API DESTROY(INFINI_DEVICE_QY, nvidia); #endif diff --git a/src/infiniop/ops/gemm/nvidia/gemm_nvidia.cu b/src/infiniop/ops/gemm/nvidia/gemm_nvidia.cu index 0e0c65f2b..31ea50bc3 100644 --- a/src/infiniop/ops/gemm/nvidia/gemm_nvidia.cu +++ b/src/infiniop/ops/gemm/nvidia/gemm_nvidia.cu @@ -43,7 +43,7 @@ infiniStatus_t Descriptor::calculate( void *stream) const { cudaDataType a_type, b_type, c_type; -#if defined(ENABLE_ILUVATAR_API) || defined(ENABLE_HYGON_API) +#if defined(ENABLE_ILUVATAR_API) || defined(ENABLE_HYGON_API) || defined(ENABLE_ALI_API) cudaDataType compute_type; #else cublasComputeType_t compute_type; @@ -52,7 +52,7 @@ infiniStatus_t Descriptor::calculate( switch (_dtype) { case INFINI_DTYPE_F16: a_type = b_type = c_type = CUDA_R_16F; -#if defined(ENABLE_ILUVATAR_API) || defined(ENABLE_HYGON_API) +#if defined(ENABLE_ILUVATAR_API) || defined(ENABLE_HYGON_API) || defined(ENABLE_ALI_API) compute_type = CUDA_R_32F; #else compute_type = CUBLAS_COMPUTE_32F; @@ -60,7 +60,7 @@ infiniStatus_t Descriptor::calculate( break; case INFINI_DTYPE_BF16: a_type = b_type = c_type = CUDA_R_16BF; -#if defined(ENABLE_ILUVATAR_API) || defined(ENABLE_HYGON_API) +#if defined(ENABLE_ILUVATAR_API) || defined(ENABLE_HYGON_API) || defined(ENABLE_ALI_API) compute_type = CUDA_R_32F; #else compute_type = CUBLAS_COMPUTE_32F; @@ -68,7 +68,7 @@ infiniStatus_t Descriptor::calculate( break; case INFINI_DTYPE_F32: a_type = b_type = c_type = CUDA_R_32F; -#if defined(ENABLE_ILUVATAR_API) || defined(ENABLE_HYGON_API) +#if defined(ENABLE_ILUVATAR_API) || defined(ENABLE_HYGON_API) || defined(ENABLE_ALI_API) compute_type = CUDA_R_32F; #else compute_type = CUBLAS_COMPUTE_32F_FAST_TF32; diff --git a/src/infiniop/ops/gemm/operator.cc b/src/infiniop/ops/gemm/operator.cc index 0a0995e8e..590947280 100644 --- a/src/infiniop/ops/gemm/operator.cc +++ b/src/infiniop/ops/gemm/operator.cc @@ -5,7 +5,7 @@ #ifdef ENABLE_CPU_API #include "cpu/gemm_cpu.h" #endif -#if defined(ENABLE_NVIDIA_API) || defined(ENABLE_ILUVATAR_API) || defined(ENABLE_QY_API) || defined(ENABLE_HYGON_API) +#if defined(ENABLE_NVIDIA_API) || defined(ENABLE_ILUVATAR_API) || defined(ENABLE_QY_API) || defined(ENABLE_HYGON_API) || defined(ENABLE_ALI_API) #include "nvidia/gemm_nvidia.cuh" #endif #ifdef ENABLE_CAMBRICON_API @@ -51,6 +51,9 @@ __C infiniStatus_t infiniopCreateGemmDescriptor( #ifdef ENABLE_ILUVATAR_API CREATE(INFINI_DEVICE_ILUVATAR, nvidia); #endif +#ifdef ENABLE_ALI_API + CREATE(INFINI_DEVICE_ALI, nvidia); +#endif #ifdef ENABLE_QY_API CREATE(INFINI_DEVICE_QY, nvidia); #endif @@ -102,6 +105,9 @@ infiniopGetGemmWorkspaceSize( #ifdef ENABLE_ILUVATAR_API GET(INFINI_DEVICE_ILUVATAR, nvidia); #endif +#ifdef ENABLE_ALI_API + GET(INFINI_DEVICE_ALI, nvidia); +#endif #ifdef ENABLE_QY_API GET(INFINI_DEVICE_QY, nvidia); #endif @@ -160,6 +166,9 @@ __C infiniStatus_t infiniopGemm( #ifdef ENABLE_ILUVATAR_API CALCULATE(INFINI_DEVICE_ILUVATAR, nvidia); #endif +#ifdef ENABLE_ALI_API + CALCULATE(INFINI_DEVICE_ALI, nvidia); +#endif #ifdef ENABLE_QY_API CALCULATE(INFINI_DEVICE_QY, nvidia); #endif @@ -208,6 +217,9 @@ infiniopDestroyGemmDescriptor(infiniopGemmDescriptor_t desc) { #ifdef ENABLE_ILUVATAR_API DELETE(INFINI_DEVICE_ILUVATAR, nvidia); #endif +#ifdef ENABLE_ALI_API + DELETE(INFINI_DEVICE_ALI, nvidia); +#endif #ifdef ENABLE_QY_API DELETE(INFINI_DEVICE_QY, nvidia); #endif diff --git a/src/infiniop/ops/layer_norm/operator.cc b/src/infiniop/ops/layer_norm/operator.cc index 3dbbdcb21..1554e8b3b 100644 --- a/src/infiniop/ops/layer_norm/operator.cc +++ b/src/infiniop/ops/layer_norm/operator.cc @@ -5,7 +5,7 @@ #ifdef ENABLE_CPU_API #include "cpu/layer_norm_cpu.h" #endif -#if defined(ENABLE_NVIDIA_API) || defined(ENABLE_ILUVATAR_API) || defined(ENABLE_QY_API) +#if defined(ENABLE_NVIDIA_API) || defined(ENABLE_ILUVATAR_API) || defined(ENABLE_QY_API) || defined(ENABLE_ALI_API) #include "nvidia/layer_norm_nvidia.cuh" #endif #ifdef ENABLE_METAX_API @@ -46,6 +46,9 @@ __C infiniStatus_t infiniopCreateLayerNormDescriptor( #ifdef ENABLE_ILUVATAR_API CREATE(INFINI_DEVICE_ILUVATAR, nvidia); #endif +#ifdef ENABLE_ALI_API + CREATE(INFINI_DEVICE_ALI, nvidia); +#endif #ifdef ENABLE_QY_API CREATE(INFINI_DEVICE_QY, nvidia); #endif @@ -76,6 +79,9 @@ __C infiniStatus_t infiniopGetLayerNormWorkspaceSize(infiniopLayerNormDescriptor #ifdef ENABLE_ILUVATAR_API GET(INFINI_DEVICE_ILUVATAR, nvidia); #endif +#ifdef ENABLE_ALI_API + GET(INFINI_DEVICE_ALI, nvidia); +#endif #ifdef ENABLE_QY_API GET(INFINI_DEVICE_QY, nvidia); #endif @@ -126,6 +132,9 @@ __C infiniStatus_t infiniopLayerNorm( #ifdef ENABLE_ILUVATAR_API CALCULATE(INFINI_DEVICE_ILUVATAR, nvidia); #endif +#ifdef ENABLE_ALI_API + CALCULATE(INFINI_DEVICE_ALI, nvidia); +#endif #ifdef ENABLE_QY_API CALCULATE(INFINI_DEVICE_QY, nvidia); #endif @@ -156,6 +165,9 @@ infiniopDestroyLayerNormDescriptor(infiniopLayerNormDescriptor_t desc) { #ifdef ENABLE_NVIDIA_API DELETE(INFINI_DEVICE_NVIDIA, nvidia); #endif +#ifdef ENABLE_ALI_API + DELETE(INFINI_DEVICE_ALI, nvidia); +#endif #ifdef ENABLE_QY_API DELETE(INFINI_DEVICE_QY, nvidia); #endif diff --git a/src/infiniop/ops/paged_attention/operator.cc b/src/infiniop/ops/paged_attention/operator.cc index 8bb603cdb..011a530a1 100644 --- a/src/infiniop/ops/paged_attention/operator.cc +++ b/src/infiniop/ops/paged_attention/operator.cc @@ -2,7 +2,7 @@ #include "../../handle.h" #include "infiniop/ops/paged_attention.h" -#if defined(ENABLE_NVIDIA_API) || defined(ENABLE_ILUVATAR_API) +#if defined(ENABLE_NVIDIA_API) || defined(ENABLE_ALI_API) || defined(ENABLE_ILUVATAR_API) #include "nvidia/paged_attention_nvidia.cuh" #endif #ifdef ENABLE_METAX_API @@ -37,6 +37,9 @@ __C infiniStatus_t infiniopCreatePagedAttentionDescriptor( #ifdef ENABLE_METAX_API CREATE(INFINI_DEVICE_METAX, metax) #endif +#ifdef ENABLE_ALI_API + CREATE(INFINI_DEVICE_ALI, nvidia) +#endif #ifdef ENABLE_ILUVATAR_API CREATE(INFINI_DEVICE_ILUVATAR, nvidia) #endif @@ -61,6 +64,9 @@ __C infiniStatus_t infiniopGetPagedAttentionWorkspaceSize( #ifdef ENABLE_METAX_API GET(INFINI_DEVICE_METAX, metax) #endif +#ifdef ENABLE_ALI_API + GET(INFINI_DEVICE_ALI, nvidia) +#endif #ifdef ENABLE_ILUVATAR_API GET(INFINI_DEVICE_ILUVATAR, nvidia) #endif @@ -89,6 +95,9 @@ __C infiniStatus_t infiniopPagedAttention( #ifdef ENABLE_METAX_API CALCULATE(INFINI_DEVICE_METAX, metax) #endif +#ifdef ENABLE_ALI_API + CALCULATE(INFINI_DEVICE_ALI, nvidia) +#endif #ifdef ENABLE_ILUVATAR_API CALCULATE(INFINI_DEVICE_ILUVATAR, nvidia) #endif @@ -112,6 +121,9 @@ __C infiniStatus_t infiniopDestroyPagedAttentionDescriptor( #ifdef ENABLE_METAX_API DESTROY(INFINI_DEVICE_METAX, metax) #endif +#ifdef ENABLE_ALI_API + DESTROY(INFINI_DEVICE_ALI, nvidia) +#endif #ifdef ENABLE_ILUVATAR_API DESTROY(INFINI_DEVICE_ILUVATAR, nvidia) #endif diff --git a/src/infiniop/ops/paged_attention_prefill/cuda/kernel_v2.cuh b/src/infiniop/ops/paged_attention_prefill/cuda/kernel_v2.cuh index 28bcccaeb..281f918ea 100644 --- a/src/infiniop/ops/paged_attention_prefill/cuda/kernel_v2.cuh +++ b/src/infiniop/ops/paged_attention_prefill/cuda/kernel_v2.cuh @@ -1,7 +1,7 @@ #ifndef __PAGED_ATTENTION_PREFILL_KERNEL_V2_CUH__ #define __PAGED_ATTENTION_PREFILL_KERNEL_V2_CUH__ -#ifdef ENABLE_NVIDIA_API +#if defined(ENABLE_NVIDIA_API) || defined(ENABLE_ALI_API) #include #include #include diff --git a/src/infiniop/ops/paged_attention_prefill/operator.cc b/src/infiniop/ops/paged_attention_prefill/operator.cc index 207157b22..5f770eb26 100644 --- a/src/infiniop/ops/paged_attention_prefill/operator.cc +++ b/src/infiniop/ops/paged_attention_prefill/operator.cc @@ -2,7 +2,7 @@ #include "../../handle.h" #include "infiniop/ops/paged_attention_prefill.h" -#if defined(ENABLE_NVIDIA_API) || defined(ENABLE_ILUVATAR_API) +#if defined(ENABLE_NVIDIA_API) || defined(ENABLE_ALI_API) || defined(ENABLE_ILUVATAR_API) #include "nvidia/paged_attention_prefill_nvidia.cuh" #endif #ifdef ENABLE_METAX_API @@ -39,6 +39,9 @@ __C infiniStatus_t infiniopCreatePagedAttentionPrefillDescriptor( #ifdef ENABLE_METAX_API CREATE(INFINI_DEVICE_METAX, metax) #endif +#ifdef ENABLE_ALI_API + CREATE(INFINI_DEVICE_ALI, nvidia) +#endif #ifdef ENABLE_ILUVATAR_API CREATE(INFINI_DEVICE_ILUVATAR, nvidia) #endif @@ -63,6 +66,9 @@ __C infiniStatus_t infiniopGetPagedAttentionPrefillWorkspaceSize( #ifdef ENABLE_METAX_API GET(INFINI_DEVICE_METAX, metax) #endif +#ifdef ENABLE_ALI_API + GET(INFINI_DEVICE_ALI, nvidia) +#endif #ifdef ENABLE_ILUVATAR_API GET(INFINI_DEVICE_ILUVATAR, nvidia) #endif @@ -94,6 +100,9 @@ __C infiniStatus_t infiniopPagedAttentionPrefill( #ifdef ENABLE_METAX_API CALCULATE(INFINI_DEVICE_METAX, metax) #endif +#ifdef ENABLE_ALI_API + CALCULATE(INFINI_DEVICE_ALI, nvidia) +#endif #ifdef ENABLE_ILUVATAR_API CALCULATE(INFINI_DEVICE_ILUVATAR, nvidia) #endif @@ -117,6 +126,9 @@ __C infiniStatus_t infiniopDestroyPagedAttentionPrefillDescriptor( #ifdef ENABLE_METAX_API DESTROY(INFINI_DEVICE_METAX, metax) #endif +#ifdef ENABLE_ALI_API + DESTROY(INFINI_DEVICE_ALI, nvidia) +#endif #ifdef ENABLE_ILUVATAR_API DESTROY(INFINI_DEVICE_ILUVATAR, nvidia) #endif diff --git a/src/infiniop/ops/paged_caching/operator.cc b/src/infiniop/ops/paged_caching/operator.cc index 3afc7a84b..a2290b2a6 100644 --- a/src/infiniop/ops/paged_caching/operator.cc +++ b/src/infiniop/ops/paged_caching/operator.cc @@ -2,7 +2,7 @@ #include "../../handle.h" #include "infiniop/ops/paged_caching.h" -#if defined(ENABLE_NVIDIA_API) || defined(ENABLE_ILUVATAR_API) +#if defined(ENABLE_NVIDIA_API) || defined(ENABLE_ALI_API) || defined(ENABLE_ILUVATAR_API) #include "nvidia/paged_caching_nvidia.cuh" #endif #ifdef ENABLE_METAX_API @@ -32,6 +32,9 @@ __C infiniStatus_t infiniopCreatePagedCachingDescriptor( #ifdef ENABLE_METAX_API CREATE(INFINI_DEVICE_METAX, metax) #endif +#ifdef ENABLE_ALI_API + CREATE(INFINI_DEVICE_ALI, nvidia) +#endif #ifdef ENABLE_ILUVATAR_API CREATE(INFINI_DEVICE_ILUVATAR, nvidia) #endif @@ -56,6 +59,9 @@ __C infiniStatus_t infiniopGetPagedCachingWorkspaceSize( #ifdef ENABLE_METAX_API GET(INFINI_DEVICE_METAX, metax) #endif +#ifdef ENABLE_ALI_API + GET(INFINI_DEVICE_ALI, nvidia) +#endif #ifdef ENABLE_ILUVATAR_API GET(INFINI_DEVICE_ILUVATAR, nvidia) #endif @@ -84,6 +90,9 @@ __C infiniStatus_t infiniopPagedCaching( #ifdef ENABLE_METAX_API CALCULATE(INFINI_DEVICE_METAX, metax) #endif +#ifdef ENABLE_ALI_API + CALCULATE(INFINI_DEVICE_ALI, nvidia) +#endif #ifdef ENABLE_ILUVATAR_API CALCULATE(INFINI_DEVICE_ILUVATAR, nvidia) #endif @@ -107,6 +116,9 @@ __C infiniStatus_t infiniopDestroyPagedCachingDescriptor( #ifdef ENABLE_METAX_API DESTROY(INFINI_DEVICE_METAX, metax) #endif +#ifdef ENABLE_ALI_API + DESTROY(INFINI_DEVICE_ALI, nvidia) +#endif #ifdef ENABLE_ILUVATAR_API DESTROY(INFINI_DEVICE_ILUVATAR, nvidia) #endif diff --git a/src/infiniop/ops/random_sample/operator.cc b/src/infiniop/ops/random_sample/operator.cc index 8239d97c5..e7a2d4e3a 100644 --- a/src/infiniop/ops/random_sample/operator.cc +++ b/src/infiniop/ops/random_sample/operator.cc @@ -5,7 +5,7 @@ #ifdef ENABLE_CPU_API #include "cpu/random_sample_cpu.h" #endif -#if defined(ENABLE_NVIDIA_API) || defined(ENABLE_ILUVATAR_API) || defined(ENABLE_QY_API) || defined(ENABLE_HYGON_API) +#if defined(ENABLE_NVIDIA_API) || defined(ENABLE_ILUVATAR_API) || defined(ENABLE_QY_API) || defined(ENABLE_HYGON_API) || defined(ENABLE_ALI_API) #include "nvidia/random_sample_nvidia.cuh" #endif #ifdef ENABLE_CAMBRICON_API @@ -50,6 +50,9 @@ infiniopCreateRandomSampleDescriptor( #ifdef ENABLE_ILUVATAR_API CREATE(INFINI_DEVICE_ILUVATAR, nvidia); #endif +#ifdef ENABLE_ALI_API + CREATE(INFINI_DEVICE_ALI, nvidia); +#endif #ifdef ENABLE_QY_API CREATE(INFINI_DEVICE_QY, nvidia); #endif @@ -101,6 +104,9 @@ __C infiniStatus_t infiniopGetRandomSampleWorkspaceSize( #ifdef ENABLE_ILUVATAR_API GET(INFINI_DEVICE_ILUVATAR, nvidia); #endif +#ifdef ENABLE_ALI_API + GET(INFINI_DEVICE_ALI, nvidia); +#endif #ifdef ENABLE_QY_API GET(INFINI_DEVICE_QY, nvidia); #endif @@ -165,6 +171,9 @@ __C infiniStatus_t infiniopRandomSample( #ifdef ENABLE_QY_API CALCULATE(INFINI_DEVICE_QY, nvidia); #endif +#ifdef ENABLE_ALI_API + CALCULATE(INFINI_DEVICE_ALI, nvidia); +#endif #ifdef ENABLE_HYGON_API CALCULATE(INFINI_DEVICE_HYGON, nvidia); #endif @@ -210,6 +219,9 @@ __C infiniStatus_t infiniopDestroyRandomSampleDescriptor( #ifdef ENABLE_ILUVATAR_API DELETE(INFINI_DEVICE_ILUVATAR, nvidia); #endif +#ifdef ENABLE_ALI_API + DELETE(INFINI_DEVICE_ALI, nvidia); +#endif #ifdef ENABLE_QY_API DELETE(INFINI_DEVICE_QY, nvidia); #endif diff --git a/src/infiniop/ops/rearrange/operator.cc b/src/infiniop/ops/rearrange/operator.cc index c7a309033..f9081b212 100644 --- a/src/infiniop/ops/rearrange/operator.cc +++ b/src/infiniop/ops/rearrange/operator.cc @@ -8,7 +8,7 @@ #ifdef ENABLE_ASCEND_API #include "ascend/rearrange_ascend.h" #endif -#if defined(ENABLE_NVIDIA_API) || defined(ENABLE_ILUVATAR_API) || defined(ENABLE_QY_API) || defined(ENABLE_HYGON_API) +#if defined(ENABLE_NVIDIA_API) || defined(ENABLE_ILUVATAR_API) || defined(ENABLE_QY_API) || defined(ENABLE_HYGON_API) || defined(ENABLE_ALI_API) #include "nvidia/rearrange_nvidia.cuh" #endif #ifdef ENABLE_CAMBRICON_API @@ -52,6 +52,9 @@ __C infiniStatus_t infiniopCreateRearrangeDescriptor( #ifdef ENABLE_ILUVATAR_API CREATE(INFINI_DEVICE_ILUVATAR, nvidia); #endif +#ifdef ENABLE_ALI_API + CREATE(INFINI_DEVICE_ALI, nvidia); +#endif #ifdef ENABLE_QY_API CREATE(INFINI_DEVICE_QY, nvidia); #endif @@ -102,6 +105,9 @@ __C infiniStatus_t infiniopRearrange( #ifdef ENABLE_ILUVATAR_API CALCULATE(INFINI_DEVICE_ILUVATAR, nvidia); #endif +#ifdef ENABLE_ALI_API + CALCULATE(INFINI_DEVICE_ALI, nvidia); +#endif #ifdef ENABLE_QY_API CALCULATE(INFINI_DEVICE_QY, nvidia); #endif @@ -150,6 +156,9 @@ __C infiniStatus_t infiniopDestroyRearrangeDescriptor( #ifdef ENABLE_ILUVATAR_API DELETE(INFINI_DEVICE_ILUVATAR, nvidia); #endif +#ifdef ENABLE_ALI_API + DELETE(INFINI_DEVICE_ALI, nvidia); +#endif #ifdef ENABLE_QY_API DELETE(INFINI_DEVICE_QY, nvidia); #endif diff --git a/src/infiniop/ops/rms_norm/operator.cc b/src/infiniop/ops/rms_norm/operator.cc index 4311f516a..43bbb752f 100644 --- a/src/infiniop/ops/rms_norm/operator.cc +++ b/src/infiniop/ops/rms_norm/operator.cc @@ -5,7 +5,7 @@ #ifdef ENABLE_CPU_API #include "cpu/rms_norm_cpu.h" #endif -#if defined(ENABLE_NVIDIA_API) || defined(ENABLE_ILUVATAR_API) || defined(ENABLE_QY_API) || defined(ENABLE_HYGON_API) +#if defined(ENABLE_NVIDIA_API) || defined(ENABLE_ILUVATAR_API) || defined(ENABLE_QY_API) || defined(ENABLE_HYGON_API) || defined(ENABLE_ALI_API) #include "nvidia/rms_norm_nvidia.cuh" #endif #ifdef ENABLE_ASCEND_API @@ -52,6 +52,9 @@ __C infiniStatus_t infiniopCreateRMSNormDescriptor( #ifdef ENABLE_ILUVATAR_API CREATE(INFINI_DEVICE_ILUVATAR, nvidia); #endif +#ifdef ENABLE_ALI_API + CREATE(INFINI_DEVICE_ALI, nvidia); +#endif #ifdef ENABLE_QY_API CREATE(INFINI_DEVICE_QY, nvidia); #endif @@ -97,6 +100,9 @@ __C infiniStatus_t infiniopGetRMSNormWorkspaceSize(infiniopRMSNormDescriptor_t d #ifdef ENABLE_ILUVATAR_API GET(INFINI_DEVICE_ILUVATAR, nvidia); #endif +#ifdef ENABLE_ALI_API + GET(INFINI_DEVICE_ALI, nvidia); +#endif #ifdef ENABLE_QY_API GET(INFINI_DEVICE_QY, nvidia); #endif @@ -143,6 +149,9 @@ __C infiniStatus_t infiniopRMSNorm(infiniopRMSNormDescriptor_t desc, void *works #ifdef ENABLE_ILUVATAR_API CALCULATE(INFINI_DEVICE_ILUVATAR, nvidia); #endif +#ifdef ENABLE_ALI_API + CALCULATE(INFINI_DEVICE_ALI, nvidia); +#endif #ifdef ENABLE_QY_API CALCULATE(INFINI_DEVICE_QY, nvidia); #endif @@ -188,6 +197,9 @@ __C infiniStatus_t infiniopDestroyRMSNormDescriptor(infiniopRMSNormDescriptor_t #ifdef ENABLE_ILUVATAR_API DESTROY(INFINI_DEVICE_ILUVATAR, nvidia); #endif +#ifdef ENABLE_ALI_API + DESTROY(INFINI_DEVICE_ALI, nvidia); +#endif #ifdef ENABLE_QY_API DESTROY(INFINI_DEVICE_QY, nvidia); #endif diff --git a/src/infiniop/ops/rope/operator.cc b/src/infiniop/ops/rope/operator.cc index d24ec4090..f5b3a346f 100644 --- a/src/infiniop/ops/rope/operator.cc +++ b/src/infiniop/ops/rope/operator.cc @@ -5,7 +5,7 @@ #ifdef ENABLE_CPU_API #include "cpu/rope_cpu.h" #endif -#if defined(ENABLE_NVIDIA_API) || defined(ENABLE_ILUVATAR_API) || defined(ENABLE_QY_API) || defined(ENABLE_HYGON_API) +#if defined(ENABLE_NVIDIA_API) || defined(ENABLE_ILUVATAR_API) || defined(ENABLE_QY_API) || defined(ENABLE_HYGON_API) || defined(ENABLE_ALI_API) #include "nvidia/rope_nvidia.cuh" #endif #ifdef ENABLE_ASCEND_API @@ -56,6 +56,9 @@ __C infiniStatus_t infiniopCreateRoPEDescriptor( #ifdef ENABLE_ILUVATAR_API CREATE(INFINI_DEVICE_ILUVATAR, nvidia); #endif +#ifdef ENABLE_ALI_API + CREATE(INFINI_DEVICE_ALI, nvidia); +#endif #ifdef ENABLE_QY_API CREATE(INFINI_DEVICE_QY, nvidia); #endif @@ -101,6 +104,9 @@ __C infiniStatus_t infiniopGetRoPEWorkspaceSize(infiniopRoPEDescriptor_t desc, #ifdef ENABLE_ILUVATAR_API GET(INFINI_DEVICE_ILUVATAR, nvidia); #endif +#ifdef ENABLE_ALI_API + GET(INFINI_DEVICE_ALI, nvidia); +#endif #ifdef ENABLE_QY_API GET(INFINI_DEVICE_QY, nvidia); #endif @@ -155,6 +161,9 @@ __C infiniStatus_t infiniopRoPE( #ifdef ENABLE_ILUVATAR_API CALCULATE(INFINI_DEVICE_ILUVATAR, nvidia); #endif +#ifdef ENABLE_ALI_API + CALCULATE(INFINI_DEVICE_ALI, nvidia); +#endif #ifdef ENABLE_QY_API CALCULATE(INFINI_DEVICE_QY, nvidia); #endif @@ -201,6 +210,9 @@ infiniopDestroyRoPEDescriptor(infiniopRoPEDescriptor_t desc) { #ifdef ENABLE_ILUVATAR_API DELETE(INFINI_DEVICE_ILUVATAR, nvidia); #endif +#ifdef ENABLE_ALI_API + DELETE(INFINI_DEVICE_ALI, nvidia); +#endif #ifdef ENABLE_QY_API DELETE(INFINI_DEVICE_QY, nvidia); #endif diff --git a/src/infiniop/ops/swiglu/operator.cc b/src/infiniop/ops/swiglu/operator.cc index b3fabba32..581e9900e 100644 --- a/src/infiniop/ops/swiglu/operator.cc +++ b/src/infiniop/ops/swiglu/operator.cc @@ -5,7 +5,7 @@ #ifdef ENABLE_CPU_API #include "cpu/swiglu_cpu.h" #endif -#if defined(ENABLE_NVIDIA_API) || defined(ENABLE_ILUVATAR_API) || defined(ENABLE_QY_API) || defined(ENABLE_HYGON_API) +#if defined(ENABLE_NVIDIA_API) || defined(ENABLE_ILUVATAR_API) || defined(ENABLE_QY_API) || defined(ENABLE_HYGON_API) || defined(ENABLE_ALI_API) #if defined(ENABLE_NINETOOTHED) #include "ninetoothed/swiglu.h" #else @@ -67,6 +67,9 @@ __C infiniStatus_t infiniopCreateSwiGLUDescriptor( CREATE(INFINI_DEVICE_ILUVATAR, nvidia); #endif #endif +#ifdef ENABLE_ALI_API + CREATE(INFINI_DEVICE_ALI, nvidia); +#endif #ifdef ENABLE_QY_API CREATE(INFINI_DEVICE_QY, nvidia); #endif @@ -125,6 +128,9 @@ __C infiniStatus_t infiniopGetSwiGLUWorkspaceSize(infiniopSwiGLUDescriptor_t des GET(INFINI_DEVICE_ILUVATAR, nvidia); #endif #endif +#ifdef ENABLE_ALI_API + GET(INFINI_DEVICE_ALI, nvidia); +#endif #ifdef ENABLE_QY_API GET(INFINI_DEVICE_QY, nvidia); #endif @@ -190,6 +196,9 @@ __C infiniStatus_t infiniopSwiGLU( CALCULATE(INFINI_DEVICE_ILUVATAR, nvidia); #endif #endif +#ifdef ENABLE_ALI_API + CALCULATE(INFINI_DEVICE_ALI, nvidia); +#endif #ifdef ENABLE_QY_API CALCULATE(INFINI_DEVICE_QY, nvidia); #endif @@ -250,6 +259,9 @@ infiniopDestroySwiGLUDescriptor(infiniopSwiGLUDescriptor_t desc) { DELETE(INFINI_DEVICE_ILUVATAR, nvidia); #endif #endif +#ifdef ENABLE_ALI_API + DELETE(INFINI_DEVICE_ALI, nvidia); +#endif #ifdef ENABLE_QY_API DELETE(INFINI_DEVICE_QY, nvidia); #endif diff --git a/src/infiniop/ops/zeros/operator.cc b/src/infiniop/ops/zeros/operator.cc index 02ece44c2..037a1859b 100644 --- a/src/infiniop/ops/zeros/operator.cc +++ b/src/infiniop/ops/zeros/operator.cc @@ -5,7 +5,7 @@ #ifdef ENABLE_CPU_API #include "cpu/zeros_cpu.h" #endif -#if defined(ENABLE_NVIDIA_API) || defined(ENABLE_ILUVATAR_API) || defined(ENABLE_QY_API) +#if defined(ENABLE_NVIDIA_API) || defined(ENABLE_ILUVATAR_API) || defined(ENABLE_QY_API) || defined(ENABLE_ALI_API) #include "nvidia/zeros_nvidia.cuh" #endif #ifdef ENABLE_METAX_API @@ -37,6 +37,9 @@ __C infiniStatus_t infiniopCreateZerosDescriptor( #ifdef ENABLE_NVIDIA_API CREATE(INFINI_DEVICE_NVIDIA, nvidia); #endif +#ifdef ENABLE_ALI_API + CREATE(INFINI_DEVICE_ALI, nvidia); +#endif #ifdef ENABLE_ILUVATAR_API CREATE(INFINI_DEVICE_ILUVATAR, nvidia); #endif @@ -73,6 +76,9 @@ __C infiniStatus_t infiniopGetZerosWorkspaceSize(infiniopZerosDescriptor_t desc, #ifdef ENABLE_ILUVATAR_API GET(INFINI_DEVICE_ILUVATAR, nvidia); #endif +#ifdef ENABLE_ALI_API + GET(INFINI_DEVICE_ALI, nvidia); +#endif #ifdef ENABLE_QY_API GET(INFINI_DEVICE_QY, nvidia); #endif @@ -114,6 +120,9 @@ __C infiniStatus_t infiniopZeros( #ifdef ENABLE_ILUVATAR_API CALCULATE(INFINI_DEVICE_ILUVATAR, nvidia); #endif +#ifdef ENABLE_ALI_API + CALCULATE(INFINI_DEVICE_ALI, nvidia); +#endif #ifdef ENABLE_QY_API CALCULATE(INFINI_DEVICE_QY, nvidia); #endif @@ -149,6 +158,9 @@ infiniopDestroyZerosDescriptor(infiniopZerosDescriptor_t desc) { #ifdef ENABLE_ILUVATAR_API DELETE(INFINI_DEVICE_ILUVATAR, nvidia); #endif +#ifdef ENABLE_ALI_API + DELETE(INFINI_DEVICE_ALI, nvidia); +#endif #ifdef ENABLE_QY_API DELETE(INFINI_DEVICE_QY, nvidia); #endif diff --git a/src/infinirt-test/main.cc b/src/infinirt-test/main.cc index e6613a262..5c3e97db8 100644 --- a/src/infinirt-test/main.cc +++ b/src/infinirt-test/main.cc @@ -23,6 +23,7 @@ void printUsage() { << " qy" << std::endl << " kunlun" << std::endl << " hygon" << std::endl + << " ali" << std::endl << std::endl; exit(EXIT_FAILURE); } @@ -55,6 +56,7 @@ ParsedArgs parseArgs(int argc, char *argv[]) { else PARSE_DEVICE("--qy", INFINI_DEVICE_QY) else PARSE_DEVICE("--kunlun", INFINI_DEVICE_KUNLUN) else PARSE_DEVICE("--hygon", INFINI_DEVICE_HYGON) + else PARSE_DEVICE("--ali", INFINI_DEVICE_ALI) else { printUsage(); } diff --git a/src/infinirt/cuda/infinirt_cuda.cu b/src/infinirt/cuda/infinirt_cuda.cu index 697e47646..dedbd6551 100644 --- a/src/infinirt/cuda/infinirt_cuda.cu +++ b/src/infinirt/cuda/infinirt_cuda.cu @@ -21,6 +21,8 @@ namespace infinirt::iluvatar { namespace infinirt::qy { #elif defined(ENABLE_HYGON_API) namespace infinirt::hygon { +#elif defined(ENABLE_ALI_API) +namespace infinirt::ali { #else namespace infinirt::cuda { // 默认回退 #endif diff --git a/src/infinirt/cuda/infinirt_cuda.cuh b/src/infinirt/cuda/infinirt_cuda.cuh index e73912f64..c89e83d83 100644 --- a/src/infinirt/cuda/infinirt_cuda.cuh +++ b/src/infinirt/cuda/infinirt_cuda.cuh @@ -38,4 +38,13 @@ INFINIRT_DEVICE_API_NOOP #endif } // namespace infinirt::hygon +// ALI namespace +namespace infinirt::ali { +#ifdef ENABLE_ALI_API +INFINIRT_DEVICE_API_IMPL +#else +INFINIRT_DEVICE_API_NOOP +#endif +} // namespace infinirt::ali + #endif // __INFINIRT_CUDA_H__ diff --git a/src/infinirt/infinirt.cc b/src/infinirt/infinirt.cc index e16f1c0f4..ad2e640e6 100644 --- a/src/infinirt/infinirt.cc +++ b/src/infinirt/infinirt.cc @@ -81,6 +81,9 @@ __C infiniStatus_t infinirtGetDevice(infiniDevice_t *device_ptr, int *device_id_ case INFINI_DEVICE_HYGON: \ _status = infinirt::hygon::API PARAMS; \ break; \ + case INFINI_DEVICE_ALI: \ + _status = infinirt::ali::API PARAMS; \ + break; \ default: \ _status = INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED; \ } \ diff --git a/test/infinicore/framework/config.py b/test/infinicore/framework/config.py index d14c003e9..058fda90c 100644 --- a/test/infinicore/framework/config.py +++ b/test/infinicore/framework/config.py @@ -24,6 +24,7 @@ def get_supported_hardware_platforms(): ("--kunlun", "Kunlun XPUs (requires torch_xmlir)"), ("--hygon", "Hygon DCUs"), ("--qy", "QY GPUs"), + ("--ali", "Ali PPU accelerators"), ] @@ -230,13 +231,21 @@ def get_test_devices(args): if args.qy: try: - # Iluvatar GPU detection + # QY GPU detection import torch devices_to_test.append(InfiniDeviceEnum.QY) except ImportError: print("Warning: QY GPU support not available") + if args.ali: + try: + import torch + + devices_to_test.append(InfiniDeviceEnum.ALI) + except ImportError: + print("Warning: Ali PPU support not available") + # Default to CPU if no devices specified if not devices_to_test: devices_to_test = [InfiniDeviceEnum.CPU] diff --git a/test/infinicore/framework/devices.py b/test/infinicore/framework/devices.py index e2a95ee71..59b0ad558 100644 --- a/test/infinicore/framework/devices.py +++ b/test/infinicore/framework/devices.py @@ -9,6 +9,7 @@ class InfiniDeviceEnum: KUNLUN = 7 HYGON = 8 QY = 9 + ALI = 10 InfiniDeviceNames = { @@ -22,6 +23,7 @@ class InfiniDeviceEnum: InfiniDeviceEnum.QY: "Qy", InfiniDeviceEnum.KUNLUN: "Kunlun", InfiniDeviceEnum.HYGON: "Hygon", + InfiniDeviceEnum.ALI: "Ali", } torch_device_map = { @@ -35,4 +37,5 @@ class InfiniDeviceEnum: InfiniDeviceEnum.KUNLUN: "cuda", InfiniDeviceEnum.HYGON: "cuda", InfiniDeviceEnum.QY: "cuda", + InfiniDeviceEnum.ALI: "cuda", } diff --git a/test/infinicore/test.py b/test/infinicore/test.py index 36aeffe4e..f69843d78 100644 --- a/test/infinicore/test.py +++ b/test/infinicore/test.py @@ -183,6 +183,7 @@ def func6_initialize_device_relationship(): _infinicore.Device.Type.QY, # 9 "cuda" _infinicore.Device.Type.KUNLUN, # 7 "cuda" _infinicore.Device.Type.HYGON, # 8 "cuda" + _infinicore.Device.Type.ALI, # 10 "cuda" ] if True: print("\n ---------- 测试 CPU") diff --git a/test/infiniop/libinfiniop/devices.py b/test/infiniop/libinfiniop/devices.py index db2e8ae4d..bb776e242 100644 --- a/test/infiniop/libinfiniop/devices.py +++ b/test/infiniop/libinfiniop/devices.py @@ -9,6 +9,7 @@ class InfiniDeviceEnum: KUNLUN = 7 HYGON = 8 QY = 9 + ALI = 10 InfiniDeviceNames = { @@ -22,6 +23,7 @@ class InfiniDeviceEnum: InfiniDeviceEnum.KUNLUN: "Kunlun", InfiniDeviceEnum.HYGON: "Hygon", InfiniDeviceEnum.QY: "QY", + InfiniDeviceEnum.ALI: "Ali", } # Mapping that maps InfiniDeviceEnum to torch device string @@ -36,4 +38,5 @@ class InfiniDeviceEnum: InfiniDeviceEnum.KUNLUN: "cuda", InfiniDeviceEnum.HYGON: "cuda", InfiniDeviceEnum.QY: "cuda", + InfiniDeviceEnum.ALI: "cuda", } diff --git a/test/infiniop/libinfiniop/utils.py b/test/infiniop/libinfiniop/utils.py index 9b43c47c5..ec8763a4e 100644 --- a/test/infiniop/libinfiniop/utils.py +++ b/test/infiniop/libinfiniop/utils.py @@ -433,6 +433,11 @@ def get_args(): action="store_true", help="Run HYGON DCU test", ) + parser.add_argument( + "--ali", + action="store_true", + help="Run ALI PPU test", + ) return parser.parse_args() @@ -487,6 +492,7 @@ def filter_tensor_dtypes_by_device(device, tensor_dtypes): InfiniDeviceEnum.ASCEND, InfiniDeviceEnum.ILUVATAR, InfiniDeviceEnum.CAMBRICON, + InfiniDeviceEnum.ALI, ): return tensor_dtypes else: @@ -757,6 +763,10 @@ def get_test_devices(args): import torch devices_to_test.append(InfiniDeviceEnum.HYGON) + if args.ali: + import torch + + devices_to_test.append(InfiniDeviceEnum.ALI) if not devices_to_test: devices_to_test = [InfiniDeviceEnum.CPU] diff --git a/xmake.lua b/xmake.lua index a4d311a7d..e211347cb 100644 --- a/xmake.lua +++ b/xmake.lua @@ -125,6 +125,18 @@ if has_config("iluvatar-gpu") then includes("xmake/iluvatar.lua") end +-- ali +option("ali-ppu") + set_default(false) + set_showmenu(true) + set_description("Whether to compile implementations for Ali PPU") +option_end() + +if has_config("ali-ppu") then + add_defines("ENABLE_ALI_API") + includes("xmake/ali.lua") +end + -- qy option("qy-gpu") set_default(false) @@ -276,6 +288,9 @@ target("infinirt") if has_config("iluvatar-gpu") then add_deps("infinirt-iluvatar") end + if has_config("ali-ppu") then + add_deps("infinirt-ali") + end if has_config("qy-gpu") then add_deps("infinirt-qy") add_files("build/.objs/infinirt-qy/rules/qy.cuda/src/infinirt/cuda/*.cu.o", {public = true}) @@ -309,6 +324,9 @@ target("infiniop") if has_config("iluvatar-gpu") then add_deps("infiniop-iluvatar") end + if has_config("ali-ppu") then + add_deps("infiniop-ali") + end if has_config("qy-gpu") then add_deps("infiniop-qy") add_files("build/.objs/infiniop-qy/rules/qy.cuda/src/infiniop/ops/*/nvidia/*.cu.o", {public = true}) @@ -364,6 +382,9 @@ target("infiniccl") if has_config("iluvatar-gpu") then add_deps("infiniccl-iluvatar") end + if has_config("ali-ppu") then + add_deps("infiniccl-ali") + end if has_config("qy-gpu") then add_deps("infiniccl-qy") add_files("build/.objs/infiniccl-qy/rules/qy.cuda/src/infiniccl/cuda/*.cu.o", {public = true}) diff --git a/xmake/ali.lua b/xmake/ali.lua new file mode 100644 index 000000000..940650d67 --- /dev/null +++ b/xmake/ali.lua @@ -0,0 +1,135 @@ +local CUDNN_ROOT = os.getenv("CUDNN_ROOT") or os.getenv("CUDNN_HOME") or os.getenv("CUDNN_PATH") +if CUDNN_ROOT ~= nil then + add_includedirs(CUDNN_ROOT .. "/include") +end + +local CUTLASS_ROOT = os.getenv("CUTLASS_ROOT") or os.getenv("CUTLASS_HOME") or os.getenv("CUTLASS_PATH") + +if CUTLASS_ROOT ~= nil then + add_includedirs(CUTLASS_ROOT) +end + +target("infiniop-ali") + set_kind("static") + add_deps("infini-utils") + on_install(function (target) end) + + set_policy("build.cuda.devlink", true) + set_toolchains("cuda") + add_links("cudart", "cublas") + if has_config("cudnn") then + add_links("cudnn") + end + + on_load(function (target) + import("lib.detect.find_tool") + local nvcc = find_tool("nvcc") + if nvcc ~= nil then + if is_plat("windows") then + nvcc_path = os.iorun("where nvcc"):match("(.-)\r?\n") + else + nvcc_path = nvcc.program + end + + target:add("linkdirs", path.directory(path.directory(nvcc_path)) .. "/lib64/stubs") + target:add("links", "cuda") + end + end) + + if is_plat("windows") then + add_cuflags("-Xcompiler=/utf-8", "--expt-relaxed-constexpr", "--allow-unsupported-compiler") + add_cuflags("-Xcompiler=/W3", "-Xcompiler=/WX") + add_cxxflags("/FS") + if CUDNN_ROOT ~= nil then + add_linkdirs(CUDNN_ROOT .. "\\lib\\x64") + end + else + add_cuflags("-Xcompiler=-Wall", "-Xcompiler=-Werror") + add_cuflags("-Xcompiler=-fPIC") + add_cuflags("--extended-lambda") + add_culdflags("-Xcompiler=-fPIC") + add_cxflags("-fPIC") + add_cxxflags("-fPIC") + add_cflags("-fPIC") + add_cuflags("--expt-relaxed-constexpr") + if CUDNN_ROOT ~= nil then + add_linkdirs(CUDNN_ROOT .. "/lib") + end + end + + add_cuflags("-Xcompiler=-Wno-error=deprecated-declarations", "-Xcompiler=-Wno-error=unused-function") + + local arch_opt = get_config("cuda_arch") + if arch_opt and type(arch_opt) == "string" then + for _, arch in ipairs(arch_opt:split(",")) do + arch = arch:trim() + local compute = arch:gsub("sm_", "compute_") + add_cuflags("-gencode=arch=" .. compute .. ",code=" .. arch) + end + else + add_cugencodes("native") + end + + set_languages("cxx17") + add_files("../src/infiniop/devices/nvidia/*.cu", "../src/infiniop/ops/*/nvidia/*.cu") + + if has_config("ninetoothed") then + add_files("../build/ninetoothed/*.c", "../build/ninetoothed/*.cpp") + end +target_end() + +target("infinirt-ali") + set_kind("static") + add_deps("infini-utils") + on_install(function (target) end) + + set_policy("build.cuda.devlink", true) + set_toolchains("cuda") + add_links("cudart") + + if is_plat("windows") then + add_cuflags("-Xcompiler=/utf-8", "--expt-relaxed-constexpr", "--allow-unsupported-compiler") + add_cxxflags("/FS") + else + add_cuflags("-Xcompiler=-fPIC", "-Xcompiler=-shared") + add_culdflags("-Xcompiler=-fPIC", "-Xcompiler=-shared") + add_cxflags("-fPIC", "-shared") + add_cxxflags("-fPIC", "-shared") + add_shflags("-fPIC") + end + + set_languages("cxx17") + add_files("../src/infinirt/cuda/*.cu") +target_end() + +target("infiniccl-ali") + set_kind("static") + add_deps("infinirt") + on_install(function (target) end) + if has_config("ccl") then + set_policy("build.cuda.devlink", true) + set_toolchains("cuda") + add_links("cudart") + + if not is_plat("windows") then + add_cuflags("-Xcompiler=-fPIC") + add_culdflags("-Xcompiler=-fPIC") + add_cxflags("-fPIC") + add_cxxflags("-fPIC") + + local nccl_root = os.getenv("NCCL_ROOT") + if nccl_root then + add_includedirs(nccl_root .. "/include") + add_links(nccl_root .. "/lib/libnccl.so") + else + add_links("nccl") -- Fall back to default nccl linking + end + + add_files("../src/infiniccl/cuda/*.cu") + else + print("[Warning] NCCL is not supported on Windows") + end + end + set_languages("cxx17") + +target_end()