From 4b5c79c07b41abd17ebfc9367633019e2698f031 Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Thu, 5 Jun 2025 14:20:03 +0900 Subject: [PATCH] Make sure things work on GPU for truncation to hardware types --- pass/Raptor.cpp | 12 ++----- pass/RaptorLogic.h | 15 +++++--- test/Integration/Truncate/Cpp/openmp-cpu.cpp | 28 +++++++++------ test/Integration/Truncate/Cpp/openmp-gpu.cpp | 29 ++++++++++++++++ .../Integration/Truncate/Cpp/truncate-all.cpp | 10 +++--- .../Truncate/Cpp/truncate-count.cpp | 2 +- test/Unit/Truncate/cmp.ll | 2 +- test/Unit/Truncate/intrinsic.ll | 2 +- test/Unit/Truncate/simple.ll | 4 +-- test/lit.site.cfg.py.in | 34 ++++++++++++++++--- 10 files changed, 100 insertions(+), 38 deletions(-) create mode 100644 test/Integration/Truncate/Cpp/openmp-gpu.cpp diff --git a/pass/Raptor.cpp b/pass/Raptor.cpp index 9174051b..d9922510 100644 --- a/pass/Raptor.cpp +++ b/pass/Raptor.cpp @@ -88,18 +88,12 @@ llvm::cl::opt RaptorPostOpt("raptor-postopt", cl::init(false), cl::Hidden, cl::desc("Run raptorpostprocessing optimizations")); -llvm::cl::opt RaptorAttributor("raptor-attributor", cl::init(false), - cl::Hidden, - cl::desc("Run attributor post Raptor")); - llvm::cl::opt RaptorOMPOpt("raptor-omp-opt", cl::init(false), cl::Hidden, cl::desc("Whether to enable openmp opt")); -llvm::cl::opt RaptorTruncateAll( - "raptor-truncate-all", cl::init(""), cl::Hidden, - cl::desc( - "Truncate all floating point operations. " - "E.g. \"64to32\" or \"64to-\".")); +llvm::cl::opt + RaptorTruncateAll("raptor-truncate-all", cl::init(""), cl::Hidden, + cl::desc("Truncate all floating point operations.")); llvm::cl::opt RaptorTruncateCount( "raptor-truncate-count", cl::init(false), cl::Hidden, diff --git a/pass/RaptorLogic.h b/pass/RaptorLogic.h index b0b2c367..9c79fea4 100644 --- a/pass/RaptorLogic.h +++ b/pass/RaptorLogic.h @@ -315,14 +315,19 @@ class TruncationConfiguration { static TruncationConfiguration getInitial(FloatTruncation Truncation, TruncateMode Mode) { - if (Mode == TruncOpMode) - return TruncationConfiguration{Truncation, Mode, true, true, false}; - else if (Mode == TruncMemMode) + if (Mode == TruncOpMode) { + if (Truncation.isToFPRT()) + return TruncationConfiguration{Truncation, Mode, true, true, false}; + else + return TruncationConfiguration{Truncation, Mode, false, false, false}; + } else if (Mode == TruncMemMode) { + assert(Truncation.isToFPRT()); return TruncationConfiguration{Truncation, Mode, false, false, false}; - else if (Mode == TruncOpFullModuleMode) + } else if (Mode == TruncOpFullModuleMode) { return TruncationConfiguration{Truncation, Mode, true, false, false}; - else + } else { llvm_unreachable(""); + } } }; diff --git a/test/Integration/Truncate/Cpp/openmp-cpu.cpp b/test/Integration/Truncate/Cpp/openmp-cpu.cpp index 448b0bbf..4a3433a6 100644 --- a/test/Integration/Truncate/Cpp/openmp-cpu.cpp +++ b/test/Integration/Truncate/Cpp/openmp-cpu.cpp @@ -36,15 +36,6 @@ double teams(double a, double b) { double teams_par(double a, double b) { double c = 0; -#pragma omp teams parallel - { - c = a + b; - } - return c; -} - -double teams__par(double a, double b) { - double c = 0; #pragma omp teams { #pragma omp parallel @@ -64,6 +55,21 @@ double par(double a, double b) { return c; } +double task(double a, double b) { + double c = 0; +#pragma omp parallel + { +#pragma omp single + { +#pragma omp task + { + c = a + b; + } + } + } + return c; +} + int main() { double a = 1; double b = 1000; @@ -84,9 +90,11 @@ int main() { printf("%f + %f = %f\n", a, b, c); APPROX_EQ(c, 1000, 1e-5); - c = __raptor_truncate_op_func(teams__par, FROM, TO)(a, b); +#if 0 + c = __raptor_truncate_op_func(task, FROM, TO)(a, b); printf("%f + %f = %f\n", a, b, c); APPROX_EQ(c, 1000, 1e-5); +#endif return 0; } diff --git a/test/Integration/Truncate/Cpp/openmp-gpu.cpp b/test/Integration/Truncate/Cpp/openmp-gpu.cpp new file mode 100644 index 00000000..85bdde39 --- /dev/null +++ b/test/Integration/Truncate/Cpp/openmp-gpu.cpp @@ -0,0 +1,29 @@ +// clang-format off +// RUN: %clang -O3 %s -o %t.a.out %loadClangRaptor %linkRaptorRT -lm -lmpfr && %t.a.out +// RUN: %clang -O3 -fopenmp %s -o %t.a.out %loadClangRaptor %linkRaptorRT -lm -lmpfr && %t.a.out +// RUN: if [ "%hasOpenMPGPU" == "1" ]; then %clang -O3 -fopenmp --offload-arch=native %s -o %t.a.out %loadClangRaptor %linkRaptorRT -lm -lmpfr && %t.a.out; fi +// clang-format on + +#include "../../test_utils.h" +#include + +#define FROM 64 +#define TO 0, 16 + +template fty *__raptor_truncate_op_func(fty *, int, int, int); + +double kernel(double a, double b) { return a + b; } + +int main() { + double a = 1; + double b = 10000; + double c; +#pragma omp target map(tofrom : c) + { + c = __raptor_truncate_op_func(kernel, FROM, TO)(a, b); + } + + printf("%f + %f = %f\n", a, b, c); + APPROX_EQ(c, b, 1e-5); + return 0; +} diff --git a/test/Integration/Truncate/Cpp/truncate-all.cpp b/test/Integration/Truncate/Cpp/truncate-all.cpp index 62d34fa8..d6c863aa 100644 --- a/test/Integration/Truncate/Cpp/truncate-all.cpp +++ b/test/Integration/Truncate/Cpp/truncate-all.cpp @@ -1,23 +1,23 @@ // clang-format off // Baseline -// RUN: %clang -O3 %s -S -emit-llvm -o - %linkRaptorRT %loadClangRaptor -S -mllvm --raptor-truncate-count=false -mllvm --raptor-truncate-all="" | %lli - | FileCheck --check-prefix BASELINE %s +// RUN: %clang -O3 %s -S -emit-llvm -o - %linkRaptorRT %loadClangPluginRaptor -S -mllvm --raptor-truncate-count=false -mllvm --raptor-truncate-all="" | %lli - | FileCheck --check-prefix BASELINE %s // BASELINE: 900000000.560000 // Truncated -// RUN: %clang -mllvm --raptor-truncate-count=false -O3 %s -o %t.a.out %linkRaptorRT %loadClangRaptor -mllvm --raptor-truncate-all="ieee(64)-ieee(32)" -lmpfr -lm && %t.a.out | FileCheck --check-prefix TO_32 %s +// RUN: %clang -mllvm --raptor-truncate-count=false -O3 %s -o %t.a.out %linkRaptorRT %loadClangPluginRaptor -mllvm --raptor-truncate-all="ieee(64)-ieee(32)" -lmpfr -lm && %t.a.out | FileCheck --check-prefix TO_32 %s // TO_32: 900000000.000000 -// RUN: %clang -mllvm --raptor-truncate-count=false -O3 %s -o %t.a.out %linkRaptorRT %loadClangRaptor -mllvm --raptor-truncate-all="ieee(64)-mpfr(8,23)" -lmpfr -lm && %t.a.out | FileCheck --check-prefix TO_28_23 %s +// RUN: %clang -mllvm --raptor-truncate-count=false -O3 %s -o %t.a.out %linkRaptorRT %loadClangPluginRaptor -mllvm --raptor-truncate-all="ieee(64)-mpfr(8,23)" -lmpfr -lm && %t.a.out | FileCheck --check-prefix TO_28_23 %s // TO_28_23: 900000000.000000 -// RUN: %clang -mllvm --raptor-truncate-count=false -O3 %s -o %t.a.out %linkRaptorRT %loadClangRaptor -mllvm --raptor-truncate-all="ieee(64)-mpfr(8,7)" -lmpfr -lm && %t.a.out | FileCheck --check-prefix TO_3_7 %s +// RUN: %clang -mllvm --raptor-truncate-count=false -O3 %s -o %t.a.out %linkRaptorRT %loadClangPluginRaptor -mllvm --raptor-truncate-all="ieee(64)-mpfr(8,7)" -lmpfr -lm && %t.a.out | FileCheck --check-prefix TO_3_7 %s // TO_3_7: 897581056.000000 // TODO revive the location check -// COM: %clang -mllvm --raptor-truncate-count=false -g -O3 %s -o %t.a.out %linkRaptorRT %loadClangRaptor -mllvm --raptor-truncate-all="ieee(64)-mpfr(3,7)" -lmpfr -lm && %t.a.out | FileCheck --check-prefix CHECK-LOCS %s +// COM: %clang -mllvm --raptor-truncate-count=false -g -O3 %s -o %t.a.out %linkRaptorRT %loadClangPluginRaptor -mllvm --raptor-truncate-all="ieee(64)-mpfr(3,7)" -lmpfr -lm && %t.a.out | FileCheck --check-prefix CHECK-LOCS %s // CHECK-LOCS: 0x[[op1:[0-9a-f]*]], {{.*}}truncate-all.cpp:[[op1loc:.*]] // CHECK-LOCS-NEXT: 0x[[op2:[0-9a-f]*]], {{.*}}truncate-all.cpp:[[op2loc:.*]] // CHECK-LOCS-NEXT: 0x[[op3:[0-9a-f]*]], {{.*}}truncate-all.cpp:[[op3loc:.*]] diff --git a/test/Integration/Truncate/Cpp/truncate-count.cpp b/test/Integration/Truncate/Cpp/truncate-count.cpp index 7697cc2f..567e51fd 100644 --- a/test/Integration/Truncate/Cpp/truncate-count.cpp +++ b/test/Integration/Truncate/Cpp/truncate-count.cpp @@ -1,5 +1,5 @@ // clang-format off -// RUN: %clang -O2 %s -o %t.a.out %linkRaptorRT %loadClangRaptor -mllvm --raptor-truncate-count -lm && %t.a.out | FileCheck %s +// RUN: %clang -O2 %s -o %t.a.out %linkRaptorRT %loadClangPluginRaptor -mllvm --raptor-truncate-count -lm && %t.a.out | FileCheck %s #include #include diff --git a/test/Unit/Truncate/cmp.ll b/test/Unit/Truncate/cmp.ll index f251318d..587ba405 100644 --- a/test/Unit/Truncate/cmp.ll +++ b/test/Unit/Truncate/cmp.ll @@ -31,7 +31,7 @@ entry: ; CHECK: define internal i1 @__raptor_done_truncate_mem_func_ieee_64_to_mpfr_8_23_0_0_0_f( ; CHECK: call i1 @__raptor_fprt_ieee_64_fcmp_olt -; CHECK: define internal i1 @__raptor_done_truncate_op_func_ieee_64_to_ieee_32_1_1_0_f( +; CHECK: define internal i1 @__raptor_done_truncate_op_func_ieee_64_to_ieee_32_0_0_0_f( ; CHECK: fcmp olt double ; CHECK: define internal i1 @__raptor_done_truncate_op_func_ieee_64_to_mpfr_3_7_1_1_0_f( diff --git a/test/Unit/Truncate/intrinsic.ll b/test/Unit/Truncate/intrinsic.ll index 2a66d17f..6d59328d 100644 --- a/test/Unit/Truncate/intrinsic.ll +++ b/test/Unit/Truncate/intrinsic.ll @@ -47,7 +47,7 @@ entry: ; CHECK-DAG: call double @__raptor_fprt_ieee_64_binop_fadd( ; CHECK-DAG: call void @llvm.nvvm.barrier0() -; CHECK: define internal double @__raptor_done_truncate_op_func_ieee_64_to_ieee_32_1_1_0_f( +; CHECK: define internal double @__raptor_done_truncate_op_func_ieee_64_to_ieee_32_0_0_0_f( ; CHECK-DAG: fptrunc ; CHECK-DAG: call float @llvm.pow.f32( ; CHECK-DAG: fpext float diff --git a/test/Unit/Truncate/simple.ll b/test/Unit/Truncate/simple.ll index 4cd822cf..12325085 100644 --- a/test/Unit/Truncate/simple.ll +++ b/test/Unit/Truncate/simple.ll @@ -48,7 +48,7 @@ entry: ; CHECK: define void @tester_op(ptr %data) { ; CHECK-NEXT: entry: -; CHECK-NEXT: call void @__raptor_done_truncate_op_func_ieee_64_to_ieee_32_1_1_0_f(ptr %data) +; CHECK-NEXT: call void @__raptor_done_truncate_op_func_ieee_64_to_ieee_32_0_0_0_f(ptr %data) ; CHECK-NEXT: ret void ; CHECK-NEXT: } @@ -71,7 +71,7 @@ entry: ; CHECK-NEXT: ret double %2 ; CHECK-NEXT: } -; CHECK: define internal void @__raptor_done_truncate_op_func_ieee_64_to_ieee_32_1_1_0_f(ptr %x) { +; CHECK: define internal void @__raptor_done_truncate_op_func_ieee_64_to_ieee_32_0_0_0_f(ptr %x) { ; CHECK-NEXT: %y = load double, ptr %x, align 8 ; CHECK-NEXT: %raptor_trunc = fptrunc double %y to float ; CHECK-NEXT: %raptor_trunc1 = fptrunc double %y to float diff --git a/test/lit.site.cfg.py.in b/test/lit.site.cfg.py.in index 92a3646c..47597b65 100644 --- a/test/lit.site.cfg.py.in +++ b/test/lit.site.cfg.py.in @@ -92,11 +92,12 @@ config.substitutions.append(('%BClibdir', '@RAPTOR_SOURCE_DIR@/bclib/')) assert len("@RAPTOR_BINARY_DIR@") != 0 -newPM = ('-Xclang -load -Xclang @RAPTOR_BINARY_DIR@/pass/ClangRaptor-' + config.llvm_ver + config.llvm_shlib_ext) -config.substitutions.append(('%loadClangRaptor', newPM)) +passPlugin = ('-fpass-plugin=@RAPTOR_BINARY_DIR@/pass/LLVMRaptor-' + config.llvm_ver + config.llvm_shlib_ext) -newPM = ('-fpass-plugin=@RAPTOR_BINARY_DIR@/pass/LLVMRaptor-' + config.llvm_ver + config.llvm_shlib_ext) -config.substitutions.append(('%loadFlangRaptor', newPM)) +newPM = ('-Xclang -load -Xclang @RAPTOR_BINARY_DIR@/pass/ClangRaptor-' + config.llvm_ver + config.llvm_shlib_ext) +config.substitutions.append(('%loadClangPluginRaptor', newPM)) +config.substitutions.append(('%loadClangRaptor', passPlugin)) +config.substitutions.append(('%loadFlangRaptor', passPlugin)) newPM = ('-Wl,--load-pass-plugin=@RAPTOR_BINARY_DIR@/pass/LLDRaptor-' + config.llvm_ver + config.llvm_shlib_ext) config.substitutions.append(('%loadLLDRaptor', newPM)) @@ -111,3 +112,28 @@ cfgfile = "@RAPTOR_SOURCE_DIR@/test/lit.cfg.py" if len("@RAPTOR_SOURCE_DIR@") == 0: cfgfile = os.path.dirname(os.path.abspath(__file__)) + "/lit.cfg.py" lit_config.load_config(config, cfgfile) + + +import subprocess + +def has_openmp_gpu(): + try: + result = subprocess.run( + ["llvm-offload-device-info"], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True + ) + + if result.returncode != 0: + return False + output = result.stdout + return "GPU" in output + + except FileNotFoundError: + return False + + +has_openmp_gpu = has_openmp_gpu() + +config.substitutions.append(('%hasOpenMPGPU', '1' if has_openmp_gpu else '0' ))