From 4d1eeb361441d834a687d1df59f165debeffc9f5 Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Thu, 10 Jul 2025 17:20:59 +0900 Subject: [PATCH] WIP Histogram logging missing file WIP Refactoring complete WIP Working flop logging test pretty good undef DUmp to file Fixes Log to file Test clang-raptor --- CMakeLists.txt | 4 + pass/Raptor.cpp | 68 +++++- pass/RaptorLogic.cpp | 88 ++++---- pass/RaptorLogic.h | 120 ++++++++++- runtime/CMakeLists.txt | 23 +- runtime/include/private/raptor/Common.h | 32 +-- .../{private => public}/raptor/FloatTypes.def | 2 + runtime/include/public/raptor/raptor.h | 22 +- runtime/ir/Flops.def | 8 +- runtime/ir/Log.cpp | 201 ++++++++++++++++++ runtime/ir/Mpfr.cpp | 165 +++++++------- runtime/obj/GarbageCollection.cpp | 1 - scripts/raptor_plot_float_histogram.py | 107 ++++++++++ test/Integration/Truncate/Cpp/log.cpp | 31 +++ test/Integration/Truncate/Cpp/openmp-gpu.cpp | 2 +- test/lit.site.cfg.py.in | 6 +- wrappers/raptor-clang++.in | 2 +- wrappers/raptor-clang.in | 2 +- 18 files changed, 704 insertions(+), 180 deletions(-) rename runtime/include/{private => public}/raptor/FloatTypes.def (80%) create mode 100644 runtime/ir/Log.cpp create mode 100755 scripts/raptor_plot_float_histogram.py create mode 100644 test/Integration/Truncate/Cpp/log.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 4133f37b..06bb3f7d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -365,6 +365,10 @@ install(EXPORT RaptorTargets DESTINATION "${INSTALL_CMAKE_DIR}" COMPONENT dev) +install(PROGRAMS + "${CMAKE_CURRENT_SOURCE_DIR}/scripts/raptor_plot_float_histogram.py" + DESTINATION bin) + add_subdirectory(runtime) add_subdirectory(test) add_subdirectory(wrappers) diff --git a/pass/Raptor.cpp b/pass/Raptor.cpp index 532b215f..e0b78fb6 100644 --- a/pass/Raptor.cpp +++ b/pass/Raptor.cpp @@ -30,6 +30,7 @@ #include #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Transforms/Utils/Instrumentation.h" #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" #include "llvm/ADT/ArrayRef.h" @@ -567,6 +568,43 @@ class RaptorBase { llvm_unreachable("Unknown float type"); } + bool HandleLogFlops(CallInst *CI) { + IRBuilder<> Builder(CI); + Function *F = parseFunctionParameter(CI); + if (!F) + return false; + unsigned ArgNum = CI->arg_size(); + if (ArgNum != 1) { + EmitFailure("TooManyArgs", CI->getDebugLoc(), CI, + "Had incorrect number of args ", *CI, " - expected 1"); + return false; + } + + RequestContext context(CI, &Builder); + for (auto FR : + {/* FloatRepresentation::getIEEE(16), */ FloatRepresentation::getIEEE( + 32), + FloatRepresentation::getIEEE(64)}) { + FunctionType *Ty = FunctionType::get( + Builder.getVoidTy(), FR.getMustBeBuiltinType(Builder.getContext()), + false); + FunctionCallee FlopLogger = CI->getModule()->getOrInsertFunction( + std::string(RaptorPrefix) + "log_flops_" + FR.getMangling(), Ty); + llvm::Value *Res = Logic.CreateTruncateFunc( + context, F, + TruncationConfiguration::getInitialLogFlops( + FR, *cast(FlopLogger.getCallee()))); + if (!Res) + return false; + F = cast(Res); + } + llvm::Value *Res = Builder.CreatePointerCast(F, CI->getType()); + + CI->replaceAllUsesWith(Res); + CI->eraseFromParent(); + return true; + } + bool HandleTruncateFunc(CallInst *CI, TruncateMode Mode) { IRBuilder<> Builder(CI); Function *F = parseFunctionParameter(CI); @@ -583,7 +621,8 @@ class RaptorBase { RequestContext context(CI, &Builder); llvm::Value *res = Logic.CreateTruncateFunc( - context, F, TruncationConfiguration::getInitial(Truncation, Mode)); + context, F, + TruncationConfiguration::getInitial(Truncation, Builder.getContext())); if (!res) return false; res = Builder.CreatePointerCast(res, CI->getType()); @@ -718,7 +757,7 @@ class RaptorBase { Function *TruncatedFunc = Logic.CreateTruncateFunc(context, &F, TruncationConfiguration::getInitial( - Truncation, TruncOpFullModuleMode)); + Truncation, Builder.getContext())); ValueToValueMapTy Mapping; for (auto &&[Arg, TArg] : llvm::zip(F.args(), TruncatedFunc->args())) @@ -790,6 +829,7 @@ class RaptorBase { Changed = true; } + SmallVector toLogFlops; SmallVector toTruncateFuncMem; SmallVector toTruncateFuncOp; SmallVector toTruncateValue; @@ -1017,11 +1057,15 @@ class RaptorBase { } bool enableRaptor = false; + bool logFlops = false; bool truncateFuncOp = false; bool truncateFuncMem = false; bool truncateValue = false; bool expandValue = false; if (false) { + } else if (Fn->getName().contains("__raptor_log_flops")) { + enableRaptor = true; + logFlops = true; } else if (Fn->getName().contains("__raptor_truncate_mem_func")) { enableRaptor = true; truncateFuncMem = true; @@ -1073,7 +1117,11 @@ class RaptorBase { } goto retry; } - if (truncateFuncOp) + if (false) + abort(); + else if (logFlops) + toLogFlops.push_back(CI); + else if (truncateFuncOp) toTruncateFuncOp.push_back(CI); else if (truncateFuncMem) toTruncateFuncMem.push_back(CI); @@ -1095,18 +1143,16 @@ class RaptorBase { } } - for (auto call : toTruncateFuncMem) { + for (auto call : toLogFlops) + HandleLogFlops(call); + for (auto call : toTruncateFuncMem) HandleTruncateFunc(call, TruncMemMode); - } - for (auto call : toTruncateFuncOp) { + for (auto call : toTruncateFuncOp) HandleTruncateFunc(call, TruncOpMode); - } - for (auto call : toTruncateValue) { + for (auto call : toTruncateValue) HandleTruncateValue(call, true); - } - for (auto call : toExpandValue) { + for (auto call : toExpandValue) HandleTruncateValue(call, false); - } return Changed; } diff --git a/pass/RaptorLogic.cpp b/pass/RaptorLogic.cpp index 62e27d3b..2d9d230a 100644 --- a/pass/RaptorLogic.cpp +++ b/pass/RaptorLogic.cpp @@ -62,7 +62,7 @@ using namespace llvm; static Value *floatValTruncate(IRBuilderBase &B, Value *v, - FloatTruncation truncation) { + TruncationConfiguration truncation) { if (truncation.isToFPRT()) return v; @@ -73,7 +73,7 @@ static Value *floatValTruncate(IRBuilderBase &B, Value *v, } static Value *floatValExpand(IRBuilderBase &B, Value *v, - FloatTruncation truncation) { + TruncationConfiguration truncation) { if (truncation.isToFPRT()) return v; @@ -84,26 +84,18 @@ static Value *floatValExpand(IRBuilderBase &B, Value *v, } static Value *floatMemTruncate(IRBuilderBase &B, Value *v, - FloatTruncation truncation) { - if (isa(v->getType())) - report_fatal_error("vector operations not allowed in mem trunc mode"); - - Type *toTy = truncation.getToType(B.getContext()); - return B.CreateBitCast(v, toTy); + TruncationConfiguration truncation) { + return v; } static Value *floatMemExpand(IRBuilderBase &B, Value *v, - FloatTruncation truncation) { - if (isa(v->getType())) - report_fatal_error("vector operations not allowed in mem trunc mode"); - - Type *fromTy = truncation.getFromType(B.getContext()); - return B.CreateBitCast(v, fromTy); + TruncationConfiguration truncation) { + return v; } class TruncateUtils { protected: - FloatTruncation truncation; + TruncationConfiguration TC; llvm::Module *M; Type *fromType; Type *toType; @@ -111,14 +103,17 @@ class TruncateUtils { RaptorLogic &Logic; Value *UnknownLoc; Value *scratch = nullptr; + CustomArgsTy CustomArgs; + std::string RTName; private: std::string getOriginalFPRTName(std::string Name) { - return std::string(RaptorFPRTOriginalPrefix) + truncation.mangleFrom() + + return std::string(RaptorPrefix) + RTName + "_original_" + TC.mangleFrom() + "_" + Name; } std::string getFPRTName(std::string Name) { - return std::string(RaptorFPRTPrefix) + truncation.mangleFrom() + "_" + Name; + return std::string(RaptorPrefix) + RTName + "_" + TC.mangleFrom() + "_" + + Name; } // Creates a function which contains the original floating point operation. @@ -169,9 +164,7 @@ class TruncateUtils { const SmallVectorImpl &ArgsIn, llvm::Type *RetTy, Value *LocStr) { SmallVector Args(ArgsIn.begin(), ArgsIn.end()); - Args.push_back(B.getInt64(truncation.getTo().getExponentWidth())); - Args.push_back(B.getInt64(truncation.getTo().getSignificandWidth())); - Args.push_back(B.getInt64(truncation.getMode())); + Args.append(CustomArgs); Args.push_back(LocStr); Args.push_back(scratch); @@ -189,11 +182,11 @@ class TruncateUtils { return CI; } - TruncateUtils(FloatTruncation truncation, Module *M, RaptorLogic &Logic) - : truncation(truncation), M(M), ctx(M->getContext()), Logic(Logic) { - fromType = truncation.getFromType(ctx); - toType = truncation.getToType(ctx); - + TruncateUtils(TruncationConfiguration TC, Module *M, RaptorLogic &Logic) + : TC(TC), M(M), ctx(M->getContext()), Logic(Logic), + CustomArgs(TC.CustomArgs), RTName(TC.RTName) { + fromType = TC.getFromType(M->getContext()); + toType = TC.getToType(M->getContext()); UnknownLoc = getUniquedLocStr(nullptr); scratch = ConstantPointerNull::get(PointerType::get(M->getContext(), 0)); } @@ -401,7 +394,7 @@ class TruncateGenerator : public llvm::InstVisitor, public TruncateUtils { private: ValueToValueMapTy &OriginalToNewFn; - FloatTruncation Truncation; + TruncationConfiguration TC; TruncateMode Mode; RaptorLogic &Logic; LLVMContext &Ctx; @@ -410,9 +403,9 @@ class TruncateGenerator : public llvm::InstVisitor, TruncateGenerator(ValueToValueMapTy &originalToNewFn, Function *oldFunc, Function *newFunc, RaptorLogic &Logic, TruncationConfiguration TC) - : TruncateUtils(TC.Truncation, newFunc->getParent(), Logic), - OriginalToNewFn(originalToNewFn), Truncation(TC.Truncation), - Mode(Truncation.getMode()), Logic(Logic), Ctx(newFunc->getContext()) { + : TruncateUtils(TC, newFunc->getParent(), Logic), + OriginalToNewFn(originalToNewFn), TC(TC), Mode(TC.getMode()), + Logic(Logic), Ctx(newFunc->getContext()) { auto AllocScratch = [&]() { // TODO we should check at the end if we never used the scracth we should @@ -444,7 +437,7 @@ class TruncateGenerator : public llvm::InstVisitor, } } }; - if (Truncation.isToFPRT()) { + if (TC.isToFPRT()) { if (Mode == TruncOpMode) { if (TC.NeedTruncChange || TC.NeedNewScratch) AllocScratch(); @@ -503,10 +496,10 @@ class TruncateGenerator : public llvm::InstVisitor, case TruncMemMode: if (isa(v)) return createFPRTConstCall(B, v); - return floatMemTruncate(B, v, Truncation); + return floatMemTruncate(B, v, TC); case TruncOpMode: case TruncOpFullModuleMode: - return floatValTruncate(B, v, Truncation); + return floatValTruncate(B, v, TC); } llvm_unreachable("Unknown trunc mode"); } @@ -514,10 +507,10 @@ class TruncateGenerator : public llvm::InstVisitor, Value *expand(IRBuilder<> &B, Value *v) { switch (Mode) { case TruncMemMode: - return floatMemExpand(B, v, Truncation); + return floatMemExpand(B, v, TC); case TruncOpMode: case TruncOpFullModuleMode: - return floatValExpand(B, v, Truncation); + return floatValExpand(B, v, TC); } llvm_unreachable("Unknown trunc mode"); } @@ -527,7 +520,7 @@ class TruncateGenerator : public llvm::InstVisitor, case UnaryOperator::FNeg: { if (I.getOperand(0)->getType() != getFromType()) return; - if (!Truncation.isToFPRT()) + if (!TC.isToFPRT()) return; auto newI = getNewFromOriginal(&I); @@ -565,7 +558,7 @@ class TruncateGenerator : public llvm::InstVisitor, Args.push_back(truncLHS); Args.push_back(truncRHS); Instruction *nres; - if (Truncation.isToFPRT()) + if (TC.isToFPRT()) nres = createFPRTOpCall(B, CI, B.getInt1Ty(), Args); else nres = @@ -685,9 +678,9 @@ class TruncateGenerator : public llvm::InstVisitor, auto newLHS = truncate(B, getNewFromOriginal(oldLHS)); auto newRHS = truncate(B, getNewFromOriginal(oldRHS)); Instruction *nres = nullptr; - if (Truncation.isToFPRT()) { + if (TC.isToFPRT()) { SmallVector Args({newLHS, newRHS}); - nres = createFPRTOpCall(B, BO, Truncation.getToType(Ctx), Args); + nres = createFPRTOpCall(B, BO, getToType(), Args); } else { nres = cast(B.CreateBinOp(BO.getOpcode(), newLHS, newRHS)); } @@ -748,7 +741,7 @@ class TruncateGenerator : public llvm::InstVisitor, Instruction *intr = nullptr; Value *nres = nullptr; - if (Truncation.isToFPRT()) { + if (TC.isToFPRT()) { nres = intr = createFPRTOpCall(B, CI, retTy, new_ops); } else { // TODO check that the intrinsic is overloaded @@ -832,11 +825,13 @@ class TruncateGenerator : public llvm::InstVisitor, } Value *GetShadow(RequestContext &ctx, Value *v, bool WillPassScratch) { - if (auto F = dyn_cast(v)) - return Logic.CreateTruncateFunc( - ctx, F, - TruncationConfiguration{Truncation, Mode, !WillPassScratch, false, - WillPassScratch}); + if (auto F = dyn_cast(v)) { + auto NewTC = TC; + NewTC.NeedNewScratch = !WillPassScratch; + NewTC.NeedTruncChange = false; + NewTC.ScratchFromArgs = WillPassScratch; + return Logic.CreateTruncateFunc(ctx, F, NewTC); + } llvm::errs() << " unknown get truncated func: " << *v << "\n"; llvm_unreachable("unknown get truncated func"); return v; @@ -1006,8 +1001,9 @@ bool RaptorLogic::CreateTruncateValue(RequestContext context, Value *v, IRBuilderBase &B = *context.ip; Value *converted = nullptr; - TruncateUtils TU(Truncation, B.GetInsertBlock()->getParent()->getParent(), - *this); + TruncateUtils TU( + TruncationConfiguration::getInitial(Truncation, v->getContext()), + B.GetInsertBlock()->getParent()->getParent(), *this); if (isTruncate) converted = TU.createFPRTNewCall(B, v); else diff --git a/pass/RaptorLogic.h b/pass/RaptorLogic.h index 9c79fea4..a4efa5d0 100644 --- a/pass/RaptorLogic.h +++ b/pass/RaptorLogic.h @@ -17,8 +17,10 @@ #include #include +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/Support/CommandLine.h" @@ -34,6 +36,7 @@ extern llvm::cl::opt RaptorPrint; extern llvm::cl::opt RaptorJuliaAddrLoad; } +constexpr char RaptorPrefix[] = "__raptor_"; constexpr char RaptorFPRTPrefix[] = "__raptor_fprt_"; constexpr char RaptorFPRTOriginalPrefix[] = "__raptor_fprt_original_"; @@ -198,6 +201,11 @@ struct FloatRepresentation { (w == F64Width && SignificandWidth == F64Significand); } + llvm::Type *getMustBeBuiltinType(llvm::LLVMContext &ctx) const { + assert(canBeBuiltin()); + return getTypeForWidth(ctx, getWidth(), /*builtinFloat=*/true); + } + llvm::Type *getBuiltinType(llvm::LLVMContext &ctx) const { if (!canBeBuiltin()) return nullptr; @@ -288,22 +296,31 @@ struct FloatTruncation { std::string mangleFrom() const { return From.getMangling(); } }; +using CustomArgsTy = llvm::SmallVector; + class TruncationConfiguration { public: - FloatTruncation Truncation; + FloatRepresentation FromRepr; TruncateMode Mode; bool NeedNewScratch; bool NeedTruncChange; bool ScratchFromArgs; + CustomArgsTy CustomArgs; + std::string CustomMangle; + std::string RTName; + + bool IsToFPRT; + std::optional ToRepr; + std::string mangle() { return std::string(truncateModeStr(Mode)) + "_func_" + - Truncation.mangleTruncation() + "_" + + FromRepr.getMangling() + "_" + CustomMangle + "_" + std::to_string(NeedTruncChange) + "_" + std::to_string(NeedNewScratch) + "_" + std::to_string(ScratchFromArgs); } static auto toTuple(const TruncationConfiguration &TC) { - return std::tuple(TC.Truncation, TC.Mode, TC.NeedNewScratch, + return std::tuple(TC.FromRepr, TC.CustomMangle, TC.Mode, TC.NeedNewScratch, TC.NeedTruncChange, TC.ScratchFromArgs); } bool operator==(const TruncationConfiguration &Other) const { @@ -313,18 +330,99 @@ class TruncationConfiguration { return toTuple(*this) < toTuple(Other); } + std::string mangleFrom() { return FromRepr.getMangling(); } + + bool isToFPRT() { return IsToFPRT; } + + TruncateMode getMode() { return Mode; } + + llvm::Type *getFromType(llvm::LLVMContext &Ctx) { + return FromRepr.getBuiltinType(Ctx); + } + + llvm::Type *getToType(llvm::LLVMContext &Ctx) { + if (isToFPRT() || !ToRepr.has_value()) + return getFromType(Ctx); + assert(ToRepr.has_value()); + return ToRepr->getBuiltinType(Ctx); + } + + static TruncationConfiguration getInitialLogFlops(FloatRepresentation FR, + llvm::Function &F) { + llvm::IRBuilder<> B(F.getContext()); + CustomArgsTy Args; + Args.push_back(&F); + return TruncationConfiguration{FR, TruncOpMode, true, false, + false, Args, "log", "fprtlog", + true, std::nullopt}; + } + static TruncationConfiguration getInitial(FloatTruncation Truncation, - TruncateMode Mode) { - if (Mode == TruncOpMode) { + llvm::LLVMContext &Ctx) { + llvm::IRBuilder<> B(Ctx); + CustomArgsTy Args; + Args.push_back(B.getInt64(Truncation.getTo().getExponentWidth())); + Args.push_back(B.getInt64(Truncation.getTo().getSignificandWidth())); + Args.push_back(B.getInt64(Truncation.getMode())); + std::string Mangle = "to_" + Truncation.getTo().getMangling(); + if (Truncation.getMode() == TruncOpMode) { if (Truncation.isToFPRT()) - return TruncationConfiguration{Truncation, Mode, true, true, false}; + return TruncationConfiguration{Truncation.getFrom(), + Truncation.getMode(), + true, + true, + false, + Args, + Mangle, + "fprt", + true, + std::nullopt}; else - return TruncationConfiguration{Truncation, Mode, false, false, false}; - } else if (Mode == TruncMemMode) { + return TruncationConfiguration{Truncation.getFrom(), + Truncation.getMode(), + false, + false, + false, + Args, + Mangle, + "", + false, + Truncation.getTo()}; + } else if (Truncation.getMode() == TruncMemMode) { assert(Truncation.isToFPRT()); - return TruncationConfiguration{Truncation, Mode, false, false, false}; - } else if (Mode == TruncOpFullModuleMode) { - return TruncationConfiguration{Truncation, Mode, true, false, false}; + return TruncationConfiguration{Truncation.getFrom(), + Truncation.getMode(), + false, + false, + false, + Args, + Mangle, + "fprt", + true, + std::nullopt}; + } else if (Truncation.getMode() == TruncOpFullModuleMode) { + if (Truncation.isToFPRT()) + return TruncationConfiguration{Truncation.getFrom(), + Truncation.getMode(), + true, + false, + false, + Args, + Mangle, + "fprt", + true, + std::nullopt}; + else + return TruncationConfiguration{Truncation.getFrom(), + Truncation.getMode(), + true, + false, + false, + Args, + Mangle, + "", + false, + Truncation.getTo()}; } else { llvm_unreachable(""); } diff --git a/runtime/CMakeLists.txt b/runtime/CMakeLists.txt index d0525b43..62de130e 100644 --- a/runtime/CMakeLists.txt +++ b/runtime/CMakeLists.txt @@ -5,6 +5,7 @@ add_library( obj/GarbageCollection.cpp ir/Mpfr.cpp ir/Fprt.cpp + ir/Log.cpp ) # add_library( @@ -30,15 +31,23 @@ add_library( # OUTPUT_NAME "Raptor-RT-FP-${LLVM_VERSION_MAJOR}" # ) -set(RAPTOR_ALL_INCLUDE_DIRS - ${CMAKE_CURRENT_SOURCE_DIR}/include/public - ${CMAKE_CURRENT_SOURCE_DIR}/include/private +set(RAPTOR_PRIVATE_INCLUDE_DIR + ${CMAKE_CURRENT_SOURCE_DIR}/include/private/ +) +set(RAPTOR_PUBLIC_INCLUDE_DIR + ${CMAKE_CURRENT_SOURCE_DIR}/include/public/ +) + +target_include_directories(Raptor-RT-${LLVM_VERSION_MAJOR} PRIVATE ${RAPTOR_PRIVATE_INCLUDE_DIR}) +target_include_directories(Raptor-RT-${LLVM_VERSION_MAJOR} PUBLIC + $ + $ ) -target_include_directories(Raptor-RT-${LLVM_VERSION_MAJOR} PRIVATE ${RAPTOR_ALL_INCLUDE_DIRS}) -# target_include_directories(Raptor-RT-GC-${LLVM_VERSION_MAJOR} PRIVATE ${RAPTOR_ALL_INCLUDE_DIRS}) -# target_include_directories(Raptor-RT-FP-${LLVM_VERSION_MAJOR} PRIVATE ${RAPTOR_ALL_INCLUDE_DIRS}) -# target_include_directories(Raptor-RT-Count-${LLVM_VERSION_MAJOR} PRIVATE ${RAPTOR_ALL_INCLUDE_DIRS}) +install( + DIRECTORY ${RAPTOR_PUBLIC_INCLUDE_DIR} + DESTINATION include +) install(TARGETS Raptor-RT-${LLVM_VERSION_MAJOR} LIBRARY DESTINATION lib${LLVM_LIBDIR_SUFFIX} COMPONENT Raptor-RT-${LLVM_VERSION_MAJOR} diff --git a/runtime/include/private/raptor/Common.h b/runtime/include/private/raptor/Common.h index 20b3e3b9..3fdfa753 100644 --- a/runtime/include/private/raptor/Common.h +++ b/runtime/include/private/raptor/Common.h @@ -9,7 +9,9 @@ #define MAX_MPFR_OPERANDS 3 #define __RAPTOR_MPFR_ATTRIBUTES extern "C" -#define __RAPTOR_MPFR_ORIGINAL_ATTRIBUTES extern "C" +#define __RAPTOR_MPFR_DECL_ATTRIBUTES extern "C" +#define __RAPTOR_MPFR_ORIGINAL_ATTRIBUTES extern "C" __attribute__((weak)) + #define __RAPTOR_MPFR_DEFAULT_ROUNDING_MODE GMP_RNDN #define __RAPTOR_MPFR_MALLOC_FAILURE_EXIT_STATUS 114 @@ -44,16 +46,16 @@ static inline bool __raptor_fprt_is_full_module_op_mode(int64_t mode) { return mode & 0b0100; } -__RAPTOR_MPFR_ATTRIBUTES +__RAPTOR_MPFR_DECL_ATTRIBUTES void raptor_fprt_gc_dump_status(); -__RAPTOR_MPFR_ATTRIBUTES +__RAPTOR_MPFR_DECL_ATTRIBUTES double raptor_fprt_gc_mark_seen(double a); -__RAPTOR_MPFR_ATTRIBUTES +__RAPTOR_MPFR_DECL_ATTRIBUTES void raptor_fprt_gc_doit(); -__RAPTOR_MPFR_ATTRIBUTES +__RAPTOR_MPFR_DECL_ATTRIBUTES void raptor_fprt_excl_trunc_start(); -__RAPTOR_MPFR_ATTRIBUTES +__RAPTOR_MPFR_DECL_ATTRIBUTES void raptor_fprt_excl_trunc_end(); template To raptor_bitcast(From from) { @@ -85,50 +87,48 @@ template To checked_raptor_bitcast(From from) { return checked_raptor_bitcast<__raptor_fp *>(d); \ } #include "raptor/FloatTypes.def" -#undef RAPTOR_FLOAT_TYPE #define RAPTOR_FLOAT_TYPE(CPP_TY, FROM_TY) \ - __RAPTOR_MPFR_ATTRIBUTES \ + __RAPTOR_MPFR_DECL_ATTRIBUTES \ CPP_TY __raptor_fprt_##FROM_TY##_get(CPP_TY _a, int64_t exponent, \ int64_t significand, int64_t mode, \ const char *loc, void *scratch); \ \ - __RAPTOR_MPFR_ATTRIBUTES \ + __RAPTOR_MPFR_DECL_ATTRIBUTES \ CPP_TY __raptor_fprt_##FROM_TY##_new(CPP_TY _a, int64_t exponent, \ int64_t significand, int64_t mode, \ const char *loc, void *scratch); \ \ - __RAPTOR_MPFR_ATTRIBUTES \ + __RAPTOR_MPFR_DECL_ATTRIBUTES \ CPP_TY __raptor_fprt_##FROM_TY##_const(CPP_TY _a, int64_t exponent, \ int64_t significand, int64_t mode, \ const char *loc, void *scratch); \ \ - __RAPTOR_MPFR_ATTRIBUTES \ + __RAPTOR_MPFR_DECL_ATTRIBUTES \ __raptor_fp *__raptor_fprt_##FROM_TY##_new_intermediate( \ int64_t exponent, int64_t significand, int64_t mode, const char *loc, \ void *scratch); \ \ - __RAPTOR_MPFR_ATTRIBUTES \ + __RAPTOR_MPFR_DECL_ATTRIBUTES \ void __raptor_fprt_##FROM_TY##_delete(CPP_TY a, int64_t exponent, \ int64_t significand, int64_t mode, \ const char *loc, void *scratch); \ \ - __RAPTOR_MPFR_ATTRIBUTES \ + __RAPTOR_MPFR_DECL_ATTRIBUTES \ void *__raptor_fprt_##FROM_TY##_get_scratch(int64_t to_e, int64_t to_m, \ int64_t mode, const char *loc, \ void *scratch); \ \ - __RAPTOR_MPFR_ATTRIBUTES \ + __RAPTOR_MPFR_DECL_ATTRIBUTES \ void __raptor_fprt_##FROM_TY##_free_scratch(int64_t to_e, int64_t to_m, \ int64_t mode, const char *loc, \ void *scratch); \ \ - __RAPTOR_MPFR_ATTRIBUTES \ + __RAPTOR_MPFR_DECL_ATTRIBUTES \ void __raptor_fprt_##FROM_TY##_trunc_change(int64_t is_push, int64_t to_e, \ int64_t to_m, int64_t mode, \ const char *loc, void *scratch); #include "raptor/FloatTypes.def" -#undef RAPTOR_FLOAT_TYPE #endif // _RAPTOR_COMMON_H_ diff --git a/runtime/include/private/raptor/FloatTypes.def b/runtime/include/public/raptor/FloatTypes.def similarity index 80% rename from runtime/include/private/raptor/FloatTypes.def rename to runtime/include/public/raptor/FloatTypes.def index 940cae05..7e95b2f0 100644 --- a/runtime/include/private/raptor/FloatTypes.def +++ b/runtime/include/public/raptor/FloatTypes.def @@ -2,3 +2,5 @@ RAPTOR_FLOAT_TYPE(double, ieee_64) RAPTOR_FLOAT_TYPE(float, ieee_32) // RAPTOR_FLOAT_TYPE(half, ieee_16) + +#undef RAPTOR_FLOAT_TYPE diff --git a/runtime/include/public/raptor/raptor.h b/runtime/include/public/raptor/raptor.h index 0831d7c1..321e8614 100644 --- a/runtime/include/public/raptor/raptor.h +++ b/runtime/include/public/raptor/raptor.h @@ -1,10 +1,12 @@ #ifndef _RAPTOR_FPRT_FPRT_H_ #define _RAPTOR_FPRT_FPRT_H_ +#include #include #ifdef __cplusplus -template fty *__raptor_truncate_op_func(fty *, int, int, int, int); +template +fty *__raptor_truncate_op_func(fty *, int, int, int, int); template fty *__raptor_truncate_op_func(fty *, int, int, int); #endif @@ -21,9 +23,27 @@ void __raptor_fprt_delete_all(); long long __raptor_get_trunc_flop_count(); long long f_raptor_get_trunc_flop_count(); +#define RAPTOR_FLOAT_TYPE(CPP_TY, FROM_TY) \ + struct __raptor_logged_flops_##CPP_TY { \ + CPP_TY *vals; \ + size_t num; \ + }; \ + void __raptor_clear_flop_log_##CPP_TY(); \ + void __raptor_set_flop_log_##CPP_TY(const char *path); +#include "FloatTypes.def" #ifdef __cplusplus } #endif +#ifdef __cplusplus +template fty *__raptor_log_flops(fty *); +template +fty *__raptor_truncate_mem_func(fty *, int, int, int, int); +template +fty *__raptor_truncate_op_func(fty *, int, int, int, int); +template double __raptor_truncate_mem_value(Tys...); +template double __raptor_expand_mem_value(Tys...); +#endif + #endif // _RAPTOR_FPRT_FPRT_H_ diff --git a/runtime/ir/Flops.def b/runtime/ir/Flops.def index 20910774..d3908375 100644 --- a/runtime/ir/Flops.def +++ b/runtime/ir/Flops.def @@ -132,11 +132,15 @@ __RAPTOR_MPFR_LROUND(intr, llvm_lround_i32_f32, ieee_32, int32_t, float, d, MPFR_RNDA); // Ternary operation -__RAPTOR_MPFR_FMULADD(llvm_fmuladd, ieee_64, double, d, f64, +__RAPTOR_MPFR_FMULADD(intr, llvm_fmuladd, ieee_64, double, d, f64, __RAPTOR_MPFR_DEFAULT_ROUNDING_MODE); -__RAPTOR_MPFR_FMULADD(llvm_fma, ieee_64, double, d, f64, +__RAPTOR_MPFR_FMULADD(intr, llvm_fma, ieee_64, double, d, f64, __RAPTOR_MPFR_DEFAULT_ROUNDING_MODE); +// llvm.is.fpclass +__RAPTOR_MPFR_ISCLASS(ieee_64, double, f64) +__RAPTOR_MPFR_ISCLASS(ieee_32, float, f32) + // Comparisons __RAPTOR_MPFR_FCMP(oeq, 1, == 0); __RAPTOR_MPFR_FCMP(ueq, 0, == 0); diff --git a/runtime/ir/Log.cpp b/runtime/ir/Log.cpp new file mode 100644 index 00000000..1e83f216 --- /dev/null +++ b/runtime/ir/Log.cpp @@ -0,0 +1,201 @@ +#include "raptor/Common.h" +#include "raptor/raptor.h" + +#include +#include +#include +#include +#include +#include +#include + +typedef void (*LogFuncTy_ieee_64)(double); +typedef void (*LogFuncTy_ieee_32)(float); +// typedef void (*LogFuncTy_ieee_16)(half); + +void __raptor_fprt_trunc_change(int64_t is_push, int64_t to_e, int64_t to_m, + int64_t mode, const char *loc, void *scratch) {} + +namespace { +struct FloatLoggerTy { + +#define RAPTOR_FLOAT_TYPE(CPP_TY, FROM_TY) \ + std::unique_ptr OS_##FROM_TY; +#include "raptor/FloatTypes.def" + + template const char *getTypeStr() { +#define RAPTOR_FLOAT_TYPE(CPP_TY, FROM_TY) \ + if constexpr (std::is_same::value) \ + return #CPP_TY; +#include "raptor/FloatTypes.def" + abort(); + } + + template void clear() { +#define RAPTOR_FLOAT_TYPE(CPP_TY, FROM_TY) \ + if constexpr (std::is_same::value) \ + OS_##FROM_TY.reset(nullptr); +#include "raptor/FloatTypes.def" + } + + template void setLogPath(const std::string Path) { +#define RAPTOR_FLOAT_TYPE(CPP_TY, FROM_TY) \ + if constexpr (std::is_same::value) { \ + std::cerr << "Writing flop log for " #CPP_TY " to '" << Path << "'...\n"; \ + OS_##FROM_TY = std::make_unique( \ + Path, std::ios_base::out | std::ios_base::binary); \ + } +#include "raptor/FloatTypes.def" + } + + template void log(T F) { +#define RAPTOR_FLOAT_TYPE(CPP_TY, FROM_TY) \ + if constexpr (std::is_same::value) \ + if (OS_##FROM_TY) \ + OS_##FROM_TY->write(reinterpret_cast(&F), sizeof(F)); +#include "raptor/FloatTypes.def" + } + + FloatLoggerTy() { + if (char *C = getenv("RAPTOR_FLOP_LOG_PREFIX")) { +#define RAPTOR_FLOAT_TYPE(CPP_TY, FROM_TY) \ + setLogPath(std::string(C) + "." #CPP_TY); +#include "raptor/FloatTypes.def" + } + } + + ~FloatLoggerTy() {} + +} FloatLogger; +} // namespace + +#define RAPTOR_FLOAT_TYPE(CPP_TY, FROM_TY) \ + __RAPTOR_MPFR_ATTRIBUTES \ + CPP_TY __raptor_fprtlog_##FROM_TY##_abs_err(CPP_TY a, CPP_TY b) { \ + return std::abs(a - b); \ + } \ + __RAPTOR_MPFR_ATTRIBUTES \ + void __raptor_fprtlog_##FROM_TY##_trunc_change( \ + int64_t is_push, int64_t to_e, int64_t to_m, int64_t mode, \ + const char *loc, void *scratch) { \ + __raptor_fprt_trunc_change(is_push, to_e, to_m, mode, loc, scratch); \ + } \ + __RAPTOR_MPFR_ATTRIBUTES \ + void *__raptor_fprtlog_##FROM_TY##_get_scratch( \ + int64_t to_e, int64_t to_m, int64_t mode, const char *loc, \ + void *scratch) { \ + return nullptr; \ + } \ + __RAPTOR_MPFR_ATTRIBUTES \ + void __raptor_fprtlog_##FROM_TY##_free_scratch( \ + int64_t to_e, int64_t to_m, int64_t mode, const char *loc, \ + void *scratch) {} \ + __RAPTOR_MPFR_ATTRIBUTES \ + void __raptor_log_flops_##FROM_TY(CPP_TY a) { FloatLogger.log(a); } \ + __RAPTOR_MPFR_ATTRIBUTES \ + void __raptor_clear_flop_log_##CPP_TY() { FloatLogger.clear(); } \ + __RAPTOR_MPFR_ATTRIBUTES \ + void __raptor_set_flop_log_##CPP_TY(const char *path) { \ + FloatLogger.setLogPath(path); \ + } +#include "raptor/FloatTypes.def" + +#define __RAPTOR_MPFR_LROUND(OP_TYPE, LLVM_OP_NAME, FROM_TYPE, RET, ARG1, \ + MPFR_SET_ARG1, ROUNDING_MODE) \ + __RAPTOR_MPFR_ORIGINAL_ATTRIBUTES \ + RET __raptor_fprtlog_original_##FROM_TYPE##_##OP_TYPE##_##LLVM_OP_NAME( \ + ARG1 a); \ + __RAPTOR_MPFR_ATTRIBUTES \ + RET __raptor_fprtlog_##FROM_TYPE##_##OP_TYPE##_##LLVM_OP_NAME( \ + ARG1 a, LogFuncTy_##FROM_TYPE f, const char *loc, void *scratch) { \ + f(a); \ + return __raptor_fprtlog_original_##FROM_TYPE##_##OP_TYPE##_##LLVM_OP_NAME( \ + a); \ + } + +#define __RAPTOR_MPFR_SINGOP(OP_TYPE, LLVM_OP_NAME, MPFR_FUNC_NAME, FROM_TYPE, \ + RET, MPFR_GET, ARG1, MPFR_SET_ARG1, \ + ROUNDING_MODE) \ + __RAPTOR_MPFR_ORIGINAL_ATTRIBUTES \ + RET __raptor_fprtlog_original_##FROM_TYPE##_##OP_TYPE##_##LLVM_OP_NAME( \ + ARG1 a); \ + __RAPTOR_MPFR_ATTRIBUTES \ + RET __raptor_fprtlog_##FROM_TYPE##_##OP_TYPE##_##LLVM_OP_NAME( \ + ARG1 a, LogFuncTy_##FROM_TYPE f, const char *loc, void *scratch) { \ + f(a); \ + return __raptor_fprtlog_original_##FROM_TYPE##_##OP_TYPE##_##LLVM_OP_NAME( \ + a); \ + } + +#define __RAPTOR_MPFR_BIN_INT(OP_TYPE, LLVM_OP_NAME, MPFR_FUNC_NAME, \ + FROM_TYPE, RET, MPFR_GET, ARG1, MPFR_SET_ARG1, \ + ARG2, ROUNDING_MODE) \ + __RAPTOR_MPFR_ORIGINAL_ATTRIBUTES \ + RET __raptor_fprtlog_original_##FROM_TYPE##_##OP_TYPE##_##LLVM_OP_NAME( \ + ARG1 a, ARG2 b); \ + __RAPTOR_MPFR_ATTRIBUTES \ + RET __raptor_fprtlog_##FROM_TYPE##_##OP_TYPE##_##LLVM_OP_NAME( \ + ARG1 a, ARG2 b, LogFuncTy_##FROM_TYPE f, const char *loc, \ + void *scratch) { \ + f(a); \ + return __raptor_fprtlog_original_##FROM_TYPE##_##OP_TYPE##_##LLVM_OP_NAME( \ + a, b); \ + } + +#define __RAPTOR_MPFR_BIN(OP_TYPE, LLVM_OP_NAME, MPFR_FUNC_NAME, FROM_TYPE, \ + RET, MPFR_GET, ARG1, MPFR_SET_ARG1, ARG2, \ + MPFR_SET_ARG2, ROUNDING_MODE) \ + __RAPTOR_MPFR_ORIGINAL_ATTRIBUTES \ + RET __raptor_fprtlog_original_##FROM_TYPE##_##OP_TYPE##_##LLVM_OP_NAME( \ + ARG1 a, ARG2 b); \ + __RAPTOR_MPFR_ATTRIBUTES \ + RET __raptor_fprtlog_##FROM_TYPE##_##OP_TYPE##_##LLVM_OP_NAME( \ + ARG1 a, ARG2 b, LogFuncTy_##FROM_TYPE f, const char *loc, \ + void *scratch) { \ + f(a); \ + f(b); \ + return __raptor_fprtlog_original_##FROM_TYPE##_##OP_TYPE##_##LLVM_OP_NAME( \ + a, b); \ + } + +#define __RAPTOR_MPFR_FMULADD(OP_TYPE, LLVM_OP_NAME, FROM_TYPE, TYPE, \ + MPFR_TYPE, LLVM_TYPE, ROUNDING_MODE) \ + __RAPTOR_MPFR_ORIGINAL_ATTRIBUTES \ + TYPE \ + __raptor_fprtlog_original_##FROM_TYPE##_##OP_TYPE##_##LLVM_OP_NAME##_##LLVM_TYPE( \ + TYPE a, TYPE b, TYPE c); \ + __RAPTOR_MPFR_ATTRIBUTES \ + TYPE __raptor_fprtlog_##FROM_TYPE##_intr_##LLVM_OP_NAME##_##LLVM_TYPE( \ + TYPE a, TYPE b, TYPE c, LogFuncTy_##FROM_TYPE f, int64_t mode, \ + const char *loc, void *scratch) { \ + f(a); \ + f(b); \ + f(c); \ + return __raptor_fprtlog_original_##FROM_TYPE##_##OP_TYPE##_##LLVM_OP_NAME##_##LLVM_TYPE( \ + a, b, c); \ + } + +#define __RAPTOR_MPFR_FCMP_IMPL(NAME, ORDERED, CMP, FROM_TYPE, TYPE, MPFR_GET, \ + ROUNDING_MODE) \ + __RAPTOR_MPFR_ORIGINAL_ATTRIBUTES \ + bool __raptor_fprtlog_original_##FROM_TYPE##_fcmp_##NAME(TYPE a, TYPE b); \ + __RAPTOR_MPFR_ATTRIBUTES \ + bool __raptor_fprtlog_##FROM_TYPE##_fcmp_##NAME( \ + TYPE a, TYPE b, LogFuncTy_##FROM_TYPE f, const char *loc, \ + void *scratch) { \ + return __raptor_fprtlog_original_##FROM_TYPE##_fcmp_##NAME(a, b); \ + } + +#define __RAPTOR_MPFR_ISCLASS(FROM_TYPE, TYPE, LLVM_TYPE) \ + __RAPTOR_MPFR_ORIGINAL_ATTRIBUTES bool \ + __raptor_fprtlog_original_##FROM_TYPE##_intr_llvm_is_fpclass_##LLVM_TYPE( \ + TYPE a, int32_t tests); \ + __RAPTOR_MPFR_ATTRIBUTES bool \ + __raptor_fprtlog_##FROM_TYPE##_intr_llvm_is_fpclass_##LLVM_TYPE( \ + TYPE a, int32_t tests, LogFuncTy_##FROM_TYPE f, const char *loc, \ + void *scratch) { \ + return __raptor_fprtlog_original_##FROM_TYPE##_intr_llvm_is_fpclass_##LLVM_TYPE( \ + a, tests); \ + } + +#include "Flops.def" diff --git a/runtime/ir/Mpfr.cpp b/runtime/ir/Mpfr.cpp index fc78b297..3333faa6 100644 --- a/runtime/ir/Mpfr.cpp +++ b/runtime/ir/Mpfr.cpp @@ -167,7 +167,6 @@ void __raptor_fprt_trunc_change(int64_t is_push, int64_t to_e, int64_t to_m, } #include "raptor/FloatTypes.def" -#undef RAPTOR_FLOAT_TYPE __RAPTOR_MPFR_ATTRIBUTES void __raptor_fprt_trunc_count(int64_t exponent, int64_t significand, @@ -441,73 +440,74 @@ void raptor_fprt_op_clear(); } \ } -#define __RAPTOR_MPFR_FMULADD(LLVM_OP_NAME, FROM_TYPE, TYPE, MPFR_TYPE, \ - LLVM_TYPE, ROUNDING_MODE) \ - __RAPTOR_MPFR_ORIGINAL_ATTRIBUTES \ - TYPE __raptor_fprt_original_##FROM_TYPE##_##OP_TYPE##_##LLVM_OP_NAME( \ - TYPE a, TYPE b, TYPE c); \ - __RAPTOR_MPFR_ATTRIBUTES \ - TYPE __raptor_fprt_##FROM_TYPE##_intr_##LLVM_OP_NAME##_##LLVM_TYPE( \ - TYPE a, TYPE b, TYPE c, int64_t exponent, int64_t significand, \ - int64_t mode, const char *loc, mpfr_t *scratch) { \ - if (__raptor_fprt_is_op_mode(mode)) { \ - __raptor_fprt_trunc_count(exponent, significand, mode, loc, scratch); \ - mpfr_set_##MPFR_TYPE(scratch[0], a, ROUNDING_MODE); \ - mpfr_set_##MPFR_TYPE(scratch[1], b, ROUNDING_MODE); \ - mpfr_set_##MPFR_TYPE(scratch[2], c, ROUNDING_MODE); \ - mpfr_mul(scratch[0], scratch[0], scratch[1], ROUNDING_MODE); \ - mpfr_add(scratch[0], scratch[0], scratch[2], ROUNDING_MODE); \ - TYPE res = mpfr_get_##MPFR_TYPE(scratch[0], ROUNDING_MODE); \ - return res; \ - } else if (__raptor_fprt_is_mem_mode(mode)) { \ - __raptor_fp *ma = __raptor_fprt_##FROM_TYPE##_to_ptr_checked( \ - a, exponent, significand, mode, loc, scratch); \ - __raptor_fp *mb = __raptor_fprt_##FROM_TYPE##_to_ptr_checked( \ - b, exponent, significand, mode, loc, scratch); \ - __raptor_fp *mc = __raptor_fprt_##FROM_TYPE##_to_ptr_checked( \ - c, exponent, significand, mode, loc, scratch); \ - RAPTOR_DUMP_INPUT(ma, OP_TYPE, LLVM_OP_NAME); \ - RAPTOR_DUMP_INPUT(mb, OP_TYPE, LLVM_OP_NAME); \ - RAPTOR_DUMP_INPUT(mc, OP_TYPE, LLVM_OP_NAME); \ - __raptor_fp *madd = __raptor_fprt_##FROM_TYPE##_new_intermediate( \ - exponent, significand, mode, loc, scratch); \ - madd->shadow = \ - __raptor_fprt_original_##FROM_TYPE##_##OP_TYPE##_##LLVM_OP_NAME( \ - ma->shadow, mb->shadow, mc->shadow); \ - if (excl_trunc) { \ - __raptor_fprt_##FROM_TYPE##_count(exponent, significand, mode, loc, \ - scratch); \ - madd->excl_result = \ - __raptor_fprt_original_##FROM_TYPE##_##OP_TYPE##_##LLVM_OP_NAME( \ - ma->excl_result, mb->excl_result, mc->excl_result); \ - mpfr_set_##MPFR_TYPE(madd->result, madd->excl_result, ROUNDING_MODE); \ - } else { \ - __raptor_fprt_trunc_count(exponent, significand, mode, loc, scratch); \ - mpfr_t mmul; \ - mpfr_init2(mmul, significand + 1); /* see MPFR_FP_EMULATION */ \ - mpfr_mul(madd->result, ma->result, mb->result, ROUNDING_MODE); \ - mpfr_add(madd->result, madd->result, mc->result, ROUNDING_MODE); \ - mpfr_clear(mmul); \ - madd->excl_result = mpfr_get_##MPFR_TYPE(madd->result, ROUNDING_MODE); \ - } \ - RAPTOR_DUMP_RESULT(__raptor_fprt_##FROM_TYPE##_to_ptr(madd), OP_TYPE, \ - LLVM_OP_NAME); \ - double trunc = mpfr_get_##MPFR_TYPE( \ - madd->result, __RAPTOR_MPFR_DEFAULT_ROUNDING_MODE); \ - double err = __raptor_fprt_##FROM_TYPE##_abs_err(trunc, madd->shadow); \ - if (!opdata[loc].count) \ - opdata[loc].op = #LLVM_OP_NAME; \ - if (trunc != 0 && err / trunc > SHADOW_ERR_REL) { \ - ++opdata[loc].count_thresh; \ - } else if (trunc == 0 && err > SHADOW_ERR_ABS) { \ - ++opdata[loc].count_thresh; \ - } \ - opdata[loc].l1_err += err; \ - ++opdata[loc].count; \ - return __raptor_fprt_ptr_to_##FROM_TYPE(madd); \ - } else { \ - abort(); \ - } \ +#define __RAPTOR_MPFR_FMULADD(OP_TYPE, LLVM_OP_NAME, FROM_TYPE, TYPE, \ + MPFR_TYPE, LLVM_TYPE, ROUNDING_MODE) \ + __RAPTOR_MPFR_ORIGINAL_ATTRIBUTES \ + TYPE \ + __raptor_fprt_original_##FROM_TYPE##_##OP_TYPE##_##LLVM_OP_NAME##_##LLVM_TYPE( \ + TYPE a, TYPE b, TYPE c); \ + __RAPTOR_MPFR_ATTRIBUTES \ + TYPE __raptor_fprt_##FROM_TYPE##_##OP_TYPE##_##LLVM_OP_NAME##_##LLVM_TYPE( \ + TYPE a, TYPE b, TYPE c, int64_t exponent, int64_t significand, \ + int64_t mode, const char *loc, mpfr_t *scratch) { \ + if (__raptor_fprt_is_op_mode(mode)) { \ + __raptor_fprt_trunc_count(exponent, significand, mode, loc, scratch); \ + mpfr_set_##MPFR_TYPE(scratch[0], a, ROUNDING_MODE); \ + mpfr_set_##MPFR_TYPE(scratch[1], b, ROUNDING_MODE); \ + mpfr_set_##MPFR_TYPE(scratch[2], c, ROUNDING_MODE); \ + mpfr_mul(scratch[0], scratch[0], scratch[1], ROUNDING_MODE); \ + mpfr_add(scratch[0], scratch[0], scratch[2], ROUNDING_MODE); \ + TYPE res = mpfr_get_##MPFR_TYPE(scratch[0], ROUNDING_MODE); \ + return res; \ + } else if (__raptor_fprt_is_mem_mode(mode)) { \ + __raptor_fp *ma = __raptor_fprt_##FROM_TYPE##_to_ptr_checked( \ + a, exponent, significand, mode, loc, scratch); \ + __raptor_fp *mb = __raptor_fprt_##FROM_TYPE##_to_ptr_checked( \ + b, exponent, significand, mode, loc, scratch); \ + __raptor_fp *mc = __raptor_fprt_##FROM_TYPE##_to_ptr_checked( \ + c, exponent, significand, mode, loc, scratch); \ + RAPTOR_DUMP_INPUT(ma, OP_TYPE, LLVM_OP_NAME); \ + RAPTOR_DUMP_INPUT(mb, OP_TYPE, LLVM_OP_NAME); \ + RAPTOR_DUMP_INPUT(mc, OP_TYPE, LLVM_OP_NAME); \ + __raptor_fp *madd = __raptor_fprt_##FROM_TYPE##_new_intermediate( \ + exponent, significand, mode, loc, scratch); \ + madd->shadow = \ + __raptor_fprt_original_##FROM_TYPE##_##OP_TYPE##_##LLVM_OP_NAME##_##LLVM_TYPE( \ + ma->shadow, mb->shadow, mc->shadow); \ + if (excl_trunc) { \ + __raptor_fprt_##FROM_TYPE##_count(exponent, significand, mode, loc, \ + scratch); \ + madd->excl_result = \ + __raptor_fprt_original_##FROM_TYPE##_##OP_TYPE##_##LLVM_OP_NAME##_##LLVM_TYPE( \ + ma->excl_result, mb->excl_result, mc->excl_result); \ + mpfr_set_##MPFR_TYPE(madd->result, madd->excl_result, ROUNDING_MODE); \ + } else { \ + __raptor_fprt_trunc_count(exponent, significand, mode, loc, scratch); \ + mpfr_t mmul; \ + mpfr_init2(mmul, significand + 1); /* see MPFR_FP_EMULATION */ \ + mpfr_mul(madd->result, ma->result, mb->result, ROUNDING_MODE); \ + mpfr_add(madd->result, madd->result, mc->result, ROUNDING_MODE); \ + mpfr_clear(mmul); \ + madd->excl_result = mpfr_get_##MPFR_TYPE(madd->result, ROUNDING_MODE); \ + } \ + RAPTOR_DUMP_RESULT(__raptor_fprt_##FROM_TYPE##_to_ptr(madd), OP_TYPE, \ + LLVM_OP_NAME); \ + double trunc = mpfr_get_##MPFR_TYPE( \ + madd->result, __RAPTOR_MPFR_DEFAULT_ROUNDING_MODE); \ + double err = __raptor_fprt_##FROM_TYPE##_abs_err(trunc, madd->shadow); \ + if (!opdata[loc].count) \ + opdata[loc].op = #LLVM_OP_NAME; \ + if (trunc != 0 && err / trunc > SHADOW_ERR_REL) { \ + ++opdata[loc].count_thresh; \ + } else if (trunc == 0 && err > SHADOW_ERR_ABS) { \ + ++opdata[loc].count_thresh; \ + } \ + opdata[loc].l1_err += err; \ + ++opdata[loc].count; \ + return __raptor_fprt_ptr_to_##FROM_TYPE(madd); \ + } else { \ + abort(); \ + } \ } // TODO This does not currently make distinctions between ordered/unordered. @@ -645,10 +645,10 @@ void raptor_fprt_op_clear(); } \ } -#define __RAPTOR_MPFR_FMULADD(LLVM_OP_NAME, FROM_TYPE, TYPE, MPFR_TYPE, \ - LLVM_TYPE, ROUNDING_MODE) \ +#define __RAPTOR_MPFR_FMULADD(OP_TYPE, LLVM_OP_NAME, FROM_TYPE, TYPE, \ + MPFR_TYPE, LLVM_TYPE, ROUNDING_MODE) \ __RAPTOR_MPFR_ATTRIBUTES \ - TYPE __raptor_fprt_##FROM_TYPE##_intr_##LLVM_OP_NAME##_##LLVM_TYPE( \ + TYPE __raptor_fprt_##FROM_TYPE##_##OP_TYPE##_##LLVM_OP_NAME##_##LLVM_TYPE( \ TYPE a, TYPE b, TYPE c, int64_t exponent, int64_t significand, \ int64_t mode, const char *loc, mpfr_t *scratch) { \ if (__raptor_fprt_is_op_mode(mode)) { \ @@ -712,15 +712,18 @@ void raptor_fprt_op_clear(); } #endif // RAPTOR_FPRT_ENABLE_SHADOW_RESIDUALS -__RAPTOR_MPFR_ORIGINAL_ATTRIBUTES __attribute__((weak)) bool -__raptor_fprt_original_ieee_64_intr_llvm_is_fpclass_f64(double a, - int32_t tests); -__RAPTOR_MPFR_ATTRIBUTES bool __raptor_fprt_ieee_64_intr_llvm_is_fpclass_f64( - double a, int32_t tests, int64_t exponent, int64_t significand, - int64_t mode, const char *loc, mpfr_t *scratch) { - return __raptor_fprt_original_ieee_64_intr_llvm_is_fpclass_f64( - __raptor_fprt_ieee_64_get(a, exponent, significand, mode, loc, scratch), - tests); -} +#define __RAPTOR_MPFR_ISCLASS(FROM_TYPE, TYPE, LLVM_TYPE) \ + __RAPTOR_MPFR_ORIGINAL_ATTRIBUTES bool \ + __raptor_fprt_original_##FROM_TYPE##_intr_llvm_is_fpclass_##LLVM_TYPE( \ + TYPE a, int32_t tests); \ + __RAPTOR_MPFR_ATTRIBUTES bool \ + __raptor_fprt_##FROM_TYPE##_intr_llvm_is_fpclass_##LLVM_TYPE( \ + TYPE a, int32_t tests, int64_t exponent, int64_t significand, \ + int64_t mode, const char *loc, mpfr_t *scratch) { \ + return __raptor_fprt_original_##FROM_TYPE##_intr_llvm_is_fpclass_##LLVM_TYPE( \ + __raptor_fprt_ieee_64_get(a, exponent, significand, mode, loc, \ + scratch), \ + tests); \ + } #include "Flops.def" diff --git a/runtime/obj/GarbageCollection.cpp b/runtime/obj/GarbageCollection.cpp index ba81f16f..cd5a1354 100644 --- a/runtime/obj/GarbageCollection.cpp +++ b/runtime/obj/GarbageCollection.cpp @@ -105,7 +105,6 @@ struct { /* ignore for now */ \ } #include "raptor/FloatTypes.def" -#undef RAPTOR_FLOAT_TYPE __RAPTOR_MPFR_ATTRIBUTES void raptor_fprt_gc_dump_status() { diff --git a/scripts/raptor_plot_float_histogram.py b/scripts/raptor_plot_float_histogram.py new file mode 100755 index 00000000..da72d769 --- /dev/null +++ b/scripts/raptor_plot_float_histogram.py @@ -0,0 +1,107 @@ +#!/usr/bin/env python3 +import argparse +import numpy as np +import matplotlib.pyplot as plt + +def plot_exponent_distribution(filename, dtype='float32', output_file='exponent_hist.png'): + """ + Plot histograms of exponent field usage in a raw binary file of floating-point numbers. + Generates two subplots: + 1. Histogram of used exponent range (min to max of actual data) + 2. Histogram over the full possible exponent range + + Parameters + ---------- + filename : str + Path to the binary file. + dtype : str + Data type of the floats in the file. One of: 'float16', 'float32', 'float64'. + output_file : str + Output filename to save the plot (e.g., 'plot.png' or 'plot.pdf'). + """ + dtype_info = { + 'float16': {'bits': 16, 'exp_bits': 5, 'bias': 15}, + 'float32': {'bits': 32, 'exp_bits': 8, 'bias': 127}, + 'float64': {'bits': 64, 'exp_bits': 11, 'bias': 1023}, + } + + if dtype not in dtype_info: + raise ValueError(f"Unsupported dtype '{dtype}'. Must be one of {list(dtype_info.keys())}") + + info = dtype_info[dtype] + bits = info['bits'] + exp_bits = info['exp_bits'] + bias = info['bias'] + + # Load binary data + data = np.fromfile(filename, dtype=dtype) + if data.size == 0: + raise ValueError("No data found in file or file empty.") + + # View data as unsigned integer + int_view = data.view({16: np.uint16, 32: np.uint32, 64: np.uint64}[bits]) + + # Extract exponent bits + mantissa_bits = bits - exp_bits - 1 + exponent_mask = ((1 << exp_bits) - 1) << mantissa_bits + exponents = ((int_view & exponent_mask) >> mantissa_bits).astype(int) + + # Mask out special exponents (0 or all ones) + normal_mask = (exponents != 0) & (exponents != (1 << exp_bits) - 1) + exponents_normal = exponents[normal_mask] + + # Convert to unbiased exponent values + unbiased_exponents = exponents_normal - bias + + # Prepare plots + fig, axes = plt.subplots(2, 1, figsize=(10, 8), constrained_layout=True) + + # --- Subplot 1: Only used exponent range --- + bins_used = np.arange(unbiased_exponents.min() - 1, unbiased_exponents.max() + 2) + axes[0].hist(unbiased_exponents, bins=bins_used, edgecolor='black', alpha=0.7) + axes[0].set_title(f"Exponent Distribution (Used Range)\n{dtype}, File: {filename}") + axes[0].set_xlabel("Unbiased Exponent Value") + axes[0].set_ylabel("Frequency") + axes[0].grid(True, linestyle='--', alpha=0.5) + + # --- Subplot 2: Full possible exponent range --- + exp_min_possible = 1 - bias # Smallest normal exponent + exp_max_possible = (1 << exp_bits) - 2 - bias # Largest normal exponent + bins_full = np.arange(exp_min_possible - 0.5, exp_max_possible + 1.5) + + axes[1].hist(unbiased_exponents, bins=bins_full, edgecolor='black', alpha=0.7) + axes[1].set_xlim(exp_min_possible - 1, exp_max_possible + 1) + axes[1].set_title(f"Exponent Distribution (Full Range)\n{dtype}") + axes[1].set_xlabel("Unbiased Exponent Value (All Possible)") + axes[1].set_ylabel("Frequency") + axes[1].grid(True, linestyle='--', alpha=0.5) + + # Save to file + plt.savefig(output_file) + plt.close() + print(f"✅ Histogram saved to '{output_file}'") + + +def main(): + parser = argparse.ArgumentParser( + description="Plot histogram of exponent field usage in a raw binary float file." + ) + parser.add_argument("filename", help="Path to the binary input file") + parser.add_argument( + "--dtype", + choices=["float16", "float32", "float64"], + default="float32", + help="Data type of floats in the file (default: float32)", + ) + parser.add_argument( + "--output", + default="exponent_hist.png", + help="Output filename for the plot (default: exponent_hist.png)", + ) + + args = parser.parse_args() + plot_exponent_distribution(args.filename, args.dtype, args.output) + + +if __name__ == "__main__": + main() diff --git a/test/Integration/Truncate/Cpp/log.cpp b/test/Integration/Truncate/Cpp/log.cpp new file mode 100644 index 00000000..d1cc00a1 --- /dev/null +++ b/test/Integration/Truncate/Cpp/log.cpp @@ -0,0 +1,31 @@ +// RUN: %clang -O3 %s -o %t.a.out %loadClangRaptor %linkRaptorRT %includeRaptorRT -lm && RAPTOR_FLOP_LOG_PREFIX=%t.flop_log %t.a.out && xxd %t.flop_log.double | FileCheck %s + +// CHECK: 00000000: 0000 0000 0000 f03f 0000 0000 0000 0040 +// CHECK: 00000010: 0000 0000 0000 0840 0000 0000 0000 0040 +// CHECK: 00000020: 0000 0000 0000 0840 0000 0000 0000 1040 +// CHECK: 00000030: 0000 0000 0000 1c40 0000 0000 0000 0040 + +#include "raptor/raptor.h" +#include + +double simple_add(double a, double b) { + return 2 * (a + b); + // TODO float and half + // return a + b + ((float)a + (float)b); +} + +template fty *__raptor_log_flops(fty *); + +int main() { + double trunc; + + trunc = __raptor_log_flops(simple_add)(1, 2); + printf("A1 %f\n", trunc); + trunc = __raptor_log_flops(simple_add)(3, 4); + printf("A2 %f\n", trunc); + __raptor_clear_flop_log_double(); + trunc = __raptor_log_flops(simple_add)(5, 6); + printf("A3 %f\n", trunc); + + return 0; +} diff --git a/test/Integration/Truncate/Cpp/openmp-gpu.cpp b/test/Integration/Truncate/Cpp/openmp-gpu.cpp index 85bdde39..671b4d93 100644 --- a/test/Integration/Truncate/Cpp/openmp-gpu.cpp +++ b/test/Integration/Truncate/Cpp/openmp-gpu.cpp @@ -1,7 +1,7 @@ // clang-format off // RUN: %clang -O3 %s -o %t.a.out %loadClangRaptor %linkRaptorRT -lm -lmpfr && %t.a.out // RUN: %clang -O3 -fopenmp %s -o %t.a.out %loadClangRaptor %linkRaptorRT -lm -lmpfr && %t.a.out -// RUN: if [ "%hasOpenMPGPU" == "1" ]; then %clang -O3 -fopenmp --offload-arch=native %s -o %t.a.out %loadClangRaptor %linkRaptorRT -lm -lmpfr && %t.a.out; fi +// RUN: if [ "%hasOpenMPGPU" == "1" ]; then %clang -O3 -fopenmp --offload-arch=native -nogpulib %s -o %t.a.out %loadClangRaptor %linkRaptorRT -lm -lmpfr && %t.a.out; fi // clang-format on #include "../../test_utils.h" diff --git a/test/lit.site.cfg.py.in b/test/lit.site.cfg.py.in index 6c456a51..48ec93a2 100644 --- a/test/lit.site.cfg.py.in +++ b/test/lit.site.cfg.py.in @@ -108,6 +108,10 @@ config.substitutions.append(('%loadLLDRaptor', newPM)) link = "-L@RAPTOR_BINARY_DIR@/runtime/ -lstdc++ -lmpfr -lRaptor-RT-" + config.llvm_ver config.substitutions.append(('%linkRaptorRT', link)) +link = "-L@RAPTOR_BINARY_DIR@/runtime/ -lstdc++ -lmpfr -lRaptor-RT-" + config.llvm_ver + +config.substitutions.append(('%includeRaptorRT', '-I@RAPTOR_SOURCE_DIR@/runtime/include/public')) + config.substitutions.append(('%hasMPFR', has_mpfr)) # Let the main config do the real work. @@ -122,7 +126,7 @@ import subprocess def has_openmp_gpu(): try: result = subprocess.run( - ["llvm-offload-device-info"], + [os.path.join(config.llvm_tools_dir, "llvm-offload-device-info")], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True diff --git a/wrappers/raptor-clang++.in b/wrappers/raptor-clang++.in index 7df2ace7..7e90afda 100644 --- a/wrappers/raptor-clang++.in +++ b/wrappers/raptor-clang++.in @@ -3,4 +3,4 @@ CMAKE_INSTALL_PREFIX="@CMAKE_INSTALL_PREFIX@" LLVM_VERSION_MAJOR="@LLVM_VERSION_MAJOR@" CLANGPLUSPLUS_PATH="@RAPTOR_CLANGPLUSPLUS_PATH@" -exec "$CLANGPLUSPLUS_PATH" -fpass-plugin="$CMAKE_INSTALL_PREFIX/lib/LLVMRaptor-$LLVM_VERSION_MAJOR.so" -L"$CMAKE_INSTALL_PREFIX/lib" -lstdc++ -lmpfr -lRaptor-RT-"$LLVM_VERSION_MAJOR" -fuse-ld=lld -Wl,--load-pass-plugin="$CMAKE_INSTALL_PREFIX/lib/LLDRaptor-$LLVM_VERSION_MAJOR.so" "$@" +exec "$CLANGPLUSPLUS_PATH" -I"$CMAKE_INSTALL_PREFIX/include/" -fpass-plugin="$CMAKE_INSTALL_PREFIX/lib/LLVMRaptor-$LLVM_VERSION_MAJOR.so" -L"$CMAKE_INSTALL_PREFIX/lib" -lstdc++ -lmpfr -lRaptor-RT-"$LLVM_VERSION_MAJOR" -fuse-ld=lld -Wl,--load-pass-plugin="$CMAKE_INSTALL_PREFIX/lib/LLDRaptor-$LLVM_VERSION_MAJOR.so" "$@" diff --git a/wrappers/raptor-clang.in b/wrappers/raptor-clang.in index c0cfa465..d084b77b 100644 --- a/wrappers/raptor-clang.in +++ b/wrappers/raptor-clang.in @@ -3,4 +3,4 @@ CMAKE_INSTALL_PREFIX="@CMAKE_INSTALL_PREFIX@" LLVM_VERSION_MAJOR="@LLVM_VERSION_MAJOR@" CLANG_PATH="@RAPTOR_CLANG_PATH@" -exec "$CLANG_PATH" -fpass-plugin="$CMAKE_INSTALL_PREFIX/lib/LLVMRaptor-$LLVM_VERSION_MAJOR.so" -L"$CMAKE_INSTALL_PREFIX/lib" -lstdc++ -lmpfr -lRaptor-RT-"$LLVM_VERSION_MAJOR" -fuse-ld=lld -Wl,--load-pass-plugin="$CMAKE_INSTALL_PREFIX/lib/LLDRaptor-$LLVM_VERSION_MAJOR.so" "$@" +exec "$CLANG_PATH" -I"$CMAKE_INSTALL_PREFIX/include/" -fpass-plugin="$CMAKE_INSTALL_PREFIX/lib/LLVMRaptor-$LLVM_VERSION_MAJOR.so" -L"$CMAKE_INSTALL_PREFIX/lib" -lstdc++ -lmpfr -lRaptor-RT-"$LLVM_VERSION_MAJOR" -fuse-ld=lld -Wl,--load-pass-plugin="$CMAKE_INSTALL_PREFIX/lib/LLDRaptor-$LLVM_VERSION_MAJOR.so" "$@"