From 4d1eeb361441d834a687d1df59f165debeffc9f5 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov <ivanov.i.aa@m.titech.ac.jp>
Date: Thu, 10 Jul 2025 17:20:59 +0900
Subject: [PATCH] WIP Histogram logging

missing file

WIP

Refactoring complete

WIP

Working flop logging

test

pretty good

undef

DUmp to file

Fixes

Log to file

Test

clang-raptor
---
 CMakeLists.txt                                |   4 +
 pass/Raptor.cpp                               |  68 +++++-
 pass/RaptorLogic.cpp                          |  88 ++++----
 pass/RaptorLogic.h                            | 120 ++++++++++-
 runtime/CMakeLists.txt                        |  23 +-
 runtime/include/private/raptor/Common.h       |  32 +--
 .../{private => public}/raptor/FloatTypes.def |   2 +
 runtime/include/public/raptor/raptor.h        |  22 +-
 runtime/ir/Flops.def                          |   8 +-
 runtime/ir/Log.cpp                            | 201 ++++++++++++++++++
 runtime/ir/Mpfr.cpp                           | 165 +++++++-------
 runtime/obj/GarbageCollection.cpp             |   1 -
 scripts/raptor_plot_float_histogram.py        | 107 ++++++++++
 test/Integration/Truncate/Cpp/log.cpp         |  31 +++
 test/Integration/Truncate/Cpp/openmp-gpu.cpp  |   2 +-
 test/lit.site.cfg.py.in                       |   6 +-
 wrappers/raptor-clang++.in                    |   2 +-
 wrappers/raptor-clang.in                      |   2 +-
 18 files changed, 704 insertions(+), 180 deletions(-)
 rename runtime/include/{private => public}/raptor/FloatTypes.def (80%)
 create mode 100644 runtime/ir/Log.cpp
 create mode 100755 scripts/raptor_plot_float_histogram.py
 create mode 100644 test/Integration/Truncate/Cpp/log.cpp
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4133f37b..06bb3f7d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -365,6 +365,10 @@ install(EXPORT RaptorTargets
   DESTINATION "${INSTALL_CMAKE_DIR}"
   COMPONENT   dev)
 
+install(PROGRAMS
+  "${CMAKE_CURRENT_SOURCE_DIR}/scripts/raptor_plot_float_histogram.py"
+  DESTINATION bin)
+
 add_subdirectory(runtime)
 add_subdirectory(test)
 add_subdirectory(wrappers)
diff --git a/pass/Raptor.cpp b/pass/Raptor.cpp
index 532b215f..e0b78fb6 100644
--- a/pass/Raptor.cpp
+++ b/pass/Raptor.cpp
@@ -30,6 +30,7 @@
 #include <memory>
 
 #include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Transforms/Utils/Instrumentation.h"
 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
 
 #include "llvm/ADT/ArrayRef.h"
@@ -567,6 +568,43 @@ class RaptorBase {
     llvm_unreachable("Unknown float type");
   }
 
+  bool HandleLogFlops(CallInst *CI) {
+    IRBuilder<> Builder(CI);
+    Function *F = parseFunctionParameter(CI);
+    if (!F)
+      return false;
+    unsigned ArgNum = CI->arg_size();
+    if (ArgNum != 1) {
+      EmitFailure("TooManyArgs", CI->getDebugLoc(), CI,
+                  "Had incorrect number of args ", *CI, " - expected 1");
+      return false;
+    }
+
+    RequestContext context(CI, &Builder);
+    for (auto FR :
+         {/* FloatRepresentation::getIEEE(16), */ FloatRepresentation::getIEEE(
+              32),
+          FloatRepresentation::getIEEE(64)}) {
+      FunctionType *Ty = FunctionType::get(
+          Builder.getVoidTy(), FR.getMustBeBuiltinType(Builder.getContext()),
+          false);
+      FunctionCallee FlopLogger = CI->getModule()->getOrInsertFunction(
+          std::string(RaptorPrefix) + "log_flops_" + FR.getMangling(), Ty);
+      llvm::Value *Res = Logic.CreateTruncateFunc(
+          context, F,
+          TruncationConfiguration::getInitialLogFlops(
+              FR, *cast<Function>(FlopLogger.getCallee())));
+      if (!Res)
+        return false;
+      F = cast<Function>(Res);
+    }
+    llvm::Value *Res = Builder.CreatePointerCast(F, CI->getType());
+
+    CI->replaceAllUsesWith(Res);
+    CI->eraseFromParent();
+    return true;
+  }
+
   bool HandleTruncateFunc(CallInst *CI, TruncateMode Mode) {
     IRBuilder<> Builder(CI);
     Function *F = parseFunctionParameter(CI);
@@ -583,7 +621,8 @@ class RaptorBase {
 
     RequestContext context(CI, &Builder);
     llvm::Value *res = Logic.CreateTruncateFunc(
-        context, F, TruncationConfiguration::getInitial(Truncation, Mode));
+        context, F,
+        TruncationConfiguration::getInitial(Truncation, Builder.getContext()));
     if (!res)
       return false;
     res = Builder.CreatePointerCast(res, CI->getType());
@@ -718,7 +757,7 @@ class RaptorBase {
       Function *TruncatedFunc =
           Logic.CreateTruncateFunc(context, &F,
                                    TruncationConfiguration::getInitial(
-                                       Truncation, TruncOpFullModuleMode));
+                                       Truncation, Builder.getContext()));
 
       ValueToValueMapTy Mapping;
       for (auto &&[Arg, TArg] : llvm::zip(F.args(), TruncatedFunc->args()))
@@ -790,6 +829,7 @@ class RaptorBase {
         Changed = true;
       }
 
+    SmallVector<CallInst *, 4> toLogFlops;
     SmallVector<CallInst *, 4> toTruncateFuncMem;
     SmallVector<CallInst *, 4> toTruncateFuncOp;
     SmallVector<CallInst *, 4> toTruncateValue;
@@ -1017,11 +1057,15 @@ class RaptorBase {
         }
 
         bool enableRaptor = false;
+        bool logFlops = false;
         bool truncateFuncOp = false;
         bool truncateFuncMem = false;
         bool truncateValue = false;
         bool expandValue = false;
         if (false) {
+        } else if (Fn->getName().contains("__raptor_log_flops")) {
+          enableRaptor = true;
+          logFlops = true;
         } else if (Fn->getName().contains("__raptor_truncate_mem_func")) {
           enableRaptor = true;
           truncateFuncMem = true;
@@ -1073,7 +1117,11 @@ class RaptorBase {
             }
             goto retry;
           }
-          if (truncateFuncOp)
+          if (false)
+            abort();
+          else if (logFlops)
+            toLogFlops.push_back(CI);
+          else if (truncateFuncOp)
             toTruncateFuncOp.push_back(CI);
           else if (truncateFuncMem)
             toTruncateFuncMem.push_back(CI);
@@ -1095,18 +1143,16 @@ class RaptorBase {
       }
     }
 
-    for (auto call : toTruncateFuncMem) {
+    for (auto call : toLogFlops)
+      HandleLogFlops(call);
+    for (auto call : toTruncateFuncMem)
       HandleTruncateFunc(call, TruncMemMode);
-    }
-    for (auto call : toTruncateFuncOp) {
+    for (auto call : toTruncateFuncOp)
       HandleTruncateFunc(call, TruncOpMode);
-    }
-    for (auto call : toTruncateValue) {
+    for (auto call : toTruncateValue)
       HandleTruncateValue(call, true);
-    }
-    for (auto call : toExpandValue) {
+    for (auto call : toExpandValue)
       HandleTruncateValue(call, false);
-    }
 
     return Changed;
   }
diff --git a/pass/RaptorLogic.cpp b/pass/RaptorLogic.cpp
index 62e27d3b..2d9d230a 100644
--- a/pass/RaptorLogic.cpp
+++ b/pass/RaptorLogic.cpp
@@ -62,7 +62,7 @@
 using namespace llvm;
 
 static Value *floatValTruncate(IRBuilderBase &B, Value *v,
-                               FloatTruncation truncation) {
+                               TruncationConfiguration truncation) {
   if (truncation.isToFPRT())
     return v;
 
@@ -73,7 +73,7 @@ static Value *floatValTruncate(IRBuilderBase &B, Value *v,
 }
 
 static Value *floatValExpand(IRBuilderBase &B, Value *v,
-                             FloatTruncation truncation) {
+                             TruncationConfiguration truncation) {
   if (truncation.isToFPRT())
     return v;
 
@@ -84,26 +84,18 @@ static Value *floatValExpand(IRBuilderBase &B, Value *v,
 }
 
 static Value *floatMemTruncate(IRBuilderBase &B, Value *v,
-                               FloatTruncation truncation) {
-  if (isa<VectorType>(v->getType()))
-    report_fatal_error("vector operations not allowed in mem trunc mode");
-
-  Type *toTy = truncation.getToType(B.getContext());
-  return B.CreateBitCast(v, toTy);
+                               TruncationConfiguration truncation) {
+  return v;
 }
 
 static Value *floatMemExpand(IRBuilderBase &B, Value *v,
-                             FloatTruncation truncation) {
-  if (isa<VectorType>(v->getType()))
-    report_fatal_error("vector operations not allowed in mem trunc mode");
-
-  Type *fromTy = truncation.getFromType(B.getContext());
-  return B.CreateBitCast(v, fromTy);
+                             TruncationConfiguration truncation) {
+  return v;
 }
 
 class TruncateUtils {
 protected:
-  FloatTruncation truncation;
+  TruncationConfiguration TC;
   llvm::Module *M;
   Type *fromType;
   Type *toType;
@@ -111,14 +103,17 @@ class TruncateUtils {
   RaptorLogic &Logic;
   Value *UnknownLoc;
   Value *scratch = nullptr;
+  CustomArgsTy CustomArgs;
+  std::string RTName;
 
 private:
   std::string getOriginalFPRTName(std::string Name) {
-    return std::string(RaptorFPRTOriginalPrefix) + truncation.mangleFrom() +
+    return std::string(RaptorPrefix) + RTName + "_original_" + TC.mangleFrom() +
            "_" + Name;
   }
   std::string getFPRTName(std::string Name) {
-    return std::string(RaptorFPRTPrefix) + truncation.mangleFrom() + "_" + Name;
+    return std::string(RaptorPrefix) + RTName + "_" + TC.mangleFrom() + "_" +
+           Name;
   }
 
   // Creates a function which contains the original floating point operation.
@@ -169,9 +164,7 @@ class TruncateUtils {
                               const SmallVectorImpl<Value *> &ArgsIn,
                               llvm::Type *RetTy, Value *LocStr) {
     SmallVector<Value *, 5> Args(ArgsIn.begin(), ArgsIn.end());
-    Args.push_back(B.getInt64(truncation.getTo().getExponentWidth()));
-    Args.push_back(B.getInt64(truncation.getTo().getSignificandWidth()));
-    Args.push_back(B.getInt64(truncation.getMode()));
+    Args.append(CustomArgs);
     Args.push_back(LocStr);
     Args.push_back(scratch);
 
@@ -189,11 +182,11 @@ class TruncateUtils {
     return CI;
   }
 
-  TruncateUtils(FloatTruncation truncation, Module *M, RaptorLogic &Logic)
-      : truncation(truncation), M(M), ctx(M->getContext()), Logic(Logic) {
-    fromType = truncation.getFromType(ctx);
-    toType = truncation.getToType(ctx);
-
+  TruncateUtils(TruncationConfiguration TC, Module *M, RaptorLogic &Logic)
+      : TC(TC), M(M), ctx(M->getContext()), Logic(Logic),
+        CustomArgs(TC.CustomArgs), RTName(TC.RTName) {
+    fromType = TC.getFromType(M->getContext());
+    toType = TC.getToType(M->getContext());
     UnknownLoc = getUniquedLocStr(nullptr);
     scratch = ConstantPointerNull::get(PointerType::get(M->getContext(), 0));
   }
@@ -401,7 +394,7 @@ class TruncateGenerator : public llvm::InstVisitor<TruncateGenerator>,
                           public TruncateUtils {
 private:
   ValueToValueMapTy &OriginalToNewFn;
-  FloatTruncation Truncation;
+  TruncationConfiguration TC;
   TruncateMode Mode;
   RaptorLogic &Logic;
   LLVMContext &Ctx;
@@ -410,9 +403,9 @@ class TruncateGenerator : public llvm::InstVisitor<TruncateGenerator>,
   TruncateGenerator(ValueToValueMapTy &originalToNewFn, Function *oldFunc,
                     Function *newFunc, RaptorLogic &Logic,
                     TruncationConfiguration TC)
-      : TruncateUtils(TC.Truncation, newFunc->getParent(), Logic),
-        OriginalToNewFn(originalToNewFn), Truncation(TC.Truncation),
-        Mode(Truncation.getMode()), Logic(Logic), Ctx(newFunc->getContext()) {
+      : TruncateUtils(TC, newFunc->getParent(), Logic),
+        OriginalToNewFn(originalToNewFn), TC(TC), Mode(TC.getMode()),
+        Logic(Logic), Ctx(newFunc->getContext()) {
 
     auto AllocScratch = [&]() {
       // TODO we should check at the end if we never used the scracth we should
@@ -444,7 +437,7 @@ class TruncateGenerator : public llvm::InstVisitor<TruncateGenerator>,
         }
       }
     };
-    if (Truncation.isToFPRT()) {
+    if (TC.isToFPRT()) {
       if (Mode == TruncOpMode) {
         if (TC.NeedTruncChange || TC.NeedNewScratch)
           AllocScratch();
@@ -503,10 +496,10 @@ class TruncateGenerator : public llvm::InstVisitor<TruncateGenerator>,
     case TruncMemMode:
       if (isa<ConstantFP>(v))
         return createFPRTConstCall(B, v);
-      return floatMemTruncate(B, v, Truncation);
+      return floatMemTruncate(B, v, TC);
     case TruncOpMode:
     case TruncOpFullModuleMode:
-      return floatValTruncate(B, v, Truncation);
+      return floatValTruncate(B, v, TC);
     }
     llvm_unreachable("Unknown trunc mode");
   }
@@ -514,10 +507,10 @@ class TruncateGenerator : public llvm::InstVisitor<TruncateGenerator>,
   Value *expand(IRBuilder<> &B, Value *v) {
     switch (Mode) {
     case TruncMemMode:
-      return floatMemExpand(B, v, Truncation);
+      return floatMemExpand(B, v, TC);
     case TruncOpMode:
     case TruncOpFullModuleMode:
-      return floatValExpand(B, v, Truncation);
+      return floatValExpand(B, v, TC);
     }
     llvm_unreachable("Unknown trunc mode");
   }
@@ -527,7 +520,7 @@ class TruncateGenerator : public llvm::InstVisitor<TruncateGenerator>,
     case UnaryOperator::FNeg: {
       if (I.getOperand(0)->getType() != getFromType())
         return;
-      if (!Truncation.isToFPRT())
+      if (!TC.isToFPRT())
         return;
 
       auto newI = getNewFromOriginal(&I);
@@ -565,7 +558,7 @@ class TruncateGenerator : public llvm::InstVisitor<TruncateGenerator>,
       Args.push_back(truncLHS);
       Args.push_back(truncRHS);
       Instruction *nres;
-      if (Truncation.isToFPRT())
+      if (TC.isToFPRT())
         nres = createFPRTOpCall(B, CI, B.getInt1Ty(), Args);
       else
         nres =
@@ -685,9 +678,9 @@ class TruncateGenerator : public llvm::InstVisitor<TruncateGenerator>,
     auto newLHS = truncate(B, getNewFromOriginal(oldLHS));
     auto newRHS = truncate(B, getNewFromOriginal(oldRHS));
     Instruction *nres = nullptr;
-    if (Truncation.isToFPRT()) {
+    if (TC.isToFPRT()) {
       SmallVector<Value *, 2> Args({newLHS, newRHS});
-      nres = createFPRTOpCall(B, BO, Truncation.getToType(Ctx), Args);
+      nres = createFPRTOpCall(B, BO, getToType(), Args);
     } else {
       nres = cast<Instruction>(B.CreateBinOp(BO.getOpcode(), newLHS, newRHS));
     }
@@ -748,7 +741,7 @@ class TruncateGenerator : public llvm::InstVisitor<TruncateGenerator>,
 
     Instruction *intr = nullptr;
     Value *nres = nullptr;
-    if (Truncation.isToFPRT()) {
+    if (TC.isToFPRT()) {
       nres = intr = createFPRTOpCall(B, CI, retTy, new_ops);
     } else {
       // TODO check that the intrinsic is overloaded
@@ -832,11 +825,13 @@ class TruncateGenerator : public llvm::InstVisitor<TruncateGenerator>,
   }
 
   Value *GetShadow(RequestContext &ctx, Value *v, bool WillPassScratch) {
-    if (auto F = dyn_cast<Function>(v))
-      return Logic.CreateTruncateFunc(
-          ctx, F,
-          TruncationConfiguration{Truncation, Mode, !WillPassScratch, false,
-                                  WillPassScratch});
+    if (auto F = dyn_cast<Function>(v)) {
+      auto NewTC = TC;
+      NewTC.NeedNewScratch = !WillPassScratch;
+      NewTC.NeedTruncChange = false;
+      NewTC.ScratchFromArgs = WillPassScratch;
+      return Logic.CreateTruncateFunc(ctx, F, NewTC);
+    }
     llvm::errs() << " unknown get truncated func: " << *v << "\n";
     llvm_unreachable("unknown get truncated func");
     return v;
@@ -1006,8 +1001,9 @@ bool RaptorLogic::CreateTruncateValue(RequestContext context, Value *v,
   IRBuilderBase &B = *context.ip;
 
   Value *converted = nullptr;
-  TruncateUtils TU(Truncation, B.GetInsertBlock()->getParent()->getParent(),
-                   *this);
+  TruncateUtils TU(
+      TruncationConfiguration::getInitial(Truncation, v->getContext()),
+      B.GetInsertBlock()->getParent()->getParent(), *this);
   if (isTruncate)
     converted = TU.createFPRTNewCall(B, v);
   else
diff --git a/pass/RaptorLogic.h b/pass/RaptorLogic.h
index 9c79fea4..a4efa5d0 100644
--- a/pass/RaptorLogic.h
+++ b/pass/RaptorLogic.h
@@ -17,8 +17,10 @@
 #include <set>
 #include <utility>
 
+#include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Type.h"
 #include "llvm/Support/CommandLine.h"
@@ -34,6 +36,7 @@ extern llvm::cl::opt<bool> RaptorPrint;
 extern llvm::cl::opt<bool> RaptorJuliaAddrLoad;
 }
 
+constexpr char RaptorPrefix[] = "__raptor_";
 constexpr char RaptorFPRTPrefix[] = "__raptor_fprt_";
 constexpr char RaptorFPRTOriginalPrefix[] = "__raptor_fprt_original_";
 
@@ -198,6 +201,11 @@ struct FloatRepresentation {
            (w == F64Width && SignificandWidth == F64Significand);
   }
 
+  llvm::Type *getMustBeBuiltinType(llvm::LLVMContext &ctx) const {
+    assert(canBeBuiltin());
+    return getTypeForWidth(ctx, getWidth(), /*builtinFloat=*/true);
+  }
+
   llvm::Type *getBuiltinType(llvm::LLVMContext &ctx) const {
     if (!canBeBuiltin())
       return nullptr;
@@ -288,22 +296,31 @@ struct FloatTruncation {
   std::string mangleFrom() const { return From.getMangling(); }
 };
 
+using CustomArgsTy = llvm::SmallVector<llvm::Value *, 5>;
+
 class TruncationConfiguration {
 public:
-  FloatTruncation Truncation;
+  FloatRepresentation FromRepr;
   TruncateMode Mode;
   bool NeedNewScratch;
   bool NeedTruncChange;
   bool ScratchFromArgs;
+  CustomArgsTy CustomArgs;
+  std::string CustomMangle;
+  std::string RTName;
+
+  bool IsToFPRT;
+  std::optional<FloatRepresentation> ToRepr;
+
   std::string mangle() {
     return std::string(truncateModeStr(Mode)) + "_func_" +
-           Truncation.mangleTruncation() + "_" +
+           FromRepr.getMangling() + "_" + CustomMangle + "_" +
            std::to_string(NeedTruncChange) + "_" +
            std::to_string(NeedNewScratch) + "_" +
            std::to_string(ScratchFromArgs);
   }
   static auto toTuple(const TruncationConfiguration &TC) {
-    return std::tuple(TC.Truncation, TC.Mode, TC.NeedNewScratch,
+    return std::tuple(TC.FromRepr, TC.CustomMangle, TC.Mode, TC.NeedNewScratch,
                       TC.NeedTruncChange, TC.ScratchFromArgs);
   }
   bool operator==(const TruncationConfiguration &Other) const {
@@ -313,18 +330,99 @@ class TruncationConfiguration {
     return toTuple(*this) < toTuple(Other);
   }
 
+  std::string mangleFrom() { return FromRepr.getMangling(); }
+
+  bool isToFPRT() { return IsToFPRT; }
+
+  TruncateMode getMode() { return Mode; }
+
+  llvm::Type *getFromType(llvm::LLVMContext &Ctx) {
+    return FromRepr.getBuiltinType(Ctx);
+  }
+
+  llvm::Type *getToType(llvm::LLVMContext &Ctx) {
+    if (isToFPRT() || !ToRepr.has_value())
+      return getFromType(Ctx);
+    assert(ToRepr.has_value());
+    return ToRepr->getBuiltinType(Ctx);
+  }
+
+  static TruncationConfiguration getInitialLogFlops(FloatRepresentation FR,
+                                                    llvm::Function &F) {
+    llvm::IRBuilder<> B(F.getContext());
+    CustomArgsTy Args;
+    Args.push_back(&F);
+    return TruncationConfiguration{FR,    TruncOpMode, true,  false,
+                                   false, Args,        "log", "fprtlog",
+                                   true,  std::nullopt};
+  }
+
   static TruncationConfiguration getInitial(FloatTruncation Truncation,
-                                            TruncateMode Mode) {
-    if (Mode == TruncOpMode) {
+                                            llvm::LLVMContext &Ctx) {
+    llvm::IRBuilder<> B(Ctx);
+    CustomArgsTy Args;
+    Args.push_back(B.getInt64(Truncation.getTo().getExponentWidth()));
+    Args.push_back(B.getInt64(Truncation.getTo().getSignificandWidth()));
+    Args.push_back(B.getInt64(Truncation.getMode()));
+    std::string Mangle = "to_" + Truncation.getTo().getMangling();
+    if (Truncation.getMode() == TruncOpMode) {
       if (Truncation.isToFPRT())
-        return TruncationConfiguration{Truncation, Mode, true, true, false};
+        return TruncationConfiguration{Truncation.getFrom(),
+                                       Truncation.getMode(),
+                                       true,
+                                       true,
+                                       false,
+                                       Args,
+                                       Mangle,
+                                       "fprt",
+                                       true,
+                                       std::nullopt};
       else
-        return TruncationConfiguration{Truncation, Mode, false, false, false};
-    } else if (Mode == TruncMemMode) {
+        return TruncationConfiguration{Truncation.getFrom(),
+                                       Truncation.getMode(),
+                                       false,
+                                       false,
+                                       false,
+                                       Args,
+                                       Mangle,
+                                       "",
+                                       false,
+                                       Truncation.getTo()};
+    } else if (Truncation.getMode() == TruncMemMode) {
       assert(Truncation.isToFPRT());
-      return TruncationConfiguration{Truncation, Mode, false, false, false};
-    } else if (Mode == TruncOpFullModuleMode) {
-      return TruncationConfiguration{Truncation, Mode, true, false, false};
+      return TruncationConfiguration{Truncation.getFrom(),
+                                     Truncation.getMode(),
+                                     false,
+                                     false,
+                                     false,
+                                     Args,
+                                     Mangle,
+                                     "fprt",
+                                     true,
+                                     std::nullopt};
+    } else if (Truncation.getMode() == TruncOpFullModuleMode) {
+      if (Truncation.isToFPRT())
+        return TruncationConfiguration{Truncation.getFrom(),
+                                       Truncation.getMode(),
+                                       true,
+                                       false,
+                                       false,
+                                       Args,
+                                       Mangle,
+                                       "fprt",
+                                       true,
+                                       std::nullopt};
+      else
+        return TruncationConfiguration{Truncation.getFrom(),
+                                       Truncation.getMode(),
+                                       true,
+                                       false,
+                                       false,
+                                       Args,
+                                       Mangle,
+                                       "",
+                                       false,
+                                       Truncation.getTo()};
     } else {
       llvm_unreachable("");
     }
diff --git a/runtime/CMakeLists.txt b/runtime/CMakeLists.txt
index d0525b43..62de130e 100644
--- a/runtime/CMakeLists.txt
+++ b/runtime/CMakeLists.txt
@@ -5,6 +5,7 @@ add_library(
   obj/GarbageCollection.cpp
   ir/Mpfr.cpp
   ir/Fprt.cpp
+  ir/Log.cpp
 )
 
 # add_library(
@@ -30,15 +31,23 @@ add_library(
 #   OUTPUT_NAME "Raptor-RT-FP-${LLVM_VERSION_MAJOR}"
 # )
 
-set(RAPTOR_ALL_INCLUDE_DIRS
-  ${CMAKE_CURRENT_SOURCE_DIR}/include/public
-  ${CMAKE_CURRENT_SOURCE_DIR}/include/private
+set(RAPTOR_PRIVATE_INCLUDE_DIR
+  ${CMAKE_CURRENT_SOURCE_DIR}/include/private/
+)
+set(RAPTOR_PUBLIC_INCLUDE_DIR
+  ${CMAKE_CURRENT_SOURCE_DIR}/include/public/
+)
+
+target_include_directories(Raptor-RT-${LLVM_VERSION_MAJOR} PRIVATE ${RAPTOR_PRIVATE_INCLUDE_DIR})
+target_include_directories(Raptor-RT-${LLVM_VERSION_MAJOR} PUBLIC
+  $<BUILD_INTERFACE:${RAPTOR_PUBLIC_INCLUDE_DIR}>
+  $<INSTALL_INTERFACE:include>
 )
 
-target_include_directories(Raptor-RT-${LLVM_VERSION_MAJOR} PRIVATE ${RAPTOR_ALL_INCLUDE_DIRS})
-# target_include_directories(Raptor-RT-GC-${LLVM_VERSION_MAJOR} PRIVATE ${RAPTOR_ALL_INCLUDE_DIRS})
-# target_include_directories(Raptor-RT-FP-${LLVM_VERSION_MAJOR} PRIVATE ${RAPTOR_ALL_INCLUDE_DIRS})
-# target_include_directories(Raptor-RT-Count-${LLVM_VERSION_MAJOR} PRIVATE ${RAPTOR_ALL_INCLUDE_DIRS})
+install(
+  DIRECTORY ${RAPTOR_PUBLIC_INCLUDE_DIR}
+  DESTINATION include
+)
 
 install(TARGETS Raptor-RT-${LLVM_VERSION_MAJOR}
   LIBRARY DESTINATION lib${LLVM_LIBDIR_SUFFIX} COMPONENT Raptor-RT-${LLVM_VERSION_MAJOR}
diff --git a/runtime/include/private/raptor/Common.h b/runtime/include/private/raptor/Common.h
index 20b3e3b9..3fdfa753 100644
--- a/runtime/include/private/raptor/Common.h
+++ b/runtime/include/private/raptor/Common.h
@@ -9,7 +9,9 @@
 #define MAX_MPFR_OPERANDS 3
 
 #define __RAPTOR_MPFR_ATTRIBUTES extern "C"
-#define __RAPTOR_MPFR_ORIGINAL_ATTRIBUTES extern "C"
+#define __RAPTOR_MPFR_DECL_ATTRIBUTES extern "C"
+#define __RAPTOR_MPFR_ORIGINAL_ATTRIBUTES extern "C" __attribute__((weak))
+
 #define __RAPTOR_MPFR_DEFAULT_ROUNDING_MODE GMP_RNDN
 #define __RAPTOR_MPFR_MALLOC_FAILURE_EXIT_STATUS 114
 
@@ -44,16 +46,16 @@ static inline bool __raptor_fprt_is_full_module_op_mode(int64_t mode) {
   return mode & 0b0100;
 }
 
-__RAPTOR_MPFR_ATTRIBUTES
+__RAPTOR_MPFR_DECL_ATTRIBUTES
 void raptor_fprt_gc_dump_status();
-__RAPTOR_MPFR_ATTRIBUTES
+__RAPTOR_MPFR_DECL_ATTRIBUTES
 double raptor_fprt_gc_mark_seen(double a);
-__RAPTOR_MPFR_ATTRIBUTES
+__RAPTOR_MPFR_DECL_ATTRIBUTES
 void raptor_fprt_gc_doit();
 
-__RAPTOR_MPFR_ATTRIBUTES
+__RAPTOR_MPFR_DECL_ATTRIBUTES
 void raptor_fprt_excl_trunc_start();
-__RAPTOR_MPFR_ATTRIBUTES
+__RAPTOR_MPFR_DECL_ATTRIBUTES
 void raptor_fprt_excl_trunc_end();
 
 template <typename To, typename From> To raptor_bitcast(From from) {
@@ -85,50 +87,48 @@ template <typename To, typename From> To checked_raptor_bitcast(From from) {
     return checked_raptor_bitcast<__raptor_fp *>(d);                           \
   }
 #include "raptor/FloatTypes.def"
-#undef RAPTOR_FLOAT_TYPE
 
 #define RAPTOR_FLOAT_TYPE(CPP_TY, FROM_TY)                                     \
-  __RAPTOR_MPFR_ATTRIBUTES                                                     \
+  __RAPTOR_MPFR_DECL_ATTRIBUTES                                                \
   CPP_TY __raptor_fprt_##FROM_TY##_get(CPP_TY _a, int64_t exponent,            \
                                        int64_t significand, int64_t mode,      \
                                        const char *loc, void *scratch);        \
                                                                                \
-  __RAPTOR_MPFR_ATTRIBUTES                                                     \
+  __RAPTOR_MPFR_DECL_ATTRIBUTES                                                \
   CPP_TY __raptor_fprt_##FROM_TY##_new(CPP_TY _a, int64_t exponent,            \
                                        int64_t significand, int64_t mode,      \
                                        const char *loc, void *scratch);        \
                                                                                \
-  __RAPTOR_MPFR_ATTRIBUTES                                                     \
+  __RAPTOR_MPFR_DECL_ATTRIBUTES                                                \
   CPP_TY __raptor_fprt_##FROM_TY##_const(CPP_TY _a, int64_t exponent,          \
                                          int64_t significand, int64_t mode,    \
                                          const char *loc, void *scratch);      \
                                                                                \
-  __RAPTOR_MPFR_ATTRIBUTES                                                     \
+  __RAPTOR_MPFR_DECL_ATTRIBUTES                                                \
   __raptor_fp *__raptor_fprt_##FROM_TY##_new_intermediate(                     \
       int64_t exponent, int64_t significand, int64_t mode, const char *loc,    \
       void *scratch);                                                          \
                                                                                \
-  __RAPTOR_MPFR_ATTRIBUTES                                                     \
+  __RAPTOR_MPFR_DECL_ATTRIBUTES                                                \
   void __raptor_fprt_##FROM_TY##_delete(CPP_TY a, int64_t exponent,            \
                                         int64_t significand, int64_t mode,     \
                                         const char *loc, void *scratch);       \
                                                                                \
-  __RAPTOR_MPFR_ATTRIBUTES                                                     \
+  __RAPTOR_MPFR_DECL_ATTRIBUTES                                                \
   void *__raptor_fprt_##FROM_TY##_get_scratch(int64_t to_e, int64_t to_m,      \
                                               int64_t mode, const char *loc,   \
                                               void *scratch);                  \
                                                                                \
-  __RAPTOR_MPFR_ATTRIBUTES                                                     \
+  __RAPTOR_MPFR_DECL_ATTRIBUTES                                                \
   void __raptor_fprt_##FROM_TY##_free_scratch(int64_t to_e, int64_t to_m,      \
                                               int64_t mode, const char *loc,   \
                                               void *scratch);                  \
                                                                                \
-  __RAPTOR_MPFR_ATTRIBUTES                                                     \
+  __RAPTOR_MPFR_DECL_ATTRIBUTES                                                \
   void __raptor_fprt_##FROM_TY##_trunc_change(int64_t is_push, int64_t to_e,   \
                                               int64_t to_m, int64_t mode,      \
                                               const char *loc, void *scratch);
 
 #include "raptor/FloatTypes.def"
-#undef RAPTOR_FLOAT_TYPE
 
 #endif // _RAPTOR_COMMON_H_
diff --git a/runtime/include/private/raptor/FloatTypes.def b/runtime/include/public/raptor/FloatTypes.def
similarity index 80%
rename from runtime/include/private/raptor/FloatTypes.def
rename to runtime/include/public/raptor/FloatTypes.def
index 940cae05..7e95b2f0 100644
--- a/runtime/include/private/raptor/FloatTypes.def
+++ b/runtime/include/public/raptor/FloatTypes.def
@@ -2,3 +2,5 @@
 RAPTOR_FLOAT_TYPE(double, ieee_64)
 RAPTOR_FLOAT_TYPE(float, ieee_32)
 // RAPTOR_FLOAT_TYPE(half, ieee_16)
+
+#undef RAPTOR_FLOAT_TYPE
diff --git a/runtime/include/public/raptor/raptor.h b/runtime/include/public/raptor/raptor.h
index 0831d7c1..321e8614 100644
--- a/runtime/include/public/raptor/raptor.h
+++ b/runtime/include/public/raptor/raptor.h
@@ -1,10 +1,12 @@
 #ifndef _RAPTOR_FPRT_FPRT_H_
 #define _RAPTOR_FPRT_FPRT_H_
 
+#include <stddef.h>
 #include <stdint.h>
 
 #ifdef __cplusplus
-template <typename fty> fty *__raptor_truncate_op_func(fty *, int, int, int, int);
+template <typename fty>
+fty *__raptor_truncate_op_func(fty *, int, int, int, int);
 template <typename fty> fty *__raptor_truncate_op_func(fty *, int, int, int);
 #endif
 
@@ -21,9 +23,27 @@ void __raptor_fprt_delete_all();
 long long __raptor_get_trunc_flop_count();
 long long f_raptor_get_trunc_flop_count();
 
+#define RAPTOR_FLOAT_TYPE(CPP_TY, FROM_TY)                                     \
+  struct __raptor_logged_flops_##CPP_TY {                                      \
+    CPP_TY *vals;                                                              \
+    size_t num;                                                                \
+  };                                                                           \
+  void __raptor_clear_flop_log_##CPP_TY();                                     \
+  void __raptor_set_flop_log_##CPP_TY(const char *path);
+#include "FloatTypes.def"
 
 #ifdef __cplusplus
 }
 #endif
 
+#ifdef __cplusplus
+template <typename fty> fty *__raptor_log_flops(fty *);
+template <typename fty>
+fty *__raptor_truncate_mem_func(fty *, int, int, int, int);
+template <typename fty>
+fty *__raptor_truncate_op_func(fty *, int, int, int, int);
+template <typename... Tys> double __raptor_truncate_mem_value(Tys...);
+template <typename... Tys> double __raptor_expand_mem_value(Tys...);
+#endif
+
 #endif // _RAPTOR_FPRT_FPRT_H_
diff --git a/runtime/ir/Flops.def b/runtime/ir/Flops.def
index 20910774..d3908375 100644
--- a/runtime/ir/Flops.def
+++ b/runtime/ir/Flops.def
@@ -132,11 +132,15 @@ __RAPTOR_MPFR_LROUND(intr, llvm_lround_i32_f32, ieee_32, int32_t, float,
                      d, MPFR_RNDA);
 
 // Ternary operation
-__RAPTOR_MPFR_FMULADD(llvm_fmuladd, ieee_64, double, d, f64,
+__RAPTOR_MPFR_FMULADD(intr, llvm_fmuladd, ieee_64, double, d, f64,
                       __RAPTOR_MPFR_DEFAULT_ROUNDING_MODE);
-__RAPTOR_MPFR_FMULADD(llvm_fma, ieee_64, double, d, f64,
+__RAPTOR_MPFR_FMULADD(intr, llvm_fma, ieee_64, double, d, f64,
                       __RAPTOR_MPFR_DEFAULT_ROUNDING_MODE);
 
+// llvm.is.fpclass
+__RAPTOR_MPFR_ISCLASS(ieee_64, double, f64)
+__RAPTOR_MPFR_ISCLASS(ieee_32, float, f32)
+
 // Comparisons
 __RAPTOR_MPFR_FCMP(oeq, 1, == 0);
 __RAPTOR_MPFR_FCMP(ueq, 0, == 0);
diff --git a/runtime/ir/Log.cpp b/runtime/ir/Log.cpp
new file mode 100644
index 00000000..1e83f216
--- /dev/null
+++ b/runtime/ir/Log.cpp
@@ -0,0 +1,201 @@
+#include "raptor/Common.h"
+#include "raptor/raptor.h"
+
+#include <cstdlib>
+#include <fstream>
+#include <ios>
+#include <iostream>
+#include <memory>
+#include <string>
+#include <vector>
+
+typedef void (*LogFuncTy_ieee_64)(double);
+typedef void (*LogFuncTy_ieee_32)(float);
+// typedef void (*LogFuncTy_ieee_16)(half);
+
+void __raptor_fprt_trunc_change(int64_t is_push, int64_t to_e, int64_t to_m,
+                                int64_t mode, const char *loc, void *scratch) {}
+
+namespace {
+struct FloatLoggerTy {
+
+#define RAPTOR_FLOAT_TYPE(CPP_TY, FROM_TY)                                     \
+  std::unique_ptr<std::ofstream> OS_##FROM_TY;
+#include "raptor/FloatTypes.def"
+
+  template <typename T> const char *getTypeStr() {
+#define RAPTOR_FLOAT_TYPE(CPP_TY, FROM_TY)                                     \
+  if constexpr (std::is_same<T, CPP_TY>::value)                                \
+    return #CPP_TY;
+#include "raptor/FloatTypes.def"
+    abort();
+  }
+
+  template <typename T> void clear() {
+#define RAPTOR_FLOAT_TYPE(CPP_TY, FROM_TY)                                     \
+  if constexpr (std::is_same<T, CPP_TY>::value)                                \
+    OS_##FROM_TY.reset(nullptr);
+#include "raptor/FloatTypes.def"
+  }
+
+  template <typename T> void setLogPath(const std::string Path) {
+#define RAPTOR_FLOAT_TYPE(CPP_TY, FROM_TY)                                     \
+  if constexpr (std::is_same<T, CPP_TY>::value) {                              \
+    std::cerr << "Writing flop log for " #CPP_TY " to '" << Path << "'...\n";  \
+    OS_##FROM_TY = std::make_unique<std::ofstream>(                            \
+        Path, std::ios_base::out | std::ios_base::binary);                     \
+  }
+#include "raptor/FloatTypes.def"
+  }
+
+  template <typename T> void log(T F) {
+#define RAPTOR_FLOAT_TYPE(CPP_TY, FROM_TY)                                     \
+  if constexpr (std::is_same<T, CPP_TY>::value)                                \
+    if (OS_##FROM_TY)                                                          \
+      OS_##FROM_TY->write(reinterpret_cast<const char *>(&F), sizeof(F));
+#include "raptor/FloatTypes.def"
+  }
+
+  FloatLoggerTy() {
+    if (char *C = getenv("RAPTOR_FLOP_LOG_PREFIX")) {
+#define RAPTOR_FLOAT_TYPE(CPP_TY, FROM_TY)                                     \
+  setLogPath<CPP_TY>(std::string(C) + "." #CPP_TY);
+#include "raptor/FloatTypes.def"
+    }
+  }
+
+  ~FloatLoggerTy() {}
+
+} FloatLogger;
+} // namespace
+
+#define RAPTOR_FLOAT_TYPE(CPP_TY, FROM_TY)                                     \
+  __RAPTOR_MPFR_ATTRIBUTES                                                     \
+  CPP_TY __raptor_fprtlog_##FROM_TY##_abs_err(CPP_TY a, CPP_TY b) {            \
+    return std::abs(a - b);                                                    \
+  }                                                                            \
+  __RAPTOR_MPFR_ATTRIBUTES                                                     \
+  void __raptor_fprtlog_##FROM_TY##_trunc_change(                              \
+      int64_t is_push, int64_t to_e, int64_t to_m, int64_t mode,               \
+      const char *loc, void *scratch) {                                        \
+    __raptor_fprt_trunc_change(is_push, to_e, to_m, mode, loc, scratch);       \
+  }                                                                            \
+  __RAPTOR_MPFR_ATTRIBUTES                                                     \
+  void *__raptor_fprtlog_##FROM_TY##_get_scratch(                              \
+      int64_t to_e, int64_t to_m, int64_t mode, const char *loc,               \
+      void *scratch) {                                                         \
+    return nullptr;                                                            \
+  }                                                                            \
+  __RAPTOR_MPFR_ATTRIBUTES                                                     \
+  void __raptor_fprtlog_##FROM_TY##_free_scratch(                              \
+      int64_t to_e, int64_t to_m, int64_t mode, const char *loc,               \
+      void *scratch) {}                                                        \
+  __RAPTOR_MPFR_ATTRIBUTES                                                     \
+  void __raptor_log_flops_##FROM_TY(CPP_TY a) { FloatLogger.log(a); }          \
+  __RAPTOR_MPFR_ATTRIBUTES                                                     \
+  void __raptor_clear_flop_log_##CPP_TY() { FloatLogger.clear<CPP_TY>(); }     \
+  __RAPTOR_MPFR_ATTRIBUTES                                                     \
+  void __raptor_set_flop_log_##CPP_TY(const char *path) {                      \
+    FloatLogger.setLogPath<CPP_TY>(path);                                      \
+  }
+#include "raptor/FloatTypes.def"
+
+#define __RAPTOR_MPFR_LROUND(OP_TYPE, LLVM_OP_NAME, FROM_TYPE, RET, ARG1,      \
+                             MPFR_SET_ARG1, ROUNDING_MODE)                     \
+  __RAPTOR_MPFR_ORIGINAL_ATTRIBUTES                                            \
+  RET __raptor_fprtlog_original_##FROM_TYPE##_##OP_TYPE##_##LLVM_OP_NAME(      \
+      ARG1 a);                                                                 \
+  __RAPTOR_MPFR_ATTRIBUTES                                                     \
+  RET __raptor_fprtlog_##FROM_TYPE##_##OP_TYPE##_##LLVM_OP_NAME(               \
+      ARG1 a, LogFuncTy_##FROM_TYPE f, const char *loc, void *scratch) {       \
+    f(a);                                                                      \
+    return __raptor_fprtlog_original_##FROM_TYPE##_##OP_TYPE##_##LLVM_OP_NAME( \
+        a);                                                                    \
+  }
+
+#define __RAPTOR_MPFR_SINGOP(OP_TYPE, LLVM_OP_NAME, MPFR_FUNC_NAME, FROM_TYPE, \
+                             RET, MPFR_GET, ARG1, MPFR_SET_ARG1,               \
+                             ROUNDING_MODE)                                    \
+  __RAPTOR_MPFR_ORIGINAL_ATTRIBUTES                                            \
+  RET __raptor_fprtlog_original_##FROM_TYPE##_##OP_TYPE##_##LLVM_OP_NAME(      \
+      ARG1 a);                                                                 \
+  __RAPTOR_MPFR_ATTRIBUTES                                                     \
+  RET __raptor_fprtlog_##FROM_TYPE##_##OP_TYPE##_##LLVM_OP_NAME(               \
+      ARG1 a, LogFuncTy_##FROM_TYPE f, const char *loc, void *scratch) {       \
+    f(a);                                                                      \
+    return __raptor_fprtlog_original_##FROM_TYPE##_##OP_TYPE##_##LLVM_OP_NAME( \
+        a);                                                                    \
+  }
+
+#define __RAPTOR_MPFR_BIN_INT(OP_TYPE, LLVM_OP_NAME, MPFR_FUNC_NAME,           \
+                              FROM_TYPE, RET, MPFR_GET, ARG1, MPFR_SET_ARG1,   \
+                              ARG2, ROUNDING_MODE)                             \
+  __RAPTOR_MPFR_ORIGINAL_ATTRIBUTES                                            \
+  RET __raptor_fprtlog_original_##FROM_TYPE##_##OP_TYPE##_##LLVM_OP_NAME(      \
+      ARG1 a, ARG2 b);                                                         \
+  __RAPTOR_MPFR_ATTRIBUTES                                                     \
+  RET __raptor_fprtlog_##FROM_TYPE##_##OP_TYPE##_##LLVM_OP_NAME(               \
+      ARG1 a, ARG2 b, LogFuncTy_##FROM_TYPE f, const char *loc,                \
+      void *scratch) {                                                         \
+    f(a);                                                                      \
+    return __raptor_fprtlog_original_##FROM_TYPE##_##OP_TYPE##_##LLVM_OP_NAME( \
+        a, b);                                                                 \
+  }
+
+#define __RAPTOR_MPFR_BIN(OP_TYPE, LLVM_OP_NAME, MPFR_FUNC_NAME, FROM_TYPE,    \
+                          RET, MPFR_GET, ARG1, MPFR_SET_ARG1, ARG2,            \
+                          MPFR_SET_ARG2, ROUNDING_MODE)                        \
+  __RAPTOR_MPFR_ORIGINAL_ATTRIBUTES                                            \
+  RET __raptor_fprtlog_original_##FROM_TYPE##_##OP_TYPE##_##LLVM_OP_NAME(      \
+      ARG1 a, ARG2 b);                                                         \
+  __RAPTOR_MPFR_ATTRIBUTES                                                     \
+  RET __raptor_fprtlog_##FROM_TYPE##_##OP_TYPE##_##LLVM_OP_NAME(               \
+      ARG1 a, ARG2 b, LogFuncTy_##FROM_TYPE f, const char *loc,                \
+      void *scratch) {                                                         \
+    f(a);                                                                      \
+    f(b);                                                                      \
+    return __raptor_fprtlog_original_##FROM_TYPE##_##OP_TYPE##_##LLVM_OP_NAME( \
+        a, b);                                                                 \
+  }
+
+#define __RAPTOR_MPFR_FMULADD(OP_TYPE, LLVM_OP_NAME, FROM_TYPE, TYPE,                        \
+                              MPFR_TYPE, LLVM_TYPE, ROUNDING_MODE)                           \
+  __RAPTOR_MPFR_ORIGINAL_ATTRIBUTES                                                          \
+  TYPE                                                                                       \
+  __raptor_fprtlog_original_##FROM_TYPE##_##OP_TYPE##_##LLVM_OP_NAME##_##LLVM_TYPE(          \
+      TYPE a, TYPE b, TYPE c);                                                               \
+  __RAPTOR_MPFR_ATTRIBUTES                                                                   \
+  TYPE __raptor_fprtlog_##FROM_TYPE##_intr_##LLVM_OP_NAME##_##LLVM_TYPE(                     \
+      TYPE a, TYPE b, TYPE c, LogFuncTy_##FROM_TYPE f, int64_t mode,                         \
+      const char *loc, void *scratch) {                                                      \
+    f(a);                                                                                    \
+    f(b);                                                                                    \
+    f(c);                                                                                    \
+    return __raptor_fprtlog_original_##FROM_TYPE##_##OP_TYPE##_##LLVM_OP_NAME##_##LLVM_TYPE( \
+        a, b, c);                                                                            \
+  }
+
+#define __RAPTOR_MPFR_FCMP_IMPL(NAME, ORDERED, CMP, FROM_TYPE, TYPE, MPFR_GET, \
+                                ROUNDING_MODE)                                 \
+  __RAPTOR_MPFR_ORIGINAL_ATTRIBUTES                                            \
+  bool __raptor_fprtlog_original_##FROM_TYPE##_fcmp_##NAME(TYPE a, TYPE b);    \
+  __RAPTOR_MPFR_ATTRIBUTES                                                     \
+  bool __raptor_fprtlog_##FROM_TYPE##_fcmp_##NAME(                             \
+      TYPE a, TYPE b, LogFuncTy_##FROM_TYPE f, const char *loc,                \
+      void *scratch) {                                                         \
+    return __raptor_fprtlog_original_##FROM_TYPE##_fcmp_##NAME(a, b);          \
+  }
+
+#define __RAPTOR_MPFR_ISCLASS(FROM_TYPE, TYPE, LLVM_TYPE)                            \
+  __RAPTOR_MPFR_ORIGINAL_ATTRIBUTES bool                                             \
+  __raptor_fprtlog_original_##FROM_TYPE##_intr_llvm_is_fpclass_##LLVM_TYPE(          \
+      TYPE a, int32_t tests);                                                        \
+  __RAPTOR_MPFR_ATTRIBUTES bool                                                      \
+  __raptor_fprtlog_##FROM_TYPE##_intr_llvm_is_fpclass_##LLVM_TYPE(                   \
+      TYPE a, int32_t tests, LogFuncTy_##FROM_TYPE f, const char *loc,               \
+      void *scratch) {                                                               \
+    return __raptor_fprtlog_original_##FROM_TYPE##_intr_llvm_is_fpclass_##LLVM_TYPE( \
+        a, tests);                                                                   \
+  }
+
+#include "Flops.def"
diff --git a/runtime/ir/Mpfr.cpp b/runtime/ir/Mpfr.cpp
index fc78b297..3333faa6 100644
--- a/runtime/ir/Mpfr.cpp
+++ b/runtime/ir/Mpfr.cpp
@@ -167,7 +167,6 @@ void __raptor_fprt_trunc_change(int64_t is_push, int64_t to_e, int64_t to_m,
   }
 
 #include "raptor/FloatTypes.def"
-#undef RAPTOR_FLOAT_TYPE
 
 __RAPTOR_MPFR_ATTRIBUTES
 void __raptor_fprt_trunc_count(int64_t exponent, int64_t significand,
@@ -441,73 +440,74 @@ void raptor_fprt_op_clear();
     }                                                                          \
   }
 
-#define __RAPTOR_MPFR_FMULADD(LLVM_OP_NAME, FROM_TYPE, TYPE, MPFR_TYPE,        \
-                              LLVM_TYPE, ROUNDING_MODE)                        \
-  __RAPTOR_MPFR_ORIGINAL_ATTRIBUTES                                            \
-  TYPE __raptor_fprt_original_##FROM_TYPE##_##OP_TYPE##_##LLVM_OP_NAME(        \
-      TYPE a, TYPE b, TYPE c);                                                 \
-  __RAPTOR_MPFR_ATTRIBUTES                                                     \
-  TYPE __raptor_fprt_##FROM_TYPE##_intr_##LLVM_OP_NAME##_##LLVM_TYPE(          \
-      TYPE a, TYPE b, TYPE c, int64_t exponent, int64_t significand,           \
-      int64_t mode, const char *loc, mpfr_t *scratch) {                        \
-    if (__raptor_fprt_is_op_mode(mode)) {                                      \
-      __raptor_fprt_trunc_count(exponent, significand, mode, loc, scratch);    \
-      mpfr_set_##MPFR_TYPE(scratch[0], a, ROUNDING_MODE);                      \
-      mpfr_set_##MPFR_TYPE(scratch[1], b, ROUNDING_MODE);                      \
-      mpfr_set_##MPFR_TYPE(scratch[2], c, ROUNDING_MODE);                      \
-      mpfr_mul(scratch[0], scratch[0], scratch[1], ROUNDING_MODE);             \
-      mpfr_add(scratch[0], scratch[0], scratch[2], ROUNDING_MODE);             \
-      TYPE res = mpfr_get_##MPFR_TYPE(scratch[0], ROUNDING_MODE);              \
-      return res;                                                              \
-    } else if (__raptor_fprt_is_mem_mode(mode)) {                              \
-      __raptor_fp *ma = __raptor_fprt_##FROM_TYPE##_to_ptr_checked(            \
-          a, exponent, significand, mode, loc, scratch);                       \
-      __raptor_fp *mb = __raptor_fprt_##FROM_TYPE##_to_ptr_checked(            \
-          b, exponent, significand, mode, loc, scratch);                       \
-      __raptor_fp *mc = __raptor_fprt_##FROM_TYPE##_to_ptr_checked(            \
-          c, exponent, significand, mode, loc, scratch);                       \
-      RAPTOR_DUMP_INPUT(ma, OP_TYPE, LLVM_OP_NAME);                            \
-      RAPTOR_DUMP_INPUT(mb, OP_TYPE, LLVM_OP_NAME);                            \
-      RAPTOR_DUMP_INPUT(mc, OP_TYPE, LLVM_OP_NAME);                            \
-      __raptor_fp *madd = __raptor_fprt_##FROM_TYPE##_new_intermediate(        \
-          exponent, significand, mode, loc, scratch);                          \
-      madd->shadow =                                                           \
-          __raptor_fprt_original_##FROM_TYPE##_##OP_TYPE##_##LLVM_OP_NAME(     \
-              ma->shadow, mb->shadow, mc->shadow);                             \
-      if (excl_trunc) {                                                        \
-        __raptor_fprt_##FROM_TYPE##_count(exponent, significand, mode, loc,    \
-                                          scratch);                            \
-        madd->excl_result =                                                    \
-            __raptor_fprt_original_##FROM_TYPE##_##OP_TYPE##_##LLVM_OP_NAME(   \
-                ma->excl_result, mb->excl_result, mc->excl_result);            \
-        mpfr_set_##MPFR_TYPE(madd->result, madd->excl_result, ROUNDING_MODE);  \
-      } else {                                                                 \
-        __raptor_fprt_trunc_count(exponent, significand, mode, loc, scratch);  \
-        mpfr_t mmul;                                                           \
-        mpfr_init2(mmul, significand + 1); /* see MPFR_FP_EMULATION */         \
-        mpfr_mul(madd->result, ma->result, mb->result, ROUNDING_MODE);         \
-        mpfr_add(madd->result, madd->result, mc->result, ROUNDING_MODE);       \
-        mpfr_clear(mmul);                                                      \
-        madd->excl_result = mpfr_get_##MPFR_TYPE(madd->result, ROUNDING_MODE); \
-      }                                                                        \
-      RAPTOR_DUMP_RESULT(__raptor_fprt_##FROM_TYPE##_to_ptr(madd), OP_TYPE,    \
-                         LLVM_OP_NAME);                                        \
-      double trunc = mpfr_get_##MPFR_TYPE(                                     \
-          madd->result, __RAPTOR_MPFR_DEFAULT_ROUNDING_MODE);                  \
-      double err = __raptor_fprt_##FROM_TYPE##_abs_err(trunc, madd->shadow);   \
-      if (!opdata[loc].count)                                                  \
-        opdata[loc].op = #LLVM_OP_NAME;                                        \
-      if (trunc != 0 && err / trunc > SHADOW_ERR_REL) {                        \
-        ++opdata[loc].count_thresh;                                            \
-      } else if (trunc == 0 && err > SHADOW_ERR_ABS) {                         \
-        ++opdata[loc].count_thresh;                                            \
-      }                                                                        \
-      opdata[loc].l1_err += err;                                               \
-      ++opdata[loc].count;                                                     \
-      return __raptor_fprt_ptr_to_##FROM_TYPE(madd);                           \
-    } else {                                                                   \
-      abort();                                                                 \
-    }                                                                          \
+#define __RAPTOR_MPFR_FMULADD(OP_TYPE, LLVM_OP_NAME, FROM_TYPE, TYPE,                      \
+                              MPFR_TYPE, LLVM_TYPE, ROUNDING_MODE)                         \
+  __RAPTOR_MPFR_ORIGINAL_ATTRIBUTES                                                        \
+  TYPE                                                                                     \
+  __raptor_fprt_original_##FROM_TYPE##_##OP_TYPE##_##LLVM_OP_NAME##_##LLVM_TYPE(           \
+      TYPE a, TYPE b, TYPE c);                                                             \
+  __RAPTOR_MPFR_ATTRIBUTES                                                                 \
+  TYPE __raptor_fprt_##FROM_TYPE##_##OP_TYPE##_##LLVM_OP_NAME##_##LLVM_TYPE(               \
+      TYPE a, TYPE b, TYPE c, int64_t exponent, int64_t significand,                       \
+      int64_t mode, const char *loc, mpfr_t *scratch) {                                    \
+    if (__raptor_fprt_is_op_mode(mode)) {                                                  \
+      __raptor_fprt_trunc_count(exponent, significand, mode, loc, scratch);                \
+      mpfr_set_##MPFR_TYPE(scratch[0], a, ROUNDING_MODE);                                  \
+      mpfr_set_##MPFR_TYPE(scratch[1], b, ROUNDING_MODE);                                  \
+      mpfr_set_##MPFR_TYPE(scratch[2], c, ROUNDING_MODE);                                  \
+      mpfr_mul(scratch[0], scratch[0], scratch[1], ROUNDING_MODE);                         \
+      mpfr_add(scratch[0], scratch[0], scratch[2], ROUNDING_MODE);                         \
+      TYPE res = mpfr_get_##MPFR_TYPE(scratch[0], ROUNDING_MODE);                          \
+      return res;                                                                          \
+    } else if (__raptor_fprt_is_mem_mode(mode)) {                                          \
+      __raptor_fp *ma = __raptor_fprt_##FROM_TYPE##_to_ptr_checked(                        \
+          a, exponent, significand, mode, loc, scratch);                                   \
+      __raptor_fp *mb = __raptor_fprt_##FROM_TYPE##_to_ptr_checked(                        \
+          b, exponent, significand, mode, loc, scratch);                                   \
+      __raptor_fp *mc = __raptor_fprt_##FROM_TYPE##_to_ptr_checked(                        \
+          c, exponent, significand, mode, loc, scratch);                                   \
+      RAPTOR_DUMP_INPUT(ma, OP_TYPE, LLVM_OP_NAME);                                        \
+      RAPTOR_DUMP_INPUT(mb, OP_TYPE, LLVM_OP_NAME);                                        \
+      RAPTOR_DUMP_INPUT(mc, OP_TYPE, LLVM_OP_NAME);                                        \
+      __raptor_fp *madd = __raptor_fprt_##FROM_TYPE##_new_intermediate(                    \
+          exponent, significand, mode, loc, scratch);                                      \
+      madd->shadow =                                                                       \
+          __raptor_fprt_original_##FROM_TYPE##_##OP_TYPE##_##LLVM_OP_NAME##_##LLVM_TYPE(   \
+              ma->shadow, mb->shadow, mc->shadow);                                         \
+      if (excl_trunc) {                                                                    \
+        __raptor_fprt_##FROM_TYPE##_count(exponent, significand, mode, loc,                \
+                                          scratch);                                        \
+        madd->excl_result =                                                                \
+            __raptor_fprt_original_##FROM_TYPE##_##OP_TYPE##_##LLVM_OP_NAME##_##LLVM_TYPE( \
+                ma->excl_result, mb->excl_result, mc->excl_result);                        \
+        mpfr_set_##MPFR_TYPE(madd->result, madd->excl_result, ROUNDING_MODE);              \
+      } else {                                                                             \
+        __raptor_fprt_trunc_count(exponent, significand, mode, loc, scratch);              \
+        mpfr_t mmul;                                                                       \
+        mpfr_init2(mmul, significand + 1); /* see MPFR_FP_EMULATION */                     \
+        mpfr_mul(madd->result, ma->result, mb->result, ROUNDING_MODE);                     \
+        mpfr_add(madd->result, madd->result, mc->result, ROUNDING_MODE);                   \
+        mpfr_clear(mmul);                                                                  \
+        madd->excl_result = mpfr_get_##MPFR_TYPE(madd->result, ROUNDING_MODE);             \
+      }                                                                                    \
+      RAPTOR_DUMP_RESULT(__raptor_fprt_##FROM_TYPE##_to_ptr(madd), OP_TYPE,                \
+                         LLVM_OP_NAME);                                                    \
+      double trunc = mpfr_get_##MPFR_TYPE(                                                 \
+          madd->result, __RAPTOR_MPFR_DEFAULT_ROUNDING_MODE);                              \
+      double err = __raptor_fprt_##FROM_TYPE##_abs_err(trunc, madd->shadow);               \
+      if (!opdata[loc].count)                                                              \
+        opdata[loc].op = #LLVM_OP_NAME;                                                    \
+      if (trunc != 0 && err / trunc > SHADOW_ERR_REL) {                                    \
+        ++opdata[loc].count_thresh;                                                        \
+      } else if (trunc == 0 && err > SHADOW_ERR_ABS) {                                     \
+        ++opdata[loc].count_thresh;                                                        \
+      }                                                                                    \
+      opdata[loc].l1_err += err;                                                           \
+      ++opdata[loc].count;                                                                 \
+      return __raptor_fprt_ptr_to_##FROM_TYPE(madd);                                       \
+    } else {                                                                               \
+      abort();                                                                             \
+    }                                                                                      \
   }
 
 // TODO This does not currently make distinctions between ordered/unordered.
@@ -645,10 +645,10 @@ void raptor_fprt_op_clear();
     }                                                                          \
   }
 
-#define __RAPTOR_MPFR_FMULADD(LLVM_OP_NAME, FROM_TYPE, TYPE, MPFR_TYPE,        \
-                              LLVM_TYPE, ROUNDING_MODE)                        \
+#define __RAPTOR_MPFR_FMULADD(OP_TYPE, LLVM_OP_NAME, FROM_TYPE, TYPE,          \
+                              MPFR_TYPE, LLVM_TYPE, ROUNDING_MODE)             \
   __RAPTOR_MPFR_ATTRIBUTES                                                     \
-  TYPE __raptor_fprt_##FROM_TYPE##_intr_##LLVM_OP_NAME##_##LLVM_TYPE(          \
+  TYPE __raptor_fprt_##FROM_TYPE##_##OP_TYPE##_##LLVM_OP_NAME##_##LLVM_TYPE(   \
       TYPE a, TYPE b, TYPE c, int64_t exponent, int64_t significand,           \
       int64_t mode, const char *loc, mpfr_t *scratch) {                        \
     if (__raptor_fprt_is_op_mode(mode)) {                                      \
@@ -712,15 +712,18 @@ void raptor_fprt_op_clear();
   }
 #endif // RAPTOR_FPRT_ENABLE_SHADOW_RESIDUALS
 
-__RAPTOR_MPFR_ORIGINAL_ATTRIBUTES __attribute__((weak)) bool
-__raptor_fprt_original_ieee_64_intr_llvm_is_fpclass_f64(double a,
-                                                        int32_t tests);
-__RAPTOR_MPFR_ATTRIBUTES bool __raptor_fprt_ieee_64_intr_llvm_is_fpclass_f64(
-    double a, int32_t tests, int64_t exponent, int64_t significand,
-    int64_t mode, const char *loc, mpfr_t *scratch) {
-  return __raptor_fprt_original_ieee_64_intr_llvm_is_fpclass_f64(
-      __raptor_fprt_ieee_64_get(a, exponent, significand, mode, loc, scratch),
-      tests);
-}
+#define __RAPTOR_MPFR_ISCLASS(FROM_TYPE, TYPE, LLVM_TYPE)                         \
+  __RAPTOR_MPFR_ORIGINAL_ATTRIBUTES bool                                          \
+      __raptor_fprt_original_##FROM_TYPE##_intr_llvm_is_fpclass_##LLVM_TYPE(      \
+          TYPE a, int32_t tests);                                                 \
+  __RAPTOR_MPFR_ATTRIBUTES bool                                                   \
+      __raptor_fprt_##FROM_TYPE##_intr_llvm_is_fpclass_##LLVM_TYPE(               \
+          TYPE a, int32_t tests, int64_t exponent, int64_t significand,           \
+          int64_t mode, const char *loc, mpfr_t *scratch) {                       \
+    return __raptor_fprt_original_##FROM_TYPE##_intr_llvm_is_fpclass_##LLVM_TYPE( \
+        __raptor_fprt_ieee_64_get(a, exponent, significand, mode, loc,            \
+                                  scratch),                                       \
+        tests);                                                                   \
+  }
 
 #include "Flops.def"
diff --git a/runtime/obj/GarbageCollection.cpp b/runtime/obj/GarbageCollection.cpp
index ba81f16f..cd5a1354 100644
--- a/runtime/obj/GarbageCollection.cpp
+++ b/runtime/obj/GarbageCollection.cpp
@@ -105,7 +105,6 @@ struct {
     /* ignore for now */                                                       \
   }
 #include "raptor/FloatTypes.def"
-#undef RAPTOR_FLOAT_TYPE
 
 __RAPTOR_MPFR_ATTRIBUTES
 void raptor_fprt_gc_dump_status() {
diff --git a/scripts/raptor_plot_float_histogram.py b/scripts/raptor_plot_float_histogram.py
new file mode 100755
index 00000000..da72d769
--- /dev/null
+++ b/scripts/raptor_plot_float_histogram.py
@@ -0,0 +1,107 @@
+#!/usr/bin/env python3
+import argparse
+import numpy as np
+import matplotlib.pyplot as plt
+
+def plot_exponent_distribution(filename, dtype='float32', output_file='exponent_hist.png'):
+    """
+    Plot histograms of exponent field usage in a raw binary file of floating-point numbers.
+    Generates two subplots:
+      1. Histogram of used exponent range (min to max of actual data)
+      2. Histogram over the full possible exponent range
+
+    Parameters
+    ----------
+    filename : str
+        Path to the binary file.
+    dtype : str
+        Data type of the floats in the file. One of: 'float16', 'float32', 'float64'.
+    output_file : str
+        Output filename to save the plot (e.g., 'plot.png' or 'plot.pdf').
+    """
+    dtype_info = {
+        'float16': {'bits': 16, 'exp_bits': 5, 'bias': 15},
+        'float32': {'bits': 32, 'exp_bits': 8, 'bias': 127},
+        'float64': {'bits': 64, 'exp_bits': 11, 'bias': 1023},
+    }
+
+    if dtype not in dtype_info:
+        raise ValueError(f"Unsupported dtype '{dtype}'. Must be one of {list(dtype_info.keys())}")
+
+    info = dtype_info[dtype]
+    bits = info['bits']
+    exp_bits = info['exp_bits']
+    bias = info['bias']
+
+    # Load binary data
+    data = np.fromfile(filename, dtype=dtype)
+    if data.size == 0:
+        raise ValueError("No data found in file or file empty.")
+
+    # View data as unsigned integer
+    int_view = data.view({16: np.uint16, 32: np.uint32, 64: np.uint64}[bits])
+
+    # Extract exponent bits
+    mantissa_bits = bits - exp_bits - 1
+    exponent_mask = ((1 << exp_bits) - 1) << mantissa_bits
+    exponents = ((int_view & exponent_mask) >> mantissa_bits).astype(int)
+
+    # Mask out special exponents (0 or all ones)
+    normal_mask = (exponents != 0) & (exponents != (1 << exp_bits) - 1)
+    exponents_normal = exponents[normal_mask]
+
+    # Convert to unbiased exponent values
+    unbiased_exponents = exponents_normal - bias
+
+    # Prepare plots
+    fig, axes = plt.subplots(2, 1, figsize=(10, 8), constrained_layout=True)
+
+    # --- Subplot 1: Only used exponent range ---
+    bins_used = np.arange(unbiased_exponents.min() - 1, unbiased_exponents.max() + 2)
+    axes[0].hist(unbiased_exponents, bins=bins_used, edgecolor='black', alpha=0.7)
+    axes[0].set_title(f"Exponent Distribution (Used Range)\n{dtype}, File: {filename}")
+    axes[0].set_xlabel("Unbiased Exponent Value")
+    axes[0].set_ylabel("Frequency")
+    axes[0].grid(True, linestyle='--', alpha=0.5)
+
+    # --- Subplot 2: Full possible exponent range ---
+    exp_min_possible = 1 - bias                    # Smallest normal exponent
+    exp_max_possible = (1 << exp_bits) - 2 - bias  # Largest normal exponent
+    bins_full = np.arange(exp_min_possible - 0.5, exp_max_possible + 1.5)
+
+    axes[1].hist(unbiased_exponents, bins=bins_full, edgecolor='black', alpha=0.7)
+    axes[1].set_xlim(exp_min_possible - 1, exp_max_possible + 1)
+    axes[1].set_title(f"Exponent Distribution (Full Range)\n{dtype}")
+    axes[1].set_xlabel("Unbiased Exponent Value (All Possible)")
+    axes[1].set_ylabel("Frequency")
+    axes[1].grid(True, linestyle='--', alpha=0.5)
+
+    # Save to file
+    plt.savefig(output_file)
+    plt.close()
+    print(f"✅ Histogram saved to '{output_file}'")
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Plot histogram of exponent field usage in a raw binary float file."
+    )
+    parser.add_argument("filename", help="Path to the binary input file")
+    parser.add_argument(
+        "--dtype",
+        choices=["float16", "float32", "float64"],
+        default="float32",
+        help="Data type of floats in the file (default: float32)",
+    )
+    parser.add_argument(
+        "--output",
+        default="exponent_hist.png",
+        help="Output filename for the plot (default: exponent_hist.png)",
+    )
+
+    args = parser.parse_args()
+    plot_exponent_distribution(args.filename, args.dtype, args.output)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/test/Integration/Truncate/Cpp/log.cpp b/test/Integration/Truncate/Cpp/log.cpp
new file mode 100644
index 00000000..d1cc00a1
--- /dev/null
+++ b/test/Integration/Truncate/Cpp/log.cpp
@@ -0,0 +1,31 @@
+// RUN: %clang -O3 %s -o %t.a.out %loadClangRaptor %linkRaptorRT %includeRaptorRT -lm && RAPTOR_FLOP_LOG_PREFIX=%t.flop_log %t.a.out && xxd %t.flop_log.double | FileCheck %s
+
+// CHECK: 00000000: 0000 0000 0000 f03f 0000 0000 0000 0040
+// CHECK: 00000010: 0000 0000 0000 0840 0000 0000 0000 0040
+// CHECK: 00000020: 0000 0000 0000 0840 0000 0000 0000 1040
+// CHECK: 00000030: 0000 0000 0000 1c40 0000 0000 0000 0040
+
+#include "raptor/raptor.h"
+#include <cstdio>
+
+double simple_add(double a, double b) {
+    return 2 * (a + b);
+    // TODO float and half
+    // return a + b + ((float)a + (float)b);
+}
+
+template <typename fty> fty *__raptor_log_flops(fty *);
+
+int main() {
+    double trunc;
+
+    trunc = __raptor_log_flops(simple_add)(1, 2);
+    printf("A1 %f\n", trunc);
+    trunc = __raptor_log_flops(simple_add)(3, 4);
+    printf("A2 %f\n", trunc);
+    __raptor_clear_flop_log_double();
+    trunc = __raptor_log_flops(simple_add)(5, 6);
+    printf("A3 %f\n", trunc);
+
+    return 0;
+}
diff --git a/test/Integration/Truncate/Cpp/openmp-gpu.cpp b/test/Integration/Truncate/Cpp/openmp-gpu.cpp
index 85bdde39..671b4d93 100644
--- a/test/Integration/Truncate/Cpp/openmp-gpu.cpp
+++ b/test/Integration/Truncate/Cpp/openmp-gpu.cpp
@@ -1,7 +1,7 @@
 // clang-format off
 // RUN: %clang -O3          %s -o %t.a.out %loadClangRaptor %linkRaptorRT -lm -lmpfr && %t.a.out
 // RUN: %clang -O3 -fopenmp %s -o %t.a.out %loadClangRaptor %linkRaptorRT -lm -lmpfr && %t.a.out
-// RUN: if [ "%hasOpenMPGPU" == "1" ]; then %clang -O3 -fopenmp --offload-arch=native %s -o %t.a.out %loadClangRaptor %linkRaptorRT -lm -lmpfr && %t.a.out; fi
+// RUN: if [ "%hasOpenMPGPU" == "1" ]; then %clang -O3 -fopenmp --offload-arch=native -nogpulib %s -o %t.a.out %loadClangRaptor %linkRaptorRT -lm -lmpfr && %t.a.out; fi
 // clang-format on
 
 #include "../../test_utils.h"
diff --git a/test/lit.site.cfg.py.in b/test/lit.site.cfg.py.in
index 6c456a51..48ec93a2 100644
--- a/test/lit.site.cfg.py.in
+++ b/test/lit.site.cfg.py.in
@@ -108,6 +108,10 @@ config.substitutions.append(('%loadLLDRaptor', newPM))
 link = "-L@RAPTOR_BINARY_DIR@/runtime/ -lstdc++ -lmpfr -lRaptor-RT-" + config.llvm_ver
 config.substitutions.append(('%linkRaptorRT', link))
 
+link = "-L@RAPTOR_BINARY_DIR@/runtime/ -lstdc++ -lmpfr -lRaptor-RT-" + config.llvm_ver
+
+config.substitutions.append(('%includeRaptorRT', '-I@RAPTOR_SOURCE_DIR@/runtime/include/public'))
+
 config.substitutions.append(('%hasMPFR', has_mpfr))
 
 # Let the main config do the real work.
@@ -122,7 +126,7 @@ import subprocess
 def has_openmp_gpu():
     try:
         result = subprocess.run(
-            ["llvm-offload-device-info"],
+            [os.path.join(config.llvm_tools_dir, "llvm-offload-device-info")],
             stdout=subprocess.PIPE,
             stderr=subprocess.PIPE,
             text=True
diff --git a/wrappers/raptor-clang++.in b/wrappers/raptor-clang++.in
index 7df2ace7..7e90afda 100644
--- a/wrappers/raptor-clang++.in
+++ b/wrappers/raptor-clang++.in
@@ -3,4 +3,4 @@
 CMAKE_INSTALL_PREFIX="@CMAKE_INSTALL_PREFIX@"
 LLVM_VERSION_MAJOR="@LLVM_VERSION_MAJOR@"
 CLANGPLUSPLUS_PATH="@RAPTOR_CLANGPLUSPLUS_PATH@"
-exec "$CLANGPLUSPLUS_PATH" -fpass-plugin="$CMAKE_INSTALL_PREFIX/lib/LLVMRaptor-$LLVM_VERSION_MAJOR.so" -L"$CMAKE_INSTALL_PREFIX/lib" -lstdc++ -lmpfr -lRaptor-RT-"$LLVM_VERSION_MAJOR" -fuse-ld=lld -Wl,--load-pass-plugin="$CMAKE_INSTALL_PREFIX/lib/LLDRaptor-$LLVM_VERSION_MAJOR.so" "$@"
+exec "$CLANGPLUSPLUS_PATH" -I"$CMAKE_INSTALL_PREFIX/include/" -fpass-plugin="$CMAKE_INSTALL_PREFIX/lib/LLVMRaptor-$LLVM_VERSION_MAJOR.so" -L"$CMAKE_INSTALL_PREFIX/lib" -lstdc++ -lmpfr -lRaptor-RT-"$LLVM_VERSION_MAJOR" -fuse-ld=lld -Wl,--load-pass-plugin="$CMAKE_INSTALL_PREFIX/lib/LLDRaptor-$LLVM_VERSION_MAJOR.so" "$@"
diff --git a/wrappers/raptor-clang.in b/wrappers/raptor-clang.in
index c0cfa465..d084b77b 100644
--- a/wrappers/raptor-clang.in
+++ b/wrappers/raptor-clang.in
@@ -3,4 +3,4 @@
 CMAKE_INSTALL_PREFIX="@CMAKE_INSTALL_PREFIX@"
 LLVM_VERSION_MAJOR="@LLVM_VERSION_MAJOR@"
 CLANG_PATH="@RAPTOR_CLANG_PATH@"
-exec "$CLANG_PATH" -fpass-plugin="$CMAKE_INSTALL_PREFIX/lib/LLVMRaptor-$LLVM_VERSION_MAJOR.so" -L"$CMAKE_INSTALL_PREFIX/lib" -lstdc++ -lmpfr -lRaptor-RT-"$LLVM_VERSION_MAJOR" -fuse-ld=lld -Wl,--load-pass-plugin="$CMAKE_INSTALL_PREFIX/lib/LLDRaptor-$LLVM_VERSION_MAJOR.so" "$@"
+exec "$CLANG_PATH" -I"$CMAKE_INSTALL_PREFIX/include/" -fpass-plugin="$CMAKE_INSTALL_PREFIX/lib/LLVMRaptor-$LLVM_VERSION_MAJOR.so" -L"$CMAKE_INSTALL_PREFIX/lib" -lstdc++ -lmpfr -lRaptor-RT-"$LLVM_VERSION_MAJOR" -fuse-ld=lld -Wl,--load-pass-plugin="$CMAKE_INSTALL_PREFIX/lib/LLDRaptor-$LLVM_VERSION_MAJOR.so" "$@"