From 715dfeacf9c16de86ee85c34701f4c80d46d47cb Mon Sep 17 00:00:00 2001 From: Hugo Date: Fri, 6 Feb 2026 02:16:13 +0100 Subject: [PATCH 1/6] refactor(emit): move LLVM output helpers into emit module --- src/compilerlib/compiler.cpp | 116 ++++++++--------------------------- 1 file changed, 27 insertions(+), 89 deletions(-) diff --git a/src/compilerlib/compiler.cpp b/src/compilerlib/compiler.cpp index 9bd60fa..832fc75 100644 --- a/src/compilerlib/compiler.cpp +++ b/src/compilerlib/compiler.cpp @@ -7,6 +7,7 @@ #include "compilerlib/instrumentation/config.hpp" #include "compilerlib/instrumentation/trace.hpp" #include "compilerlib/instrumentation/vtable.hpp" +#include "emit/llvm_output.hpp" #include #include @@ -22,7 +23,6 @@ #include #include #include -#include #include #include #include @@ -103,23 +103,6 @@ namespace compilerlib } }; - llvm::CodeGenOptLevel toCodeGenOptLevel(unsigned level) - { - switch (level) - { - case 0: - return llvm::CodeGenOptLevel::None; - case 1: - return llvm::CodeGenOptLevel::Less; - case 2: - return llvm::CodeGenOptLevel::Default; - case 3: - return llvm::CodeGenOptLevel::Aggressive; - default: - return llvm::CodeGenOptLevel::Default; - } - } - const char* findArgValue(const llvm::opt::ArgStringList& args, llvm::StringRef opt) { for (size_t i = 0; i + 1 < args.size(); ++i) @@ -421,70 +404,6 @@ namespace compilerlib return plan; } - CT_NODISCARD bool emitObjectFile(llvm::Module& module, const clang::CompilerInstance& ci, - llvm::StringRef outputPath, std::string& error) - { - std::string targetTriple = module.getTargetTriple(); - - if (targetTriple.empty()) - targetTriple = llvm::sys::getDefaultTargetTriple(); - module.setTargetTriple(targetTriple); - - std::string targetError; - const llvm::Target* target = - llvm::TargetRegistry::lookupTarget(targetTriple, targetError); - if (!target) - { - error = targetError; - return false; - } - - const auto& targetOpts = ci.getTargetOpts(); - std::string features; - for (const auto& feature : targetOpts.FeaturesAsWritten) - { - if (!features.empty()) - features += ","; - features += feature; - } - - llvm::TargetOptions options; - auto codegenLevel = toCodeGenOptLevel(ci.getCodeGenOpts().OptimizationLevel); - // For position-independent code (needed for instrumented code and PIE executables), - // explicitly set the relocation model to PIC - llvm::Reloc::Model relocModel = llvm::Reloc::PIC_; - std::unique_ptr targetMachine( - target->createTargetMachine(targetTriple, targetOpts.CPU, features, options, - relocModel, std::nullopt, codegenLevel)); - if (!targetMachine) - { - error = "failed to create target machine"; - return false; - } - - module.setDataLayout(targetMachine->createDataLayout()); - - std::error_code ec; - llvm::raw_fd_ostream dest(outputPath, ec, llvm::sys::fs::OF_None); - if (ec) - { - error = ec.message(); - return false; - } - - llvm::legacy::PassManager pass; - if (targetMachine->addPassesToEmitFile(pass, dest, nullptr, - llvm::CodeGenFileType::ObjectFile)) - { - error = "target does not support object emission"; - return false; - } - - pass.run(module); - dest.flush(); - return true; - } - class Cc1Runner { public: @@ -504,9 +423,15 @@ namespace compilerlib return false; } - if (ci->getFrontendOpts().ProgramAction != clang::frontend::EmitObj) + auto actionKind = ci->getFrontendOpts().ProgramAction; + switch (actionKind) { - error = "instrumentation only supports object/binary output"; + case clang::frontend::EmitObj: + case clang::frontend::EmitLLVM: + case clang::frontend::EmitBC: + break; + default: + error = "instrumentation only supports object or LLVM IR/bitcode output"; return false; } @@ -544,15 +469,28 @@ namespace compilerlib } emitRuntimeConfigGlobals(*module, ctx_.runtimeConfig); - const char* outputObj = findArgValue(ccArgs, "-o"); - if (!outputObj) + const char* outputPath = findArgValue(ccArgs, "-o"); + if (!outputPath) { - error = "unable to determine output object file"; + error = "unable to determine output file"; return false; } - - if (!emitObjectFile(*module, *ci, outputObj, error)) + switch (actionKind) { + case clang::frontend::EmitObj: + if (!emit::emitObjectFile(*module, *ci, outputPath, error)) + return false; + break; + case clang::frontend::EmitLLVM: + if (!emit::emitLLVMIRFile(*module, outputPath, error)) + return false; + break; + case clang::frontend::EmitBC: + if (!emit::emitBitcodeFile(*module, outputPath, error)) + return false; + break; + default: + error = "instrumentation only supports object or LLVM IR/bitcode output"; return false; } From 613e526ab5d22594426736b02cfd7fff60a0c043 Mon Sep 17 00:00:00 2001 From: Hugo Date: Fri, 6 Feb 2026 02:18:18 +0100 Subject: [PATCH 2/6] feat(compiler): support instrumented LLVM IR/bitcode output --- src/compilerlib/emit/llvm_output.cpp | 155 +++++++++++++++++++++++++++ src/compilerlib/emit/llvm_output.hpp | 27 +++++ 2 files changed, 182 insertions(+) create mode 100644 src/compilerlib/emit/llvm_output.cpp create mode 100644 src/compilerlib/emit/llvm_output.hpp diff --git a/src/compilerlib/emit/llvm_output.cpp b/src/compilerlib/emit/llvm_output.cpp new file mode 100644 index 0000000..503482d --- /dev/null +++ b/src/compilerlib/emit/llvm_output.cpp @@ -0,0 +1,155 @@ +#include "llvm_output.hpp" + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace compilerlib::emit +{ + namespace + { + CT_NODISCARD llvm::CodeGenOptLevel toCodeGenOptLevel(unsigned level) + { + switch (level) + { + case 0: + return llvm::CodeGenOptLevel::None; + case 1: + return llvm::CodeGenOptLevel::Less; + case 2: + return llvm::CodeGenOptLevel::Default; + case 3: + return llvm::CodeGenOptLevel::Aggressive; + default: + return llvm::CodeGenOptLevel::Default; + } + } + + template + CT_NODISCARD bool writeOutputFile(llvm::StringRef outputPath, std::string& error, + Writer&& writer) + { + std::error_code ec; + llvm::raw_fd_ostream dest(outputPath, ec, llvm::sys::fs::OF_None); + if (ec) + { + error = ec.message(); + return false; + } + + if (!writer(dest)) + { + if (error.empty()) + error = "failed to write file"; + return false; + } + dest.flush(); + if (dest.has_error()) + { + error = "failed to write file"; + return false; + } + return true; + } + + std::string buildTargetFeatures(const clang::CompilerInstance& ci) + { + const auto& targetOpts = ci.getTargetOpts(); + std::string features; + for (const auto& feature : targetOpts.FeaturesAsWritten) + { + if (!features.empty()) + features += ","; + features += feature; + } + return features; + } + + std::unique_ptr + createTargetMachine(llvm::Module& module, const clang::CompilerInstance& ci, + std::string& error) + { + std::string targetTriple = module.getTargetTriple(); + + if (targetTriple.empty()) + targetTriple = llvm::sys::getDefaultTargetTriple(); + module.setTargetTriple(targetTriple); + + std::string targetError; + const llvm::Target* target = + llvm::TargetRegistry::lookupTarget(targetTriple, targetError); + if (!target) + { + error = targetError; + return nullptr; + } + + llvm::TargetOptions options; + auto codegenLevel = toCodeGenOptLevel(ci.getCodeGenOpts().OptimizationLevel); + // For position-independent code (needed for instrumented code and PIE executables), + // explicitly set the relocation model to PIC. + llvm::Reloc::Model relocModel = llvm::Reloc::PIC_; + std::unique_ptr targetMachine( + target->createTargetMachine(targetTriple, ci.getTargetOpts().CPU, + buildTargetFeatures(ci), options, relocModel, + std::nullopt, codegenLevel)); + if (!targetMachine) + { + error = "failed to create target machine"; + return nullptr; + } + + module.setDataLayout(targetMachine->createDataLayout()); + return targetMachine; + } + } // namespace + + bool emitObjectFile(llvm::Module& module, const clang::CompilerInstance& ci, + llvm::StringRef outputPath, std::string& error) + { + std::unique_ptr targetMachine = + createTargetMachine(module, ci, error); + if (!targetMachine) + return false; + + return writeOutputFile(outputPath, error, [&](llvm::raw_fd_ostream& dest) -> bool { + llvm::legacy::PassManager pass; + if (targetMachine->addPassesToEmitFile(pass, dest, nullptr, + llvm::CodeGenFileType::ObjectFile)) + { + error = "target does not support object emission"; + return false; + } + + pass.run(module); + return true; + }); + } + + bool emitLLVMIRFile(llvm::Module& module, llvm::StringRef outputPath, std::string& error) + { + return writeOutputFile(outputPath, error, [&](llvm::raw_fd_ostream& dest) -> bool { + module.print(dest, nullptr); + return !dest.has_error(); + }); + } + + bool emitBitcodeFile(llvm::Module& module, llvm::StringRef outputPath, std::string& error) + { + return writeOutputFile(outputPath, error, [&](llvm::raw_fd_ostream& dest) -> bool { + llvm::WriteBitcodeToFile(module, dest); + return !dest.has_error(); + }); + } +} // namespace compilerlib::emit diff --git a/src/compilerlib/emit/llvm_output.hpp b/src/compilerlib/emit/llvm_output.hpp new file mode 100644 index 0000000..b931688 --- /dev/null +++ b/src/compilerlib/emit/llvm_output.hpp @@ -0,0 +1,27 @@ +#pragma once + +#include "compilerlib/attributes.hpp" + +#include + +#include + +namespace llvm +{ + class Module; +} + +namespace clang +{ + class CompilerInstance; +} + +namespace compilerlib::emit +{ + CT_NODISCARD bool emitObjectFile(llvm::Module& module, const clang::CompilerInstance& ci, + llvm::StringRef outputPath, std::string& error); + CT_NODISCARD bool emitLLVMIRFile(llvm::Module& module, llvm::StringRef outputPath, + std::string& error); + CT_NODISCARD bool emitBitcodeFile(llvm::Module& module, llvm::StringRef outputPath, + std::string& error); +} // namespace compilerlib::emit From a5bbd51aead5624a4a416b9f4801f9bef1b72cba Mon Sep 17 00:00:00 2001 From: Hugo Date: Fri, 6 Feb 2026 02:18:49 +0100 Subject: [PATCH 3/6] refactor(emit): move LLVM output helpers into emit module --- CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index d6f7861..52d497a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -33,6 +33,7 @@ endif() set(LIB_SOURCES src/compilerlib/compiler.cpp + src/compilerlib/emit/llvm_output.cpp src/compilerlib/toolchain.cpp src/compilerlib/instrumentation/alloc.cpp src/compilerlib/instrumentation/bounds.cpp From bdaf35415de2c4f08d6b2b947b38cfbd3f2c3095 Mon Sep 17 00:00:00 2001 From: Hugo Date: Fri, 6 Feb 2026 02:22:10 +0100 Subject: [PATCH 4/6] feat(compiler): support instrumented LLVM IR/bitcode output --- src/compilerlib/emit/llvm_output.cpp | 62 +++++++++++++++------------- src/compilerlib/emit/llvm_output.hpp | 2 +- 2 files changed, 34 insertions(+), 30 deletions(-) diff --git a/src/compilerlib/emit/llvm_output.cpp b/src/compilerlib/emit/llvm_output.cpp index 503482d..b0aed61 100644 --- a/src/compilerlib/emit/llvm_output.cpp +++ b/src/compilerlib/emit/llvm_output.cpp @@ -76,9 +76,9 @@ namespace compilerlib::emit return features; } - std::unique_ptr - createTargetMachine(llvm::Module& module, const clang::CompilerInstance& ci, - std::string& error) + std::unique_ptr createTargetMachine(llvm::Module& module, + const clang::CompilerInstance& ci, + std::string& error) { std::string targetTriple = module.getTargetTriple(); @@ -100,10 +100,9 @@ namespace compilerlib::emit // For position-independent code (needed for instrumented code and PIE executables), // explicitly set the relocation model to PIC. llvm::Reloc::Model relocModel = llvm::Reloc::PIC_; - std::unique_ptr targetMachine( - target->createTargetMachine(targetTriple, ci.getTargetOpts().CPU, - buildTargetFeatures(ci), options, relocModel, - std::nullopt, codegenLevel)); + std::unique_ptr targetMachine(target->createTargetMachine( + targetTriple, ci.getTargetOpts().CPU, buildTargetFeatures(ci), options, relocModel, + std::nullopt, codegenLevel)); if (!targetMachine) { error = "failed to create target machine"; @@ -118,38 +117,43 @@ namespace compilerlib::emit bool emitObjectFile(llvm::Module& module, const clang::CompilerInstance& ci, llvm::StringRef outputPath, std::string& error) { - std::unique_ptr targetMachine = - createTargetMachine(module, ci, error); + std::unique_ptr targetMachine = createTargetMachine(module, ci, error); if (!targetMachine) return false; - return writeOutputFile(outputPath, error, [&](llvm::raw_fd_ostream& dest) -> bool { - llvm::legacy::PassManager pass; - if (targetMachine->addPassesToEmitFile(pass, dest, nullptr, - llvm::CodeGenFileType::ObjectFile)) - { - error = "target does not support object emission"; - return false; - } - - pass.run(module); - return true; - }); + return writeOutputFile(outputPath, error, + [&](llvm::raw_fd_ostream& dest) -> bool + { + llvm::legacy::PassManager pass; + if (targetMachine->addPassesToEmitFile( + pass, dest, nullptr, llvm::CodeGenFileType::ObjectFile)) + { + error = "target does not support object emission"; + return false; + } + + pass.run(module); + return true; + }); } bool emitLLVMIRFile(llvm::Module& module, llvm::StringRef outputPath, std::string& error) { - return writeOutputFile(outputPath, error, [&](llvm::raw_fd_ostream& dest) -> bool { - module.print(dest, nullptr); - return !dest.has_error(); - }); + return writeOutputFile(outputPath, error, + [&](llvm::raw_fd_ostream& dest) -> bool + { + module.print(dest, nullptr); + return !dest.has_error(); + }); } bool emitBitcodeFile(llvm::Module& module, llvm::StringRef outputPath, std::string& error) { - return writeOutputFile(outputPath, error, [&](llvm::raw_fd_ostream& dest) -> bool { - llvm::WriteBitcodeToFile(module, dest); - return !dest.has_error(); - }); + return writeOutputFile(outputPath, error, + [&](llvm::raw_fd_ostream& dest) -> bool + { + llvm::WriteBitcodeToFile(module, dest); + return !dest.has_error(); + }); } } // namespace compilerlib::emit diff --git a/src/compilerlib/emit/llvm_output.hpp b/src/compilerlib/emit/llvm_output.hpp index b931688..77f31c1 100644 --- a/src/compilerlib/emit/llvm_output.hpp +++ b/src/compilerlib/emit/llvm_output.hpp @@ -23,5 +23,5 @@ namespace compilerlib::emit CT_NODISCARD bool emitLLVMIRFile(llvm::Module& module, llvm::StringRef outputPath, std::string& error); CT_NODISCARD bool emitBitcodeFile(llvm::Module& module, llvm::StringRef outputPath, - std::string& error); + std::string& error); } // namespace compilerlib::emit From 271954def20e820b70b827ea6cb5951ab391d91e Mon Sep 17 00:00:00 2001 From: Hugo Date: Fri, 6 Feb 2026 02:33:17 +0100 Subject: [PATCH 5/6] test(examples): cover instrumented LLVM IR and bitcode outputs --- test/examples/test_smoke.py | 41 ++++++++++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/test/examples/test_smoke.py b/test/examples/test_smoke.py index db23473..a0bfde3 100644 --- a/test/examples/test_smoke.py +++ b/test/examples/test_smoke.py @@ -214,6 +214,39 @@ def base_out_assertions(out_name: str): ], ) + tc_instrument_emit_llvm = TestCase( + name="compile_instrument_emit_llvm", + plan=CompilePlan( + name="compile_instrument_emit_llvm", + sources=[Path("hello.c")], + out=None, + extra_args=["--instrument", "-S", "-emit-llvm", "-o=hello_instr.ll"], + ), + assertions=[ + assert_exit_code(0), + assert_argv_contains(["--instrument", "-S", "-emit-llvm"]), + assert_output_exists_at("hello_instr.ll"), + assert_output_kind_at("hello_instr.ll", ArtifactKind.LLVM_IR_TEXT), + assert_output_nonempty_at("hello_instr.ll"), + ], + ) + + tc_instrument_emit_bc = TestCase( + name="compile_instrument_emit_bc", + plan=CompilePlan( + name="compile_instrument_emit_bc", + sources=[Path("hello.c")], + out=None, + extra_args=["--instrument", "-emit-llvm", "-o=hello_instr.bc"], + ), + assertions=[ + assert_exit_code(0), + assert_argv_contains(["--instrument", "-emit-llvm"]), + assert_output_exists_at("hello_instr.bc"), + assert_output_nonempty_at("hello_instr.bc"), + ], + ) + tc_readme_emit_llvm = TestCase( name="readme_emit_llvm", plan=CompilePlan( @@ -355,7 +388,13 @@ def base_out_assertions(out_name: str): platform = detect_platform() common_cases = [tc_o_eq, tc_d_space, tc_d_compact, tc_cpp, tc_x_cxx] - instrument_cases = [tc_instrument_c, tc_instrument_cpp, tc_instrument_x_cxx] + instrument_cases = [ + tc_instrument_c, + tc_instrument_cpp, + tc_instrument_x_cxx, + tc_instrument_emit_llvm, + tc_instrument_emit_bc, + ] readme_cases = [ tc_readme_emit_llvm, tc_readme_asm, From bc44c1f4c7c026cd10dd9588b5fadde20d3a24e0 Mon Sep 17 00:00:00 2001 From: Hugo Date: Fri, 6 Feb 2026 02:40:24 +0100 Subject: [PATCH 6/6] test(examples): fix instrumented bitcode output flags --- test/examples/test_smoke.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/examples/test_smoke.py b/test/examples/test_smoke.py index a0bfde3..51ead57 100644 --- a/test/examples/test_smoke.py +++ b/test/examples/test_smoke.py @@ -237,11 +237,11 @@ def base_out_assertions(out_name: str): name="compile_instrument_emit_bc", sources=[Path("hello.c")], out=None, - extra_args=["--instrument", "-emit-llvm", "-o=hello_instr.bc"], + extra_args=["--instrument", "-c", "-emit-llvm", "-o=hello_instr.bc"], ), assertions=[ assert_exit_code(0), - assert_argv_contains(["--instrument", "-emit-llvm"]), + assert_argv_contains(["--instrument", "-c", "-emit-llvm"]), assert_output_exists_at("hello_instr.bc"), assert_output_nonempty_at("hello_instr.bc"), ],