diff --git a/CMakeLists.txt b/CMakeLists.txt index d71b04ad..9742dd57 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -137,14 +137,16 @@ set_property(CACHE YAML_BACKEND PROPERTY STRINGS YAML_CPP LLVM) # For paths given when reporting errors add_compile_options(-fmacro-prefix-map=${PROJECT_SOURCE_DIR}=.) +set(EXTRA_LINK_OPTIONS "-Wl,--exclude-libs,ALL") + if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang") set(COROUTINES_COMPILE_OPTION) set(EXTRA_COMPILE_OPTIONS) - set(EXTRA_LINK_OPTIONS -fuse-ld=lld -frtti) + list(PREPEND EXTRA_LINK_OPTIONS -fuse-ld=lld -frtti) else() set(COROUTINES_COMPILE_OPTION -fcoroutines) set(EXTRA_COMPILE_OPTIONS) - set(EXTRA_LINK_OPTIONS -frtti) + list(PREPEND EXTRA_LINK_OPTIONS -frtti) endif() if(ROCROLLER_ENABLE_TIMERS) diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index 60118ce5..0d90054b 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -265,6 +265,8 @@ set_target_properties( rocroller PROPERTIES VERSION ${ROCROLLER_VERSION} SOVERSION ${ROCROLLER_SOVERSION} + VISIBILITY_INLINES_HIDDEN ON + CXX_VISIBILITY_PRESET hidden ) target_link_libraries(rocroller diff --git a/lib/include/rocRoller/Assemblers/Assembler.hpp b/lib/include/rocRoller/Assemblers/Assembler.hpp index db2449f1..8927935b 100644 --- a/lib/include/rocRoller/Assemblers/Assembler.hpp +++ b/lib/include/rocRoller/Assemblers/Assembler.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include #include @@ -35,9 +37,9 @@ namespace rocRoller { - std::ostream& operator<<(std::ostream&, AssemblerType); + ROCROLLER_DECLSPEC std::ostream& operator<<(std::ostream&, AssemblerType); - class Assembler + class ROCROLLER_DECLSPEC Assembler { public: using Argument = AssemblerType; diff --git a/lib/include/rocRoller/Assemblers/Assembler_fwd.hpp b/lib/include/rocRoller/Assemblers/Assembler_fwd.hpp index abd02169..50369b07 100644 --- a/lib/include/rocRoller/Assemblers/Assembler_fwd.hpp +++ b/lib/include/rocRoller/Assemblers/Assembler_fwd.hpp @@ -26,12 +26,14 @@ #pragma once +#include + #include #include namespace rocRoller { - class Assembler; + class ROCROLLER_DECLSPEC Assembler; using AssemblerPtr = std::shared_ptr; enum class AssemblerType : int @@ -41,5 +43,5 @@ namespace rocRoller Count }; - std::string toString(AssemblerType t); + ROCROLLER_DECLSPEC std::string toString(AssemblerType t); } diff --git a/lib/include/rocRoller/Assemblers/InProcessAssembler.hpp b/lib/include/rocRoller/Assemblers/InProcessAssembler.hpp index 94d20044..35f67f4d 100644 --- a/lib/include/rocRoller/Assemblers/InProcessAssembler.hpp +++ b/lib/include/rocRoller/Assemblers/InProcessAssembler.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include @@ -33,7 +35,7 @@ namespace rocRoller { - class InProcessAssembler : public Assembler + class ROCROLLER_DECLSPEC InProcessAssembler : public Assembler { public: InProcessAssembler(); diff --git a/lib/include/rocRoller/Assemblers/SubprocessAssembler.hpp b/lib/include/rocRoller/Assemblers/SubprocessAssembler.hpp index 9cc3c1a0..13f578a3 100644 --- a/lib/include/rocRoller/Assemblers/SubprocessAssembler.hpp +++ b/lib/include/rocRoller/Assemblers/SubprocessAssembler.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include @@ -34,7 +36,7 @@ namespace rocRoller { - class SubprocessAssembler : public Assembler + class ROCROLLER_DECLSPEC SubprocessAssembler : public Assembler { public: SubprocessAssembler(); diff --git a/lib/include/rocRoller/AssemblyKernel.hpp b/lib/include/rocRoller/AssemblyKernel.hpp index 6d68bde1..b1fefdc1 100644 --- a/lib/include/rocRoller/AssemblyKernel.hpp +++ b/lib/include/rocRoller/AssemblyKernel.hpp @@ -29,6 +29,8 @@ #pragma once +#include + #include #include #include @@ -43,7 +45,7 @@ namespace rocRoller { - class AssemblyKernel + class ROCROLLER_DECLSPEC AssemblyKernel { public: AssemblyKernel(ContextPtr context, std::string const& kernelName); @@ -231,7 +233,7 @@ namespace rocRoller CommandPtr m_command; }; - struct AssemblyKernels + struct ROCROLLER_DECLSPEC AssemblyKernels { constexpr std::array hsa_version() { diff --git a/lib/include/rocRoller/AssemblyKernelArgument.hpp b/lib/include/rocRoller/AssemblyKernelArgument.hpp index fa20444a..ad0da452 100644 --- a/lib/include/rocRoller/AssemblyKernelArgument.hpp +++ b/lib/include/rocRoller/AssemblyKernelArgument.hpp @@ -29,6 +29,8 @@ #pragma once +#include + #include #include @@ -36,7 +38,7 @@ namespace rocRoller { - struct AssemblyKernelArgument + struct ROCROLLER_DECLSPEC AssemblyKernelArgument { std::string name; VariableType variableType; @@ -52,5 +54,6 @@ namespace rocRoller std::string toString() const; }; - std::ostream& operator<<(std::ostream& stream, AssemblyKernelArgument const& arg); + ROCROLLER_DECLSPEC std::ostream& operator<<(std::ostream& stream, + AssemblyKernelArgument const& arg); } diff --git a/lib/include/rocRoller/AssemblyKernelArgument_fwd.hpp b/lib/include/rocRoller/AssemblyKernelArgument_fwd.hpp index a17d1d6a..72af3c99 100644 --- a/lib/include/rocRoller/AssemblyKernelArgument_fwd.hpp +++ b/lib/include/rocRoller/AssemblyKernelArgument_fwd.hpp @@ -29,11 +29,13 @@ #pragma once +#include + #include namespace rocRoller { - struct AssemblyKernelArgument; + struct ROCROLLER_DECLSPEC AssemblyKernelArgument; using AssemblyKernelArgumentPtr = std::shared_ptr; } diff --git a/lib/include/rocRoller/AssemblyKernel_fwd.hpp b/lib/include/rocRoller/AssemblyKernel_fwd.hpp index f11e2614..1d24d6e3 100644 --- a/lib/include/rocRoller/AssemblyKernel_fwd.hpp +++ b/lib/include/rocRoller/AssemblyKernel_fwd.hpp @@ -29,10 +29,12 @@ #pragma once +#include + #include namespace rocRoller { - class AssemblyKernel; + class ROCROLLER_DECLSPEC AssemblyKernel; using AssemblyKernelPtr = std::shared_ptr; } diff --git a/lib/include/rocRoller/AssertOpKinds.hpp b/lib/include/rocRoller/AssertOpKinds.hpp index 5bf0df24..af1eadc4 100644 --- a/lib/include/rocRoller/AssertOpKinds.hpp +++ b/lib/include/rocRoller/AssertOpKinds.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include @@ -33,7 +35,7 @@ namespace rocRoller { - std::string toString(const AssertOpKind& assertOpKind); - std::ostream& operator<<(std::ostream&, AssertOpKind const); + ROCROLLER_DECLSPEC std::string toString(AssertOpKind const& assertOpKind); + ROCROLLER_DECLSPEC std::ostream& operator<<(std::ostream&, AssertOpKind const); } diff --git a/lib/include/rocRoller/CodeGen/Annotate.hpp b/lib/include/rocRoller/CodeGen/Annotate.hpp index c7eb79a4..d1120044 100644 --- a/lib/include/rocRoller/CodeGen/Annotate.hpp +++ b/lib/include/rocRoller/CodeGen/Annotate.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include @@ -40,7 +42,7 @@ namespace rocRoller * This will add the comment "foo" to each instruction that's part of * generating `expr`. */ - class AddComment + class ROCROLLER_DECLSPEC AddComment { public: AddComment(std::string comment) @@ -63,7 +65,7 @@ namespace rocRoller * Really intended for use only from LowerFromKernelGraph, so that every * instruction is annotated with the control op that it came from. */ - class AddControlOp + class ROCROLLER_DECLSPEC AddControlOp { public: AddControlOp(int op) diff --git a/lib/include/rocRoller/CodeGen/ArgumentLoader.hpp b/lib/include/rocRoller/CodeGen/ArgumentLoader.hpp index 5f6092b6..131a495d 100644 --- a/lib/include/rocRoller/CodeGen/ArgumentLoader.hpp +++ b/lib/include/rocRoller/CodeGen/ArgumentLoader.hpp @@ -29,6 +29,8 @@ #pragma once +#include + #include #include @@ -40,7 +42,7 @@ namespace rocRollerTest { - class ArgumentLoaderTest_loadArgExtra_Test; + class ROCROLLER_DECLSPEC ArgumentLoaderTest_loadArgExtra_Test; } namespace rocRoller @@ -57,7 +59,7 @@ namespace rocRoller * instructions as well as possibly more synchronization. * */ - class ArgumentLoader + class ROCROLLER_DECLSPEC ArgumentLoader { public: ArgumentLoader(AssemblyKernelPtr kernel); diff --git a/lib/include/rocRoller/CodeGen/ArgumentLoader_fwd.hpp b/lib/include/rocRoller/CodeGen/ArgumentLoader_fwd.hpp index 519e6d8a..9b9e8309 100644 --- a/lib/include/rocRoller/CodeGen/ArgumentLoader_fwd.hpp +++ b/lib/include/rocRoller/CodeGen/ArgumentLoader_fwd.hpp @@ -29,8 +29,10 @@ #pragma once +#include + namespace rocRoller { - class ArgumentLoader; + class ROCROLLER_DECLSPEC ArgumentLoader; } diff --git a/lib/include/rocRoller/CodeGen/Arithmetic/Add.hpp b/lib/include/rocRoller/CodeGen/Arithmetic/Add.hpp index 4acd593a..27f89799 100644 --- a/lib/include/rocRoller/CodeGen/Arithmetic/Add.hpp +++ b/lib/include/rocRoller/CodeGen/Arithmetic/Add.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include namespace rocRoller @@ -33,15 +35,15 @@ namespace rocRoller // GetGenerator function will return the Generator to use based on the provided arguments. template <> - std::shared_ptr> - GetGenerator(Register::ValuePtr dst, + ROCROLLER_DECLSPEC std::shared_ptr> + GetGenerator(Register::ValuePtr dst, Register::ValuePtr lhs, Register::ValuePtr rhs, Expression::Add const&); // Templated Generator class based on the register type and datatype. template - class AddGenerator : public BinaryArithmeticGenerator + class ROCROLLER_DECLSPEC AddGenerator : public BinaryArithmeticGenerator { public: AddGenerator(ContextPtr c) diff --git a/lib/include/rocRoller/CodeGen/Arithmetic/AddShiftL.hpp b/lib/include/rocRoller/CodeGen/Arithmetic/AddShiftL.hpp index 51e8a54e..2e304fa2 100644 --- a/lib/include/rocRoller/CodeGen/Arithmetic/AddShiftL.hpp +++ b/lib/include/rocRoller/CodeGen/Arithmetic/AddShiftL.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include namespace rocRoller @@ -33,15 +35,16 @@ namespace rocRoller // GetGenerator function will return the Generator to use based on the provided arguments. template <> - std::shared_ptr> - GetGenerator(Register::ValuePtr dst, + ROCROLLER_DECLSPEC std::shared_ptr> + GetGenerator(Register::ValuePtr dst, Register::ValuePtr lhs, Register::ValuePtr rhs, Register::ValuePtr shiftAmount, Expression::AddShiftL const&); // Generator for all register types and datatypes. - class AddShiftLGenerator : public TernaryArithmeticGenerator + class ROCROLLER_DECLSPEC AddShiftLGenerator + : public TernaryArithmeticGenerator { public: AddShiftLGenerator(ContextPtr c) diff --git a/lib/include/rocRoller/CodeGen/Arithmetic/ArithmeticGenerator.hpp b/lib/include/rocRoller/CodeGen/Arithmetic/ArithmeticGenerator.hpp index 47fcae57..de08a158 100644 --- a/lib/include/rocRoller/CodeGen/Arithmetic/ArithmeticGenerator.hpp +++ b/lib/include/rocRoller/CodeGen/Arithmetic/ArithmeticGenerator.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include #include @@ -34,7 +36,7 @@ namespace rocRoller { /// Base Arithmetic Generator class. All Arithmetic generators should be derived /// from this class. - class ArithmeticGenerator + class ROCROLLER_DECLSPEC ArithmeticGenerator { public: ArithmeticGenerator(ContextPtr context) @@ -101,7 +103,7 @@ namespace rocRoller // Unary Arithmetic Generator. Most unary generators should be derived from // this class. template - class UnaryArithmeticGenerator : public ArithmeticGenerator + class ROCROLLER_DECLSPEC UnaryArithmeticGenerator : public ArithmeticGenerator { public: UnaryArithmeticGenerator(ContextPtr context) @@ -129,7 +131,7 @@ namespace rocRoller // Binary Arithmetic Generator. Most binary generators should be derived from // this class. template - class BinaryArithmeticGenerator : public ArithmeticGenerator + class ROCROLLER_DECLSPEC BinaryArithmeticGenerator : public ArithmeticGenerator { public: BinaryArithmeticGenerator(ContextPtr context) @@ -160,7 +162,7 @@ namespace rocRoller // Ternary Arithmetic Generator. Most ternary generators should be derived from // this class. template - class TernaryArithmeticGenerator : public ArithmeticGenerator + class ROCROLLER_DECLSPEC TernaryArithmeticGenerator : public ArithmeticGenerator { public: TernaryArithmeticGenerator(ContextPtr context) @@ -192,7 +194,7 @@ namespace rocRoller // TernaryMixed Arithmetic Generator. Only Ternary generators that can support mixed // airthmetic should be derived from this class. template - class TernaryMixedArithmeticGenerator : public ArithmeticGenerator + class ROCROLLER_DECLSPEC TernaryMixedArithmeticGenerator : public ArithmeticGenerator { public: TernaryMixedArithmeticGenerator(ContextPtr context) diff --git a/lib/include/rocRoller/CodeGen/Arithmetic/ArithmeticShiftR.hpp b/lib/include/rocRoller/CodeGen/Arithmetic/ArithmeticShiftR.hpp index d7a42b50..e0d0fc3b 100644 --- a/lib/include/rocRoller/CodeGen/Arithmetic/ArithmeticShiftR.hpp +++ b/lib/include/rocRoller/CodeGen/Arithmetic/ArithmeticShiftR.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include namespace rocRoller @@ -33,14 +35,15 @@ namespace rocRoller // GetGenerator function will return the Generator to use based on the provided arguments. template <> - std::shared_ptr> - GetGenerator(Register::ValuePtr dst, + ROCROLLER_DECLSPEC std::shared_ptr> + GetGenerator(Register::ValuePtr dst, Register::ValuePtr lhs, Register::ValuePtr rhs, Expression::ArithmeticShiftR const& expr); // Generator for all register types and datatypes. - class ArithmeticShiftRGenerator : public BinaryArithmeticGenerator + class ROCROLLER_DECLSPEC ArithmeticShiftRGenerator + : public BinaryArithmeticGenerator { public: ArithmeticShiftRGenerator(ContextPtr c) diff --git a/lib/include/rocRoller/CodeGen/Arithmetic/BitFieldExtract.hpp b/lib/include/rocRoller/CodeGen/Arithmetic/BitFieldExtract.hpp index 81f806a6..bab3f064 100644 --- a/lib/include/rocRoller/CodeGen/Arithmetic/BitFieldExtract.hpp +++ b/lib/include/rocRoller/CodeGen/Arithmetic/BitFieldExtract.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include namespace rocRoller @@ -33,11 +35,14 @@ namespace rocRoller // GetGenerator function will return the Generator to use based on the provided arguments. template <> - std::shared_ptr> GetGenerator( - Register::ValuePtr dst, Register::ValuePtr arg, Expression::BitFieldExtract const& expr); + ROCROLLER_DECLSPEC std::shared_ptr> + GetGenerator(Register::ValuePtr dst, + Register::ValuePtr arg, + Expression::BitFieldExtract const& expr); template - class BitFieldExtractGenerator : public UnaryArithmeticGenerator + class ROCROLLER_DECLSPEC BitFieldExtractGenerator + : public UnaryArithmeticGenerator { public: BitFieldExtractGenerator(ContextPtr c) diff --git a/lib/include/rocRoller/CodeGen/Arithmetic/BitwiseAnd.hpp b/lib/include/rocRoller/CodeGen/Arithmetic/BitwiseAnd.hpp index f605100a..08b4664d 100644 --- a/lib/include/rocRoller/CodeGen/Arithmetic/BitwiseAnd.hpp +++ b/lib/include/rocRoller/CodeGen/Arithmetic/BitwiseAnd.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include namespace rocRoller @@ -33,14 +35,15 @@ namespace rocRoller // GetGenerator function will return the Generator to use based on the provided arguments. template <> - std::shared_ptr> - GetGenerator(Register::ValuePtr dst, + ROCROLLER_DECLSPEC std::shared_ptr> + GetGenerator(Register::ValuePtr dst, Register::ValuePtr lhs, Register::ValuePtr rhs, Expression::BitwiseAnd const&); // Generator for all register types and datatypes. - class BitwiseAndGenerator : public BinaryArithmeticGenerator + class ROCROLLER_DECLSPEC BitwiseAndGenerator + : public BinaryArithmeticGenerator { public: BitwiseAndGenerator(ContextPtr c) diff --git a/lib/include/rocRoller/CodeGen/Arithmetic/BitwiseNegate.hpp b/lib/include/rocRoller/CodeGen/Arithmetic/BitwiseNegate.hpp index db3669bc..0f7342fa 100644 --- a/lib/include/rocRoller/CodeGen/Arithmetic/BitwiseNegate.hpp +++ b/lib/include/rocRoller/CodeGen/Arithmetic/BitwiseNegate.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include namespace rocRoller @@ -33,13 +35,14 @@ namespace rocRoller // GetGenerator function will return the Generator to use based on the provided arguments. template <> - std::shared_ptr> - GetGenerator(Register::ValuePtr dst, + ROCROLLER_DECLSPEC std::shared_ptr> + GetGenerator(Register::ValuePtr dst, Register::ValuePtr arg, Expression::BitwiseNegate const&); // Templated Generator class based on the return type. - class BitwiseNegateGenerator : public UnaryArithmeticGenerator + class ROCROLLER_DECLSPEC BitwiseNegateGenerator + : public UnaryArithmeticGenerator { public: BitwiseNegateGenerator(ContextPtr c) diff --git a/lib/include/rocRoller/CodeGen/Arithmetic/BitwiseOr.hpp b/lib/include/rocRoller/CodeGen/Arithmetic/BitwiseOr.hpp index 5bdbf042..bf0a502f 100644 --- a/lib/include/rocRoller/CodeGen/Arithmetic/BitwiseOr.hpp +++ b/lib/include/rocRoller/CodeGen/Arithmetic/BitwiseOr.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include namespace rocRoller @@ -33,14 +35,15 @@ namespace rocRoller // GetGenerator function will return the Generator to use based on the provided arguments. template <> - std::shared_ptr> - GetGenerator(Register::ValuePtr dst, + ROCROLLER_DECLSPEC std::shared_ptr> + GetGenerator(Register::ValuePtr dst, Register::ValuePtr lhs, Register::ValuePtr rhs, Expression::BitwiseOr const&); // Generator for all register types and datatypes. - class BitwiseOrGenerator : public BinaryArithmeticGenerator + class ROCROLLER_DECLSPEC BitwiseOrGenerator + : public BinaryArithmeticGenerator { public: BitwiseOrGenerator(ContextPtr c) diff --git a/lib/include/rocRoller/CodeGen/Arithmetic/BitwiseXor.hpp b/lib/include/rocRoller/CodeGen/Arithmetic/BitwiseXor.hpp index ab1d9788..0fd5efe1 100644 --- a/lib/include/rocRoller/CodeGen/Arithmetic/BitwiseXor.hpp +++ b/lib/include/rocRoller/CodeGen/Arithmetic/BitwiseXor.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include namespace rocRoller @@ -33,14 +35,15 @@ namespace rocRoller // GetGenerator function will return the Generator to use based on the provided arguments. template <> - std::shared_ptr> - GetGenerator(Register::ValuePtr dst, + ROCROLLER_DECLSPEC std::shared_ptr> + GetGenerator(Register::ValuePtr dst, Register::ValuePtr lhs, Register::ValuePtr rhs, Expression::BitwiseXor const&); // Generator for all register types and datatypes. - class BitwiseXorGenerator : public BinaryArithmeticGenerator + class ROCROLLER_DECLSPEC BitwiseXorGenerator + : public BinaryArithmeticGenerator { public: BitwiseXorGenerator(ContextPtr c) diff --git a/lib/include/rocRoller/CodeGen/Arithmetic/Conditional.hpp b/lib/include/rocRoller/CodeGen/Arithmetic/Conditional.hpp index 5ae9392b..8c0b2f2e 100644 --- a/lib/include/rocRoller/CodeGen/Arithmetic/Conditional.hpp +++ b/lib/include/rocRoller/CodeGen/Arithmetic/Conditional.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include namespace rocRoller @@ -33,15 +35,16 @@ namespace rocRoller // GetGenerator function will return the Generator to use based on the provided arguments. template <> - std::shared_ptr> - GetGenerator(Register::ValuePtr dst, + ROCROLLER_DECLSPEC std::shared_ptr> + GetGenerator(Register::ValuePtr dst, Register::ValuePtr lhs, Register::ValuePtr r1hs, Register::ValuePtr r2hsw, Expression::Conditional const&); // Generator for all register types and datatypes. - class ConditionalGenerator : public TernaryArithmeticGenerator + class ROCROLLER_DECLSPEC ConditionalGenerator + : public TernaryArithmeticGenerator { public: ConditionalGenerator(ContextPtr c) diff --git a/lib/include/rocRoller/CodeGen/Arithmetic/Convert.hpp b/lib/include/rocRoller/CodeGen/Arithmetic/Convert.hpp index 7498916c..56dd9a41 100644 --- a/lib/include/rocRoller/CodeGen/Arithmetic/Convert.hpp +++ b/lib/include/rocRoller/CodeGen/Arithmetic/Convert.hpp @@ -26,13 +26,15 @@ #pragma once +#include + #include namespace rocRoller { template <> - std::shared_ptr> - GetGenerator(Register::ValuePtr dst, + ROCROLLER_DECLSPEC std::shared_ptr> + GetGenerator(Register::ValuePtr dst, Register::ValuePtr arg, Expression::Convert const&); /** @@ -46,7 +48,7 @@ namespace rocRoller Generator generateConvertOp(DataType dataType, Register::ValuePtr dest, Register::ValuePtr arg); - class ConvertGenerator : public UnaryArithmeticGenerator + class ROCROLLER_DECLSPEC ConvertGenerator : public UnaryArithmeticGenerator { public: ConvertGenerator(ContextPtr c) @@ -122,7 +124,8 @@ namespace rocRoller * the second arg is a seed for stochastic rounding. */ template <> - std::shared_ptr>> + ROCROLLER_DECLSPEC + std::shared_ptr>> GetGenerator>( Register::ValuePtr dst, Register::ValuePtr lhs, @@ -130,7 +133,8 @@ namespace rocRoller Expression::SRConvert const&); template <> - std::shared_ptr>> + ROCROLLER_DECLSPEC + std::shared_ptr>> GetGenerator>( Register::ValuePtr dst, Register::ValuePtr lhs, @@ -139,7 +143,8 @@ namespace rocRoller // Templated Generator class based on the return type. template - class SRConvertGenerator : public BinaryArithmeticGenerator> + class ROCROLLER_DECLSPEC SRConvertGenerator + : public BinaryArithmeticGenerator> { public: SRConvertGenerator(ContextPtr c) diff --git a/lib/include/rocRoller/CodeGen/Arithmetic/Divide.hpp b/lib/include/rocRoller/CodeGen/Arithmetic/Divide.hpp index 528641a0..50470623 100644 --- a/lib/include/rocRoller/CodeGen/Arithmetic/Divide.hpp +++ b/lib/include/rocRoller/CodeGen/Arithmetic/Divide.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include namespace rocRoller @@ -33,15 +35,15 @@ namespace rocRoller // GetGenerator function will return the Generator to use based on the provided arguments. template <> - std::shared_ptr> - GetGenerator(Register::ValuePtr dst, + ROCROLLER_DECLSPEC std::shared_ptr> + GetGenerator(Register::ValuePtr dst, Register::ValuePtr lhs, Register::ValuePtr rhs, Expression::Divide const&); // Templated Generator class based on the register type and datatype. template - class DivideGenerator : public BinaryArithmeticGenerator + class ROCROLLER_DECLSPEC DivideGenerator : public BinaryArithmeticGenerator { public: DivideGenerator(ContextPtr c) diff --git a/lib/include/rocRoller/CodeGen/Arithmetic/Equal.hpp b/lib/include/rocRoller/CodeGen/Arithmetic/Equal.hpp index 79eb284e..ce34c2df 100644 --- a/lib/include/rocRoller/CodeGen/Arithmetic/Equal.hpp +++ b/lib/include/rocRoller/CodeGen/Arithmetic/Equal.hpp @@ -26,21 +26,23 @@ #pragma once +#include + #include namespace rocRoller { // GetGenerator function will return the Generator to use based on the provided arguments. template <> - std::shared_ptr> - GetGenerator(Register::ValuePtr dst, + ROCROLLER_DECLSPEC std::shared_ptr> + GetGenerator(Register::ValuePtr dst, Register::ValuePtr lhs, Register::ValuePtr rhs, Expression::Equal const&); // Templated Generator class based on the register type and datatype. template - class EqualGenerator : public BinaryArithmeticGenerator + class ROCROLLER_DECLSPEC EqualGenerator : public BinaryArithmeticGenerator { public: EqualGenerator(ContextPtr c) diff --git a/lib/include/rocRoller/CodeGen/Arithmetic/Exponential2.hpp b/lib/include/rocRoller/CodeGen/Arithmetic/Exponential2.hpp index ece5dc1f..1d86280f 100644 --- a/lib/include/rocRoller/CodeGen/Arithmetic/Exponential2.hpp +++ b/lib/include/rocRoller/CodeGen/Arithmetic/Exponential2.hpp @@ -26,20 +26,23 @@ #pragma once +#include + #include namespace rocRoller { // GetGenerator function will return the Generator to use based on the provided arguments. template <> - std::shared_ptr> - GetGenerator(Register::ValuePtr dst, + ROCROLLER_DECLSPEC std::shared_ptr> + GetGenerator(Register::ValuePtr dst, Register::ValuePtr arg, Expression::Exponential2 const&); // Templated Generator class based on the return type. template - class Exponential2Generator : public UnaryArithmeticGenerator + class ROCROLLER_DECLSPEC Exponential2Generator + : public UnaryArithmeticGenerator { public: Exponential2Generator(ContextPtr c) diff --git a/lib/include/rocRoller/CodeGen/Arithmetic/GreaterThan.hpp b/lib/include/rocRoller/CodeGen/Arithmetic/GreaterThan.hpp index 3dd16da4..28b844a6 100644 --- a/lib/include/rocRoller/CodeGen/Arithmetic/GreaterThan.hpp +++ b/lib/include/rocRoller/CodeGen/Arithmetic/GreaterThan.hpp @@ -26,21 +26,24 @@ #pragma once +#include + #include namespace rocRoller { // GetGenerator function will return the Generator to use based on the provided arguments. template <> - std::shared_ptr> - GetGenerator(Register::ValuePtr dst, + ROCROLLER_DECLSPEC std::shared_ptr> + GetGenerator(Register::ValuePtr dst, Register::ValuePtr lhs, Register::ValuePtr rhs, Expression::GreaterThan const&); // Templated Generator class based on the register type and datatype. template - class GreaterThanGenerator : public BinaryArithmeticGenerator + class ROCROLLER_DECLSPEC GreaterThanGenerator + : public BinaryArithmeticGenerator { public: GreaterThanGenerator(ContextPtr c) diff --git a/lib/include/rocRoller/CodeGen/Arithmetic/GreaterThanEqual.hpp b/lib/include/rocRoller/CodeGen/Arithmetic/GreaterThanEqual.hpp index 46fcf56e..4bf97ef0 100644 --- a/lib/include/rocRoller/CodeGen/Arithmetic/GreaterThanEqual.hpp +++ b/lib/include/rocRoller/CodeGen/Arithmetic/GreaterThanEqual.hpp @@ -26,21 +26,24 @@ #pragma once +#include + #include namespace rocRoller { // GetGenerator function will return the Generator to use based on the provided arguments. template <> - std::shared_ptr> - GetGenerator(Register::ValuePtr dst, + ROCROLLER_DECLSPEC std::shared_ptr> + GetGenerator(Register::ValuePtr dst, Register::ValuePtr lhs, Register::ValuePtr rhs, Expression::GreaterThanEqual const&); // Templated Generator class based on the register type and datatype. template - class GreaterThanEqualGenerator : public BinaryArithmeticGenerator + class ROCROLLER_DECLSPEC GreaterThanEqualGenerator + : public BinaryArithmeticGenerator { public: GreaterThanEqualGenerator(ContextPtr c) diff --git a/lib/include/rocRoller/CodeGen/Arithmetic/LessThan.hpp b/lib/include/rocRoller/CodeGen/Arithmetic/LessThan.hpp index de605b59..cf23373e 100644 --- a/lib/include/rocRoller/CodeGen/Arithmetic/LessThan.hpp +++ b/lib/include/rocRoller/CodeGen/Arithmetic/LessThan.hpp @@ -26,21 +26,24 @@ #pragma once +#include + #include namespace rocRoller { // GetGenerator function will return the Generator to use based on the provided arguments. template <> - std::shared_ptr> - GetGenerator(Register::ValuePtr dst, + ROCROLLER_DECLSPEC std::shared_ptr> + GetGenerator(Register::ValuePtr dst, Register::ValuePtr lhs, Register::ValuePtr rhs, Expression::LessThan const&); // Templated Generator class based on the register type and datatype. template - class LessThanGenerator : public BinaryArithmeticGenerator + class ROCROLLER_DECLSPEC LessThanGenerator + : public BinaryArithmeticGenerator { public: LessThanGenerator(ContextPtr c) diff --git a/lib/include/rocRoller/CodeGen/Arithmetic/LessThanEqual.hpp b/lib/include/rocRoller/CodeGen/Arithmetic/LessThanEqual.hpp index d8ba1a0b..572e420e 100644 --- a/lib/include/rocRoller/CodeGen/Arithmetic/LessThanEqual.hpp +++ b/lib/include/rocRoller/CodeGen/Arithmetic/LessThanEqual.hpp @@ -26,21 +26,24 @@ #pragma once +#include + #include namespace rocRoller { // GetGenerator function will return the Generator to use based on the provided arguments. template <> - std::shared_ptr> - GetGenerator(Register::ValuePtr dst, + ROCROLLER_DECLSPEC std::shared_ptr> + GetGenerator(Register::ValuePtr dst, Register::ValuePtr lhs, Register::ValuePtr rhs, Expression::LessThanEqual const&); // Templated Generator class based on the register type and datatype. template - class LessThanEqualGenerator : public BinaryArithmeticGenerator + class ROCROLLER_DECLSPEC LessThanEqualGenerator + : public BinaryArithmeticGenerator { public: LessThanEqualGenerator(ContextPtr c) diff --git a/lib/include/rocRoller/CodeGen/Arithmetic/LogicalAnd.hpp b/lib/include/rocRoller/CodeGen/Arithmetic/LogicalAnd.hpp index a4af2546..982940d9 100644 --- a/lib/include/rocRoller/CodeGen/Arithmetic/LogicalAnd.hpp +++ b/lib/include/rocRoller/CodeGen/Arithmetic/LogicalAnd.hpp @@ -26,21 +26,24 @@ #pragma once +#include + #include namespace rocRoller { // GetGenerator function will return the Generator to use based on the provided arguments. template <> - std::shared_ptr> - GetGenerator(Register::ValuePtr dst, + ROCROLLER_DECLSPEC std::shared_ptr> + GetGenerator(Register::ValuePtr dst, Register::ValuePtr lhs, Register::ValuePtr rhs, Expression::LogicalAnd const&); // Templated Generator class based on the register type and datatype. template - class LogicalAndGenerator : public BinaryArithmeticGenerator + class ROCROLLER_DECLSPEC LogicalAndGenerator + : public BinaryArithmeticGenerator { public: LogicalAndGenerator(ContextPtr c) diff --git a/lib/include/rocRoller/CodeGen/Arithmetic/LogicalNot.hpp b/lib/include/rocRoller/CodeGen/Arithmetic/LogicalNot.hpp index 9684df06..18a75559 100644 --- a/lib/include/rocRoller/CodeGen/Arithmetic/LogicalNot.hpp +++ b/lib/include/rocRoller/CodeGen/Arithmetic/LogicalNot.hpp @@ -26,20 +26,23 @@ #pragma once +#include + #include namespace rocRoller { // GetGenerator function will return the Generator to use based on the provided arguments. template <> - std::shared_ptr> - GetGenerator(Register::ValuePtr dst, + ROCROLLER_DECLSPEC std::shared_ptr> + GetGenerator(Register::ValuePtr dst, Register::ValuePtr arg, Expression::LogicalNot const&); // Templated Generator class based on the register type and datatype. template - class LogicalNotGenerator : public UnaryArithmeticGenerator + class ROCROLLER_DECLSPEC LogicalNotGenerator + : public UnaryArithmeticGenerator { public: LogicalNotGenerator(ContextPtr c) diff --git a/lib/include/rocRoller/CodeGen/Arithmetic/LogicalOr.hpp b/lib/include/rocRoller/CodeGen/Arithmetic/LogicalOr.hpp index 3f5a4a0b..64aaa5b4 100644 --- a/lib/include/rocRoller/CodeGen/Arithmetic/LogicalOr.hpp +++ b/lib/include/rocRoller/CodeGen/Arithmetic/LogicalOr.hpp @@ -26,21 +26,24 @@ #pragma once +#include + #include namespace rocRoller { // GetGenerator function will return the Generator to use based on the provided arguments. template <> - std::shared_ptr> - GetGenerator(Register::ValuePtr dst, + ROCROLLER_DECLSPEC std::shared_ptr> + GetGenerator(Register::ValuePtr dst, Register::ValuePtr lhs, Register::ValuePtr rhs, Expression::LogicalOr const&); // Templated Generator class based on the register type and datatype. template - class LogicalOrGenerator : public BinaryArithmeticGenerator + class ROCROLLER_DECLSPEC LogicalOrGenerator + : public BinaryArithmeticGenerator { public: LogicalOrGenerator(ContextPtr c) diff --git a/lib/include/rocRoller/CodeGen/Arithmetic/LogicalShiftR.hpp b/lib/include/rocRoller/CodeGen/Arithmetic/LogicalShiftR.hpp index de27d73a..55fc74c4 100644 --- a/lib/include/rocRoller/CodeGen/Arithmetic/LogicalShiftR.hpp +++ b/lib/include/rocRoller/CodeGen/Arithmetic/LogicalShiftR.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include namespace rocRoller @@ -33,14 +35,15 @@ namespace rocRoller // GetGenerator function will return the Generator to use based on the provided arguments. template <> - std::shared_ptr> - GetGenerator(Register::ValuePtr dst, + ROCROLLER_DECLSPEC std::shared_ptr> + GetGenerator(Register::ValuePtr dst, Register::ValuePtr lhs, Register::ValuePtr rhs, Expression::LogicalShiftR const&); // Generator for all register types and datatypes. - class LogicalShiftRGenerator : public BinaryArithmeticGenerator + class ROCROLLER_DECLSPEC LogicalShiftRGenerator + : public BinaryArithmeticGenerator { public: LogicalShiftRGenerator(ContextPtr c) diff --git a/lib/include/rocRoller/CodeGen/Arithmetic/MatrixMultiply.hpp b/lib/include/rocRoller/CodeGen/Arithmetic/MatrixMultiply.hpp index 3295c087..9ab14ca6 100644 --- a/lib/include/rocRoller/CodeGen/Arithmetic/MatrixMultiply.hpp +++ b/lib/include/rocRoller/CodeGen/Arithmetic/MatrixMultiply.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include @@ -36,7 +38,7 @@ namespace rocRoller { namespace InstructionGenerators { - struct MatrixMultiply + struct ROCROLLER_DECLSPEC MatrixMultiply { /** * Context, accumulation type, input type. @@ -65,7 +67,7 @@ namespace rocRoller = 0; }; - struct MatrixMultiplyGenerator : public MatrixMultiply + struct ROCROLLER_DECLSPEC MatrixMultiplyGenerator : public MatrixMultiply { using Base = MatrixMultiply; diff --git a/lib/include/rocRoller/CodeGen/Arithmetic/MatrixMultiply_fwd.hpp b/lib/include/rocRoller/CodeGen/Arithmetic/MatrixMultiply_fwd.hpp index f2ff1579..aeda06db 100644 --- a/lib/include/rocRoller/CodeGen/Arithmetic/MatrixMultiply_fwd.hpp +++ b/lib/include/rocRoller/CodeGen/Arithmetic/MatrixMultiply_fwd.hpp @@ -26,13 +26,15 @@ #pragma once +#include + #include namespace rocRoller { namespace InstructionGenerators { - struct MatrixMultiply; + struct ROCROLLER_DECLSPEC MatrixMultiply; using MatrixMultiplyPtr = std::shared_ptr; } } diff --git a/lib/include/rocRoller/CodeGen/Arithmetic/Modulo.hpp b/lib/include/rocRoller/CodeGen/Arithmetic/Modulo.hpp index 36dde94e..a298b6e3 100644 --- a/lib/include/rocRoller/CodeGen/Arithmetic/Modulo.hpp +++ b/lib/include/rocRoller/CodeGen/Arithmetic/Modulo.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include namespace rocRoller @@ -33,15 +35,15 @@ namespace rocRoller // GetGenerator function will return the Generator to use based on the provided arguments. template <> - std::shared_ptr> - GetGenerator(Register::ValuePtr dst, + ROCROLLER_DECLSPEC std::shared_ptr> + GetGenerator(Register::ValuePtr dst, Register::ValuePtr lhs, Register::ValuePtr rhs, Expression::Modulo const&); // Templated Generator class based on the register type and datatype. template - class ModuloGenerator : public BinaryArithmeticGenerator + class ROCROLLER_DECLSPEC ModuloGenerator : public BinaryArithmeticGenerator { public: ModuloGenerator(ContextPtr c) diff --git a/lib/include/rocRoller/CodeGen/Arithmetic/Multiply.hpp b/lib/include/rocRoller/CodeGen/Arithmetic/Multiply.hpp index eb398db2..139c59c3 100644 --- a/lib/include/rocRoller/CodeGen/Arithmetic/Multiply.hpp +++ b/lib/include/rocRoller/CodeGen/Arithmetic/Multiply.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include namespace rocRoller @@ -33,15 +35,16 @@ namespace rocRoller // GetGenerator function will return the Generator to use based on the provided arguments. template <> - std::shared_ptr> - GetGenerator(Register::ValuePtr dst, + ROCROLLER_DECLSPEC std::shared_ptr> + GetGenerator(Register::ValuePtr dst, Register::ValuePtr lhs, Register::ValuePtr rhs, Expression::Multiply const&); // Templated Generator class based on the register type and datatype. template - class MultiplyGenerator : public BinaryArithmeticGenerator + class ROCROLLER_DECLSPEC MultiplyGenerator + : public BinaryArithmeticGenerator { public: MultiplyGenerator(ContextPtr c) diff --git a/lib/include/rocRoller/CodeGen/Arithmetic/MultiplyAdd.hpp b/lib/include/rocRoller/CodeGen/Arithmetic/MultiplyAdd.hpp index 55ba8a6a..1688b362 100644 --- a/lib/include/rocRoller/CodeGen/Arithmetic/MultiplyAdd.hpp +++ b/lib/include/rocRoller/CodeGen/Arithmetic/MultiplyAdd.hpp @@ -26,19 +26,22 @@ #pragma once +#include + #include namespace rocRoller { template <> - std::shared_ptr> - GetGenerator(Register::ValuePtr dst, + ROCROLLER_DECLSPEC std::shared_ptr> + GetGenerator(Register::ValuePtr dst, Register::ValuePtr a, Register::ValuePtr x, Register::ValuePtr y, Expression::MultiplyAdd const&); - struct MultiplyAddGenerator : public TernaryArithmeticGenerator + struct ROCROLLER_DECLSPEC MultiplyAddGenerator + : public TernaryArithmeticGenerator { MultiplyAddGenerator(ContextPtr c) : TernaryArithmeticGenerator(c) diff --git a/lib/include/rocRoller/CodeGen/Arithmetic/MultiplyHigh.hpp b/lib/include/rocRoller/CodeGen/Arithmetic/MultiplyHigh.hpp index 289859a5..68679882 100644 --- a/lib/include/rocRoller/CodeGen/Arithmetic/MultiplyHigh.hpp +++ b/lib/include/rocRoller/CodeGen/Arithmetic/MultiplyHigh.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include namespace rocRoller @@ -33,14 +35,15 @@ namespace rocRoller // GetGenerator function will return the Generator to use based on the provided arguments. template <> - std::shared_ptr> - GetGenerator(Register::ValuePtr dst, + ROCROLLER_DECLSPEC std::shared_ptr> + GetGenerator(Register::ValuePtr dst, Register::ValuePtr lhs, Register::ValuePtr rhs, Expression::MultiplyHigh const&); // Generator for all register types and datatypes. - class MultiplyHighGenerator : public BinaryArithmeticGenerator + class ROCROLLER_DECLSPEC MultiplyHighGenerator + : public BinaryArithmeticGenerator { public: MultiplyHighGenerator(ContextPtr c) diff --git a/lib/include/rocRoller/CodeGen/Arithmetic/Negate.hpp b/lib/include/rocRoller/CodeGen/Arithmetic/Negate.hpp index c0e087d9..0460aca3 100644 --- a/lib/include/rocRoller/CodeGen/Arithmetic/Negate.hpp +++ b/lib/include/rocRoller/CodeGen/Arithmetic/Negate.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include namespace rocRoller @@ -33,11 +35,13 @@ namespace rocRoller // GetGenerator function will return the Generator to use based on the provided arguments. template <> - std::shared_ptr> GetGenerator( - Register::ValuePtr dst, Register::ValuePtr arg, Expression::Negate const&); + ROCROLLER_DECLSPEC std::shared_ptr> + GetGenerator(Register::ValuePtr dst, + Register::ValuePtr arg, + Expression::Negate const&); // Templated Generator class based on the return type. - class NegateGenerator : public UnaryArithmeticGenerator + class ROCROLLER_DECLSPEC NegateGenerator : public UnaryArithmeticGenerator { public: NegateGenerator(ContextPtr c) diff --git a/lib/include/rocRoller/CodeGen/Arithmetic/NotEqual.hpp b/lib/include/rocRoller/CodeGen/Arithmetic/NotEqual.hpp index 4c479b7a..a1275220 100644 --- a/lib/include/rocRoller/CodeGen/Arithmetic/NotEqual.hpp +++ b/lib/include/rocRoller/CodeGen/Arithmetic/NotEqual.hpp @@ -26,21 +26,24 @@ #pragma once +#include + #include namespace rocRoller { // GetGenerator function will return the Generator to use based on the provided arguments. template <> - std::shared_ptr> - GetGenerator(Register::ValuePtr dst, + ROCROLLER_DECLSPEC std::shared_ptr> + GetGenerator(Register::ValuePtr dst, Register::ValuePtr lhs, Register::ValuePtr rhs, Expression::NotEqual const&); // Templated Generator class based on the register type and datatype. template - class NotEqualGenerator : public BinaryArithmeticGenerator + class ROCROLLER_DECLSPEC NotEqualGenerator + : public BinaryArithmeticGenerator { public: NotEqualGenerator(ContextPtr c) diff --git a/lib/include/rocRoller/CodeGen/Arithmetic/RandomNumber.hpp b/lib/include/rocRoller/CodeGen/Arithmetic/RandomNumber.hpp index 8a5d7aae..284f7fb0 100644 --- a/lib/include/rocRoller/CodeGen/Arithmetic/RandomNumber.hpp +++ b/lib/include/rocRoller/CodeGen/Arithmetic/RandomNumber.hpp @@ -26,19 +26,22 @@ #pragma once +#include + #include namespace rocRoller { // GetGenerator function will return the Generator to use based on the provided arguments. template <> - std::shared_ptr> - GetGenerator(Register::ValuePtr dst, + ROCROLLER_DECLSPEC std::shared_ptr> + GetGenerator(Register::ValuePtr dst, Register::ValuePtr arg, Expression::RandomNumber const&); // Templated Generator class based on the register type and datatype. - class RandomNumberGenerator : public UnaryArithmeticGenerator + class ROCROLLER_DECLSPEC RandomNumberGenerator + : public UnaryArithmeticGenerator { public: RandomNumberGenerator(ContextPtr c) diff --git a/lib/include/rocRoller/CodeGen/Arithmetic/ScaledMatrixMultiply.hpp b/lib/include/rocRoller/CodeGen/Arithmetic/ScaledMatrixMultiply.hpp index db437f9e..ce7b8161 100644 --- a/lib/include/rocRoller/CodeGen/Arithmetic/ScaledMatrixMultiply.hpp +++ b/lib/include/rocRoller/CodeGen/Arithmetic/ScaledMatrixMultiply.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include "ScaledMatrixMultiply_fwd.hpp" #include @@ -37,7 +39,7 @@ namespace rocRoller { namespace InstructionGenerators { - struct ScaledMatrixMultiply + struct ROCROLLER_DECLSPEC ScaledMatrixMultiply { /** * Context, accumulation type, input type. @@ -67,7 +69,7 @@ namespace rocRoller = 0; }; - struct ScaledMatrixMultiplyGenerator : public ScaledMatrixMultiply + struct ROCROLLER_DECLSPEC ScaledMatrixMultiplyGenerator : public ScaledMatrixMultiply { static bool constexpr isValidInputType(auto const vtype) { diff --git a/lib/include/rocRoller/CodeGen/Arithmetic/ScaledMatrixMultiply_fwd.hpp b/lib/include/rocRoller/CodeGen/Arithmetic/ScaledMatrixMultiply_fwd.hpp index dcc28b78..05fa7295 100644 --- a/lib/include/rocRoller/CodeGen/Arithmetic/ScaledMatrixMultiply_fwd.hpp +++ b/lib/include/rocRoller/CodeGen/Arithmetic/ScaledMatrixMultiply_fwd.hpp @@ -26,13 +26,15 @@ #pragma once +#include + #include namespace rocRoller { namespace InstructionGenerators { - struct ScaledMatrixMultiply; + struct ROCROLLER_DECLSPEC ScaledMatrixMultiply; using ScaledMatrixMultiplyPtr = std::shared_ptr; } } diff --git a/lib/include/rocRoller/CodeGen/Arithmetic/ShiftL.hpp b/lib/include/rocRoller/CodeGen/Arithmetic/ShiftL.hpp index 2b1cc92c..91736eef 100644 --- a/lib/include/rocRoller/CodeGen/Arithmetic/ShiftL.hpp +++ b/lib/include/rocRoller/CodeGen/Arithmetic/ShiftL.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include namespace rocRoller @@ -33,14 +35,14 @@ namespace rocRoller // GetGenerator function will return the Generator to use based on the provided arguments. template <> - std::shared_ptr> - GetGenerator(Register::ValuePtr dst, + ROCROLLER_DECLSPEC std::shared_ptr> + GetGenerator(Register::ValuePtr dst, Register::ValuePtr lhs, Register::ValuePtr rhs, Expression::ShiftL const&); // Generator for all register types and datatypes. - class ShiftLGenerator : public BinaryArithmeticGenerator + class ROCROLLER_DECLSPEC ShiftLGenerator : public BinaryArithmeticGenerator { public: ShiftLGenerator(ContextPtr c) diff --git a/lib/include/rocRoller/CodeGen/Arithmetic/ShiftLAdd.hpp b/lib/include/rocRoller/CodeGen/Arithmetic/ShiftLAdd.hpp index 43d3157a..27b93d24 100644 --- a/lib/include/rocRoller/CodeGen/Arithmetic/ShiftLAdd.hpp +++ b/lib/include/rocRoller/CodeGen/Arithmetic/ShiftLAdd.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include namespace rocRoller @@ -33,15 +35,16 @@ namespace rocRoller // GetGenerator function will return the Generator to use based on the provided arguments. template <> - std::shared_ptr> - GetGenerator(Register::ValuePtr dst, + ROCROLLER_DECLSPEC std::shared_ptr> + GetGenerator(Register::ValuePtr dst, Register::ValuePtr lhs, Register::ValuePtr shiftAmount, Register::ValuePtr rhs, Expression::ShiftLAdd const&); // Generator for all register types and datatypes. - class ShiftLAddGenerator : public TernaryArithmeticGenerator + class ROCROLLER_DECLSPEC ShiftLAddGenerator + : public TernaryArithmeticGenerator { public: ShiftLAddGenerator(ContextPtr c) diff --git a/lib/include/rocRoller/CodeGen/Arithmetic/Subtract.hpp b/lib/include/rocRoller/CodeGen/Arithmetic/Subtract.hpp index f58ebb77..335a1435 100644 --- a/lib/include/rocRoller/CodeGen/Arithmetic/Subtract.hpp +++ b/lib/include/rocRoller/CodeGen/Arithmetic/Subtract.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include namespace rocRoller @@ -33,15 +35,16 @@ namespace rocRoller // GetGenerator function will return the Generator to use based on the provided arguments. template <> - std::shared_ptr> - GetGenerator(Register::ValuePtr dst, + ROCROLLER_DECLSPEC std::shared_ptr> + GetGenerator(Register::ValuePtr dst, Register::ValuePtr lhs, Register::ValuePtr rhs, Expression::Subtract const&); // Templated Generator class based on the register type and datatype. template - class SubtractGenerator : public BinaryArithmeticGenerator + class ROCROLLER_DECLSPEC SubtractGenerator + : public BinaryArithmeticGenerator { public: SubtractGenerator(ContextPtr c) diff --git a/lib/include/rocRoller/CodeGen/Arithmetic/Utility.hpp b/lib/include/rocRoller/CodeGen/Arithmetic/Utility.hpp index b534e028..bff79407 100644 --- a/lib/include/rocRoller/CodeGen/Arithmetic/Utility.hpp +++ b/lib/include/rocRoller/CodeGen/Arithmetic/Utility.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include namespace rocRoller @@ -35,13 +37,13 @@ namespace rocRoller /** * @brief Represent a single Register::Value as two Register::Values each the size of a single DWord */ - void get2LiteralDwords(Register::ValuePtr& lsd, - Register::ValuePtr& msd, - Register::ValuePtr input); + ROCROLLER_DECLSPEC void get2LiteralDwords(Register::ValuePtr& lsd, + Register::ValuePtr& msd, + Register::ValuePtr input); /** * @brief Get the modifier string for MFMA's input matrix types */ - std::string getModifier(DataType dataType); + ROCROLLER_DECLSPEC std::string getModifier(DataType dataType); } } diff --git a/lib/include/rocRoller/CodeGen/BranchGenerator.hpp b/lib/include/rocRoller/CodeGen/BranchGenerator.hpp index 2b6061b3..db925357 100644 --- a/lib/include/rocRoller/CodeGen/BranchGenerator.hpp +++ b/lib/include/rocRoller/CodeGen/BranchGenerator.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include #include @@ -36,7 +38,7 @@ namespace rocRoller /** * @brief Generator for generating conditional and unconditional branches. */ - class BranchGenerator + class ROCROLLER_DECLSPEC BranchGenerator { public: BranchGenerator(ContextPtr); diff --git a/lib/include/rocRoller/CodeGen/BranchGenerator_fwd.hpp b/lib/include/rocRoller/CodeGen/BranchGenerator_fwd.hpp index bef46792..ae888026 100644 --- a/lib/include/rocRoller/CodeGen/BranchGenerator_fwd.hpp +++ b/lib/include/rocRoller/CodeGen/BranchGenerator_fwd.hpp @@ -26,9 +26,11 @@ #pragma once +#include + namespace rocRoller { - class BranchGenerator; + class ROCROLLER_DECLSPEC BranchGenerator; } diff --git a/lib/include/rocRoller/CodeGen/Buffer.hpp b/lib/include/rocRoller/CodeGen/Buffer.hpp index 3f5646c5..0390c3c6 100644 --- a/lib/include/rocRoller/CodeGen/Buffer.hpp +++ b/lib/include/rocRoller/CodeGen/Buffer.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include @@ -38,7 +40,7 @@ namespace rocRoller { - struct GFX9BufferDescriptorOptions + struct ROCROLLER_DECLSPEC GFX9BufferDescriptorOptions { enum DataFormatValue { @@ -87,14 +89,14 @@ namespace rocRoller void validate() const; }; - std::string toString(GFX9BufferDescriptorOptions::DataFormatValue val); - std::ostream& operator<<(std::ostream& stream, - GFX9BufferDescriptorOptions::DataFormatValue val); + ROCROLLER_DECLSPEC std::string toString(GFX9BufferDescriptorOptions::DataFormatValue val); + ROCROLLER_DECLSPEC std::ostream& operator<<(std::ostream& stream, + GFX9BufferDescriptorOptions::DataFormatValue val); static_assert(sizeof(GFX9BufferDescriptorOptions) == 4); static_assert(GFX9BufferDescriptorOptions::DFReserved == 15); - class BufferDescriptor + class ROCROLLER_DECLSPEC BufferDescriptor { public: BufferDescriptor(Register::ValuePtr srd, ContextPtr context); diff --git a/lib/include/rocRoller/CodeGen/BufferInstructionOptions.hpp b/lib/include/rocRoller/CodeGen/BufferInstructionOptions.hpp index 76f60f31..487fc4f3 100644 --- a/lib/include/rocRoller/CodeGen/BufferInstructionOptions.hpp +++ b/lib/include/rocRoller/CodeGen/BufferInstructionOptions.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include /** @@ -38,7 +40,7 @@ namespace rocRoller { - struct BufferInstructionOptions + struct ROCROLLER_DECLSPEC BufferInstructionOptions { bool offen = false; bool glc = false; diff --git a/lib/include/rocRoller/CodeGen/CopyGenerator.hpp b/lib/include/rocRoller/CodeGen/CopyGenerator.hpp index df7eafa1..af2497f4 100644 --- a/lib/include/rocRoller/CodeGen/CopyGenerator.hpp +++ b/lib/include/rocRoller/CodeGen/CopyGenerator.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include #include @@ -37,7 +39,7 @@ namespace rocRoller * Co-yields `mov` instructions automatically from a `src` to `dest` * if the copy is valid. */ - class CopyGenerator + class ROCROLLER_DECLSPEC CopyGenerator { public: CopyGenerator(ContextPtr); diff --git a/lib/include/rocRoller/CodeGen/CopyGenerator_fwd.hpp b/lib/include/rocRoller/CodeGen/CopyGenerator_fwd.hpp index 6c4e53fb..3a76c476 100644 --- a/lib/include/rocRoller/CodeGen/CopyGenerator_fwd.hpp +++ b/lib/include/rocRoller/CodeGen/CopyGenerator_fwd.hpp @@ -26,9 +26,11 @@ #pragma once +#include + namespace rocRoller { - class CopyGenerator; + class ROCROLLER_DECLSPEC CopyGenerator; } diff --git a/lib/include/rocRoller/CodeGen/CrashKernelGenerator.hpp b/lib/include/rocRoller/CodeGen/CrashKernelGenerator.hpp index 13343b65..fc5baceb 100644 --- a/lib/include/rocRoller/CodeGen/CrashKernelGenerator.hpp +++ b/lib/include/rocRoller/CodeGen/CrashKernelGenerator.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include #include @@ -34,7 +36,7 @@ namespace rocRoller { - class CrashKernelGenerator + class ROCROLLER_DECLSPEC CrashKernelGenerator { public: CrashKernelGenerator(ContextPtr); diff --git a/lib/include/rocRoller/CodeGen/CrashKernelGenerator_fwd.hpp b/lib/include/rocRoller/CodeGen/CrashKernelGenerator_fwd.hpp index 13f5628a..fc7b3214 100644 --- a/lib/include/rocRoller/CodeGen/CrashKernelGenerator_fwd.hpp +++ b/lib/include/rocRoller/CodeGen/CrashKernelGenerator_fwd.hpp @@ -26,9 +26,11 @@ #pragma once +#include + namespace rocRoller { - class CrashKernelGenerator; + class ROCROLLER_DECLSPEC CrashKernelGenerator; } diff --git a/lib/include/rocRoller/CodeGen/Instruction.hpp b/lib/include/rocRoller/CodeGen/Instruction.hpp index 4fa18bcc..92dce168 100644 --- a/lib/include/rocRoller/CodeGen/Instruction.hpp +++ b/lib/include/rocRoller/CodeGen/Instruction.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include @@ -37,7 +39,7 @@ namespace rocRoller { - struct Instruction + struct ROCROLLER_DECLSPEC Instruction { enum { diff --git a/lib/include/rocRoller/CodeGen/Instruction_fwd.hpp b/lib/include/rocRoller/CodeGen/Instruction_fwd.hpp index 8e31cb86..e88ab3f6 100644 --- a/lib/include/rocRoller/CodeGen/Instruction_fwd.hpp +++ b/lib/include/rocRoller/CodeGen/Instruction_fwd.hpp @@ -26,9 +26,11 @@ #pragma once +#include + namespace rocRoller { - class Instruction; + struct ROCROLLER_DECLSPEC Instruction; } diff --git a/lib/include/rocRoller/CodeGen/LoadStoreTileGenerator.hpp b/lib/include/rocRoller/CodeGen/LoadStoreTileGenerator.hpp index e1fed27d..92168947 100644 --- a/lib/include/rocRoller/CodeGen/LoadStoreTileGenerator.hpp +++ b/lib/include/rocRoller/CodeGen/LoadStoreTileGenerator.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include #include @@ -43,7 +45,7 @@ namespace rocRoller * to and from memory. * */ - class LoadStoreTileGenerator + class ROCROLLER_DECLSPEC LoadStoreTileGenerator { public: LoadStoreTileGenerator(KernelGraphPtr, ContextPtr, unsigned int); @@ -124,7 +126,7 @@ namespace rocRoller /** * Information needed in order to load or store a tile. */ - struct LoadStoreTileInfo + struct ROCROLLER_DECLSPEC LoadStoreTileInfo { MemoryInstructions::MemoryKind kind = MemoryInstructions::MemoryKind::Count; uint64_t m = 0; diff --git a/lib/include/rocRoller/CodeGen/MemoryInstructions.hpp b/lib/include/rocRoller/CodeGen/MemoryInstructions.hpp index d8eedc91..26c05070 100644 --- a/lib/include/rocRoller/CodeGen/MemoryInstructions.hpp +++ b/lib/include/rocRoller/CodeGen/MemoryInstructions.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include #include @@ -36,7 +38,7 @@ namespace rocRoller { - class MemoryInstructions + class ROCROLLER_DECLSPEC MemoryInstructions { public: MemoryInstructions(ContextPtr context); @@ -384,11 +386,13 @@ namespace rocRoller Register::ValuePtr toPack) const; }; - std::string toString(MemoryInstructions::MemoryDirection const& d); - std::ostream& operator<<(std::ostream& stream, MemoryInstructions::MemoryDirection n); + ROCROLLER_DECLSPEC std::string toString(MemoryInstructions::MemoryDirection const& d); + ROCROLLER_DECLSPEC std::ostream& operator<<(std::ostream& stream, + MemoryInstructions::MemoryDirection n); - std::string toString(MemoryInstructions::MemoryKind const& k); - std::ostream& operator<<(std::ostream& stream, MemoryInstructions::MemoryKind k); + ROCROLLER_DECLSPEC std::string toString(MemoryInstructions::MemoryKind const& k); + ROCROLLER_DECLSPEC std::ostream& operator<<(std::ostream& stream, + MemoryInstructions::MemoryKind k); } #include diff --git a/lib/include/rocRoller/CodeGen/MemoryInstructions_fwd.hpp b/lib/include/rocRoller/CodeGen/MemoryInstructions_fwd.hpp index 796c23d6..78d6bdf1 100644 --- a/lib/include/rocRoller/CodeGen/MemoryInstructions_fwd.hpp +++ b/lib/include/rocRoller/CodeGen/MemoryInstructions_fwd.hpp @@ -26,9 +26,11 @@ #pragma once +#include + namespace rocRoller { - class MemoryInstructions; + class ROCROLLER_DECLSPEC MemoryInstructions; } diff --git a/lib/include/rocRoller/CodeGen/Utils.hpp b/lib/include/rocRoller/CodeGen/Utils.hpp index 8d5cb194..bc9cc866 100644 --- a/lib/include/rocRoller/CodeGen/Utils.hpp +++ b/lib/include/rocRoller/CodeGen/Utils.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include namespace rocRoller @@ -37,7 +39,7 @@ namespace rocRoller * * @param elementBits number of bits of variable type to load. */ - uint bitsPerTransposeLoad(uint elementBits); + ROCROLLER_DECLSPEC uint bitsPerTransposeLoad(uint elementBits); /** * @brief Returns extra number of bytes required to fulfill 128b alignment requirement of 6-bit transpose loads. @@ -46,7 +48,7 @@ namespace rocRoller * * @param elementBits number of bits of variable type to load. */ - uint extraLDSBytesPerElementBlock(uint elementBits); + ROCROLLER_DECLSPEC uint extraLDSBytesPerElementBlock(uint elementBits); - std::string transposeLoadMnemonic(uint elementBits); + ROCROLLER_DECLSPEC std::string transposeLoadMnemonic(uint elementBits); } // rocRoller diff --git a/lib/include/rocRoller/CodeGen/WaitCount.hpp b/lib/include/rocRoller/CodeGen/WaitCount.hpp index 3f5d2cff..7a1a4932 100644 --- a/lib/include/rocRoller/CodeGen/WaitCount.hpp +++ b/lib/include/rocRoller/CodeGen/WaitCount.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include @@ -44,7 +46,7 @@ namespace rocRoller * * Internally represents -1 as not having to wait for a particular counter. */ - class WaitCount + class ROCROLLER_DECLSPEC WaitCount { public: WaitCount() = default; @@ -144,5 +146,5 @@ namespace rocRoller bool m_hasEXPCnt = false; }; - std::ostream& operator<<(std::ostream& stream, WaitCount const& wait); + ROCROLLER_DECLSPEC std::ostream& operator<<(std::ostream& stream, WaitCount const& wait); } diff --git a/lib/include/rocRoller/CodeGen/WaitCount_fwd.hpp b/lib/include/rocRoller/CodeGen/WaitCount_fwd.hpp index 7335c1e1..7ca4dd6a 100644 --- a/lib/include/rocRoller/CodeGen/WaitCount_fwd.hpp +++ b/lib/include/rocRoller/CodeGen/WaitCount_fwd.hpp @@ -26,7 +26,9 @@ #pragma once +#include + namespace rocRoller { - class WaitCount; + class ROCROLLER_DECLSPEC WaitCount; } diff --git a/lib/include/rocRoller/CommandSolution.hpp b/lib/include/rocRoller/CommandSolution.hpp index 69da3759..0b46813d 100644 --- a/lib/include/rocRoller/CommandSolution.hpp +++ b/lib/include/rocRoller/CommandSolution.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include @@ -51,7 +53,7 @@ namespace rocRoller /** * CommandParameters - tunable command parameters. */ - class CommandParameters + class ROCROLLER_DECLSPEC CommandParameters { public: CommandParameters(); @@ -157,7 +159,7 @@ namespace rocRoller * * TODO: Remove this! */ - class CommandLaunchParameters + class ROCROLLER_DECLSPEC CommandLaunchParameters { public: CommandLaunchParameters() = default; @@ -172,7 +174,7 @@ namespace rocRoller std::optional> m_workitemCount; }; - class CommandKernel + class ROCROLLER_DECLSPEC CommandKernel { public: CommandKernel() = default; @@ -351,7 +353,7 @@ namespace rocRoller hipStream_t stream); }; - class CommandSolution + class ROCROLLER_DECLSPEC CommandSolution { public: explicit CommandSolution(CommandPtr command); diff --git a/lib/include/rocRoller/CommandSolution_fwd.hpp b/lib/include/rocRoller/CommandSolution_fwd.hpp index af28e72f..2f45a097 100644 --- a/lib/include/rocRoller/CommandSolution_fwd.hpp +++ b/lib/include/rocRoller/CommandSolution_fwd.hpp @@ -26,14 +26,16 @@ #pragma once +#include + #include namespace rocRoller { - class CommandParameters; - class CommandLaunchParameters; - class CommandKernel; - class CommandSolution; + class ROCROLLER_DECLSPEC CommandParameters; + class ROCROLLER_DECLSPEC CommandLaunchParameters; + class ROCROLLER_DECLSPEC CommandKernel; + class ROCROLLER_DECLSPEC CommandSolution; using CommandParametersPtr = std::shared_ptr; using CommandKernelPtr = std::shared_ptr; diff --git a/lib/include/rocRoller/CommonSubexpressionElim.hpp b/lib/include/rocRoller/CommonSubexpressionElim.hpp index 0e1aea68..36c6aa2b 100644 --- a/lib/include/rocRoller/CommonSubexpressionElim.hpp +++ b/lib/include/rocRoller/CommonSubexpressionElim.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include #include @@ -35,7 +37,7 @@ namespace rocRoller { namespace Expression { - struct ExpressionNode + struct ROCROLLER_DECLSPEC ExpressionNode { /** * The destination register for the expression. @@ -85,7 +87,8 @@ namespace rocRoller * @param context * @return ExpressionTree */ - ExpressionTree consolidateSubExpressions(ExpressionPtr expr, ContextPtr context); + ROCROLLER_DECLSPEC ExpressionTree consolidateSubExpressions(ExpressionPtr expr, + ContextPtr context); /** * @brief Get the number of consolidations performed by Common Subexpression Elimination @@ -93,7 +96,7 @@ namespace rocRoller * @param tree Tree to fetch count from * @return Count of subexpressions consolidatated from original expression */ - int getConsolidationCount(ExpressionTree const& tree); + ROCROLLER_DECLSPEC int getConsolidationCount(ExpressionTree const& tree); /** * @brief Rebuilds an Expression from an ExpressionTree @@ -101,6 +104,6 @@ namespace rocRoller * @param tree The tree to rebuild * @return ExpressionPtr */ - ExpressionPtr rebuildExpression(ExpressionTree const& tree); + ROCROLLER_DECLSPEC ExpressionPtr rebuildExpression(ExpressionTree const& tree); } } diff --git a/lib/include/rocRoller/Context.hpp b/lib/include/rocRoller/Context.hpp index ec029fec..a303955b 100644 --- a/lib/include/rocRoller/Context.hpp +++ b/lib/include/rocRoller/Context.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include #include @@ -57,11 +59,11 @@ #include #include -class ContextFixture; +class ROCROLLER_DECLSPEC ContextFixture; namespace rocRoller { - class Context : public std::enable_shared_from_this + class ROCROLLER_DECLSPEC Context : public std::enable_shared_from_this { public: Context(); @@ -184,7 +186,7 @@ namespace rocRoller KernelOptions m_kernelOptions; }; - std::ostream& operator<<(std::ostream&, ContextPtr const&); + ROCROLLER_DECLSPEC std::ostream& operator<<(std::ostream&, ContextPtr const&); } #include diff --git a/lib/include/rocRoller/Context_fwd.hpp b/lib/include/rocRoller/Context_fwd.hpp index 2788734e..81fdf4f5 100644 --- a/lib/include/rocRoller/Context_fwd.hpp +++ b/lib/include/rocRoller/Context_fwd.hpp @@ -26,11 +26,13 @@ #pragma once +#include + #include namespace rocRoller { - class Context; + class ROCROLLER_DECLSPEC Context; using ContextPtr = std::shared_ptr; } diff --git a/lib/include/rocRoller/DataTypes/DataTypes.hpp b/lib/include/rocRoller/DataTypes/DataTypes.hpp index 19bbec37..b06b346b 100644 --- a/lib/include/rocRoller/DataTypes/DataTypes.hpp +++ b/lib/include/rocRoller/DataTypes/DataTypes.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include #include @@ -70,8 +72,8 @@ namespace rocRoller Count }; - std::string toString(DataDirection dir); - std::ostream& operator<<(std::ostream& stream, DataDirection dir); + ROCROLLER_DECLSPEC std::string toString(DataDirection dir); + ROCROLLER_DECLSPEC std::ostream& operator<<(std::ostream& stream, DataDirection dir); /** * \ingroup DataTypes @@ -121,10 +123,10 @@ namespace rocRoller Count }; - std::string toString(DataType d); - std::string TypeAbbrev(DataType d); - std::ostream& operator<<(std::ostream& stream, DataType const& t); - std::istream& operator>>(std::istream& stream, DataType& t); + ROCROLLER_DECLSPEC std::string toString(DataType d); + ROCROLLER_DECLSPEC std::string TypeAbbrev(DataType d); + ROCROLLER_DECLSPEC std::ostream& operator<<(std::ostream& stream, DataType const& t); + ROCROLLER_DECLSPEC std::istream& operator>>(std::istream& stream, DataType& t); /** * Pointer Type @@ -159,8 +161,8 @@ namespace rocRoller Count }; - std::string toString(MemoryType m); - std::ostream& operator<<(std::ostream& stream, MemoryType const& m); + ROCROLLER_DECLSPEC std::string toString(MemoryType m); + ROCROLLER_DECLSPEC std::ostream& operator<<(std::ostream& stream, MemoryType const& m); /** * Layout of wavetile for MFMA instructions. @@ -175,8 +177,8 @@ namespace rocRoller Count }; - std::string toString(LayoutType l); - std::ostream& operator<<(std::ostream& stream, LayoutType l); + ROCROLLER_DECLSPEC std::string toString(LayoutType l); + ROCROLLER_DECLSPEC std::ostream& operator<<(std::ostream& stream, LayoutType l); enum class NaryArgument : int { @@ -190,8 +192,8 @@ namespace rocRoller None = Count }; - std::string toString(NaryArgument n); - std::ostream& operator<<(std::ostream& stream, NaryArgument n); + ROCROLLER_DECLSPEC std::string toString(NaryArgument n); + ROCROLLER_DECLSPEC std::ostream& operator<<(std::ostream& stream, NaryArgument n); inline constexpr DataType getIntegerType(bool isSigned, int sizeBytes) { @@ -275,7 +277,7 @@ namespace rocRoller /** * VariableType */ - struct VariableType + struct ROCROLLER_DECLSPEC VariableType { constexpr VariableType() : dataType() @@ -368,18 +370,18 @@ namespace rocRoller static VariableType Promote(VariableType lhs, VariableType rhs); }; - std::string toString(PointerType const& p); - std::ostream& operator<<(std::ostream& stream, PointerType const& p); + ROCROLLER_DECLSPEC std::string toString(PointerType const& p); + ROCROLLER_DECLSPEC std::ostream& operator<<(std::ostream& stream, PointerType const& p); - std::string toString(VariableType const& v); - std::string TypeAbbrev(VariableType const& v); - std::ostream& operator<<(std::ostream& stream, VariableType const& v); + ROCROLLER_DECLSPEC std::string toString(VariableType const& v); + ROCROLLER_DECLSPEC std::string TypeAbbrev(VariableType const& v); + ROCROLLER_DECLSPEC std::ostream& operator<<(std::ostream& stream, VariableType const& v); /** * \ingroup DataTypes * \brief Runtime accessible data type metadata */ - struct DataTypeInfo + struct ROCROLLER_DECLSPEC DataTypeInfo { static DataTypeInfo const& Get(int index); static DataTypeInfo const& Get(DataType t); @@ -428,7 +430,7 @@ namespace rocRoller * \brief Compile-time accessible data type metadata. */ template - struct TypeInfo + struct ROCROLLER_DECLSPEC TypeInfo { }; @@ -442,7 +444,7 @@ namespace rocRoller bool T_IsComplex, bool T_IsIntegral, bool T_IsSigned> - struct BaseTypeInfo + struct ROCROLLER_DECLSPEC BaseTypeInfo { using Type = T; @@ -643,19 +645,19 @@ namespace rocRoller T_IsIntegral, T_IsSigned>::IsIntegral; -#define DeclareDefaultValueTypeInfo(dtype, enumVal) \ - template <> \ - struct TypeInfo : public BaseTypeInfo, \ - std::is_signed_v> \ - { \ +#define DeclareDefaultValueTypeInfo(dtype, enumVal) \ + template <> \ + struct ROCROLLER_DECLSPEC TypeInfo : public BaseTypeInfo, \ + std::is_signed_v> \ + { \ } DeclareDefaultValueTypeInfo(float, Float); @@ -671,214 +673,216 @@ namespace rocRoller #undef DeclareDefaultValueTypeInfo template <> - struct TypeInfo : public BaseTypeInfo + struct ROCROLLER_DECLSPEC TypeInfo : public BaseTypeInfo { }; template <> - struct TypeInfo : public BaseTypeInfo + struct ROCROLLER_DECLSPEC TypeInfo : public BaseTypeInfo { }; template <> - struct TypeInfo : public BaseTypeInfo + struct ROCROLLER_DECLSPEC TypeInfo : public BaseTypeInfo { }; template <> - struct TypeInfo> : public BaseTypeInfo, - DataType::ComplexFloat, - DataType::ComplexFloat, - PointerType::Value, - 1, - 2, - 64, - true, - false, - true> + struct ROCROLLER_DECLSPEC TypeInfo> + : public BaseTypeInfo, + DataType::ComplexFloat, + DataType::ComplexFloat, + PointerType::Value, + 1, + 2, + 64, + true, + false, + true> { }; template <> - struct TypeInfo> : public BaseTypeInfo, - DataType::ComplexDouble, - DataType::ComplexDouble, - PointerType::Value, - 1, - 4, - 128, - true, - false, - true> + struct ROCROLLER_DECLSPEC TypeInfo> + : public BaseTypeInfo, + DataType::ComplexDouble, + DataType::ComplexDouble, + PointerType::Value, + 1, + 4, + 128, + true, + false, + true> { }; template <> - struct TypeInfo : public BaseTypeInfo + struct ROCROLLER_DECLSPEC TypeInfo : public BaseTypeInfo { }; template <> - struct TypeInfo : public BaseTypeInfo + struct ROCROLLER_DECLSPEC TypeInfo : public BaseTypeInfo { }; template <> - struct TypeInfo : public BaseTypeInfo + struct ROCROLLER_DECLSPEC TypeInfo : public BaseTypeInfo { }; - struct Halfx2 : public DistinctType + struct ROCROLLER_DECLSPEC Halfx2 : public DistinctType { }; template <> - struct TypeInfo : public BaseTypeInfo + struct ROCROLLER_DECLSPEC TypeInfo : public BaseTypeInfo { }; template <> - struct TypeInfo : public BaseTypeInfo + struct ROCROLLER_DECLSPEC TypeInfo : public BaseTypeInfo { }; - struct FP8x4 : public DistinctType + struct ROCROLLER_DECLSPEC FP8x4 : public DistinctType { }; template <> - struct TypeInfo : public BaseTypeInfo + struct ROCROLLER_DECLSPEC TypeInfo : public BaseTypeInfo { }; template <> - struct TypeInfo : public BaseTypeInfo + struct ROCROLLER_DECLSPEC TypeInfo : public BaseTypeInfo { }; - struct BF8x4 : public DistinctType + struct ROCROLLER_DECLSPEC BF8x4 : public DistinctType { }; template <> - struct TypeInfo : public BaseTypeInfo + struct ROCROLLER_DECLSPEC TypeInfo : public BaseTypeInfo { }; template <> - struct TypeInfo : public BaseTypeInfo + struct ROCROLLER_DECLSPEC TypeInfo : public BaseTypeInfo { }; - struct FP6x16 + struct ROCROLLER_DECLSPEC FP6x16 { uint32_t a; uint32_t b; @@ -886,34 +890,34 @@ namespace rocRoller }; template <> - struct TypeInfo : public BaseTypeInfo + struct ROCROLLER_DECLSPEC TypeInfo : public BaseTypeInfo { }; template <> - struct TypeInfo : public BaseTypeInfo + struct ROCROLLER_DECLSPEC TypeInfo : public BaseTypeInfo { }; - struct BF6x16 + struct ROCROLLER_DECLSPEC BF6x16 { uint32_t a; uint32_t b; @@ -921,188 +925,190 @@ namespace rocRoller }; template <> - struct TypeInfo : public BaseTypeInfo + struct ROCROLLER_DECLSPEC TypeInfo : public BaseTypeInfo { }; template <> - struct TypeInfo : public BaseTypeInfo + struct ROCROLLER_DECLSPEC TypeInfo : public BaseTypeInfo { }; - struct FP4x8 : public DistinctType + struct ROCROLLER_DECLSPEC FP4x8 : public DistinctType { }; template <> - struct TypeInfo : public BaseTypeInfo + struct ROCROLLER_DECLSPEC TypeInfo : public BaseTypeInfo { }; template <> - struct TypeInfo : public BaseTypeInfo + struct ROCROLLER_DECLSPEC TypeInfo : public BaseTypeInfo { }; - struct BFloat16x2 : public DistinctType + struct ROCROLLER_DECLSPEC BFloat16x2 : public DistinctType { }; template <> - struct TypeInfo : public BaseTypeInfo + struct ROCROLLER_DECLSPEC TypeInfo : public BaseTypeInfo { }; - struct Raw32 : public DistinctType + struct ROCROLLER_DECLSPEC Raw32 : public DistinctType { }; template <> - struct TypeInfo : public BaseTypeInfo + struct ROCROLLER_DECLSPEC TypeInfo : public BaseTypeInfo { }; template <> - struct TypeInfo : public BaseTypeInfo + struct ROCROLLER_DECLSPEC TypeInfo : public BaseTypeInfo { }; - struct Bool32 : public DistinctType + struct ROCROLLER_DECLSPEC Bool32 : public DistinctType { }; template <> - struct TypeInfo : public BaseTypeInfo + struct ROCROLLER_DECLSPEC TypeInfo : public BaseTypeInfo { }; - struct Bool64 : public DistinctType + struct ROCROLLER_DECLSPEC Bool64 : public DistinctType { }; template <> - struct TypeInfo : public BaseTypeInfo + struct ROCROLLER_DECLSPEC TypeInfo : public BaseTypeInfo { }; - struct PointerLocal : public DistinctType + struct ROCROLLER_DECLSPEC PointerLocal : public DistinctType { }; template <> - struct TypeInfo : public BaseTypeInfo + struct ROCROLLER_DECLSPEC TypeInfo + : public BaseTypeInfo { }; - struct PointerGlobal : public DistinctType + struct ROCROLLER_DECLSPEC PointerGlobal : public DistinctType { }; template <> - struct TypeInfo : public BaseTypeInfo + struct ROCROLLER_DECLSPEC TypeInfo + : public BaseTypeInfo { }; - struct Buffer + struct ROCROLLER_DECLSPEC Buffer { uint32_t desc0; uint32_t desc1; @@ -1111,28 +1117,28 @@ namespace rocRoller }; template <> - struct TypeInfo : public BaseTypeInfo + struct ROCROLLER_DECLSPEC TypeInfo : public BaseTypeInfo { }; template - struct EnumTypeInfo + struct ROCROLLER_DECLSPEC EnumTypeInfo { }; -#define DeclareEnumTypeInfo(typeEnum, dtype) \ - template <> \ - struct EnumTypeInfo : public TypeInfo \ - { \ +#define DeclareEnumTypeInfo(typeEnum, dtype) \ + template <> \ + struct ROCROLLER_DECLSPEC EnumTypeInfo : public TypeInfo \ + { \ } DeclareEnumTypeInfo(Float, float); @@ -1201,7 +1207,7 @@ namespace rocRoller namespace std { template <> - struct hash + struct ROCROLLER_DECLSPEC hash { inline size_t operator()(rocRoller::VariableType const& varType) const { diff --git a/lib/include/rocRoller/DataTypes/DataTypes_BF16_Utils.hpp b/lib/include/rocRoller/DataTypes/DataTypes_BF16_Utils.hpp index 577ba478..5c38aea4 100644 --- a/lib/include/rocRoller/DataTypes/DataTypes_BF16_Utils.hpp +++ b/lib/include/rocRoller/DataTypes/DataTypes_BF16_Utils.hpp @@ -35,6 +35,8 @@ j* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE namespace rocRoller { #pragma once + +#include inline constexpr uint16_t BFLOAT16_Q_NAN_VALUE = 0xFFC1; inline constexpr uint16_t BFLOAT16_ZERO_VALUE = 0x00; @@ -85,7 +87,7 @@ namespace rocRoller } - struct BFloat16; - float bf16_to_float(const BFloat16 v); - BFloat16 float_to_bf16(const float v); + struct ROCROLLER_DECLSPEC BFloat16; + ROCROLLER_DECLSPEC float bf16_to_float(const BFloat16 v); + ROCROLLER_DECLSPEC BFloat16 float_to_bf16(const float v); } diff --git a/lib/include/rocRoller/DataTypes/DataTypes_BF6.hpp b/lib/include/rocRoller/DataTypes/DataTypes_BF6.hpp index 8f8f6d38..cceb4aff 100644 --- a/lib/include/rocRoller/DataTypes/DataTypes_BF6.hpp +++ b/lib/include/rocRoller/DataTypes/DataTypes_BF6.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include "DataTypes_F6_Utils.hpp" #include #include @@ -43,7 +45,7 @@ namespace rocRoller * \ingroup DataTypes * @{ */ - struct BF6 + struct ROCROLLER_DECLSPEC BF6 { constexpr BF6() : data(F6_ZERO_VALUE) @@ -226,12 +228,12 @@ namespace std } template <> - struct is_floating_point : true_type + struct ROCROLLER_DECLSPEC is_floating_point : true_type { }; template <> - struct hash + struct ROCROLLER_DECLSPEC hash { size_t operator()(const rocRoller::BF6& a) const { diff --git a/lib/include/rocRoller/DataTypes/DataTypes_BF8.hpp b/lib/include/rocRoller/DataTypes/DataTypes_BF8.hpp index 37bdbe7e..643a218e 100644 --- a/lib/include/rocRoller/DataTypes/DataTypes_BF8.hpp +++ b/lib/include/rocRoller/DataTypes/DataTypes_BF8.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include #include @@ -48,7 +50,7 @@ namespace rocRoller * @brief Floating point 8-bit type in E5M2 format * */ - struct BF8 + struct ROCROLLER_DECLSPEC BF8 { constexpr BF8() : data(F8_ZERO_VALUE) @@ -223,12 +225,12 @@ namespace std } template <> - struct is_floating_point : true_type + struct ROCROLLER_DECLSPEC is_floating_point : true_type { }; template <> - struct hash + struct ROCROLLER_DECLSPEC hash { size_t operator()(const rocRoller::BF8& a) const { diff --git a/lib/include/rocRoller/DataTypes/DataTypes_BFloat16.hpp b/lib/include/rocRoller/DataTypes/DataTypes_BFloat16.hpp index 819a773c..908ea6b2 100644 --- a/lib/include/rocRoller/DataTypes/DataTypes_BFloat16.hpp +++ b/lib/include/rocRoller/DataTypes/DataTypes_BFloat16.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #define ROCROLLER_USE_BFloat16 @@ -40,7 +42,7 @@ namespace rocRoller * @brief Floating point 8-bit type in E5M2 format * */ - struct BFloat16 + struct ROCROLLER_DECLSPEC BFloat16 { constexpr BFloat16() : data(BFLOAT16_ZERO_VALUE) @@ -101,12 +103,12 @@ namespace rocRoller namespace std { template <> - struct is_floating_point : true_type + struct ROCROLLER_DECLSPEC is_floating_point : true_type { }; template <> - struct hash + struct ROCROLLER_DECLSPEC hash { size_t operator()(const rocRoller::BFloat16& a) const { diff --git a/lib/include/rocRoller/DataTypes/DataTypes_F6_Utils.hpp b/lib/include/rocRoller/DataTypes/DataTypes_F6_Utils.hpp index 19f614b9..ea5f8cad 100644 --- a/lib/include/rocRoller/DataTypes/DataTypes_F6_Utils.hpp +++ b/lib/include/rocRoller/DataTypes/DataTypes_F6_Utils.hpp @@ -26,6 +26,8 @@ j* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE #pragma once +#include + #include #include #include @@ -346,11 +348,11 @@ namespace rocRoller inline constexpr int8_t F6_ZERO_VALUE = 0x0; - struct FP6; - float fp6_to_float(const FP6 v); - FP6 float_to_fp6(const float v); + struct ROCROLLER_DECLSPEC FP6; + ROCROLLER_DECLSPEC float fp6_to_float(const FP6 v); + ROCROLLER_DECLSPEC FP6 float_to_fp6(const float v); - struct BF6; - float bf6_to_float(const BF6 v); - BF6 float_to_bf6(const float v); + struct ROCROLLER_DECLSPEC BF6; + ROCROLLER_DECLSPEC float bf6_to_float(const BF6 v); + ROCROLLER_DECLSPEC BF6 float_to_bf6(const float v); } diff --git a/lib/include/rocRoller/DataTypes/DataTypes_F8_Utils.hpp b/lib/include/rocRoller/DataTypes/DataTypes_F8_Utils.hpp index 706eaf56..378db23f 100644 --- a/lib/include/rocRoller/DataTypes/DataTypes_F8_Utils.hpp +++ b/lib/include/rocRoller/DataTypes/DataTypes_F8_Utils.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include @@ -415,13 +417,13 @@ namespace rocRoller inline constexpr int8_t F8_ZERO_VALUE = 0x0; - struct FP8; - float fp8_to_float(const FP8 v); - FP8 float_to_fp8(const float v); + struct ROCROLLER_DECLSPEC FP8; + ROCROLLER_DECLSPEC float fp8_to_float(const FP8 v); + ROCROLLER_DECLSPEC FP8 float_to_fp8(const float v); - struct BF8; - float bf8_to_float(const BF8 v); - BF8 float_to_bf8(const float v); + struct ROCROLLER_DECLSPEC BF8; + ROCROLLER_DECLSPEC float bf8_to_float(const BF8 v); + ROCROLLER_DECLSPEC BF8 float_to_bf8(const float v); inline float scaleToFloat(uint8_t scale) { @@ -430,7 +432,7 @@ namespace rocRoller inline uint8_t floatToScale(float value) { - struct + struct ROCROLLER_DECLSPEC { uint mantissa : 23; uint exponent : 8; diff --git a/lib/include/rocRoller/DataTypes/DataTypes_FP4.hpp b/lib/include/rocRoller/DataTypes/DataTypes_FP4.hpp index 3b6115dd..fdb8ab9e 100644 --- a/lib/include/rocRoller/DataTypes/DataTypes_FP4.hpp +++ b/lib/include/rocRoller/DataTypes/DataTypes_FP4.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include #include @@ -38,7 +40,7 @@ namespace rocRoller { - typedef struct + typedef struct ROCROLLER_DECLSPEC { uint32_t val : 4; } uint4_t; @@ -330,7 +332,7 @@ namespace rocRoller * @{ */ - struct FP4 + struct ROCROLLER_DECLSPEC FP4 { constexpr FP4() : data(FP4_ZERO_VALUE) @@ -531,12 +533,12 @@ namespace std } template <> - struct is_floating_point : true_type + struct ROCROLLER_DECLSPEC is_floating_point : true_type { }; template <> - struct hash + struct ROCROLLER_DECLSPEC hash { size_t operator()(const rocRoller::FP4& a) const { diff --git a/lib/include/rocRoller/DataTypes/DataTypes_FP6.hpp b/lib/include/rocRoller/DataTypes/DataTypes_FP6.hpp index b4d31435..7f903307 100644 --- a/lib/include/rocRoller/DataTypes/DataTypes_FP6.hpp +++ b/lib/include/rocRoller/DataTypes/DataTypes_FP6.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include "DataTypes_F6_Utils.hpp" #include #include @@ -45,7 +47,7 @@ namespace rocRoller * \ingroup DataTypes * @{ */ - struct FP6 + struct ROCROLLER_DECLSPEC FP6 { constexpr FP6() : data(F6_ZERO_VALUE) @@ -228,12 +230,12 @@ namespace std } template <> - struct is_floating_point : true_type + struct ROCROLLER_DECLSPEC is_floating_point : true_type { }; template <> - struct hash + struct ROCROLLER_DECLSPEC hash { size_t operator()(const rocRoller::FP6& a) const { diff --git a/lib/include/rocRoller/DataTypes/DataTypes_FP8.hpp b/lib/include/rocRoller/DataTypes/DataTypes_FP8.hpp index ead8b410..832a4d7e 100644 --- a/lib/include/rocRoller/DataTypes/DataTypes_FP8.hpp +++ b/lib/include/rocRoller/DataTypes/DataTypes_FP8.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include @@ -48,7 +50,7 @@ namespace rocRoller * @brief Floating point 8-bit type in E4M3 format * */ - struct FP8 + struct ROCROLLER_DECLSPEC FP8 { constexpr FP8() : data(F8_ZERO_VALUE) @@ -223,12 +225,12 @@ namespace std } template <> - struct is_floating_point : true_type + struct ROCROLLER_DECLSPEC is_floating_point : true_type { }; template <> - struct hash + struct ROCROLLER_DECLSPEC hash { size_t operator()(const rocRoller::FP8& a) const { diff --git a/lib/include/rocRoller/DataTypes/DataTypes_Half.hpp b/lib/include/rocRoller/DataTypes/DataTypes_Half.hpp index ab17e989..d5e4831a 100644 --- a/lib/include/rocRoller/DataTypes/DataTypes_Half.hpp +++ b/lib/include/rocRoller/DataTypes/DataTypes_Half.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #ifdef ROCROLLER_USE_HIP #include #endif @@ -47,7 +49,7 @@ namespace rocRoller /** * \ingroup DataTypes */ - struct Half : public DistinctType + struct ROCROLLER_DECLSPEC Half : public DistinctType { }; #endif @@ -61,7 +63,7 @@ namespace std } template <> - struct hash + struct ROCROLLER_DECLSPEC hash { inline size_t operator()(rocRoller::Half const& h) const noexcept { @@ -71,12 +73,12 @@ namespace std }; template <> - struct is_floating_point : std::true_type + struct ROCROLLER_DECLSPEC is_floating_point : std::true_type { }; template <> - struct is_signed : std::true_type + struct ROCROLLER_DECLSPEC is_signed : std::true_type { }; } // namespace std diff --git a/lib/include/rocRoller/DataTypes/DataTypes_Int8.hpp b/lib/include/rocRoller/DataTypes/DataTypes_Int8.hpp index 262e7a1d..a57920b9 100644 --- a/lib/include/rocRoller/DataTypes/DataTypes_Int8.hpp +++ b/lib/include/rocRoller/DataTypes/DataTypes_Int8.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #ifdef ROCROLLER_USE_HIP #include #endif @@ -37,7 +39,7 @@ namespace rocRoller /** * \ingroup DataTypes */ - struct Int8 : public DistinctType + struct ROCROLLER_DECLSPEC Int8 : public DistinctType { }; } // namespace rocRoller diff --git a/lib/include/rocRoller/DataTypes/DataTypes_Int8x4.hpp b/lib/include/rocRoller/DataTypes/DataTypes_Int8x4.hpp index 339ca2af..984de57e 100644 --- a/lib/include/rocRoller/DataTypes/DataTypes_Int8x4.hpp +++ b/lib/include/rocRoller/DataTypes/DataTypes_Int8x4.hpp @@ -26,11 +26,13 @@ #pragma once +#include + #include namespace rocRoller { - struct Int8x4 + struct ROCROLLER_DECLSPEC Int8x4 { Int8x4() : a(0) diff --git a/lib/include/rocRoller/DataTypes/DataTypes_UInt8x4.hpp b/lib/include/rocRoller/DataTypes/DataTypes_UInt8x4.hpp index 657d04d1..f744560c 100644 --- a/lib/include/rocRoller/DataTypes/DataTypes_UInt8x4.hpp +++ b/lib/include/rocRoller/DataTypes/DataTypes_UInt8x4.hpp @@ -26,11 +26,13 @@ #pragma once +#include + #include namespace rocRoller { - struct UInt8x4 + struct ROCROLLER_DECLSPEC UInt8x4 { UInt8x4() : a(0) diff --git a/lib/include/rocRoller/DataTypes/DataTypes_Utils.hpp b/lib/include/rocRoller/DataTypes/DataTypes_Utils.hpp index f5a6e25c..eb605939 100644 --- a/lib/include/rocRoller/DataTypes/DataTypes_Utils.hpp +++ b/lib/include/rocRoller/DataTypes/DataTypes_Utils.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include @@ -33,76 +35,76 @@ namespace rocRoller { template - struct PackedTypeOf + struct ROCROLLER_DECLSPEC PackedTypeOf { typedef T type; }; template <> - struct PackedTypeOf + struct ROCROLLER_DECLSPEC PackedTypeOf { typedef FP6x16 type; }; template <> - struct PackedTypeOf + struct ROCROLLER_DECLSPEC PackedTypeOf { typedef BF6x16 type; }; template <> - struct PackedTypeOf + struct ROCROLLER_DECLSPEC PackedTypeOf { typedef FP4x8 type; }; template - struct SegmentedTypeOf + struct ROCROLLER_DECLSPEC SegmentedTypeOf { typedef T type; }; template <> - struct SegmentedTypeOf + struct ROCROLLER_DECLSPEC SegmentedTypeOf { typedef FP6 type; }; template <> - struct SegmentedTypeOf + struct ROCROLLER_DECLSPEC SegmentedTypeOf { typedef BF6 type; }; template <> - struct SegmentedTypeOf + struct ROCROLLER_DECLSPEC SegmentedTypeOf { typedef FP4 type; }; - void packFP4x8(uint32_t* out, uint8_t const* data, size_t n); - std::vector packFP4x8(std::vector const&); - std::vector unpackFP4x8(std::vector const&); + ROCROLLER_DECLSPEC void packFP4x8(uint32_t* out, uint8_t const* data, size_t n); + ROCROLLER_DECLSPEC std::vector packFP4x8(std::vector const&); + ROCROLLER_DECLSPEC std::vector unpackFP4x8(std::vector const&); - std::vector f32_to_fp4x8(std::vector f32); - std::vector fp4x8_to_f32(std::vector f4x8); + ROCROLLER_DECLSPEC std::vector f32_to_fp4x8(std::vector f32); + ROCROLLER_DECLSPEC std::vector fp4x8_to_f32(std::vector f4x8); - void packF6x16(uint32_t*, uint8_t const*, size_t); - std::vector packF6x16(std::vector const&); - std::vector unpackF6x16(std::vector const&); + ROCROLLER_DECLSPEC void packF6x16(uint32_t*, uint8_t const*, size_t); + ROCROLLER_DECLSPEC std::vector packF6x16(std::vector const&); + ROCROLLER_DECLSPEC std::vector unpackF6x16(std::vector const&); inline std::vector unpackToFloat(std::vector const& x) { return x; }; - std::vector unpackToFloat(std::vector const&); - std::vector unpackToFloat(std::vector const&); - std::vector unpackToFloat(std::vector const&); - std::vector unpackToFloat(std::vector const&); - std::vector unpackToFloat(std::vector const&); - std::vector unpackToFloat(std::vector const&); - std::vector unpackToFloat(std::vector const&); - std::vector unpackToFloat(std::vector const&); - std::vector unpackToFloat(std::vector const&); - std::vector unpackToFloat(std::vector const&); + ROCROLLER_DECLSPEC std::vector unpackToFloat(std::vector const&); + ROCROLLER_DECLSPEC std::vector unpackToFloat(std::vector const&); + ROCROLLER_DECLSPEC std::vector unpackToFloat(std::vector const&); + ROCROLLER_DECLSPEC std::vector unpackToFloat(std::vector const&); + ROCROLLER_DECLSPEC std::vector unpackToFloat(std::vector const&); + ROCROLLER_DECLSPEC std::vector unpackToFloat(std::vector const&); + ROCROLLER_DECLSPEC std::vector unpackToFloat(std::vector const&); + ROCROLLER_DECLSPEC std::vector unpackToFloat(std::vector const&); + ROCROLLER_DECLSPEC std::vector unpackToFloat(std::vector const&); + ROCROLLER_DECLSPEC std::vector unpackToFloat(std::vector const&); } diff --git a/lib/include/rocRoller/DataTypes/DistinctType.hpp b/lib/include/rocRoller/DataTypes/DistinctType.hpp index 7dc0d489..d50eaa6d 100644 --- a/lib/include/rocRoller/DataTypes/DistinctType.hpp +++ b/lib/include/rocRoller/DataTypes/DistinctType.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include #include @@ -45,7 +47,7 @@ namespace rocRoller * to its underlying data type. */ template - struct DistinctType + struct ROCROLLER_DECLSPEC DistinctType { using Value = T; diff --git a/lib/include/rocRoller/ExecutableKernel.hpp b/lib/include/rocRoller/ExecutableKernel.hpp index 6e6b7597..253643b0 100644 --- a/lib/include/rocRoller/ExecutableKernel.hpp +++ b/lib/include/rocRoller/ExecutableKernel.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include #include @@ -35,7 +37,7 @@ namespace rocRoller { // KernelInvocation contains the information needed to launch a kernel with // the ExecutableKernel class. - struct KernelInvocation + struct ROCROLLER_DECLSPEC KernelInvocation { std::array workitemCount = {1, 1, 1}; std::array workgroupSize = {1, 1, 1}; @@ -44,7 +46,7 @@ namespace rocRoller // The executer class can load a kernel from a string of machine code and // then launch the kernel on a GPU. - class ExecutableKernel + class ROCROLLER_DECLSPEC ExecutableKernel { public: ExecutableKernel(); @@ -129,7 +131,7 @@ namespace rocRoller hipFunction_t getHipFunction() const; private: - struct HIPData; + struct ROCROLLER_DECLSPEC HIPData; std::string m_kernelName; bool m_kernelLoaded; diff --git a/lib/include/rocRoller/ExecutableKernel_fwd.hpp b/lib/include/rocRoller/ExecutableKernel_fwd.hpp index 2b101e82..b869e2b3 100644 --- a/lib/include/rocRoller/ExecutableKernel_fwd.hpp +++ b/lib/include/rocRoller/ExecutableKernel_fwd.hpp @@ -26,8 +26,10 @@ #pragma once +#include + namespace rocRoller { - struct KernelInvocation; - class ExecutableKernel; + struct ROCROLLER_DECLSPEC KernelInvocation; + class ROCROLLER_DECLSPEC ExecutableKernel; } diff --git a/lib/include/rocRoller/Expression.hpp b/lib/include/rocRoller/Expression.hpp index 8e8fa36e..b1255c8a 100644 --- a/lib/include/rocRoller/Expression.hpp +++ b/lib/include/rocRoller/Expression.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include #include @@ -41,8 +43,8 @@ namespace rocRoller { namespace Expression { - std::string toString(EvaluationTime t); - std::ostream& operator<<(std::ostream&, EvaluationTime const&); + ROCROLLER_DECLSPEC std::string toString(EvaluationTime t); + ROCROLLER_DECLSPEC std::ostream& operator<<(std::ostream&, EvaluationTime const&); using EvaluationTimes = EnumBitset; @@ -52,8 +54,8 @@ namespace rocRoller Associative, Count }; - std::string toString(AlgebraicProperty t); - std::ostream& operator<<(std::ostream&, AlgebraicProperty const&); + ROCROLLER_DECLSPEC std::string toString(AlgebraicProperty t); + ROCROLLER_DECLSPEC std::ostream& operator<<(std::ostream&, AlgebraicProperty const&); using AlgebraicProperties = EnumBitset; @@ -66,13 +68,13 @@ namespace rocRoller Value, Count }; - std::string toString(Category c); - std::ostream& operator<<(std::ostream&, Category const&); + ROCROLLER_DECLSPEC std::string toString(Category c); + ROCROLLER_DECLSPEC std::ostream& operator<<(std::ostream&, Category const&); // Expression: type alias for std::variant of all expression subtypes. // Defined in Expression_fwd.hpp. - struct Binary + struct ROCROLLER_DECLSPEC Binary { ExpressionPtr lhs, rhs; std::string comment = ""; @@ -95,7 +97,7 @@ namespace rocRoller // expressions. See the KernelOption minLaunchTimeExpressionComplexity for a more // in-depth description. - struct Add : Binary + struct ROCROLLER_DECLSPEC Add : Binary { constexpr static inline auto Type = Category::Arithmetic; constexpr static inline auto EvalTimes = EvaluationTimes::All(); @@ -104,7 +106,7 @@ namespace rocRoller constexpr static inline int Complexity = 2; }; - struct Subtract : Binary + struct ROCROLLER_DECLSPEC Subtract : Binary { constexpr static inline auto Type = Category::Arithmetic; constexpr static inline auto EvalTimes = EvaluationTimes::All(); @@ -112,7 +114,7 @@ namespace rocRoller constexpr static inline int Complexity = 2; }; - struct Multiply : Binary + struct ROCROLLER_DECLSPEC Multiply : Binary { constexpr static inline auto Type = Category::Arithmetic; constexpr static inline auto EvalTimes = EvaluationTimes::All(); @@ -121,7 +123,7 @@ namespace rocRoller constexpr static inline int Complexity = 4; }; - struct MultiplyHigh : Binary + struct ROCROLLER_DECLSPEC MultiplyHigh : Binary { constexpr static inline auto Type = Category::Arithmetic; constexpr static inline auto EvalTimes = EvaluationTimes::All(); @@ -129,7 +131,7 @@ namespace rocRoller constexpr static inline int Complexity = 4; }; - struct Divide : Binary + struct ROCROLLER_DECLSPEC Divide : Binary { constexpr static inline auto Type = Category::Arithmetic; constexpr static inline auto EvalTimes = EvaluationTimes::All(); @@ -137,7 +139,7 @@ namespace rocRoller constexpr static inline int Complexity = 50; }; - struct Modulo : Binary + struct ROCROLLER_DECLSPEC Modulo : Binary { constexpr static inline auto Type = Category::Arithmetic; constexpr static inline auto EvalTimes = EvaluationTimes::All(); @@ -145,7 +147,7 @@ namespace rocRoller constexpr static inline int Complexity = 50; }; - struct ShiftL : Binary + struct ROCROLLER_DECLSPEC ShiftL : Binary { constexpr static inline auto Type = Category::Arithmetic; constexpr static inline auto EvalTimes = EvaluationTimes::All(); @@ -153,7 +155,7 @@ namespace rocRoller constexpr static inline int Complexity = 1; }; - struct LogicalShiftR : Binary + struct ROCROLLER_DECLSPEC LogicalShiftR : Binary { constexpr static inline auto Type = Category::Arithmetic; constexpr static inline auto EvalTimes = EvaluationTimes::All(); @@ -161,7 +163,7 @@ namespace rocRoller constexpr static inline int Complexity = 1; }; - struct ArithmeticShiftR : Binary + struct ROCROLLER_DECLSPEC ArithmeticShiftR : Binary { constexpr static inline auto Type = Category::Arithmetic; constexpr static inline auto EvalTimes = EvaluationTimes::All(); @@ -169,7 +171,7 @@ namespace rocRoller constexpr static inline int Complexity = 1; }; - struct BitwiseAnd : Binary + struct ROCROLLER_DECLSPEC BitwiseAnd : Binary { constexpr static inline auto Type = Category::Arithmetic; constexpr static inline auto EvalTimes = EvaluationTimes::All(); @@ -178,7 +180,7 @@ namespace rocRoller constexpr static inline int Complexity = 1; }; - struct BitwiseOr : Binary + struct ROCROLLER_DECLSPEC BitwiseOr : Binary { constexpr static inline auto Type = Category::Arithmetic; constexpr static inline auto EvalTimes = EvaluationTimes::All(); @@ -187,7 +189,7 @@ namespace rocRoller constexpr static inline int Complexity = 1; }; - struct BitwiseXor : Binary + struct ROCROLLER_DECLSPEC BitwiseXor : Binary { constexpr static inline auto Type = Category::Arithmetic; constexpr static inline auto EvalTimes = EvaluationTimes::All(); @@ -196,7 +198,7 @@ namespace rocRoller constexpr static inline int Complexity = 1; }; - struct GreaterThan : Binary + struct ROCROLLER_DECLSPEC GreaterThan : Binary { constexpr static inline auto Type = Category::Comparison; constexpr static inline auto EvalTimes = EvaluationTimes::All(); @@ -204,7 +206,7 @@ namespace rocRoller constexpr static inline int Complexity = 2; }; - struct GreaterThanEqual : Binary + struct ROCROLLER_DECLSPEC GreaterThanEqual : Binary { constexpr static inline auto Type = Category::Comparison; constexpr static inline auto EvalTimes = EvaluationTimes::All(); @@ -212,7 +214,7 @@ namespace rocRoller constexpr static inline int Complexity = 2; }; - struct LessThan : Binary + struct ROCROLLER_DECLSPEC LessThan : Binary { constexpr static inline auto Type = Category::Comparison; constexpr static inline auto EvalTimes = EvaluationTimes::All(); @@ -220,7 +222,7 @@ namespace rocRoller constexpr static inline int Complexity = 2; }; - struct LessThanEqual : Binary + struct ROCROLLER_DECLSPEC LessThanEqual : Binary { constexpr static inline auto Type = Category::Comparison; constexpr static inline auto EvalTimes = EvaluationTimes::All(); @@ -228,7 +230,7 @@ namespace rocRoller constexpr static inline int Complexity = 2; }; - struct Equal : Binary + struct ROCROLLER_DECLSPEC Equal : Binary { constexpr static inline auto Type = Category::Comparison; constexpr static inline auto EvalTimes = EvaluationTimes::All(); @@ -236,7 +238,7 @@ namespace rocRoller constexpr static inline int Complexity = 2; }; - struct NotEqual : Binary + struct ROCROLLER_DECLSPEC NotEqual : Binary { constexpr static inline auto Type = Category::Comparison; constexpr static inline auto EvalTimes = EvaluationTimes::All(); @@ -244,7 +246,7 @@ namespace rocRoller constexpr static inline int Complexity = 1; }; - struct LogicalAnd : Binary + struct ROCROLLER_DECLSPEC LogicalAnd : Binary { constexpr static inline auto Type = Category::Logical; constexpr static inline auto EvalTimes = EvaluationTimes::All(); @@ -253,7 +255,7 @@ namespace rocRoller constexpr static inline int Complexity = 1; }; - struct LogicalOr : Binary + struct ROCROLLER_DECLSPEC LogicalOr : Binary { constexpr static inline auto Type = Category::Logical; constexpr static inline auto EvalTimes = EvaluationTimes::All(); @@ -268,7 +270,7 @@ namespace rocRoller * for stochastic rounding. */ template - struct SRConvert : Binary + struct ROCROLLER_DECLSPEC SRConvert : Binary { constexpr static inline auto DestinationType = DATATYPE; constexpr static inline auto Type = Category::Conversion; @@ -276,7 +278,7 @@ namespace rocRoller constexpr static inline int Complexity = 2; }; - struct Ternary + struct ROCROLLER_DECLSPEC Ternary { ExpressionPtr lhs, r1hs, r2hs; std::string comment = ""; @@ -289,7 +291,7 @@ namespace rocRoller } }; - struct TernaryMixed : Ternary + struct ROCROLLER_DECLSPEC TernaryMixed : Ternary { }; @@ -310,7 +312,7 @@ namespace rocRoller * ShiftL expression, lowering to the fused instruction if possible. * result = (lhs + r1hs) << r2hs */ - struct AddShiftL : Ternary + struct ROCROLLER_DECLSPEC AddShiftL : Ternary { constexpr static inline auto Type = Category::Arithmetic; constexpr static inline EvaluationTimes EvalTimes{EvaluationTime::KernelExecute}; @@ -322,7 +324,7 @@ namespace rocRoller * Add expression, lowering to the fused instruction if possible. * result = (lhs << r1hs) + r2hs */ - struct ShiftLAdd : Ternary + struct ROCROLLER_DECLSPEC ShiftLAdd : Ternary { constexpr static inline auto Type = Category::Arithmetic; constexpr static inline EvaluationTimes EvalTimes{EvaluationTime::KernelExecute}; @@ -334,7 +336,7 @@ namespace rocRoller * * MatA is M x K, with B batches. MatB is K x N, with B batches. MatC is M x N, with B batches. */ - struct MatrixMultiply : Ternary + struct ROCROLLER_DECLSPEC MatrixMultiply : Ternary { MatrixMultiply() = default; @@ -357,7 +359,7 @@ namespace rocRoller constexpr static inline int Complexity = 20; }; - struct ScaledMatrixMultiply + struct ROCROLLER_DECLSPEC ScaledMatrixMultiply { ExpressionPtr matA, matB, matC, scaleA, scaleB; DataType accumulationPrecision = DataType::Float; @@ -386,7 +388,7 @@ namespace rocRoller * Represents DEST = LHS ? R1HS : R2HS. * Utilizes cselect */ - struct Conditional : Ternary + struct ROCROLLER_DECLSPEC Conditional : Ternary { constexpr static inline auto Type = Category::Arithmetic; constexpr static inline auto EvalTimes = EvaluationTimes::All(); @@ -398,7 +400,7 @@ namespace rocRoller * Utilizes TernaryMixed instead of Ternary * allows for mixed precision arithmetic */ - struct MultiplyAdd : TernaryMixed + struct ROCROLLER_DECLSPEC MultiplyAdd : TernaryMixed { constexpr static inline auto Type = Category::Arithmetic; constexpr static inline auto EvalTimes = EvaluationTimes::All(); @@ -407,7 +409,7 @@ namespace rocRoller constexpr static inline int Complexity = 4; }; - struct Unary + struct ROCROLLER_DECLSPEC Unary { ExpressionPtr arg; std::string comment = ""; @@ -426,7 +428,7 @@ namespace rocRoller requires std::derived_from; }; - struct MagicMultiple : Unary + struct ROCROLLER_DECLSPEC MagicMultiple : Unary { constexpr static inline auto Type = Category::Arithmetic; constexpr static inline EvaluationTimes EvalTimes{EvaluationTime::Translate, @@ -434,7 +436,7 @@ namespace rocRoller constexpr static inline int Complexity = 50; }; - struct MagicShifts : Unary + struct ROCROLLER_DECLSPEC MagicShifts : Unary { constexpr static inline auto Type = Category::Arithmetic; constexpr static inline EvaluationTimes EvalTimes{EvaluationTime::Translate, @@ -442,7 +444,7 @@ namespace rocRoller constexpr static inline int Complexity = 50; }; - struct MagicSign : Unary + struct ROCROLLER_DECLSPEC MagicSign : Unary { constexpr static inline auto Type = Category::Arithmetic; constexpr static inline EvaluationTimes EvalTimes{EvaluationTime::Translate, @@ -450,21 +452,21 @@ namespace rocRoller constexpr static inline int Complexity = 50; }; - struct Negate : Unary + struct ROCROLLER_DECLSPEC Negate : Unary { constexpr static inline auto Type = Category::Arithmetic; constexpr static inline auto EvalTimes = EvaluationTimes::All(); constexpr static inline int Complexity = 1; }; - struct BitwiseNegate : Unary + struct ROCROLLER_DECLSPEC BitwiseNegate : Unary { constexpr static inline auto Type = Category::Arithmetic; constexpr static inline auto EvalTimes = EvaluationTimes::All(); constexpr static inline int Complexity = 1; }; - struct Convert : Unary + struct ROCROLLER_DECLSPEC Convert : Unary { inline Convert& copyParams(const Convert& other) { @@ -480,21 +482,21 @@ namespace rocRoller DataType destinationType = DataType::None; }; - struct LogicalNot : Unary + struct ROCROLLER_DECLSPEC LogicalNot : Unary { constexpr static inline auto Type = Category::Logical; constexpr static inline auto EvalTimes = EvaluationTimes::All(); constexpr static inline int Complexity = 1; }; - struct Exponential2 : Unary + struct ROCROLLER_DECLSPEC Exponential2 : Unary { constexpr static inline auto Type = Category::Arithmetic; constexpr static inline auto EvalTimes = EvaluationTimes::All(); constexpr static inline int Complexity = 1; }; - struct Exponential : Unary + struct ROCROLLER_DECLSPEC Exponential : Unary { constexpr static inline auto Type = Category::Arithmetic; constexpr static inline EvaluationTimes EvalTimes{EvaluationTime::Translate, @@ -502,14 +504,14 @@ namespace rocRoller constexpr static inline int Complexity = 2; }; - struct RandomNumber : Unary + struct ROCROLLER_DECLSPEC RandomNumber : Unary { constexpr static inline auto Type = Category::Arithmetic; constexpr static inline auto EvalTimes = EvaluationTimes::All(); constexpr static inline int Complexity = 1; }; - struct BitFieldExtract : Unary + struct ROCROLLER_DECLSPEC BitFieldExtract : Unary { inline BitFieldExtract& copyParams(const BitFieldExtract& other) { @@ -539,7 +541,7 @@ namespace rocRoller * If `varType` is `DataType::None`, the data type is * "deferred". */ - struct DataFlowTag + struct ROCROLLER_DECLSPEC DataFlowTag { int tag; @@ -729,105 +731,113 @@ namespace rocRoller // Other visitors // - std::string toString(ExpressionPtr const& expr); - std::string toString(Expression const& expr); - std::ostream& operator<<(std::ostream&, ExpressionPtr const&); - std::ostream& operator<<(std::ostream&, Expression const&); - std::ostream& operator<<(std::ostream&, std::vector const&); + ROCROLLER_DECLSPEC std::string toString(ExpressionPtr const& expr); + ROCROLLER_DECLSPEC std::string toString(Expression const& expr); + ROCROLLER_DECLSPEC std::ostream& operator<<(std::ostream&, ExpressionPtr const&); + ROCROLLER_DECLSPEC std::ostream& operator<<(std::ostream&, Expression const&); + ROCROLLER_DECLSPEC std::ostream& operator<<(std::ostream&, + std::vector const&); - std::string name(ExpressionPtr const& expr); - std::string name(Expression const& expr); + ROCROLLER_DECLSPEC std::string name(ExpressionPtr const& expr); + ROCROLLER_DECLSPEC std::string name(Expression const& expr); - std::string argumentName(ExpressionPtr const& expr); - std::string argumentName(Expression const& expr); + ROCROLLER_DECLSPEC std::string argumentName(ExpressionPtr const& expr); + ROCROLLER_DECLSPEC std::string argumentName(Expression const& expr); // EvaluationTime max(EvaluationTime lhs, EvaluationTime rhs); - EvaluationTimes evaluationTimes(ExpressionPtr const& expr); - EvaluationTimes evaluationTimes(Expression const& expr); + ROCROLLER_DECLSPEC EvaluationTimes evaluationTimes(ExpressionPtr const& expr); + ROCROLLER_DECLSPEC EvaluationTimes evaluationTimes(Expression const& expr); - VariableType resultVariableType(Expression const& expr); - VariableType resultVariableType(ExpressionPtr const& expr); + ROCROLLER_DECLSPEC VariableType resultVariableType(Expression const& expr); + ROCROLLER_DECLSPEC VariableType resultVariableType(ExpressionPtr const& expr); - Register::Type resultRegisterType(Expression const& expr); - Register::Type resultRegisterType(ExpressionPtr const& expr); + ROCROLLER_DECLSPEC Register::Type resultRegisterType(Expression const& expr); + ROCROLLER_DECLSPEC Register::Type resultRegisterType(ExpressionPtr const& expr); - struct ResultType + struct ROCROLLER_DECLSPEC ResultType { Register::Type regType; VariableType varType; bool operator==(ResultType const&) const = default; }; - ResultType resultType(ExpressionPtr const& expr); - ResultType resultType(Expression const& expr); + ROCROLLER_DECLSPEC ResultType resultType(ExpressionPtr const& expr); + ROCROLLER_DECLSPEC ResultType resultType(Expression const& expr); - std::string toString(ResultType const& obj); - std::ostream& operator<<(std::ostream&, ResultType const&); + ROCROLLER_DECLSPEC std::string toString(ResultType const& obj); + ROCROLLER_DECLSPEC std::ostream& operator<<(std::ostream&, ResultType const&); /** * True when two expressions are identical. * * NOTE: Never considers commutativity or associativity. */ - bool identical(ExpressionPtr const&, ExpressionPtr const&); - bool identical(Expression const&, Expression const&); + ROCROLLER_DECLSPEC bool identical(ExpressionPtr const&, ExpressionPtr const&); + ROCROLLER_DECLSPEC bool identical(Expression const&, Expression const&); /** * True when two expressions are equivalent. * Optionally considers algebraic properties like commutativity. */ - bool equivalent(ExpressionPtr const&, - ExpressionPtr const&, - AlgebraicProperties = AlgebraicProperties::All()); + ROCROLLER_DECLSPEC bool equivalent(ExpressionPtr const&, + ExpressionPtr const&, + AlgebraicProperties = AlgebraicProperties::All()); /** * Comment accessors. */ - void setComment(ExpressionPtr& expr, std::string comment); - void setComment(Expression& expr, std::string comment); + ROCROLLER_DECLSPEC void setComment(ExpressionPtr& expr, std::string comment); + ROCROLLER_DECLSPEC void setComment(Expression& expr, std::string comment); - std::string getComment(Expression const& expr, bool includeRegisterComments); - std::string getComment(ExpressionPtr const& expr, bool includeRegisterComments); + ROCROLLER_DECLSPEC std::string getComment(Expression const& expr, + bool includeRegisterComments); + ROCROLLER_DECLSPEC std::string getComment(ExpressionPtr const& expr, + bool includeRegisterComments); - std::string getComment(ExpressionPtr const& expr); - std::string getComment(Expression const& expr); - std::string getComment(ExpressionPtr const& expr, bool includeRegisterComments); - std::string getComment(Expression const& expr, bool includeRegisterComments); + ROCROLLER_DECLSPEC std::string getComment(ExpressionPtr const& expr); + ROCROLLER_DECLSPEC std::string getComment(Expression const& expr); + ROCROLLER_DECLSPEC std::string getComment(ExpressionPtr const& expr, + bool includeRegisterComments); + ROCROLLER_DECLSPEC std::string getComment(Expression const& expr, + bool includeRegisterComments); /** * Copies any comments from src into dst. If dst is not of a type that allows * comments, does nothing. */ - void copyComment(ExpressionPtr const& dst, ExpressionPtr const& src); - void copyComment(Expression& dst, ExpressionPtr const& src); - void copyComment(ExpressionPtr const& dst, Expression const& src); - void copyComment(Expression& dst, Expression const& src); + ROCROLLER_DECLSPEC void copyComment(ExpressionPtr const& dst, ExpressionPtr const& src); + ROCROLLER_DECLSPEC void copyComment(Expression& dst, ExpressionPtr const& src); + ROCROLLER_DECLSPEC void copyComment(ExpressionPtr const& dst, Expression const& src); + ROCROLLER_DECLSPEC void copyComment(Expression& dst, Expression const& src); - void appendComment(ExpressionPtr& expr, std::string comment); - void appendComment(Expression& expr, std::string comment); + ROCROLLER_DECLSPEC void appendComment(ExpressionPtr& expr, std::string comment); + ROCROLLER_DECLSPEC void appendComment(Expression& expr, std::string comment); /** * Evaluate an expression whose evaluationTime is Translate. Will throw an exception * otherwise. */ - CommandArgumentValue evaluate(ExpressionPtr const& expr); - CommandArgumentValue evaluate(Expression const& expr); + ROCROLLER_DECLSPEC CommandArgumentValue evaluate(ExpressionPtr const& expr); + ROCROLLER_DECLSPEC CommandArgumentValue evaluate(Expression const& expr); /** * Evaluate an expression if its evaluationTime is Translate, returns nullopt * otherwise. */ - std::optional tryEvaluate(ExpressionPtr const& expr); - std::optional tryEvaluate(Expression const& expr); + ROCROLLER_DECLSPEC std::optional + tryEvaluate(ExpressionPtr const& expr); + ROCROLLER_DECLSPEC std::optional tryEvaluate(Expression const& expr); - bool canEvaluateTo(CommandArgumentValue val, ExpressionPtr const& expr); + ROCROLLER_DECLSPEC bool canEvaluateTo(CommandArgumentValue val, ExpressionPtr const& expr); /** * Evaluate an expression whose evaluationTime is Translate or KernelLaunch. Will throw * an exception if it contains any Register values. */ - CommandArgumentValue evaluate(ExpressionPtr const& expr, RuntimeArguments const& args); - CommandArgumentValue evaluate(Expression const& expr, RuntimeArguments const& args); + ROCROLLER_DECLSPEC CommandArgumentValue evaluate(ExpressionPtr const& expr, + RuntimeArguments const& args); + ROCROLLER_DECLSPEC CommandArgumentValue evaluate(Expression const& expr, + RuntimeArguments const& args); /** * Splits an expression and returns its operands in a tuple. @@ -840,39 +850,42 @@ namespace rocRoller * Throws if expr is not of type Expr. */ template - requires(CUnary || CBinary || CTernary) auto split(ExpressionPtr expr); + requires(CUnary || CBinary || CTernary) ROCROLLER_DECLSPEC + auto split(ExpressionPtr expr); /** * Returns an approximate total complexity for an expression, to be used as a heuristic. * See the KernelOption minLaunchTimeExpressionComplexity for a more in-depth * description. */ - int complexity(ExpressionPtr expr); - int complexity(Expression const& expr); + ROCROLLER_DECLSPEC int complexity(ExpressionPtr expr); + ROCROLLER_DECLSPEC int complexity(Expression const& expr); - Generator + ROCROLLER_DECLSPEC Generator generate(Register::ValuePtr& dest, ExpressionPtr expr, ContextPtr context); - std::string toYAML(ExpressionPtr const& expr); - ExpressionPtr fromYAML(std::string const& str); + ROCROLLER_DECLSPEC std::string toYAML(ExpressionPtr const& expr); + ROCROLLER_DECLSPEC ExpressionPtr fromYAML(std::string const& str); /** * Returns true if expr is of type T or if expr contains a subexpression of type T. */ template - bool contains(ExpressionPtr expr); + ROCROLLER_DECLSPEC bool contains(ExpressionPtr expr); /** * Returns true if expr is of type T or if expr contains a subexpression of type T. */ template - bool contains(Expression const& expr); + ROCROLLER_DECLSPEC bool contains(Expression const& expr); /** * Returns true if expr contains a sub-expression */ - bool containsSubExpression(ExpressionPtr const& expr, ExpressionPtr const& subExpr); - bool containsSubExpression(Expression const& expr, Expression const& subExpr); + ROCROLLER_DECLSPEC bool containsSubExpression(ExpressionPtr const& expr, + ExpressionPtr const& subExpr); + ROCROLLER_DECLSPEC bool containsSubExpression(Expression const& expr, + Expression const& subExpr); } // namespace Expression } // namespace rocRoller diff --git a/lib/include/rocRoller/ExpressionTransformations.hpp b/lib/include/rocRoller/ExpressionTransformations.hpp index 01131d1b..9250e929 100644 --- a/lib/include/rocRoller/ExpressionTransformations.hpp +++ b/lib/include/rocRoller/ExpressionTransformations.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include @@ -33,14 +35,15 @@ namespace rocRoller { namespace Expression { - ExpressionPtr identity(ExpressionPtr expr); + ROCROLLER_DECLSPEC ExpressionPtr identity(ExpressionPtr expr); /** * Transform sub-expressions of `expr` into new kernel arguments * * Return value should be Translate time or KernelExecute time evaluable. */ - ExpressionPtr launchTimeSubExpressions(ExpressionPtr expr, ContextPtr context); + ROCROLLER_DECLSPEC ExpressionPtr launchTimeSubExpressions(ExpressionPtr expr, + ContextPtr context); /** * Restore any command arguments that have been cleaned (transformed from command @@ -48,7 +51,7 @@ namespace rocRoller * * Return value should be Translate time or KernelLaunch time evaluable. */ - ExpressionPtr restoreCommandArguments(ExpressionPtr expr); + ROCROLLER_DECLSPEC ExpressionPtr restoreCommandArguments(ExpressionPtr expr); /** * @brief Attempt to replace division operations found within an expression with faster operations. @@ -57,14 +60,14 @@ namespace rocRoller * @param context * @return ExpressionPtr Transformed expression */ - ExpressionPtr fastDivision(ExpressionPtr expr, ContextPtr context); + ROCROLLER_DECLSPEC ExpressionPtr fastDivision(ExpressionPtr expr, ContextPtr context); /** * Ensures that the kernel arguments will include the magic constants required to divide/modulo * by `expr`. * Requires `expr` to have a type of either Int32 or Int64, and to be evaluable at kernel launch time. */ - void enableDivideBy(ExpressionPtr expr, ContextPtr context); + ROCROLLER_DECLSPEC void enableDivideBy(ExpressionPtr expr, ContextPtr context); /** * @brief Attempt to replace multiplication operations found within an expression with faster operations. @@ -72,13 +75,13 @@ namespace rocRoller * @param expr Input expression * @return ExpressionPtr Transformed expression */ - ExpressionPtr fastMultiplication(ExpressionPtr expr); + ROCROLLER_DECLSPEC ExpressionPtr fastMultiplication(ExpressionPtr expr); /** * Attempt to combine multiple shifts: * - Opposite shifts by same amount: mask off bits that would be zeroed out. */ - ExpressionPtr combineShifts(ExpressionPtr expr); + ROCROLLER_DECLSPEC ExpressionPtr combineShifts(ExpressionPtr expr); /** * @brief Simplify expressions @@ -86,7 +89,7 @@ namespace rocRoller * @param expr Input expression * @return ExpressionPtr Transformed expression */ - ExpressionPtr simplify(ExpressionPtr expr); + ROCROLLER_DECLSPEC ExpressionPtr simplify(ExpressionPtr expr); /** * @brief Fuse binary expressions into ternaries. @@ -94,7 +97,7 @@ namespace rocRoller * @param expr Input expression * @return ExpressionPtr Transformed expression */ - ExpressionPtr fuseTernary(ExpressionPtr expr); + ROCROLLER_DECLSPEC ExpressionPtr fuseTernary(ExpressionPtr expr); /** * @brief Fuse binary expressions if one combination is able to be condensed by association @@ -102,7 +105,7 @@ namespace rocRoller * @param expr Input expression * @return ExpressionPtr Transformed expression */ - ExpressionPtr fuseAssociative(ExpressionPtr expr); + ROCROLLER_DECLSPEC ExpressionPtr fuseAssociative(ExpressionPtr expr); /** * Resolve all DataFlowTags in the given expression. @@ -111,7 +114,8 @@ namespace rocRoller * @param expr Input expression * @return ExpressionPtr Transformed expression */ - ExpressionPtr dataFlowTagPropagation(ExpressionPtr expr, ContextPtr context); + ROCROLLER_DECLSPEC ExpressionPtr dataFlowTagPropagation(ExpressionPtr expr, + ContextPtr context); /** * @brief Attempt to compute e^x operations by using exp2(x * log2(e)). @@ -119,7 +123,7 @@ namespace rocRoller * @param expr Input expression * @return ExpressionPtr Transformed expression */ - ExpressionPtr lowerExponential(ExpressionPtr expr); + ROCROLLER_DECLSPEC ExpressionPtr lowerExponential(ExpressionPtr expr); /** * Helper (lambda/transducer) for applying all fast arithmetic transformations. @@ -131,7 +135,7 @@ namespace rocRoller * * Can also be passed as an ExpressionTransducer. */ - struct FastArithmetic + struct ROCROLLER_DECLSPEC FastArithmetic { FastArithmetic() = delete; explicit FastArithmetic(ContextPtr); @@ -151,7 +155,7 @@ namespace rocRoller * @param context * @return ExpressionPtr Transformed expression */ - ExpressionPtr lowerPRNG(ExpressionPtr exp, ContextPtr context); + ROCROLLER_DECLSPEC ExpressionPtr lowerPRNG(ExpressionPtr exp, ContextPtr context); /** * @brief Resolve all ValuePtr expressions that are bitfields into @@ -160,6 +164,6 @@ namespace rocRoller * @param expr Input expression * @return ExpressionPtr Transformed expression */ - ExpressionPtr lowerBitfieldValues(ExpressionPtr expr); + ROCROLLER_DECLSPEC ExpressionPtr lowerBitfieldValues(ExpressionPtr expr); } } diff --git a/lib/include/rocRoller/Expression_fwd.hpp b/lib/include/rocRoller/Expression_fwd.hpp index 67ef714c..d9fc37db 100644 --- a/lib/include/rocRoller/Expression_fwd.hpp +++ b/lib/include/rocRoller/Expression_fwd.hpp @@ -30,6 +30,8 @@ #pragma once +#include + #include #include #include @@ -44,54 +46,54 @@ namespace rocRoller { namespace Expression { - struct Add; - struct MatrixMultiply; - struct ScaledMatrixMultiply; - struct Multiply; - struct MultiplyAdd; - struct MultiplyHigh; - struct Subtract; - struct Divide; - struct Modulo; - struct ShiftL; - struct LogicalShiftR; - struct ArithmeticShiftR; - struct BitwiseNegate; - struct BitwiseAnd; - struct BitwiseOr; - struct BitwiseXor; - struct GreaterThan; - struct GreaterThanEqual; - struct LessThan; - struct LessThanEqual; - struct Equal; - struct NotEqual; - struct LogicalAnd; - struct LogicalOr; - struct LogicalNot; - - struct Exponential2; - struct Exponential; - - struct MagicMultiple; - struct MagicShifts; - struct MagicSign; - struct Negate; - - struct RandomNumber; - - struct BitFieldExtract; - - struct AddShiftL; - struct ShiftLAdd; - struct Conditional; - - struct Convert; + struct ROCROLLER_DECLSPEC Add; + struct ROCROLLER_DECLSPEC MatrixMultiply; + struct ROCROLLER_DECLSPEC ScaledMatrixMultiply; + struct ROCROLLER_DECLSPEC Multiply; + struct ROCROLLER_DECLSPEC MultiplyAdd; + struct ROCROLLER_DECLSPEC MultiplyHigh; + struct ROCROLLER_DECLSPEC Subtract; + struct ROCROLLER_DECLSPEC Divide; + struct ROCROLLER_DECLSPEC Modulo; + struct ROCROLLER_DECLSPEC ShiftL; + struct ROCROLLER_DECLSPEC LogicalShiftR; + struct ROCROLLER_DECLSPEC ArithmeticShiftR; + struct ROCROLLER_DECLSPEC BitwiseNegate; + struct ROCROLLER_DECLSPEC BitwiseAnd; + struct ROCROLLER_DECLSPEC BitwiseOr; + struct ROCROLLER_DECLSPEC BitwiseXor; + struct ROCROLLER_DECLSPEC GreaterThan; + struct ROCROLLER_DECLSPEC GreaterThanEqual; + struct ROCROLLER_DECLSPEC LessThan; + struct ROCROLLER_DECLSPEC LessThanEqual; + struct ROCROLLER_DECLSPEC Equal; + struct ROCROLLER_DECLSPEC NotEqual; + struct ROCROLLER_DECLSPEC LogicalAnd; + struct ROCROLLER_DECLSPEC LogicalOr; + struct ROCROLLER_DECLSPEC LogicalNot; + + struct ROCROLLER_DECLSPEC Exponential2; + struct ROCROLLER_DECLSPEC Exponential; + + struct ROCROLLER_DECLSPEC MagicMultiple; + struct ROCROLLER_DECLSPEC MagicShifts; + struct ROCROLLER_DECLSPEC MagicSign; + struct ROCROLLER_DECLSPEC Negate; + + struct ROCROLLER_DECLSPEC RandomNumber; + + struct ROCROLLER_DECLSPEC BitFieldExtract; + + struct ROCROLLER_DECLSPEC AddShiftL; + struct ROCROLLER_DECLSPEC ShiftLAdd; + struct ROCROLLER_DECLSPEC Conditional; + + struct ROCROLLER_DECLSPEC Convert; template - struct SRConvert; + struct ROCROLLER_DECLSPEC SRConvert; - struct DataFlowTag; + struct ROCROLLER_DECLSPEC DataFlowTag; using WaveTilePtr = std::shared_ptr; using Expression = std::variant< diff --git a/lib/include/rocRoller/Expression_impl.hpp b/lib/include/rocRoller/Expression_impl.hpp index 5f42d377..1c8facc0 100644 --- a/lib/include/rocRoller/Expression_impl.hpp +++ b/lib/include/rocRoller/Expression_impl.hpp @@ -622,5 +622,73 @@ namespace rocRoller } } + template + struct ContainsVisitor + { + bool operator()(T const& expr) + { + return true; + } + + template + requires(!std::same_as) bool operator()(U const& expr) + { + return call(expr.arg); + } + + template + requires(!std::same_as) bool operator()(U const& expr) + { + return call(expr.lhs) || call(expr.rhs); + } + + template + requires(!std::same_as) bool operator()(U const& expr) + { + return call(expr.lhs) || call(expr.r1hs) || call(expr.r2hs); + } + + template U> + requires(!std::same_as) bool operator()(U const& expr) + { + return call(expr.matA) || call(expr.matB) || call(expr.matC) || call(expr.scaleA) + || call(expr.scaleB); + } + + template + requires(!std::same_as) bool operator()(U const& expr) + { + return false; + } + + bool call(Expression const& expr) + { + return std::visit(*this, expr); + } + + bool call(ExpressionPtr const& expr) + { + if(!expr) + return false; + + return call(*expr); + } + }; + + template + inline bool contains(Expression const& expr) + { + ContainsVisitor v; + return v.call(expr); + } + + template + inline bool contains(ExpressionPtr expr) + { + AssertFatal(expr != nullptr); + + return contains(*expr); + } + } } diff --git a/lib/include/rocRoller/GPUArchitecture/GPUArchitecture.hpp b/lib/include/rocRoller/GPUArchitecture/GPUArchitecture.hpp index f06e1a86..6a9b6a76 100644 --- a/lib/include/rocRoller/GPUArchitecture/GPUArchitecture.hpp +++ b/lib/include/rocRoller/GPUArchitecture/GPUArchitecture.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include #include @@ -45,7 +47,7 @@ namespace rocRoller { - class GPUArchitecture + class ROCROLLER_DECLSPEC GPUArchitecture { public: GPUArchitecture(); @@ -121,7 +123,7 @@ namespace rocRoller }; //Used as a container for serialization. - struct GPUArchitecturesStruct + struct ROCROLLER_DECLSPEC GPUArchitecturesStruct { std::map architectures; }; diff --git a/lib/include/rocRoller/GPUArchitecture/GPUArchitectureLibrary.hpp b/lib/include/rocRoller/GPUArchitecture/GPUArchitectureLibrary.hpp index a62f1ea2..1f93d95f 100644 --- a/lib/include/rocRoller/GPUArchitecture/GPUArchitectureLibrary.hpp +++ b/lib/include/rocRoller/GPUArchitecture/GPUArchitectureLibrary.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include #include @@ -46,7 +48,7 @@ namespace rocRoller { - class GPUArchitectureLibrary : public LazySingleton + class ROCROLLER_DECLSPEC GPUArchitectureLibrary : public LazySingleton { public: bool HasCapability(GPUArchitectureTarget const&, GPUCapability const&); diff --git a/lib/include/rocRoller/GPUArchitecture/GPUArchitectureLibrary_impl.hpp b/lib/include/rocRoller/GPUArchitecture/GPUArchitectureLibrary_impl.hpp index 594955f9..4e9a0656 100644 --- a/lib/include/rocRoller/GPUArchitecture/GPUArchitectureLibrary_impl.hpp +++ b/lib/include/rocRoller/GPUArchitecture/GPUArchitectureLibrary_impl.hpp @@ -25,6 +25,7 @@ *******************************************************************************/ #pragma once + #include #include #include diff --git a/lib/include/rocRoller/GPUArchitecture/GPUArchitectureTarget.hpp b/lib/include/rocRoller/GPUArchitecture/GPUArchitectureTarget.hpp index 27cbedee..4914b980 100644 --- a/lib/include/rocRoller/GPUArchitecture/GPUArchitectureTarget.hpp +++ b/lib/include/rocRoller/GPUArchitecture/GPUArchitectureTarget.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include #include @@ -70,12 +72,12 @@ namespace rocRoller Count, }; - std::string toString(GPUArchitectureGFX const& gfx); - std::string name(GPUArchitectureGFX const& gfx); + ROCROLLER_DECLSPEC std::string toString(GPUArchitectureGFX const& gfx); + ROCROLLER_DECLSPEC std::string name(GPUArchitectureGFX const& gfx); - std::ostream& operator<<(std::ostream&, GPUArchitectureGFX const& gfx); + ROCROLLER_DECLSPEC std::ostream& operator<<(std::ostream&, GPUArchitectureGFX const& gfx); - struct GPUArchitectureFeatures + struct ROCROLLER_DECLSPEC GPUArchitectureFeatures { public: bool sramecc = false; @@ -91,7 +93,7 @@ namespace rocRoller auto operator<=>(const GPUArchitectureFeatures&) const = default; }; - struct GPUArchitectureTarget + struct ROCROLLER_DECLSPEC GPUArchitectureTarget { public: GPUArchitectureGFX gfx = GPUArchitectureGFX::UNKNOWN; diff --git a/lib/include/rocRoller/GPUArchitecture/GPUArchitectureTarget_fwd.hpp b/lib/include/rocRoller/GPUArchitecture/GPUArchitectureTarget_fwd.hpp index 963025e5..855a6cd6 100644 --- a/lib/include/rocRoller/GPUArchitecture/GPUArchitectureTarget_fwd.hpp +++ b/lib/include/rocRoller/GPUArchitecture/GPUArchitectureTarget_fwd.hpp @@ -26,7 +26,9 @@ #pragma once +#include + namespace rocRoller { - struct GPUArchitectureTarget; + struct ROCROLLER_DECLSPEC GPUArchitectureTarget; } diff --git a/lib/include/rocRoller/GPUArchitecture/GPUArchitecture_fwd.hpp b/lib/include/rocRoller/GPUArchitecture/GPUArchitecture_fwd.hpp index e11290a1..29cb024a 100644 --- a/lib/include/rocRoller/GPUArchitecture/GPUArchitecture_fwd.hpp +++ b/lib/include/rocRoller/GPUArchitecture/GPUArchitecture_fwd.hpp @@ -26,7 +26,9 @@ #pragma once +#include + namespace rocRoller { - class GPUArchitecture; + class ROCROLLER_DECLSPEC GPUArchitecture; } diff --git a/lib/include/rocRoller/GPUArchitecture/GPUCapability.hpp b/lib/include/rocRoller/GPUArchitecture/GPUCapability.hpp index e774054e..7dba8fc6 100644 --- a/lib/include/rocRoller/GPUArchitecture/GPUCapability.hpp +++ b/lib/include/rocRoller/GPUArchitecture/GPUCapability.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include #include @@ -42,7 +44,7 @@ namespace rocRoller { - class GPUCapability + class ROCROLLER_DECLSPEC GPUCapability { public: enum Value : uint8_t @@ -203,7 +205,7 @@ namespace rocRoller static std::string toString(Value); - struct Hash + struct ROCROLLER_DECLSPEC Hash { std::size_t operator()(const GPUCapability& input) const { @@ -219,7 +221,7 @@ namespace rocRoller static const std::unordered_map m_stringMap; }; - std::ostream& operator<<(std::ostream&, GPUCapability::Value); + ROCROLLER_DECLSPEC std::ostream& operator<<(std::ostream&, GPUCapability::Value); } #include diff --git a/lib/include/rocRoller/GPUArchitecture/GPUInstructionInfo.hpp b/lib/include/rocRoller/GPUArchitecture/GPUInstructionInfo.hpp index a6394195..10b08f11 100644 --- a/lib/include/rocRoller/GPUArchitecture/GPUInstructionInfo.hpp +++ b/lib/include/rocRoller/GPUArchitecture/GPUInstructionInfo.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include #include @@ -43,7 +45,7 @@ namespace rocRoller { - class GPUWaitQueueType + class ROCROLLER_DECLSPEC GPUWaitQueueType { public: enum Value : uint8_t @@ -84,7 +86,7 @@ namespace rocRoller static std::string toString(Value); - struct Hash + struct ROCROLLER_DECLSPEC Hash { std::size_t operator()(const GPUWaitQueueType& input) const { @@ -103,9 +105,9 @@ namespace rocRoller static const std::unordered_map m_stringMap; }; - std::ostream& operator<<(std::ostream&, GPUWaitQueueType::Value const& v); + ROCROLLER_DECLSPEC std::ostream& operator<<(std::ostream&, GPUWaitQueueType::Value const& v); - class GPUWaitQueue + class ROCROLLER_DECLSPEC GPUWaitQueue { public: enum Value : uint8_t @@ -173,7 +175,7 @@ namespace rocRoller static std::string toString(Value); - struct Hash + struct ROCROLLER_DECLSPEC Hash { std::size_t operator()(const GPUWaitQueue& input) const { @@ -186,9 +188,9 @@ namespace rocRoller static std::unordered_map m_stringMap; }; - std::ostream& operator<<(std::ostream&, GPUWaitQueue::Value const& v); + ROCROLLER_DECLSPEC std::ostream& operator<<(std::ostream&, GPUWaitQueue::Value const& v); - class GPUInstructionInfo + class ROCROLLER_DECLSPEC GPUInstructionInfo { public: GPUInstructionInfo() = default; diff --git a/lib/include/rocRoller/GPUArchitecture/GPUInstructionInfo_fwd.hpp b/lib/include/rocRoller/GPUArchitecture/GPUInstructionInfo_fwd.hpp index d8c02fd5..2316233a 100644 --- a/lib/include/rocRoller/GPUArchitecture/GPUInstructionInfo_fwd.hpp +++ b/lib/include/rocRoller/GPUArchitecture/GPUInstructionInfo_fwd.hpp @@ -26,8 +26,10 @@ #pragma once +#include + namespace rocRoller { - class GPUInstructionInfo; - class GPUWaitQueueType; + class ROCROLLER_DECLSPEC GPUInstructionInfo; + class ROCROLLER_DECLSPEC GPUWaitQueueType; } diff --git a/lib/include/rocRoller/Graph/GraphUtilities.hpp b/lib/include/rocRoller/Graph/GraphUtilities.hpp index 177301d5..5071ddae 100644 --- a/lib/include/rocRoller/Graph/GraphUtilities.hpp +++ b/lib/include/rocRoller/Graph/GraphUtilities.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include @@ -43,7 +45,7 @@ namespace rocRoller * source, only following edges that satisfy edgePredicate. */ template EdgePredicate> - void removeRedundantEdges(AGraph& graph, EdgePredicate edgePredicate); + ROCROLLER_DECLSPEC void removeRedundantEdges(AGraph& graph, EdgePredicate edgePredicate); /** * `graph` must be an instantiation of Hypergraph which is calm (i.e. @@ -54,7 +56,8 @@ namespace rocRoller * source, only following edges that satisfy edgePredicate. */ template EdgePredicate> - Generator findRedundantEdges(AGraph const& graph, EdgePredicate edgePredicate); + ROCROLLER_DECLSPEC Generator findRedundantEdges(AGraph const& graph, + EdgePredicate edgePredicate); } } diff --git a/lib/include/rocRoller/Graph/Hypergraph.hpp b/lib/include/rocRoller/Graph/Hypergraph.hpp index 18fdc0fb..9627873c 100644 --- a/lib/include/rocRoller/Graph/Hypergraph.hpp +++ b/lib/include/rocRoller/Graph/Hypergraph.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include #include @@ -80,11 +82,11 @@ namespace rocRoller Count }; - std::string toString(GraphModification m); + ROCROLLER_DECLSPEC std::string toString(GraphModification m); namespace mi = boost::multi_index; - struct HypergraphIncident + struct ROCROLLER_DECLSPEC HypergraphIncident { int src; int dst; @@ -92,7 +94,7 @@ namespace rocRoller }; template - class Hypergraph + class ROCROLLER_DECLSPEC Hypergraph { public: using Element = std::variant; @@ -106,7 +108,7 @@ namespace rocRoller /** * */ - struct Location + struct ROCROLLER_DECLSPEC Location { int index; std::vector incoming; @@ -421,13 +423,13 @@ namespace rocRoller // TODO: May need to replace with multi_index for in-place rewriting. std::map m_elements; - struct BySrc + struct ROCROLLER_DECLSPEC BySrc { }; - struct ByDst + struct ROCROLLER_DECLSPEC ByDst { }; - struct BySrcDst + struct ROCROLLER_DECLSPEC BySrcDst { }; @@ -449,10 +451,11 @@ namespace rocRoller }; template - std::ostream& operator<<(std::ostream& stream, Hypergraph const& graph); + ROCROLLER_DECLSPEC std::ostream& operator<<(std::ostream& stream, + Hypergraph const& graph); template - std::string variantToString(Cls const& el) + ROCROLLER_DECLSPEC std::string variantToString(Cls const& el) { return std::visit([](auto const& v) { return toString(v); }, el); } @@ -471,11 +474,12 @@ namespace rocRoller * @param destNodePredicate Only yield nodes that satisfy this predicate. */ template - Generator reachableNodes(Graph::Hypergraph const& graph, - int start, - auto nodePredicate, - auto edgePredicate, - auto destNodePredicate); + ROCROLLER_DECLSPEC Generator + reachableNodes(Graph::Hypergraph const& graph, + int start, + auto nodePredicate, + auto edgePredicate, + auto destNodePredicate); template concept CCalmGraph = !T::IsHyper; diff --git a/lib/include/rocRoller/InstructionValues/LDSAllocator.hpp b/lib/include/rocRoller/InstructionValues/LDSAllocator.hpp index b56ff034..25ee26ee 100644 --- a/lib/include/rocRoller/InstructionValues/LDSAllocator.hpp +++ b/lib/include/rocRoller/InstructionValues/LDSAllocator.hpp @@ -26,13 +26,15 @@ #pragma once +#include + #include #include namespace rocRoller { - class LDSAllocation; + class ROCROLLER_DECLSPEC LDSAllocation; /** * @brief An LDSAllocator is used to create LDSAllocations. These are @@ -44,7 +46,7 @@ namespace rocRoller * blocks that have been deallocated. * */ - class LDSAllocator : public std::enable_shared_from_this + class ROCROLLER_DECLSPEC LDSAllocator : public std::enable_shared_from_this { public: /** @@ -106,7 +108,7 @@ namespace rocRoller * allocated. * */ - class LDSAllocation : public std::enable_shared_from_this + class ROCROLLER_DECLSPEC LDSAllocation : public std::enable_shared_from_this { friend class LDSAllocator; diff --git a/lib/include/rocRoller/InstructionValues/LDSAllocator_fwd.hpp b/lib/include/rocRoller/InstructionValues/LDSAllocator_fwd.hpp index e373e732..9f19c06d 100644 --- a/lib/include/rocRoller/InstructionValues/LDSAllocator_fwd.hpp +++ b/lib/include/rocRoller/InstructionValues/LDSAllocator_fwd.hpp @@ -26,8 +26,10 @@ #pragma once +#include + namespace rocRoller { - class LDSAllocation; - class LDSAllocator; + class ROCROLLER_DECLSPEC LDSAllocation; + class ROCROLLER_DECLSPEC LDSAllocator; } diff --git a/lib/include/rocRoller/InstructionValues/LabelAllocator.hpp b/lib/include/rocRoller/InstructionValues/LabelAllocator.hpp index f01ddc1c..4ed97c87 100644 --- a/lib/include/rocRoller/InstructionValues/LabelAllocator.hpp +++ b/lib/include/rocRoller/InstructionValues/LabelAllocator.hpp @@ -26,11 +26,13 @@ #pragma once +#include + #include namespace rocRoller { - class LabelAllocator + class ROCROLLER_DECLSPEC LabelAllocator { public: LabelAllocator(std::string prefix); diff --git a/lib/include/rocRoller/InstructionValues/LabelAllocator_fwd.hpp b/lib/include/rocRoller/InstructionValues/LabelAllocator_fwd.hpp index a258e8e0..a4b160eb 100644 --- a/lib/include/rocRoller/InstructionValues/LabelAllocator_fwd.hpp +++ b/lib/include/rocRoller/InstructionValues/LabelAllocator_fwd.hpp @@ -26,10 +26,12 @@ #pragma once +#include + #include namespace rocRoller { - class LabelAllocator; + class ROCROLLER_DECLSPEC LabelAllocator; using LabelAllocatorPtr = std::shared_ptr; } diff --git a/lib/include/rocRoller/InstructionValues/Register.hpp b/lib/include/rocRoller/InstructionValues/Register.hpp index 94c4483f..40b864dc 100644 --- a/lib/include/rocRoller/InstructionValues/Register.hpp +++ b/lib/include/rocRoller/InstructionValues/Register.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include #include @@ -64,7 +66,7 @@ namespace rocRoller Count = 255, }; - struct AllocationOptions + struct ROCROLLER_DECLSPEC AllocationOptions { /// In units of registers int contiguousChunkWidth = VALUE_CONTIGUOUS; @@ -78,7 +80,7 @@ namespace rocRoller auto operator<=>(AllocationOptions const& other) const = default; }; - struct RegisterId + struct ROCROLLER_DECLSPEC RegisterId { RegisterId(Type regType, int index) : regType(regType) @@ -91,12 +93,12 @@ namespace rocRoller std::string toString() const; }; - std::string toString(RegisterId const& regId); + ROCROLLER_DECLSPEC std::string toString(RegisterId const& regId); // For some reason, GCC will not find the operator declared in Utils.hpp. - std::ostream& operator<<(std::ostream& stream, RegisterId const& regId); + ROCROLLER_DECLSPEC std::ostream& operator<<(std::ostream& stream, RegisterId const& regId); - struct RegisterIdHash + struct ROCROLLER_DECLSPEC RegisterIdHash { size_t operator()(RegisterId const& regId) const noexcept { @@ -141,7 +143,7 @@ namespace rocRoller * Maintains a `shared_ptr` reference to the `Allocation` object. * */ - struct Value : public std::enable_shared_from_this + struct ROCROLLER_DECLSPEC Value : public std::enable_shared_from_this { public: Value(); @@ -451,7 +453,7 @@ namespace rocRoller * * TODO: Make not copyable, enforce construction through shared_ptr */ - struct Allocation : public std::enable_shared_from_this + struct ROCROLLER_DECLSPEC Allocation : public std::enable_shared_from_this { Allocation(ContextPtr context, Type regType, @@ -525,8 +527,9 @@ namespace rocRoller void setRegisterCount(); }; - std::string toString(AllocationOptions const& opts); - std::ostream& operator<<(std::ostream& stream, AllocationOptions const& opts); + ROCROLLER_DECLSPEC std::string toString(AllocationOptions const& opts); + ROCROLLER_DECLSPEC std::ostream& operator<<(std::ostream& stream, + AllocationOptions const& opts); } } diff --git a/lib/include/rocRoller/InstructionValues/RegisterAllocator.hpp b/lib/include/rocRoller/InstructionValues/RegisterAllocator.hpp index 8e45331f..f4f89240 100644 --- a/lib/include/rocRoller/InstructionValues/RegisterAllocator.hpp +++ b/lib/include/rocRoller/InstructionValues/RegisterAllocator.hpp @@ -26,9 +26,11 @@ #pragma once +#include + #include -class RegisterTest_RegisterToString_Test; +class ROCROLLER_DECLSPEC RegisterTest_RegisterToString_Test; namespace rocRoller { @@ -42,10 +44,10 @@ namespace rocRoller Count, }; - std::string toString(AllocatorScheme a); - std::ostream& operator<<(std::ostream&, AllocatorScheme const&); + ROCROLLER_DECLSPEC std::string toString(AllocatorScheme a); + ROCROLLER_DECLSPEC std::ostream& operator<<(std::ostream&, AllocatorScheme const&); - class Allocator : public std::enable_shared_from_this + class ROCROLLER_DECLSPEC Allocator : public std::enable_shared_from_this { public: Allocator(Type regType, diff --git a/lib/include/rocRoller/InstructionValues/RegisterAllocator_fwd.hpp b/lib/include/rocRoller/InstructionValues/RegisterAllocator_fwd.hpp index 48007c4b..da8be399 100644 --- a/lib/include/rocRoller/InstructionValues/RegisterAllocator_fwd.hpp +++ b/lib/include/rocRoller/InstructionValues/RegisterAllocator_fwd.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include @@ -33,7 +35,7 @@ namespace rocRoller { namespace Register { - class Allocator; + class ROCROLLER_DECLSPEC Allocator; } } diff --git a/lib/include/rocRoller/InstructionValues/Register_fwd.hpp b/lib/include/rocRoller/InstructionValues/Register_fwd.hpp index 390a09ec..e3661e3e 100644 --- a/lib/include/rocRoller/InstructionValues/Register_fwd.hpp +++ b/lib/include/rocRoller/InstructionValues/Register_fwd.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include #include @@ -67,20 +69,20 @@ namespace rocRoller Count }; - struct RegisterId; - struct RegisterIdHash; + struct ROCROLLER_DECLSPEC RegisterId; + struct ROCROLLER_DECLSPEC RegisterIdHash; - class Allocation; - struct Value; + class ROCROLLER_DECLSPEC Allocation; + struct ROCROLLER_DECLSPEC Value; using AllocationPtr = std::shared_ptr; using ValuePtr = std::shared_ptr; - std::string toString(Type t); - std::ostream& operator<<(std::ostream& stream, Type t); + ROCROLLER_DECLSPEC std::string toString(Type t); + ROCROLLER_DECLSPEC std::ostream& operator<<(std::ostream& stream, Type t); - std::string toString(AllocationState state); - std::ostream& operator<<(std::ostream& stream, AllocationState state); + ROCROLLER_DECLSPEC std::string toString(AllocationState state); + ROCROLLER_DECLSPEC std::ostream& operator<<(std::ostream& stream, AllocationState state); } } diff --git a/lib/include/rocRoller/KernelArguments.hpp b/lib/include/rocRoller/KernelArguments.hpp index 9fb46a17..83a2bd24 100644 --- a/lib/include/rocRoller/KernelArguments.hpp +++ b/lib/include/rocRoller/KernelArguments.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include #include @@ -36,7 +38,7 @@ namespace rocRoller { - class KernelArguments + class ROCROLLER_DECLSPEC KernelArguments { public: explicit KernelArguments(bool log = true, size_t bytes = 0); @@ -71,7 +73,7 @@ namespace rocRoller using ArgPair = std::pair; - class const_iterator + class ROCROLLER_DECLSPEC const_iterator { public: using iterator_category = std::input_iterator_tag; @@ -136,8 +138,9 @@ namespace rocRoller bool m_log; }; - std::ostream& operator<<(std::ostream& stream, const KernelArguments& t); - std::ostream& operator<<(std::ostream& stream, const KernelArguments::const_iterator& iter); + ROCROLLER_DECLSPEC std::ostream& operator<<(std::ostream& stream, const KernelArguments& t); + ROCROLLER_DECLSPEC std::ostream& operator<<(std::ostream& stream, + const KernelArguments::const_iterator& iter); } // namespace rocRoller diff --git a/lib/include/rocRoller/KernelArguments_fwd.hpp b/lib/include/rocRoller/KernelArguments_fwd.hpp index 1423ba69..6ab7a0f1 100644 --- a/lib/include/rocRoller/KernelArguments_fwd.hpp +++ b/lib/include/rocRoller/KernelArguments_fwd.hpp @@ -26,9 +26,11 @@ #pragma once +#include + namespace rocRoller { - class KernelArguments; + class ROCROLLER_DECLSPEC KernelArguments; } diff --git a/lib/include/rocRoller/KernelGraph/Constraints.hpp b/lib/include/rocRoller/KernelGraph/Constraints.hpp index 0ce83f77..8312534e 100644 --- a/lib/include/rocRoller/KernelGraph/Constraints.hpp +++ b/lib/include/rocRoller/KernelGraph/Constraints.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include namespace rocRoller @@ -36,7 +38,7 @@ namespace rocRoller * @brief Return value for any function that applies constraints to the KernelGraph. * */ - struct ConstraintStatus + struct ROCROLLER_DECLSPEC ConstraintStatus { bool satisfied = true; std::string explanation = ""; @@ -57,9 +59,9 @@ namespace rocRoller } }; - ConstraintStatus NoDanglingMappings(const KernelGraph& k); - ConstraintStatus SingleControlRoot(const KernelGraph& k); - ConstraintStatus NoRedundantSetCoordinates(const KernelGraph& k); + ROCROLLER_DECLSPEC ConstraintStatus NoDanglingMappings(const KernelGraph& k); + ROCROLLER_DECLSPEC ConstraintStatus SingleControlRoot(const KernelGraph& k); + ROCROLLER_DECLSPEC ConstraintStatus NoRedundantSetCoordinates(const KernelGraph& k); using GraphConstraint = ConstraintStatus (*)(const KernelGraph& k); } diff --git a/lib/include/rocRoller/KernelGraph/ControlGraph/ControlEdge.hpp b/lib/include/rocRoller/KernelGraph/ControlGraph/ControlEdge.hpp index d8da2b27..338f2762 100644 --- a/lib/include/rocRoller/KernelGraph/ControlGraph/ControlEdge.hpp +++ b/lib/include/rocRoller/KernelGraph/ControlGraph/ControlEdge.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include diff --git a/lib/include/rocRoller/KernelGraph/ControlGraph/ControlEdge_fwd.hpp b/lib/include/rocRoller/KernelGraph/ControlGraph/ControlEdge_fwd.hpp index 0cad8723..377d04a5 100644 --- a/lib/include/rocRoller/KernelGraph/ControlGraph/ControlEdge_fwd.hpp +++ b/lib/include/rocRoller/KernelGraph/ControlGraph/ControlEdge_fwd.hpp @@ -26,18 +26,20 @@ #pragma once +#include + #include namespace rocRoller { namespace KernelGraph::ControlGraph { - struct Sequence; - struct Body; // Of kernel, for loop, if, etc. - struct Else; // Alternative body for false conditional + struct ROCROLLER_DECLSPEC Sequence; + struct ROCROLLER_DECLSPEC Body; // Of kernel, for loop, if, etc. + struct ROCROLLER_DECLSPEC Else; // Alternative body for false conditional - struct Initialize; - struct ForLoopIncrement; + struct ROCROLLER_DECLSPEC Initialize; + struct ROCROLLER_DECLSPEC ForLoopIncrement; using ControlEdge = std::variant; diff --git a/lib/include/rocRoller/KernelGraph/ControlGraph/ControlFlowRWTracer.hpp b/lib/include/rocRoller/KernelGraph/ControlGraph/ControlFlowRWTracer.hpp index e9d613f7..b881f9c0 100644 --- a/lib/include/rocRoller/KernelGraph/ControlGraph/ControlFlowRWTracer.hpp +++ b/lib/include/rocRoller/KernelGraph/ControlGraph/ControlFlowRWTracer.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include @@ -44,7 +46,7 @@ namespace rocRoller::KernelGraph * recorded trace for all operations in the control graph that * access or modify a coordinate. */ - class ControlFlowRWTracer + class ROCROLLER_DECLSPEC ControlFlowRWTracer { public: enum ReadWrite @@ -56,7 +58,7 @@ namespace rocRoller::KernelGraph Count }; - struct ReadWriteRecord + struct ROCROLLER_DECLSPEC ReadWriteRecord { int control, coordinate; ReadWrite rw; @@ -146,10 +148,11 @@ namespace rocRoller::KernelGraph void trace(int start); }; - std::string toString(ControlFlowRWTracer::ReadWrite rw); + ROCROLLER_DECLSPEC std::string toString(ControlFlowRWTracer::ReadWrite rw); - std::string toString(ControlFlowRWTracer::ReadWriteRecord const& record); + ROCROLLER_DECLSPEC std::string toString(ControlFlowRWTracer::ReadWriteRecord const& record); - std::ostream& operator<<(std::ostream& stream, ControlFlowRWTracer::ReadWrite rw); + ROCROLLER_DECLSPEC std::ostream& operator<<(std::ostream& stream, + ControlFlowRWTracer::ReadWrite rw); } diff --git a/lib/include/rocRoller/KernelGraph/ControlGraph/ControlGraph.hpp b/lib/include/rocRoller/KernelGraph/ControlGraph/ControlGraph.hpp index 04106644..6a358fb6 100644 --- a/lib/include/rocRoller/KernelGraph/ControlGraph/ControlGraph.hpp +++ b/lib/include/rocRoller/KernelGraph/ControlGraph/ControlGraph.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include @@ -78,24 +80,24 @@ namespace rocRoller /** * Return a full representation of 'n' */ - std::string toString(NodeOrdering n); - std::ostream& operator<<(std::ostream& stream, NodeOrdering n); + ROCROLLER_DECLSPEC std::string toString(NodeOrdering n); + ROCROLLER_DECLSPEC std::ostream& operator<<(std::ostream& stream, NodeOrdering n); /** * Return a full representation of 'c' */ - std::string toString(CacheStatus c); - std::ostream& operator<<(std::ostream& stream, CacheStatus c); + ROCROLLER_DECLSPEC std::string toString(CacheStatus c); + ROCROLLER_DECLSPEC std::ostream& operator<<(std::ostream& stream, CacheStatus c); /** * Return a 3-character representation of 'n'. */ - std::string abbrev(NodeOrdering n); + ROCROLLER_DECLSPEC std::string abbrev(NodeOrdering n); /** * If ordering `order` applies to (a, b), return the ordering that applies to (b, a). */ - NodeOrdering opposite(NodeOrdering order); + ROCROLLER_DECLSPEC NodeOrdering opposite(NodeOrdering order); /** * Control flow graph. @@ -103,7 +105,8 @@ namespace rocRoller * Nodes in the graph represent operations. Edges describe * dependencies. */ - class ControlGraph : public Graph::Hypergraph + class ROCROLLER_DECLSPEC ControlGraph + : public Graph::Hypergraph { public: using Base = Graph::Hypergraph; diff --git a/lib/include/rocRoller/KernelGraph/ControlGraph/ControlGraph_fwd.hpp b/lib/include/rocRoller/KernelGraph/ControlGraph/ControlGraph_fwd.hpp index 572e1830..59f00fec 100644 --- a/lib/include/rocRoller/KernelGraph/ControlGraph/ControlGraph_fwd.hpp +++ b/lib/include/rocRoller/KernelGraph/ControlGraph/ControlGraph_fwd.hpp @@ -26,10 +26,12 @@ #pragma once +#include + namespace rocRoller { namespace KernelGraph::ControlGraph { - class ControlGraph; + class ROCROLLER_DECLSPEC ControlGraph; } } diff --git a/lib/include/rocRoller/KernelGraph/ControlGraph/LastRWTracer.hpp b/lib/include/rocRoller/KernelGraph/ControlGraph/LastRWTracer.hpp index 27c5de1b..a490b671 100644 --- a/lib/include/rocRoller/KernelGraph/ControlGraph/LastRWTracer.hpp +++ b/lib/include/rocRoller/KernelGraph/ControlGraph/LastRWTracer.hpp @@ -26,11 +26,13 @@ #pragma once +#include + #include namespace rocRoller::KernelGraph { - class LastRWTracer : public ControlFlowRWTracer + class ROCROLLER_DECLSPEC LastRWTracer : public ControlFlowRWTracer { public: LastRWTracer(KernelGraph const& graph, int start = -1, bool trackConnections = false) diff --git a/lib/include/rocRoller/KernelGraph/ControlGraph/Operation.hpp b/lib/include/rocRoller/KernelGraph/ControlGraph/Operation.hpp index 721570e3..3b6cb0d2 100644 --- a/lib/include/rocRoller/KernelGraph/ControlGraph/Operation.hpp +++ b/lib/include/rocRoller/KernelGraph/ControlGraph/Operation.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include #include @@ -62,7 +64,7 @@ namespace rocRoller /** * SetCoordinate - Sets the value of a Coordinate */ - struct SetCoordinate + struct ROCROLLER_DECLSPEC SetCoordinate { SetCoordinate(); explicit SetCoordinate(Expression::ExpressionPtr value); @@ -90,7 +92,7 @@ namespace rocRoller * if(condition) goto while_top * */ - struct DoWhileOp + struct ROCROLLER_DECLSPEC DoWhileOp { Expression::ExpressionPtr condition; @@ -124,7 +126,7 @@ namespace rocRoller * for_bottom: * */ - struct ForLoopOp + struct ROCROLLER_DECLSPEC ForLoopOp { Expression::ExpressionPtr condition; @@ -153,7 +155,7 @@ namespace rocRoller * * */ - struct ConditionalOp + struct ROCROLLER_DECLSPEC ConditionalOp { Expression::ExpressionPtr condition; @@ -180,7 +182,7 @@ namespace rocRoller * Where is a instruction sequence that causes a trap or exception in kernel code. * */ - struct AssertOp + struct ROCROLLER_DECLSPEC AssertOp { std::string assertName; @@ -193,7 +195,7 @@ namespace rocRoller /** * UnrollOp - a kernel unroll. */ - struct UnrollOp + struct ROCROLLER_DECLSPEC UnrollOp { Expression::ExpressionPtr size; @@ -207,7 +209,7 @@ namespace rocRoller * If the register already exists, it must be of type 'regType'. If not, `regType` * specifies which type of register will be allocated. */ - struct Assign + struct ROCROLLER_DECLSPEC Assign { Register::Type regType = Register::Type::Count; Expression::ExpressionPtr expression; @@ -240,7 +242,7 @@ namespace rocRoller * @param increment Increment dimension * @param base */ - struct ComputeIndex + struct ROCROLLER_DECLSPEC ComputeIndex { // TODO: might be nicer to have UInt32 for strides; need // to allow user to specify stride types instead of @@ -267,7 +269,7 @@ namespace rocRoller /** * LoadLinear - Load linear dimension. */ - struct LoadLinear + struct ROCROLLER_DECLSPEC LoadLinear { LoadLinear(); explicit LoadLinear(rocRoller::VariableType const varType); @@ -288,7 +290,7 @@ namespace rocRoller * instructions) is specified by the `LayoutType` member of * the the WaveTile node. */ - struct LoadTiled + struct ROCROLLER_DECLSPEC LoadTiled { LoadTiled(); explicit LoadTiled(VariableType const varType, bool const isTransposedTile = false); @@ -302,7 +304,7 @@ namespace rocRoller /** * LoadVGPR - replaces LoadLinear. */ - struct LoadVGPR + struct ROCROLLER_DECLSPEC LoadVGPR { LoadVGPR(); explicit LoadVGPR(VariableType const varType, bool const scalar = false); @@ -316,7 +318,7 @@ namespace rocRoller /** * LoadSGPR - load scalar value from memory. */ - struct LoadSGPR + struct ROCROLLER_DECLSPEC LoadSGPR { LoadSGPR(); LoadSGPR(VariableType const varType, BufferInstructionOptions const bio); @@ -330,7 +332,7 @@ namespace rocRoller /** * LoadLDSTile - loads a tile from LDS */ - struct LoadLDSTile + struct ROCROLLER_DECLSPEC LoadLDSTile { LoadLDSTile(); explicit LoadLDSTile(VariableType const varType, bool const isTransposedTile = false); @@ -342,7 +344,7 @@ namespace rocRoller std::string toString() const; }; - struct LoadTileDirect2LDS + struct ROCROLLER_DECLSPEC LoadTileDirect2LDS { LoadTileDirect2LDS(); explicit LoadTileDirect2LDS(VariableType const varType); @@ -355,7 +357,7 @@ namespace rocRoller /** * Multiply - Multiply two MacroTiles */ - struct Multiply + struct ROCROLLER_DECLSPEC Multiply { Multiply(); Multiply(Operations::ScaleMode scaleA, Operations::ScaleMode scaleB); @@ -387,7 +389,7 @@ namespace rocRoller * Storage location and affinity is specified by the MacroTile * node. */ - struct StoreTiled + struct ROCROLLER_DECLSPEC StoreTiled { StoreTiled(); explicit StoreTiled(VariableType const dtype); @@ -406,7 +408,7 @@ namespace rocRoller /** * StoreSGPR - stores a scalar value to memory. */ - struct StoreSGPR + struct ROCROLLER_DECLSPEC StoreSGPR { StoreSGPR(); StoreSGPR(VariableType const varType, BufferInstructionOptions const bio); @@ -420,7 +422,7 @@ namespace rocRoller /** * StoreLDSTile - store a tile into LDS */ - struct StoreLDSTile + struct ROCROLLER_DECLSPEC StoreLDSTile { StoreLDSTile(); explicit StoreLDSTile(VariableType const varType); @@ -433,7 +435,7 @@ namespace rocRoller /** * TensorContraction - Tensor contraction operation. */ - struct TensorContraction + struct ROCROLLER_DECLSPEC TensorContraction { TensorContraction(); TensorContraction(std::vector const& aContractedDimensions, @@ -465,7 +467,7 @@ namespace rocRoller /** * Exchange - permute the lanes data within a wave. */ - struct Exchange + struct ROCROLLER_DECLSPEC Exchange { Exchange(); explicit Exchange(VariableType const varType); @@ -478,7 +480,7 @@ namespace rocRoller /** * SeedPRNG - Set the initial seed value of a random number generator */ - struct SeedPRNG + struct ROCROLLER_DECLSPEC SeedPRNG { SeedPRNG(); explicit SeedPRNG(bool addTID); diff --git a/lib/include/rocRoller/KernelGraph/ControlGraph/Operation_fwd.hpp b/lib/include/rocRoller/KernelGraph/ControlGraph/Operation_fwd.hpp index b881d78a..1fab92db 100644 --- a/lib/include/rocRoller/KernelGraph/ControlGraph/Operation_fwd.hpp +++ b/lib/include/rocRoller/KernelGraph/ControlGraph/Operation_fwd.hpp @@ -26,42 +26,44 @@ #pragma once +#include + #include namespace rocRoller { namespace KernelGraph::ControlGraph { - struct Assign; - struct Barrier; - struct ComputeIndex; - struct ConditionalOp; - struct AssertOp; - struct Deallocate; - struct DoWhileOp; - struct Exchange; - struct ForLoopOp; - struct Kernel; - struct LoadLDSTile; - struct LoadLinear; - struct LoadVGPR; - struct LoadSGPR; - struct LoadTiled; - struct Multiply; - struct NOP; - struct Block; - struct Scope; - struct SetCoordinate; - struct StoreLDSTile; - struct LoadTileDirect2LDS; - struct StoreLinear; - struct StoreTiled; - struct StoreVGPR; - struct StoreSGPR; - struct TensorContraction; - struct UnrollOp; - struct WaitZero; - struct SeedPRNG; + struct ROCROLLER_DECLSPEC Assign; + struct ROCROLLER_DECLSPEC Barrier; + struct ROCROLLER_DECLSPEC ComputeIndex; + struct ROCROLLER_DECLSPEC ConditionalOp; + struct ROCROLLER_DECLSPEC AssertOp; + struct ROCROLLER_DECLSPEC Deallocate; + struct ROCROLLER_DECLSPEC DoWhileOp; + struct ROCROLLER_DECLSPEC Exchange; + struct ROCROLLER_DECLSPEC ForLoopOp; + struct ROCROLLER_DECLSPEC Kernel; + struct ROCROLLER_DECLSPEC LoadLDSTile; + struct ROCROLLER_DECLSPEC LoadLinear; + struct ROCROLLER_DECLSPEC LoadVGPR; + struct ROCROLLER_DECLSPEC LoadSGPR; + struct ROCROLLER_DECLSPEC LoadTiled; + struct ROCROLLER_DECLSPEC Multiply; + struct ROCROLLER_DECLSPEC NOP; + struct ROCROLLER_DECLSPEC Block; + struct ROCROLLER_DECLSPEC Scope; + struct ROCROLLER_DECLSPEC SetCoordinate; + struct ROCROLLER_DECLSPEC StoreLDSTile; + struct ROCROLLER_DECLSPEC LoadTileDirect2LDS; + struct ROCROLLER_DECLSPEC StoreLinear; + struct ROCROLLER_DECLSPEC StoreTiled; + struct ROCROLLER_DECLSPEC StoreVGPR; + struct ROCROLLER_DECLSPEC StoreSGPR; + struct ROCROLLER_DECLSPEC TensorContraction; + struct ROCROLLER_DECLSPEC UnrollOp; + struct ROCROLLER_DECLSPEC WaitZero; + struct ROCROLLER_DECLSPEC SeedPRNG; using Operation = std::variant + #include #include #include @@ -50,14 +52,14 @@ namespace rocRoller::KernelGraph */ namespace Connections { - struct JustNaryArgument + struct ROCROLLER_DECLSPEC JustNaryArgument { NaryArgument argument; auto operator<=>(JustNaryArgument const&) const = default; }; - struct TypeAndSubDimension + struct ROCROLLER_DECLSPEC TypeAndSubDimension { std::string id; int subdimension; @@ -70,7 +72,7 @@ namespace rocRoller::KernelGraph return a.id < b.id; } - struct TypeAndNaryArgument + struct ROCROLLER_DECLSPEC TypeAndNaryArgument { std::string id; NaryArgument argument; @@ -101,10 +103,10 @@ namespace rocRoller::KernelGraph Count }; - std::string toString(ComputeIndexArgument cia); - std::ostream& operator<<(std::ostream&, ComputeIndexArgument const&); + ROCROLLER_DECLSPEC std::string toString(ComputeIndexArgument cia); + ROCROLLER_DECLSPEC std::ostream& operator<<(std::ostream&, ComputeIndexArgument const&); - struct ComputeIndex + struct ROCROLLER_DECLSPEC ComputeIndex { ComputeIndexArgument argument; int index = 0; @@ -123,13 +125,13 @@ namespace rocRoller::KernelGraph TypeAndSubDimension, TypeAndNaryArgument>; - std::string name(ConnectionSpec const& cs); - std::string toString(ConnectionSpec const& cs); - std::ostream& operator<<(std::ostream& stream, ConnectionSpec const& cs); + ROCROLLER_DECLSPEC std::string name(ConnectionSpec const& cs); + ROCROLLER_DECLSPEC std::string toString(ConnectionSpec const& cs); + ROCROLLER_DECLSPEC std::ostream& operator<<(std::ostream& stream, ConnectionSpec const& cs); } - struct DeferredConnection + struct ROCROLLER_DECLSPEC DeferredConnection { Connections::ConnectionSpec connectionSpec; int coordinate; @@ -156,14 +158,14 @@ namespace rocRoller::KernelGraph * coordinates. To accomplish this, connection specifiers (see * ConnectionSpec) are used. */ - class ControlToCoordinateMapper + class ROCROLLER_DECLSPEC ControlToCoordinateMapper { // key_type is: // control graph index, connection specification using key_type = std::tuple; public: - struct Connection + struct ROCROLLER_DECLSPEC Connection { int control; int coordinate; diff --git a/lib/include/rocRoller/KernelGraph/CoordinateGraph/CoordinateEdge.hpp b/lib/include/rocRoller/KernelGraph/CoordinateGraph/CoordinateEdge.hpp index 1d7ea8eb..c86c09ea 100644 --- a/lib/include/rocRoller/KernelGraph/CoordinateGraph/CoordinateEdge.hpp +++ b/lib/include/rocRoller/KernelGraph/CoordinateGraph/CoordinateEdge.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include @@ -96,7 +98,7 @@ namespace rocRoller * Index - denotes that the source will index the register * allocation from the dest. */ - struct Index + struct ROCROLLER_DECLSPEC Index { int index = -1; diff --git a/lib/include/rocRoller/KernelGraph/CoordinateGraph/CoordinateEdgeVisitor.hpp b/lib/include/rocRoller/KernelGraph/CoordinateGraph/CoordinateEdgeVisitor.hpp index 30424f90..b34c6f73 100644 --- a/lib/include/rocRoller/KernelGraph/CoordinateGraph/CoordinateEdgeVisitor.hpp +++ b/lib/include/rocRoller/KernelGraph/CoordinateGraph/CoordinateEdgeVisitor.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include @@ -41,7 +43,7 @@ namespace rocRoller concept CTUndefinedEdge = std::is_same::value || std:: is_same::value || std::is_same::value; - struct BaseEdgeVisitor + struct ROCROLLER_DECLSPEC BaseEdgeVisitor { // index expressions for the dimensions std::vector indexes; @@ -62,7 +64,7 @@ namespace rocRoller } }; - struct ForwardEdgeVisitor : public BaseEdgeVisitor + struct ROCROLLER_DECLSPEC ForwardEdgeVisitor : public BaseEdgeVisitor { std::vector operator()(Flatten const& e) { @@ -158,7 +160,7 @@ namespace rocRoller } }; - struct ReverseEdgeVisitor : public BaseEdgeVisitor + struct ROCROLLER_DECLSPEC ReverseEdgeVisitor : public BaseEdgeVisitor { std::vector operator()(Flatten const& e) { @@ -255,7 +257,7 @@ namespace rocRoller /* * Diff edge visitors. */ - struct BaseEdgeDiffVisitor : public BaseEdgeVisitor + struct ROCROLLER_DECLSPEC BaseEdgeDiffVisitor : public BaseEdgeVisitor { Expression::ExpressionPtr zero; @@ -286,7 +288,7 @@ namespace rocRoller } }; - struct ForwardEdgeDiffVisitor : public BaseEdgeDiffVisitor + struct ROCROLLER_DECLSPEC ForwardEdgeDiffVisitor : public BaseEdgeDiffVisitor { using BaseEdgeDiffVisitor::BaseEdgeDiffVisitor; @@ -422,7 +424,7 @@ namespace rocRoller } }; - struct ReverseEdgeDiffVisitor : public BaseEdgeDiffVisitor + struct ROCROLLER_DECLSPEC ReverseEdgeDiffVisitor : public BaseEdgeDiffVisitor { using BaseEdgeDiffVisitor::BaseEdgeDiffVisitor; diff --git a/lib/include/rocRoller/KernelGraph/CoordinateGraph/CoordinateEdge_fwd.hpp b/lib/include/rocRoller/KernelGraph/CoordinateGraph/CoordinateEdge_fwd.hpp index 17c5bdd8..823c93e2 100644 --- a/lib/include/rocRoller/KernelGraph/CoordinateGraph/CoordinateEdge_fwd.hpp +++ b/lib/include/rocRoller/KernelGraph/CoordinateGraph/CoordinateEdge_fwd.hpp @@ -26,23 +26,25 @@ #pragma once +#include + #include namespace rocRoller { namespace KernelGraph::CoordinateGraph { - struct ConstructMacroTile; - struct DestructMacroTile; - struct Flatten; - struct Forget; - struct Inherit; - struct Join; - struct MakeOutput; - struct PassThrough; - struct Split; - struct Sunder; - struct Tile; + struct ROCROLLER_DECLSPEC ConstructMacroTile; + struct ROCROLLER_DECLSPEC DestructMacroTile; + struct ROCROLLER_DECLSPEC Flatten; + struct ROCROLLER_DECLSPEC Forget; + struct ROCROLLER_DECLSPEC Inherit; + struct ROCROLLER_DECLSPEC Join; + struct ROCROLLER_DECLSPEC MakeOutput; + struct ROCROLLER_DECLSPEC PassThrough; + struct ROCROLLER_DECLSPEC Split; + struct ROCROLLER_DECLSPEC Sunder; + struct ROCROLLER_DECLSPEC Tile; using CoordinateTransformEdge = std::variant && !std::same_as); - struct DataFlow; + struct ROCROLLER_DECLSPEC DataFlow; - struct Alias; - struct Buffer; - struct Duplicate; - struct Index; - struct Offset; - struct Stride; - struct View; + struct ROCROLLER_DECLSPEC Alias; + struct ROCROLLER_DECLSPEC Buffer; + struct ROCROLLER_DECLSPEC Duplicate; + struct ROCROLLER_DECLSPEC Index; + struct ROCROLLER_DECLSPEC Offset; + struct ROCROLLER_DECLSPEC Stride; + struct ROCROLLER_DECLSPEC View; using DataFlowEdge = std::variant; diff --git a/lib/include/rocRoller/KernelGraph/CoordinateGraph/CoordinateGraph.hpp b/lib/include/rocRoller/KernelGraph/CoordinateGraph/CoordinateGraph.hpp index 991558e1..312989ea 100644 --- a/lib/include/rocRoller/KernelGraph/CoordinateGraph/CoordinateGraph.hpp +++ b/lib/include/rocRoller/KernelGraph/CoordinateGraph/CoordinateGraph.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include #include @@ -114,7 +116,7 @@ namespace rocRoller * Hyper-edges describe how to transform coordinates and/or * apply operations. */ - class CoordinateGraph : public Graph::Hypergraph + class ROCROLLER_DECLSPEC CoordinateGraph : public Graph::Hypergraph { public: using Base = Graph::Hypergraph; diff --git a/lib/include/rocRoller/KernelGraph/CoordinateGraph/Dimension.hpp b/lib/include/rocRoller/KernelGraph/CoordinateGraph/Dimension.hpp index 2e201f56..59a04b46 100644 --- a/lib/include/rocRoller/KernelGraph/CoordinateGraph/Dimension.hpp +++ b/lib/include/rocRoller/KernelGraph/CoordinateGraph/Dimension.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include #include @@ -48,7 +50,7 @@ namespace rocRoller * - Storage (Registers, e.g. MacroTile) */ - struct BaseDimension + struct ROCROLLER_DECLSPEC BaseDimension { Expression::ExpressionPtr size, stride, offset; @@ -84,7 +86,7 @@ namespace rocRoller * * Can exist in the final graph. */ - struct Adhoc : public BaseDimension + struct ROCROLLER_DECLSPEC Adhoc : public BaseDimension { Adhoc(); @@ -115,7 +117,7 @@ namespace rocRoller * * Encodes size and stride info. */ - struct SubDimension : public BaseDimension + struct ROCROLLER_DECLSPEC SubDimension : public BaseDimension { int dim; @@ -135,7 +137,7 @@ namespace rocRoller * Usually split into SubDimensions. The subdimensions carry * sizes and strides. */ - struct User : public BaseDimension + struct ROCROLLER_DECLSPEC User : public BaseDimension { std::string argumentName; @@ -164,7 +166,7 @@ namespace rocRoller /** * Linear dimension. Usually flattened subdimenions. */ - struct Linear : public BaseDimension + struct ROCROLLER_DECLSPEC Linear : public BaseDimension { static constexpr bool HasValue = false; using BaseDimension::BaseDimension; @@ -175,7 +177,7 @@ namespace rocRoller /** * Wavefront - represents wavefronts within a workgroup. */ - struct Wavefront : public SubDimension + struct ROCROLLER_DECLSPEC Wavefront : public SubDimension { static constexpr bool HasValue = false; using SubDimension::SubDimension; @@ -186,7 +188,7 @@ namespace rocRoller /** * Lane - represents a lane within a wavefront. */ - struct Lane : public BaseDimension + struct ROCROLLER_DECLSPEC Lane : public BaseDimension { static constexpr bool HasValue = false; using BaseDimension::BaseDimension; @@ -200,7 +202,7 @@ namespace rocRoller * Sub-dimensions 0, 1, and 2 correspond to the x, y and z * kernel launch dimensions. */ - struct Workgroup : public SubDimension + struct ROCROLLER_DECLSPEC Workgroup : public SubDimension { static constexpr bool HasValue = false; @@ -215,7 +217,7 @@ namespace rocRoller * Sub-dimensions 0, 1, and 2 correspond to the x, y and z * kernel launch dimensions. */ - struct Workitem : public SubDimension + struct ROCROLLER_DECLSPEC Workitem : public SubDimension { static constexpr bool HasValue = false; @@ -228,7 +230,7 @@ namespace rocRoller /** * VGPR - represents (small) thread local scalar/array. */ - struct VGPR : public BaseDimension + struct ROCROLLER_DECLSPEC VGPR : public BaseDimension { static constexpr bool HasValue = false; @@ -237,7 +239,7 @@ namespace rocRoller std::string name() const override; }; - struct VGPRBlockNumber : public BaseDimension + struct ROCROLLER_DECLSPEC VGPRBlockNumber : public BaseDimension { static constexpr bool HasValue = false; @@ -246,7 +248,7 @@ namespace rocRoller std::string name() const override; }; - struct VGPRBlockIndex : public BaseDimension + struct ROCROLLER_DECLSPEC VGPRBlockIndex : public BaseDimension { static constexpr bool HasValue = false; @@ -262,7 +264,7 @@ namespace rocRoller * - Represents storage * - Represents address coordinate information */ - struct LDS : public BaseDimension + struct ROCROLLER_DECLSPEC LDS : public BaseDimension { static constexpr bool HasValue = false; using BaseDimension::BaseDimension; @@ -283,7 +285,7 @@ namespace rocRoller * ForLoop dimensions elucidate how indexes depend on which * for-loop iteration is being executed. */ - struct ForLoop : public BaseDimension + struct ROCROLLER_DECLSPEC ForLoop : public BaseDimension { static constexpr bool HasValue = false; @@ -299,7 +301,7 @@ namespace rocRoller * Unroll dimensions elucidate how indexes depend on which * inner-iteration of an unrolled for-loop is being executed. */ - struct Unroll : public BaseDimension + struct ROCROLLER_DECLSPEC Unroll : public BaseDimension { static constexpr bool HasValue = false; @@ -313,7 +315,7 @@ namespace rocRoller /** * MacroTileIndex - sub-dimension of a tile. See MacroTile. */ - struct MacroTileIndex : public SubDimension + struct ROCROLLER_DECLSPEC MacroTileIndex : public SubDimension { static constexpr bool HasValue = false; @@ -325,7 +327,7 @@ namespace rocRoller /** * MacroTileNumber. See MacroTile. */ - struct MacroTileNumber : public SubDimension + struct ROCROLLER_DECLSPEC MacroTileNumber : public SubDimension { static constexpr bool HasValue = false; @@ -340,7 +342,7 @@ namespace rocRoller * The storage location (eg, VGPRs vs LDS) is specified by * `MemoryType`. */ - struct MacroTile : public BaseDimension + struct ROCROLLER_DECLSPEC MacroTile : public BaseDimension { int rank = 0; MemoryType memoryType = MemoryType::None; @@ -436,7 +438,7 @@ namespace rocRoller /** * ThreadTileIndex - sub-dimension of a tile (fast-moving). */ - struct ThreadTileIndex : public SubDimension + struct ROCROLLER_DECLSPEC ThreadTileIndex : public SubDimension { static constexpr bool HasValue = false; @@ -449,7 +451,7 @@ namespace rocRoller /** * ThreadTileNumber - sub-dimension of a tile (slow-moving). */ - struct ThreadTileNumber : public SubDimension + struct ROCROLLER_DECLSPEC ThreadTileNumber : public SubDimension { static constexpr bool HasValue = false; @@ -465,7 +467,7 @@ namespace rocRoller * The storage location (eg, VGPRs vs LDS) is specified by * `MemoryType`. */ - struct ThreadTile : public BaseDimension + struct ROCROLLER_DECLSPEC ThreadTile : public BaseDimension { int rank = -1; @@ -486,7 +488,7 @@ namespace rocRoller /** * WaveTileIndex - sub-dimension of a tile. See WaveTile. */ - struct WaveTileIndex : public SubDimension + struct ROCROLLER_DECLSPEC WaveTileIndex : public SubDimension { static constexpr bool HasValue = false; using SubDimension::SubDimension; @@ -497,7 +499,7 @@ namespace rocRoller /** * WaveTileNumber. See WaveTile. */ - struct WaveTileNumber : public SubDimension + struct ROCROLLER_DECLSPEC WaveTileNumber : public SubDimension { static constexpr bool HasValue = false; using SubDimension::SubDimension; @@ -508,7 +510,7 @@ namespace rocRoller /** * WaveTile - a tensor tile owned by a wave in GPRs. */ - struct WaveTile : public BaseDimension + struct ROCROLLER_DECLSPEC WaveTile : public BaseDimension { int rank = 0; @@ -559,7 +561,7 @@ namespace rocRoller /** * JammedWaveTileNumber - Number of wave tiles to execute per wavefront */ - struct JammedWaveTileNumber : public SubDimension + struct ROCROLLER_DECLSPEC JammedWaveTileNumber : public SubDimension { static constexpr bool HasValue = false; using SubDimension::SubDimension; @@ -570,7 +572,7 @@ namespace rocRoller /** * ElementNumber - represents the value(s) from a ThreadTile to be stored in the VGPR(s). */ - struct ElementNumber : public SubDimension + struct ROCROLLER_DECLSPEC ElementNumber : public SubDimension { static constexpr bool HasValue = false; diff --git a/lib/include/rocRoller/KernelGraph/CoordinateGraph/Dimension_fwd.hpp b/lib/include/rocRoller/KernelGraph/CoordinateGraph/Dimension_fwd.hpp index 2889b5cb..20a1a2bc 100644 --- a/lib/include/rocRoller/KernelGraph/CoordinateGraph/Dimension_fwd.hpp +++ b/lib/include/rocRoller/KernelGraph/CoordinateGraph/Dimension_fwd.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include namespace rocRoller @@ -36,31 +38,31 @@ namespace rocRoller * Nodes (Dimensions) */ - struct ForLoop; - struct Adhoc; - struct ElementNumber; - struct Lane; - struct Linear; - struct LDS; - struct MacroTile; - struct MacroTileIndex; - struct MacroTileNumber; - struct SubDimension; - struct ThreadTile; - struct ThreadTileIndex; - struct ThreadTileNumber; - struct Unroll; - struct User; - struct VGPR; - struct VGPRBlockNumber; - struct VGPRBlockIndex; - struct WaveTile; - struct WaveTileIndex; - struct WaveTileNumber; - struct JammedWaveTileNumber; - struct Wavefront; - struct Workgroup; - struct Workitem; + struct ROCROLLER_DECLSPEC ForLoop; + struct ROCROLLER_DECLSPEC Adhoc; + struct ROCROLLER_DECLSPEC ElementNumber; + struct ROCROLLER_DECLSPEC Lane; + struct ROCROLLER_DECLSPEC Linear; + struct ROCROLLER_DECLSPEC LDS; + struct ROCROLLER_DECLSPEC MacroTile; + struct ROCROLLER_DECLSPEC MacroTileIndex; + struct ROCROLLER_DECLSPEC MacroTileNumber; + struct ROCROLLER_DECLSPEC SubDimension; + struct ROCROLLER_DECLSPEC ThreadTile; + struct ROCROLLER_DECLSPEC ThreadTileIndex; + struct ROCROLLER_DECLSPEC ThreadTileNumber; + struct ROCROLLER_DECLSPEC Unroll; + struct ROCROLLER_DECLSPEC User; + struct ROCROLLER_DECLSPEC VGPR; + struct ROCROLLER_DECLSPEC VGPRBlockNumber; + struct ROCROLLER_DECLSPEC VGPRBlockIndex; + struct ROCROLLER_DECLSPEC WaveTile; + struct ROCROLLER_DECLSPEC WaveTileIndex; + struct ROCROLLER_DECLSPEC WaveTileNumber; + struct ROCROLLER_DECLSPEC JammedWaveTileNumber; + struct ROCROLLER_DECLSPEC Wavefront; + struct ROCROLLER_DECLSPEC Workgroup; + struct ROCROLLER_DECLSPEC Workitem; using Dimension = std::variant + #include #include #include @@ -41,7 +43,7 @@ namespace rocRoller * Workgroup and Workitem (work coordinates) are implicitly * set if a context is passed to the constructor. */ - class Transformer + class ROCROLLER_DECLSPEC Transformer { public: Transformer() = delete; diff --git a/lib/include/rocRoller/KernelGraph/CoordinateGraph/Transformer_fwd.hpp b/lib/include/rocRoller/KernelGraph/CoordinateGraph/Transformer_fwd.hpp index 0e4fd69e..cbb28cec 100644 --- a/lib/include/rocRoller/KernelGraph/CoordinateGraph/Transformer_fwd.hpp +++ b/lib/include/rocRoller/KernelGraph/CoordinateGraph/Transformer_fwd.hpp @@ -26,13 +26,15 @@ #pragma once +#include + #include namespace rocRoller { namespace KernelGraph::CoordinateGraph { - class Transformer; + class ROCROLLER_DECLSPEC Transformer; using TransformerPtr = std::shared_ptr; } diff --git a/lib/include/rocRoller/KernelGraph/KernelGraph.hpp b/lib/include/rocRoller/KernelGraph/KernelGraph.hpp index 0cc1c719..af33cd0d 100644 --- a/lib/include/rocRoller/KernelGraph/KernelGraph.hpp +++ b/lib/include/rocRoller/KernelGraph/KernelGraph.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include #include @@ -52,7 +54,7 @@ namespace rocRoller * @brief Kernel graph container: control and coordinate graphs, control-to-coordinate mapper. * @ingroup KernelGraph */ - class KernelGraph + class ROCROLLER_DECLSPEC KernelGraph { std::vector m_constraints{ &NoDanglingMappings, &SingleControlRoot, &NoRedundantSetCoordinates}; @@ -119,17 +121,17 @@ namespace rocRoller * * @ingroup KernelGraph */ - KernelGraph translate(CommandPtr); + ROCROLLER_DECLSPEC KernelGraph translate(CommandPtr); /** * Generate assembly from a KernelGraph. * * @ingroup KernelGraph */ - Generator generate(KernelGraph, AssemblyKernelPtr); + ROCROLLER_DECLSPEC Generator generate(KernelGraph, AssemblyKernelPtr); - std::string toYAML(KernelGraph const& g); - KernelGraph fromYAML(std::string const& str); + ROCROLLER_DECLSPEC std::string toYAML(KernelGraph const& g); + ROCROLLER_DECLSPEC KernelGraph fromYAML(std::string const& str); } } diff --git a/lib/include/rocRoller/KernelGraph/KernelGraph_fwd.hpp b/lib/include/rocRoller/KernelGraph/KernelGraph_fwd.hpp index fa0fda6a..62fa3436 100644 --- a/lib/include/rocRoller/KernelGraph/KernelGraph_fwd.hpp +++ b/lib/include/rocRoller/KernelGraph/KernelGraph_fwd.hpp @@ -26,15 +26,17 @@ #pragma once +#include + #include namespace rocRoller { namespace KernelGraph { - class KernelGraph; - struct KernelUnrollVisitor; - struct LoopDistributeVisitor; + class ROCROLLER_DECLSPEC KernelGraph; + struct ROCROLLER_DECLSPEC KernelUnrollVisitor; + struct ROCROLLER_DECLSPEC LoopDistributeVisitor; using KernelGraphPtr = std::shared_ptr; } diff --git a/lib/include/rocRoller/KernelGraph/Policy.hpp b/lib/include/rocRoller/KernelGraph/Policy.hpp index 1d80c304..7764b3e5 100644 --- a/lib/include/rocRoller/KernelGraph/Policy.hpp +++ b/lib/include/rocRoller/KernelGraph/Policy.hpp @@ -26,18 +26,23 @@ #pragma once +#include + namespace rocRoller { - class CacheOnlyPolicy // Use cache to look up order. Caller should ensure the cache is valid. + class ROCROLLER_DECLSPEC + CacheOnlyPolicy // Use cache to look up order. Caller should ensure the cache is valid. { }; - class UpdateCachePolicy // < Use cache to look up order. Cache will be re-built if invalid. + class ROCROLLER_DECLSPEC + UpdateCachePolicy // < Use cache to look up order. Cache will be re-built if invalid. { }; - class UseCacheIfAvailablePolicy // < Look up in cache if available, otherwise, use traversal. + class ROCROLLER_DECLSPEC + UseCacheIfAvailablePolicy // < Look up in cache if available, otherwise, use traversal. { }; - class IgnoreCachePolicy // < Use traversal. + class ROCROLLER_DECLSPEC IgnoreCachePolicy // < Use traversal. { }; diff --git a/lib/include/rocRoller/KernelGraph/RegisterTagManager.hpp b/lib/include/rocRoller/KernelGraph/RegisterTagManager.hpp index ae6c1a3f..e1a374cf 100644 --- a/lib/include/rocRoller/KernelGraph/RegisterTagManager.hpp +++ b/lib/include/rocRoller/KernelGraph/RegisterTagManager.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include @@ -42,7 +44,7 @@ namespace rocRoller * * See also: LoadStoreTileGenerator::generateStride. */ - struct RegisterExpressionAttributes + struct ROCROLLER_DECLSPEC RegisterExpressionAttributes { DataType dataType = DataType::None; //< Desired result type of the expression bool unitStride = false; //< Expression corresponds to a unitary (=1) element-stride. @@ -54,7 +56,7 @@ namespace rocRoller auto operator<=>(RegisterExpressionAttributes const& other) const = default; }; - std::string toString(RegisterExpressionAttributes const& attrs); + ROCROLLER_DECLSPEC std::string toString(RegisterExpressionAttributes const& attrs); /** * @brief Register Tag Manager - Keeps track of data flow tags @@ -71,7 +73,7 @@ namespace rocRoller * using the associated data flow tag. * */ - class RegisterTagManager + class ROCROLLER_DECLSPEC RegisterTagManager { public: RegisterTagManager(ContextPtr context); diff --git a/lib/include/rocRoller/KernelGraph/RegisterTagManager_fwd.hpp b/lib/include/rocRoller/KernelGraph/RegisterTagManager_fwd.hpp index 5a448637..14dabb45 100644 --- a/lib/include/rocRoller/KernelGraph/RegisterTagManager_fwd.hpp +++ b/lib/include/rocRoller/KernelGraph/RegisterTagManager_fwd.hpp @@ -28,11 +28,13 @@ */ #pragma once +#include + #include namespace rocRoller { - class RegisterTagManager; + class ROCROLLER_DECLSPEC RegisterTagManager; using RegTagManPtr = std::shared_ptr; } diff --git a/lib/include/rocRoller/KernelGraph/Reindexer.hpp b/lib/include/rocRoller/KernelGraph/Reindexer.hpp index 7966c7e8..652bbf41 100644 --- a/lib/include/rocRoller/KernelGraph/Reindexer.hpp +++ b/lib/include/rocRoller/KernelGraph/Reindexer.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include @@ -35,7 +37,7 @@ namespace rocRoller { namespace KernelGraph { - class GraphReindexer + class ROCROLLER_DECLSPEC GraphReindexer { public: std::map coordinates; @@ -52,9 +54,10 @@ namespace rocRoller * auto newExpr = reindexExpression(oldExpr, reindexer); * */ - Expression::ExpressionPtr reindexExpression(Expression::ExpressionPtr expr, - GraphReindexer const& reindexer); + ROCROLLER_DECLSPEC Expression::ExpressionPtr + reindexExpression(Expression::ExpressionPtr expr, GraphReindexer const& reindexer); - void reindexExpressions(KernelGraph& graph, int tag, GraphReindexer const& reindexer); + ROCROLLER_DECLSPEC void + reindexExpressions(KernelGraph& graph, int tag, GraphReindexer const& reindexer); } } diff --git a/lib/include/rocRoller/KernelGraph/ScopeManager.hpp b/lib/include/rocRoller/KernelGraph/ScopeManager.hpp index bdede0ba..0aa97d86 100644 --- a/lib/include/rocRoller/KernelGraph/ScopeManager.hpp +++ b/lib/include/rocRoller/KernelGraph/ScopeManager.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include #include @@ -55,7 +57,7 @@ namespace rocRoller::KernelGraph * register already exists. If not, the register is added to the * current scope. */ - class ScopeManager + class ROCROLLER_DECLSPEC ScopeManager { public: ScopeManager() = delete; diff --git a/lib/include/rocRoller/KernelGraph/ScopeManager_fwd.hpp b/lib/include/rocRoller/KernelGraph/ScopeManager_fwd.hpp index 59431e75..176b92a6 100644 --- a/lib/include/rocRoller/KernelGraph/ScopeManager_fwd.hpp +++ b/lib/include/rocRoller/KernelGraph/ScopeManager_fwd.hpp @@ -26,7 +26,9 @@ #pragma once +#include + namespace rocRoller::KernelGraph { - class ScopeManager; + class ROCROLLER_DECLSPEC ScopeManager; } diff --git a/lib/include/rocRoller/KernelGraph/StructUtils.hpp b/lib/include/rocRoller/KernelGraph/StructUtils.hpp index e3eee5ec..3e772d2c 100644 --- a/lib/include/rocRoller/KernelGraph/StructUtils.hpp +++ b/lib/include/rocRoller/KernelGraph/StructUtils.hpp @@ -26,8 +26,10 @@ #pragma once +#include + #define RR_EMPTY_STRUCT_WITH_NAME(cls) \ - struct cls \ + struct ROCROLLER_DECLSPEC cls \ { \ static constexpr bool HasValue = false; \ \ diff --git a/lib/include/rocRoller/KernelGraph/Transforms/AddComputeIndex.hpp b/lib/include/rocRoller/KernelGraph/Transforms/AddComputeIndex.hpp index 8ce18b78..4dc503e6 100644 --- a/lib/include/rocRoller/KernelGraph/Transforms/AddComputeIndex.hpp +++ b/lib/include/rocRoller/KernelGraph/Transforms/AddComputeIndex.hpp @@ -25,7 +25,9 @@ *******************************************************************************/ #pragma once + #include +#include namespace rocRoller { @@ -48,7 +50,7 @@ namespace rocRoller * portion of the Coordinate graph to keep track of the data * needed to perform the operations. */ - class AddComputeIndex : public GraphTransform + class ROCROLLER_DECLSPEC AddComputeIndex : public GraphTransform { public: KernelGraph apply(KernelGraph const& original) override; diff --git a/lib/include/rocRoller/KernelGraph/Transforms/AddConvert.hpp b/lib/include/rocRoller/KernelGraph/Transforms/AddConvert.hpp index a760af0b..20e9c464 100644 --- a/lib/include/rocRoller/KernelGraph/Transforms/AddConvert.hpp +++ b/lib/include/rocRoller/KernelGraph/Transforms/AddConvert.hpp @@ -25,7 +25,9 @@ *******************************************************************************/ #pragma once + #include +#include namespace rocRoller { @@ -38,7 +40,7 @@ namespace rocRoller * datatype of the arguments to the multiply and adds control * nodes that will perform the appropriate type conversion. */ - class AddConvert : public GraphTransform + class ROCROLLER_DECLSPEC AddConvert : public GraphTransform { public: KernelGraph apply(KernelGraph const& original) override; diff --git a/lib/include/rocRoller/KernelGraph/Transforms/AddDeallocate.hpp b/lib/include/rocRoller/KernelGraph/Transforms/AddDeallocate.hpp index f265466e..88bba33f 100644 --- a/lib/include/rocRoller/KernelGraph/Transforms/AddDeallocate.hpp +++ b/lib/include/rocRoller/KernelGraph/Transforms/AddDeallocate.hpp @@ -25,7 +25,9 @@ *******************************************************************************/ #pragma once + #include +#include namespace rocRoller { @@ -38,7 +40,7 @@ namespace rocRoller * lifetimes. Deallocate operations are added when registers * are no longer needed. */ - class AddDeallocate : public GraphTransform + class ROCROLLER_DECLSPEC AddDeallocate : public GraphTransform { public: KernelGraph apply(KernelGraph const& original) override; diff --git a/lib/include/rocRoller/KernelGraph/Transforms/AddDirect2LDS.hpp b/lib/include/rocRoller/KernelGraph/Transforms/AddDirect2LDS.hpp index 353cf2e1..e09e29a1 100644 --- a/lib/include/rocRoller/KernelGraph/Transforms/AddDirect2LDS.hpp +++ b/lib/include/rocRoller/KernelGraph/Transforms/AddDirect2LDS.hpp @@ -25,7 +25,9 @@ *******************************************************************************/ #pragma once + #include +#include namespace rocRoller { @@ -36,7 +38,7 @@ namespace rocRoller * * @ingroup Transformations */ - class AddDirect2LDS : public GraphTransform + class ROCROLLER_DECLSPEC AddDirect2LDS : public GraphTransform { public: AddDirect2LDS(ContextPtr context, CommandParametersPtr params) diff --git a/lib/include/rocRoller/KernelGraph/Transforms/AddF6LDSPadding.hpp b/lib/include/rocRoller/KernelGraph/Transforms/AddF6LDSPadding.hpp index 96a7d389..a1739023 100644 --- a/lib/include/rocRoller/KernelGraph/Transforms/AddF6LDSPadding.hpp +++ b/lib/include/rocRoller/KernelGraph/Transforms/AddF6LDSPadding.hpp @@ -25,8 +25,10 @@ *******************************************************************************/ #pragma once + #include #include +#include namespace rocRoller { @@ -51,7 +53,7 @@ namespace rocRoller * transpose loads have their new needsPadding field set to indicate * to CodeGen that padding is required. */ - class AddF6LDSPadding : public GraphTransform + class ROCROLLER_DECLSPEC AddF6LDSPadding : public GraphTransform { public: AddF6LDSPadding(ContextPtr context) diff --git a/lib/include/rocRoller/KernelGraph/Transforms/AddLDS.hpp b/lib/include/rocRoller/KernelGraph/Transforms/AddLDS.hpp index 6b5d6ed1..c70664e7 100644 --- a/lib/include/rocRoller/KernelGraph/Transforms/AddLDS.hpp +++ b/lib/include/rocRoller/KernelGraph/Transforms/AddLDS.hpp @@ -25,8 +25,10 @@ *******************************************************************************/ #pragma once + #include #include +#include namespace rocRoller { @@ -48,7 +50,7 @@ namespace rocRoller * * @ingroup Transformations */ - class AddLDS : public GraphTransform + class ROCROLLER_DECLSPEC AddLDS : public GraphTransform { public: AddLDS(CommandParametersPtr params, ContextPtr context) @@ -77,7 +79,7 @@ namespace rocRoller * Modifies the coordinate and control graphs to add LDS * information. */ - class AddPrefetch : public GraphTransform + class ROCROLLER_DECLSPEC AddPrefetch : public GraphTransform { public: AddPrefetch(CommandParametersPtr params, ContextPtr context) diff --git a/lib/include/rocRoller/KernelGraph/Transforms/AddPRNG.hpp b/lib/include/rocRoller/KernelGraph/Transforms/AddPRNG.hpp index 2df0ffd5..c9b4cf2e 100644 --- a/lib/include/rocRoller/KernelGraph/Transforms/AddPRNG.hpp +++ b/lib/include/rocRoller/KernelGraph/Transforms/AddPRNG.hpp @@ -25,7 +25,9 @@ *******************************************************************************/ #pragma once + #include +#include namespace rocRoller { @@ -41,7 +43,7 @@ namespace rocRoller * * @ingroup Transformations */ - class AddPRNG : public GraphTransform + class ROCROLLER_DECLSPEC AddPRNG : public GraphTransform { public: AddPRNG(ContextPtr context) diff --git a/lib/include/rocRoller/KernelGraph/Transforms/AddStreamK.hpp b/lib/include/rocRoller/KernelGraph/Transforms/AddStreamK.hpp index 7b3b2e17..c586331a 100644 --- a/lib/include/rocRoller/KernelGraph/Transforms/AddStreamK.hpp +++ b/lib/include/rocRoller/KernelGraph/Transforms/AddStreamK.hpp @@ -25,7 +25,9 @@ *******************************************************************************/ #pragma once + #include +#include #include #include @@ -73,7 +75,7 @@ namespace rocRoller * * @param numWGs How many workgroups will be launched. */ - class AddStreamK : public GraphTransform + class ROCROLLER_DECLSPEC AddStreamK : public GraphTransform { public: AddStreamK() = delete; diff --git a/lib/include/rocRoller/KernelGraph/Transforms/AliasDataFlowTags.hpp b/lib/include/rocRoller/KernelGraph/Transforms/AliasDataFlowTags.hpp index e9d42cb4..7c0e924b 100644 --- a/lib/include/rocRoller/KernelGraph/Transforms/AliasDataFlowTags.hpp +++ b/lib/include/rocRoller/KernelGraph/Transforms/AliasDataFlowTags.hpp @@ -25,8 +25,10 @@ *******************************************************************************/ #pragma once + #include #include +#include namespace rocRoller { @@ -59,7 +61,7 @@ namespace rocRoller * * @ingroup Transformations */ - class AliasDataFlowTags : public GraphTransform + class ROCROLLER_DECLSPEC AliasDataFlowTags : public GraphTransform { public: AliasDataFlowTags() = default; diff --git a/lib/include/rocRoller/KernelGraph/Transforms/AliasDataFlowTags_detail.hpp b/lib/include/rocRoller/KernelGraph/Transforms/AliasDataFlowTags_detail.hpp index 4850e468..dbcc22e8 100644 --- a/lib/include/rocRoller/KernelGraph/Transforms/AliasDataFlowTags_detail.hpp +++ b/lib/include/rocRoller/KernelGraph/Transforms/AliasDataFlowTags_detail.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include @@ -39,7 +41,7 @@ namespace rocRoller using Record = ControlFlowRWTracer::ReadWriteRecord; using Edge = ControlGraph::ControlEdge; - class TagRWGraph : public Graph::Hypergraph + class ROCROLLER_DECLSPEC TagRWGraph : public Graph::Hypergraph { using Base = Graph::Hypergraph; using Base::Hypergraph; @@ -64,7 +66,7 @@ namespace rocRoller * have to be after all nodes in the `begin` set and before all * nodes in the `end` set. */ - struct GraphExtent + struct ROCROLLER_DECLSPEC GraphExtent { std::set begin; std::set end; @@ -77,14 +79,15 @@ namespace rocRoller bool isWithin(KernelGraph const& kgraph, GraphExtent const& gap) const; }; - std::ostream& operator<<(std::ostream& stream, GraphExtent const& extent); + ROCROLLER_DECLSPEC std::ostream& operator<<(std::ostream& stream, + GraphExtent const& extent); /** * Represents the liveness of a given tag, including any gaps * where it would be acceptable for the registers to be * modified. */ - struct TagExtent + struct ROCROLLER_DECLSPEC TagExtent { using CategoryKey = std::tuple; @@ -123,26 +126,29 @@ namespace rocRoller * Returns a graph describing the relative ordering of each of the * control nodes in `records`. */ - TagRWGraph getOrdering(KernelGraph const& kgraph, std::vector const& records); + ROCROLLER_DECLSPEC TagRWGraph getOrdering(KernelGraph const& kgraph, + std::vector const& records); /** * Returns a TagExtent with the metadata (but not the extent or * gaps) filled in. */ - TagExtent getInfo(KernelGraph const& kgraph, std::vector const& records); + ROCROLLER_DECLSPEC TagExtent getInfo(KernelGraph const& kgraph, + std::vector const& records); /** * Returns a complete TagExtent representing the usage pattern * recorded in `records`. */ - TagExtent getExtent(KernelGraph const& kgraph, std::vector const& records); + ROCROLLER_DECLSPEC TagExtent getExtent(KernelGraph const& kgraph, + std::vector const& records); /** * Returns a set of aliases inner -> outer where `inner` can borrow * the registers of `outer` without causing a correctness problem * for the kernel. */ - std::map findAliasCandidates(KernelGraph const& kgraph); + ROCROLLER_DECLSPEC std::map findAliasCandidates(KernelGraph const& kgraph); } } diff --git a/lib/include/rocRoller/KernelGraph/Transforms/CleanArguments.hpp b/lib/include/rocRoller/KernelGraph/Transforms/CleanArguments.hpp index ad27a92c..a23b49d2 100644 --- a/lib/include/rocRoller/KernelGraph/Transforms/CleanArguments.hpp +++ b/lib/include/rocRoller/KernelGraph/Transforms/CleanArguments.hpp @@ -25,9 +25,11 @@ *******************************************************************************/ #pragma once + #include #include #include +#include namespace rocRoller { @@ -37,7 +39,7 @@ namespace rocRoller * @brief Removes all CommandArgruments found within an * expression with the appropriate AssemblyKernel Argument. */ - class CleanArguments : public GraphTransform + class ROCROLLER_DECLSPEC CleanArguments : public GraphTransform { public: CleanArguments(ContextPtr context, CommandPtr command) diff --git a/lib/include/rocRoller/KernelGraph/Transforms/CleanLoops.hpp b/lib/include/rocRoller/KernelGraph/Transforms/CleanLoops.hpp index 37969bd7..1157fe11 100644 --- a/lib/include/rocRoller/KernelGraph/Transforms/CleanLoops.hpp +++ b/lib/include/rocRoller/KernelGraph/Transforms/CleanLoops.hpp @@ -25,7 +25,9 @@ *******************************************************************************/ #pragma once + #include +#include namespace rocRoller { @@ -36,7 +38,7 @@ namespace rocRoller * * Removes forloops that only contain a single iterations. */ - class CleanLoops : public GraphTransform + class ROCROLLER_DECLSPEC CleanLoops : public GraphTransform { public: KernelGraph apply(KernelGraph const& original) override; diff --git a/lib/include/rocRoller/KernelGraph/Transforms/ConnectWorkgroups.hpp b/lib/include/rocRoller/KernelGraph/Transforms/ConnectWorkgroups.hpp index 23f4f34b..cda4ebd3 100644 --- a/lib/include/rocRoller/KernelGraph/Transforms/ConnectWorkgroups.hpp +++ b/lib/include/rocRoller/KernelGraph/Transforms/ConnectWorkgroups.hpp @@ -25,7 +25,9 @@ *******************************************************************************/ #pragma once + #include +#include namespace rocRoller { @@ -39,7 +41,7 @@ namespace rocRoller * that are leafs (don't have outgoing/incoming edges), and * attaches Workgroup coordinates to them. */ - class ConnectWorkgroups : public GraphTransform + class ROCROLLER_DECLSPEC ConnectWorkgroups : public GraphTransform { public: KernelGraph apply(KernelGraph const& original) override; diff --git a/lib/include/rocRoller/KernelGraph/Transforms/ConstantPropagation.hpp b/lib/include/rocRoller/KernelGraph/Transforms/ConstantPropagation.hpp index facc2484..3d2a1475 100644 --- a/lib/include/rocRoller/KernelGraph/Transforms/ConstantPropagation.hpp +++ b/lib/include/rocRoller/KernelGraph/Transforms/ConstantPropagation.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include @@ -36,7 +38,7 @@ namespace rocRoller /** * @brief Propagate constant to prune unneeded Load and Assign operations. Only support for beta==0 for now. */ - class ConstantPropagation : public GraphTransform + class ROCROLLER_DECLSPEC ConstantPropagation : public GraphTransform { public: KernelGraph apply(KernelGraph const& original) override; diff --git a/lib/include/rocRoller/KernelGraph/Transforms/FuseExpressions.hpp b/lib/include/rocRoller/KernelGraph/Transforms/FuseExpressions.hpp index 69269870..7ec4e3b9 100644 --- a/lib/include/rocRoller/KernelGraph/Transforms/FuseExpressions.hpp +++ b/lib/include/rocRoller/KernelGraph/Transforms/FuseExpressions.hpp @@ -25,7 +25,9 @@ *******************************************************************************/ #pragma once + #include +#include namespace rocRoller { @@ -36,7 +38,7 @@ namespace rocRoller * * Fuses neighbouring expressions where possible. */ - class FuseExpressions : public GraphTransform + class ROCROLLER_DECLSPEC FuseExpressions : public GraphTransform { public: KernelGraph apply(KernelGraph const& original) override; diff --git a/lib/include/rocRoller/KernelGraph/Transforms/FuseLoops.hpp b/lib/include/rocRoller/KernelGraph/Transforms/FuseLoops.hpp index e903d82a..59a4ac85 100644 --- a/lib/include/rocRoller/KernelGraph/Transforms/FuseLoops.hpp +++ b/lib/include/rocRoller/KernelGraph/Transforms/FuseLoops.hpp @@ -25,7 +25,9 @@ *******************************************************************************/ #pragma once + #include +#include namespace rocRoller { @@ -37,7 +39,7 @@ namespace rocRoller * Fuses multiple loops together if they iterate over the same * length. */ - class FuseLoops : public GraphTransform + class ROCROLLER_DECLSPEC FuseLoops : public GraphTransform { public: KernelGraph apply(KernelGraph const& original) override; diff --git a/lib/include/rocRoller/KernelGraph/Transforms/GraphTransform.hpp b/lib/include/rocRoller/KernelGraph/Transforms/GraphTransform.hpp index 6422cdb7..36132012 100644 --- a/lib/include/rocRoller/KernelGraph/Transforms/GraphTransform.hpp +++ b/lib/include/rocRoller/KernelGraph/Transforms/GraphTransform.hpp @@ -57,6 +57,8 @@ #pragma once +#include + #include #include @@ -72,7 +74,7 @@ namespace rocRoller * returns a transformed kernel graph based on the * transformation. */ - class GraphTransform + class ROCROLLER_DECLSPEC GraphTransform { public: GraphTransform() = default; diff --git a/lib/include/rocRoller/KernelGraph/Transforms/GraphTransform_fwd.hpp b/lib/include/rocRoller/KernelGraph/Transforms/GraphTransform_fwd.hpp index 464d094d..93c678a7 100644 --- a/lib/include/rocRoller/KernelGraph/Transforms/GraphTransform_fwd.hpp +++ b/lib/include/rocRoller/KernelGraph/Transforms/GraphTransform_fwd.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include @@ -34,7 +36,7 @@ namespace rocRoller { namespace KernelGraph { - class GraphTransform; + class ROCROLLER_DECLSPEC GraphTransform; using GraphTransformPtr = std::shared_ptr; } diff --git a/lib/include/rocRoller/KernelGraph/Transforms/IdentifyParallelDimensions.hpp b/lib/include/rocRoller/KernelGraph/Transforms/IdentifyParallelDimensions.hpp index de6db6ac..3cb08a9a 100644 --- a/lib/include/rocRoller/KernelGraph/Transforms/IdentifyParallelDimensions.hpp +++ b/lib/include/rocRoller/KernelGraph/Transforms/IdentifyParallelDimensions.hpp @@ -29,6 +29,8 @@ */ #pragma once +#include + #include #include @@ -53,7 +55,8 @@ namespace rocRoller * This means that the output should be run through mergeSets() before * using. */ - std::vector> identifyParallelDimensionSets(KernelGraph const& graph); + ROCROLLER_DECLSPEC std::vector> + identifyParallelDimensionSets(KernelGraph const& graph); /** * Returns the set of LoadTiled nodes reachable from `start` @@ -63,7 +66,7 @@ namespace rocRoller * * @param start a control node ID. Typically this should be a StoreTiled node. */ - std::set loadNodesReachableWithoutDimensionModifyingNodes( + ROCROLLER_DECLSPEC std::set loadNodesReachableWithoutDimensionModifyingNodes( ControlGraph::ControlGraph const& graph, int start); /** @@ -80,7 +83,7 @@ namespace rocRoller * predicates to the CommandKernel so that we check that these sizes are * consistent. */ - class IdentifyParallelDimensions : public GraphTransform + class ROCROLLER_DECLSPEC IdentifyParallelDimensions : public GraphTransform { public: KernelGraph apply(KernelGraph const& original) override; diff --git a/lib/include/rocRoller/KernelGraph/Transforms/InlineIncrements.hpp b/lib/include/rocRoller/KernelGraph/Transforms/InlineIncrements.hpp index e1f15589..024a07c6 100644 --- a/lib/include/rocRoller/KernelGraph/Transforms/InlineIncrements.hpp +++ b/lib/include/rocRoller/KernelGraph/Transforms/InlineIncrements.hpp @@ -25,7 +25,9 @@ *******************************************************************************/ #pragma once + #include +#include namespace rocRoller { @@ -34,7 +36,7 @@ namespace rocRoller /** * @brief Moves loop-iteration operations into the loop body. */ - class InlineIncrements : public GraphTransform + class ROCROLLER_DECLSPEC InlineIncrements : public GraphTransform { public: KernelGraph apply(KernelGraph const& original) override; diff --git a/lib/include/rocRoller/KernelGraph/Transforms/LoadPacked.hpp b/lib/include/rocRoller/KernelGraph/Transforms/LoadPacked.hpp index b69a3804..858be52c 100644 --- a/lib/include/rocRoller/KernelGraph/Transforms/LoadPacked.hpp +++ b/lib/include/rocRoller/KernelGraph/Transforms/LoadPacked.hpp @@ -25,14 +25,16 @@ *******************************************************************************/ #pragma once + #include #include +#include namespace rocRoller { namespace KernelGraph { - class LoadPacked : public GraphTransform + class ROCROLLER_DECLSPEC LoadPacked : public GraphTransform { public: LoadPacked(ContextPtr context); diff --git a/lib/include/rocRoller/KernelGraph/Transforms/LoopOverTileNumbers.hpp b/lib/include/rocRoller/KernelGraph/Transforms/LoopOverTileNumbers.hpp index ff406540..8d6fcacb 100644 --- a/lib/include/rocRoller/KernelGraph/Transforms/LoopOverTileNumbers.hpp +++ b/lib/include/rocRoller/KernelGraph/Transforms/LoopOverTileNumbers.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include @@ -82,7 +84,7 @@ namespace rocRoller * * Launched workgroups = product(tileNumberCoordSizes) * numIteratedTiles */ - class LoopOverTileNumbers : public GraphTransform + class ROCROLLER_DECLSPEC LoopOverTileNumbers : public GraphTransform { public: LoopOverTileNumbers() = delete; diff --git a/lib/include/rocRoller/KernelGraph/Transforms/LowerLinear.hpp b/lib/include/rocRoller/KernelGraph/Transforms/LowerLinear.hpp index 6a174f29..42d75573 100644 --- a/lib/include/rocRoller/KernelGraph/Transforms/LowerLinear.hpp +++ b/lib/include/rocRoller/KernelGraph/Transforms/LowerLinear.hpp @@ -25,9 +25,11 @@ *******************************************************************************/ #pragma once + #include #include #include +#include namespace rocRoller { @@ -41,7 +43,7 @@ namespace rocRoller * Linear dimensions are packed/flattened, tiled onto * workgroups and wavefronts, and then operated on. */ - class LowerLinear : public GraphTransform + class ROCROLLER_DECLSPEC LowerLinear : public GraphTransform { public: LowerLinear(ContextPtr context) @@ -62,7 +64,7 @@ namespace rocRoller /** * Rewrite KernelGraph to additionally distribute linear dimensions onto a For loop. */ - class LowerLinearLoop : public GraphTransform + class ROCROLLER_DECLSPEC LowerLinearLoop : public GraphTransform { public: LowerLinearLoop(Expression::ExpressionPtr loopSize, ContextPtr context) diff --git a/lib/include/rocRoller/KernelGraph/Transforms/LowerTensorContraction.hpp b/lib/include/rocRoller/KernelGraph/Transforms/LowerTensorContraction.hpp index ffd3387e..63bac747 100644 --- a/lib/include/rocRoller/KernelGraph/Transforms/LowerTensorContraction.hpp +++ b/lib/include/rocRoller/KernelGraph/Transforms/LowerTensorContraction.hpp @@ -25,9 +25,11 @@ *******************************************************************************/ #pragma once + #include #include #include +#include namespace rocRoller { @@ -39,7 +41,7 @@ namespace rocRoller * Currently supports matrix-matrix products. The contraction * is lowered using a "data-parallel" decomposition. */ - class LowerTensorContraction : public GraphTransform + class ROCROLLER_DECLSPEC LowerTensorContraction : public GraphTransform { public: LowerTensorContraction(CommandParametersPtr params, ContextPtr context) diff --git a/lib/include/rocRoller/KernelGraph/Transforms/LowerTile.hpp b/lib/include/rocRoller/KernelGraph/Transforms/LowerTile.hpp index eefed9d7..2220e17a 100644 --- a/lib/include/rocRoller/KernelGraph/Transforms/LowerTile.hpp +++ b/lib/include/rocRoller/KernelGraph/Transforms/LowerTile.hpp @@ -25,9 +25,11 @@ *******************************************************************************/ #pragma once + #include #include #include +#include namespace rocRoller { @@ -50,7 +52,7 @@ namespace rocRoller * translation time. To specify these attributes, call * `setDimension`. */ - class LowerTile : public GraphTransform + class ROCROLLER_DECLSPEC LowerTile : public GraphTransform { public: LowerTile(CommandParametersPtr params, ContextPtr context) diff --git a/lib/include/rocRoller/KernelGraph/Transforms/OrderEpilogueBlocks.hpp b/lib/include/rocRoller/KernelGraph/Transforms/OrderEpilogueBlocks.hpp index 391050be..c6961800 100644 --- a/lib/include/rocRoller/KernelGraph/Transforms/OrderEpilogueBlocks.hpp +++ b/lib/include/rocRoller/KernelGraph/Transforms/OrderEpilogueBlocks.hpp @@ -25,7 +25,9 @@ *******************************************************************************/ #pragma once + #include +#include namespace rocRoller { @@ -36,7 +38,7 @@ namespace rocRoller * * Orders the epilogue components. */ - class OrderEpilogueBlocks : public GraphTransform + class ROCROLLER_DECLSPEC OrderEpilogueBlocks : public GraphTransform { public: KernelGraph apply(KernelGraph const& original) override; diff --git a/lib/include/rocRoller/KernelGraph/Transforms/OrderMemory.hpp b/lib/include/rocRoller/KernelGraph/Transforms/OrderMemory.hpp index dc5da450..714beba1 100644 --- a/lib/include/rocRoller/KernelGraph/Transforms/OrderMemory.hpp +++ b/lib/include/rocRoller/KernelGraph/Transforms/OrderMemory.hpp @@ -25,9 +25,11 @@ *******************************************************************************/ #pragma once + #include #include #include +#include namespace rocRoller { @@ -42,7 +44,7 @@ namespace rocRoller /** * @brief Ensure there are no ambiguous memory operations in the control graph. */ - class OrderMemory : public GraphTransform + class ROCROLLER_DECLSPEC OrderMemory : public GraphTransform { public: OrderMemory(bool checkOrder = true) diff --git a/lib/include/rocRoller/KernelGraph/Transforms/RemoveDuplicates.hpp b/lib/include/rocRoller/KernelGraph/Transforms/RemoveDuplicates.hpp index 1912638d..8a88da2e 100644 --- a/lib/include/rocRoller/KernelGraph/Transforms/RemoveDuplicates.hpp +++ b/lib/include/rocRoller/KernelGraph/Transforms/RemoveDuplicates.hpp @@ -25,7 +25,9 @@ *******************************************************************************/ #pragma once + #include +#include namespace rocRoller { @@ -34,7 +36,7 @@ namespace rocRoller /** * @brief Remove duplicate loads and stores from the graph. */ - class RemoveDuplicates : public GraphTransform + class ROCROLLER_DECLSPEC RemoveDuplicates : public GraphTransform { public: KernelGraph apply(KernelGraph const& original) override; diff --git a/lib/include/rocRoller/KernelGraph/Transforms/Simplify.hpp b/lib/include/rocRoller/KernelGraph/Transforms/Simplify.hpp index 4a26886a..be37b0fc 100644 --- a/lib/include/rocRoller/KernelGraph/Transforms/Simplify.hpp +++ b/lib/include/rocRoller/KernelGraph/Transforms/Simplify.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include @@ -36,15 +38,15 @@ namespace rocRoller /** * @brief Simplify a graph by removing redundant edges. */ - class Simplify : public GraphTransform + class ROCROLLER_DECLSPEC Simplify : public GraphTransform { public: KernelGraph apply(KernelGraph const& original) override; std::string name() const override; }; - void removeRedundantSequenceEdges(KernelGraph& graph); - void removeRedundantBodyEdges(KernelGraph& graph); - void removeRedundantNOPs(KernelGraph& graph); + ROCROLLER_DECLSPEC void removeRedundantSequenceEdges(KernelGraph& graph); + ROCROLLER_DECLSPEC void removeRedundantBodyEdges(KernelGraph& graph); + ROCROLLER_DECLSPEC void removeRedundantNOPs(KernelGraph& graph); } } diff --git a/lib/include/rocRoller/KernelGraph/Transforms/SwizzleScale.hpp b/lib/include/rocRoller/KernelGraph/Transforms/SwizzleScale.hpp index 2bb1ff1d..1df96b74 100644 --- a/lib/include/rocRoller/KernelGraph/Transforms/SwizzleScale.hpp +++ b/lib/include/rocRoller/KernelGraph/Transforms/SwizzleScale.hpp @@ -25,8 +25,10 @@ *******************************************************************************/ #pragma once + #include #include +#include namespace rocRoller { @@ -35,7 +37,7 @@ namespace rocRoller /** * @brief Swizzle the scale loads. */ - class SwizzleScale : public GraphTransform + class ROCROLLER_DECLSPEC SwizzleScale : public GraphTransform { public: SwizzleScale(CommandParametersPtr params, ContextPtr context) diff --git a/lib/include/rocRoller/KernelGraph/Transforms/UnrollLoops.hpp b/lib/include/rocRoller/KernelGraph/Transforms/UnrollLoops.hpp index fdb084de..f7f550a9 100644 --- a/lib/include/rocRoller/KernelGraph/Transforms/UnrollLoops.hpp +++ b/lib/include/rocRoller/KernelGraph/Transforms/UnrollLoops.hpp @@ -25,7 +25,9 @@ *******************************************************************************/ #pragma once + #include +#include #include #include @@ -43,7 +45,7 @@ namespace rocRoller * @param start * @return std::string */ - std::string getForLoopName(KernelGraph& graph, int start); + ROCROLLER_DECLSPEC std::string getForLoopName(KernelGraph& graph, int start); /** * @brief Determine how many times to unroll the loop. @@ -51,7 +53,7 @@ namespace rocRoller * A value of 0 or 1 means do not unroll it. * Use getForLoopName to determine which forLoop we are attempting to unroll */ - unsigned int + ROCROLLER_DECLSPEC unsigned int getUnrollAmount(KernelGraph& graph, int loopTag, CommandParametersPtr const& params); /** @@ -60,7 +62,7 @@ namespace rocRoller * Unrolls every loop that does not have a previous iteration * dependency by a value of 2. */ - class UnrollLoops : public GraphTransform + class ROCROLLER_DECLSPEC UnrollLoops : public GraphTransform { public: UnrollLoops(CommandParametersPtr params, ContextPtr context); diff --git a/lib/include/rocRoller/KernelGraph/Transforms/UpdateParameters.hpp b/lib/include/rocRoller/KernelGraph/Transforms/UpdateParameters.hpp index b9f05082..4e40269f 100644 --- a/lib/include/rocRoller/KernelGraph/Transforms/UpdateParameters.hpp +++ b/lib/include/rocRoller/KernelGraph/Transforms/UpdateParameters.hpp @@ -25,8 +25,10 @@ *******************************************************************************/ #pragma once + #include #include +#include namespace rocRoller { @@ -36,7 +38,7 @@ namespace rocRoller * @brief Updates dimension parameters within the coordinate * graph based on the command parameters. */ - class UpdateParameters : public GraphTransform + class ROCROLLER_DECLSPEC UpdateParameters : public GraphTransform { public: UpdateParameters(CommandParametersPtr params) @@ -54,7 +56,7 @@ namespace rocRoller CommandParametersPtr m_params; }; - class UpdateWavefrontParameters : public GraphTransform + class ROCROLLER_DECLSPEC UpdateWavefrontParameters : public GraphTransform { public: UpdateWavefrontParameters(CommandParametersPtr params) @@ -72,7 +74,7 @@ namespace rocRoller CommandParametersPtr m_params; }; - class SetWorkitemCount : public GraphTransform + class ROCROLLER_DECLSPEC SetWorkitemCount : public GraphTransform { public: SetWorkitemCount(ContextPtr context) diff --git a/lib/include/rocRoller/KernelGraph/Utils.hpp b/lib/include/rocRoller/KernelGraph/Utils.hpp index 2b8d80ea..c25dcc90 100644 --- a/lib/include/rocRoller/KernelGraph/Utils.hpp +++ b/lib/include/rocRoller/KernelGraph/Utils.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include @@ -38,7 +40,7 @@ namespace rocRoller namespace KernelGraph { // Return value of colourByUnrollValue. A colour-mapping is... - struct UnrollColouring + struct ROCROLLER_DECLSPEC UnrollColouring { std::map> operationColour; //< Mapping: operation tag to colour-mapping. @@ -47,51 +49,50 @@ namespace rocRoller std::set separators; //< Separator edges in the control graph }; - std::string toString(UnrollColouring const&); + ROCROLLER_DECLSPEC std::string toString(UnrollColouring const&); /** * @brief */ - UnrollColouring colourByUnrollValue(KernelGraph const& kgraph, - int topOp = -1, - std::unordered_set const& exclude = {}); + ROCROLLER_DECLSPEC UnrollColouring colourByUnrollValue( + KernelGraph const& kgraph, int topOp = -1, std::unordered_set const& exclude = {}); /** * @brief Return DataFlowTag of LHS of binary expression in Assign node. */ template - std::tuple getBinaryLHS(KernelGraph const& kgraph, - int assign); + ROCROLLER_DECLSPEC std::tuple + getBinaryLHS(KernelGraph const& kgraph, int assign); /** * @brief Return DataFlowTag of RHS of binary expression in Assign node. */ template - std::tuple getBinaryRHS(KernelGraph const& kgraph, - int assign); + ROCROLLER_DECLSPEC std::tuple + getBinaryRHS(KernelGraph const& kgraph, int assign); /** * @brief Create a range-based for loop. * * returns {dimension, operation} */ - std::pair rangeFor(KernelGraph& graph, - Expression::ExpressionPtr size, - const std::string& name, - VariableType vtype = DataType::None, - int forLoopCoord = -1); + ROCROLLER_DECLSPEC std::pair rangeFor(KernelGraph& graph, + Expression::ExpressionPtr size, + const std::string& name, + VariableType vtype = DataType::None, + int forLoopCoord = -1); /** * @brief Remove a range-based for loop created by rangeFor. */ - void purgeFor(KernelGraph& graph, int tag); + ROCROLLER_DECLSPEC void purgeFor(KernelGraph& graph, int tag); /** * @brief Create a clone of a ForLoopOp. This new ForLoopOp * will use the same ForLoop Dimension as the original * ForLoopOp. */ - int cloneForLoop(KernelGraph& graph, int tag); + ROCROLLER_DECLSPEC int cloneForLoop(KernelGraph& graph, int tag); /** * @brief Remove a node and all of its children from the control graph @@ -101,21 +102,21 @@ namespace rocRoller * @param kgraph * @param node */ - void purgeNodeAndChildren(KernelGraph& kgraph, int node); + ROCROLLER_DECLSPEC void purgeNodeAndChildren(KernelGraph& kgraph, int node); template > - void purgeNodes(KernelGraph& kgraph, Range nodes); + ROCROLLER_DECLSPEC void purgeNodes(KernelGraph& kgraph, Range nodes); - bool isHardwareCoordinate(int tag, KernelGraph const& kgraph); - bool isLoopishCoordinate(int tag, KernelGraph const& kgraph); - bool isStorageCoordinate(int tag, KernelGraph const& kgraph); + ROCROLLER_DECLSPEC bool isHardwareCoordinate(int tag, KernelGraph const& kgraph); + ROCROLLER_DECLSPEC bool isLoopishCoordinate(int tag, KernelGraph const& kgraph); + ROCROLLER_DECLSPEC bool isStorageCoordinate(int tag, KernelGraph const& kgraph); /** * @brief Filter coordinates by type. */ template - std::unordered_set filterCoordinates(auto const& candidates, - KernelGraph const& kgraph); + ROCROLLER_DECLSPEC std::unordered_set filterCoordinates(auto const& candidates, + KernelGraph const& kgraph); /** * @brief Find storage neighbour in either direction. @@ -128,19 +129,20 @@ namespace rocRoller * * Tries upstream first. */ - std::optional> - findStorageNeighbour(int tag, KernelGraph const& kgraph); + ROCROLLER_DECLSPEC std::optional> + findStorageNeighbour(int tag, KernelGraph const& kgraph); /** * @brief Return Unroll coordinate beside (as part of a Split * edge) the ForLoop coordinate. */ - std::optional findUnrollNeighbour(KernelGraph const& kgraph, int forLoopCoord); + ROCROLLER_DECLSPEC std::optional findUnrollNeighbour(KernelGraph const& kgraph, + int forLoopCoord); /** * @brief Return DataFlowTag of DEST of Assign node. */ - int getDEST(KernelGraph const& kgraph, int assign); + ROCROLLER_DECLSPEC int getDEST(KernelGraph const& kgraph, int assign); /** * @brief Return target coordinate for load/store operation. @@ -151,7 +153,8 @@ namespace rocRoller * For stores, the target is the destination (User or LDS) of * the store. */ - std::pair getOperationTarget(int tag, KernelGraph const& kgraph); + ROCROLLER_DECLSPEC std::pair + getOperationTarget(int tag, KernelGraph const& kgraph); /** * Returns the true coordinate that should be the target of a @@ -160,7 +163,7 @@ namespace rocRoller * For now this will just follow any Duplicate edge leaving * `storageTarget`. */ - int getTransformTarget(int storageTarget, KernelGraph const& kgraph); + ROCROLLER_DECLSPEC int getTransformTarget(int storageTarget, KernelGraph const& kgraph); /** * @brief Find all required coordintes needed to compute @@ -169,30 +172,33 @@ namespace rocRoller * @return Pair of: vector required coordinates; set of * coordinates in the connecting path. */ - std::pair, std::unordered_set> findRequiredCoordinates( - int target, Graph::Direction direction, KernelGraph const& kgraph); + ROCROLLER_DECLSPEC std::pair, std::unordered_set> + findRequiredCoordinates(int target, + Graph::Direction direction, + KernelGraph const& kgraph); - std::pair, std::unordered_set> - findRequiredCoordinates(int target, - Graph::Direction direction, - std::function fullStop, - KernelGraph const& kgraph); + ROCROLLER_DECLSPEC std::pair, std::unordered_set> + findRequiredCoordinates(int target, + Graph::Direction direction, + std::function fullStop, + KernelGraph const& kgraph); - std::pair, std::unordered_set> - findAllRequiredCoordinates(int op, KernelGraph const& graph); + ROCROLLER_DECLSPEC std::pair, std::unordered_set> + findAllRequiredCoordinates(int op, KernelGraph const& graph); /** * @brief Find the operation of type T that contains the * candidate load/store operation. */ template - std::optional findContainingOperation(int candidate, KernelGraph const& kgraph); + ROCROLLER_DECLSPEC std::optional findContainingOperation(int candidate, + KernelGraph const& kgraph); /** * @brief Reconnect incoming/outgoing edges from op to newop. */ template - void reconnect(KernelGraph& graph, int newop, int op); + ROCROLLER_DECLSPEC void reconnect(KernelGraph& graph, int newop, int op); /** * @brief Find the operation of type T that contains the @@ -200,7 +206,8 @@ namespace rocRoller * body of that operation. */ template - std::optional findTopOfContainingOperation(int candidate, KernelGraph const& kgraph); + ROCROLLER_DECLSPEC std::optional + findTopOfContainingOperation(int candidate, KernelGraph const& kgraph); /** * @brief Create a new coordinate representing data within the scratch space. This will return a @@ -212,7 +219,7 @@ namespace rocRoller * @param context * @return User */ - rocRoller::KernelGraph::CoordinateGraph::User newScratchCoordinate( + ROCROLLER_DECLSPEC rocRoller::KernelGraph::CoordinateGraph::User newScratchCoordinate( Expression::ExpressionPtr size, VariableType varType, ContextPtr context); /** @@ -223,47 +230,51 @@ namespace rocRoller * * Does not delete the original operation. */ - int replaceWith(KernelGraph& graph, int op, int newOp, bool includeBody = true); + ROCROLLER_DECLSPEC int + replaceWith(KernelGraph& graph, int op, int newOp, bool includeBody = true); /** * @brief Insert chain (from top to bottom) above operation. * * Bottom is attached to op via a Sequence edge. */ - void insertBefore(KernelGraph& graph, int op, int top, int bottom); + ROCROLLER_DECLSPEC void insertBefore(KernelGraph& graph, int op, int top, int bottom); /** * @brief Insert chain (from top to bottom) above operation. * * Top is attached to op via a Sequence edge. */ - void insertAfter(KernelGraph& graph, int op, int top, int bottom); + ROCROLLER_DECLSPEC void insertAfter(KernelGraph& graph, int op, int top, int bottom); /** * @brief Replace operation with a new operation. */ - void insertWithBody(KernelGraph& graph, int op, int newOp); + ROCROLLER_DECLSPEC void insertWithBody(KernelGraph& graph, int op, int newOp); /** * @brief Find load/store operations that need their indexes * precomputed by ComputeIndex. */ - std::vector findComputeIndexCandidates(KernelGraph const& kgraph, int start); + ROCROLLER_DECLSPEC std::vector findComputeIndexCandidates(KernelGraph const& kgraph, + int start); /** * Removes all CommandArgruments found within an expression * with the appropriate AssemblyKernel Argument. */ - Expression::ExpressionPtr cleanArguments(Expression::ExpressionPtr, AssemblyKernelPtr); + ROCROLLER_DECLSPEC Expression::ExpressionPtr cleanArguments(Expression::ExpressionPtr, + AssemblyKernelPtr); /** * @brief Get ForLoop and increment (Linear) dimensions * assciated with ForLoopOp. */ - std::pair getForLoopCoords(int forLoopOp, KernelGraph const& kgraph); + ROCROLLER_DECLSPEC std::pair getForLoopCoords(int forLoopOp, + KernelGraph const& kgraph); template Range> - std::optional + ROCROLLER_DECLSPEC std::optional getForLoopCoord(std::optional forLoopOp, KernelGraph const& kgraph, Range within); /** @@ -282,21 +293,21 @@ namespace rocRoller * @param forLoop * @return std::pair */ - std::pair - getForLoopIncrement(KernelGraph const& graph, int forLoop); + ROCROLLER_DECLSPEC std::pair + getForLoopIncrement(KernelGraph const& graph, int forLoop); - void duplicateMacroTile(KernelGraph& graph, int tag); + ROCROLLER_DECLSPEC void duplicateMacroTile(KernelGraph& graph, int tag); - int duplicateControlNode(KernelGraph& graph, int tag); + ROCROLLER_DECLSPEC int duplicateControlNode(KernelGraph& graph, int tag); /** * Updates the threadtile size for enabling the use of long dword instructions */ - void updateThreadTileForLongDwords(int& t_m, - int& t_n, - int maxWidth, - uint macTileFastMovingDimSize, - int numDwordsPerElement); + ROCROLLER_DECLSPEC void updateThreadTileForLongDwords(int& t_m, + int& t_n, + int maxWidth, + uint macTileFastMovingDimSize, + int numDwordsPerElement); /** * @brief Get the tag of the highest SetCoordinate directly upstream from load. @@ -305,7 +316,7 @@ namespace rocRoller * @param load * @return int */ - int getTopSetCoordinate(KernelGraph const& graph, int load); + ROCROLLER_DECLSPEC int getTopSetCoordinate(KernelGraph const& graph, int load); /** * @brief Get the unique tags of the highest SetCoordinate nodes directly upstream from each load. @@ -314,7 +325,8 @@ namespace rocRoller * @param loads * @return std::set */ - std::set getTopSetCoordinates(KernelGraph& graph, std::vector loads); + ROCROLLER_DECLSPEC std::set getTopSetCoordinates(KernelGraph& graph, + std::vector loads); /** * @brief Get the SetCoordinate object upstream from load that sets the @@ -325,7 +337,7 @@ namespace rocRoller * @param load * @return int */ - int getSetCoordinateForDim(KernelGraph const& graph, int dim, int load); + ROCROLLER_DECLSPEC int getSetCoordinateForDim(KernelGraph const& graph, int dim, int load); /** * @brief Determine whether a matching SetCoordinate object exists upstream @@ -337,16 +349,17 @@ namespace rocRoller * @param coordTag * @return bool */ - bool hasExistingSetCoordinate(KernelGraph const& graph, - int op, - int coordValue, - int coordTag); + ROCROLLER_DECLSPEC bool hasExistingSetCoordinate(KernelGraph const& graph, + int op, + int coordValue, + int coordTag); /** * Gets the unroll coordinate value that is set by a SetCoordinate node upstream * from the operation op, for the dimension unrollDim. */ - unsigned int getUnrollValueForOp(KernelGraph const& graph, int unrollDim, int op); + ROCROLLER_DECLSPEC unsigned int + getUnrollValueForOp(KernelGraph const& graph, int unrollDim, int op); /** * @brief Create duplicates of all of the nodes downstream of the provided @@ -362,15 +375,16 @@ namespace rocRoller * @return New start nodes for the duplicated sub-graph. */ template Predicate> - std::vector duplicateControlNodes(KernelGraph& graph, - std::shared_ptr reindexer, - std::vector const& startNodes, - Predicate dontDuplicate); + ROCROLLER_DECLSPEC std::vector + duplicateControlNodes(KernelGraph& graph, + std::shared_ptr reindexer, + std::vector const& startNodes, + Predicate dontDuplicate); /** * @brief Return VariableType of load/store operation. */ - VariableType getVariableType(KernelGraph const& graph, int opTag); + ROCROLLER_DECLSPEC VariableType getVariableType(KernelGraph const& graph, int opTag); /** * @brief Add coordinate-transforms for storing a MacroTile @@ -378,61 +392,62 @@ namespace rocRoller * * Implemented in LowerTile.cpp. */ - void storeMacroTile_VGPR(KernelGraph& graph, - std::vector& connections, - int userTag, - int macTileTag, - std::vector const& sdim, - std::vector const& jammedTiles, - CommandParametersPtr params, - ContextPtr context); + ROCROLLER_DECLSPEC void storeMacroTile_VGPR(KernelGraph& graph, + std::vector& connections, + int userTag, + int macTileTag, + std::vector const& sdim, + std::vector const& jammedTiles, + CommandParametersPtr params, + ContextPtr context); /** * @brief Add coordinate-transforms for loading a MacroTile * from global into a ThreadTile. */ - void loadMacroTile_VGPR(KernelGraph& graph, - std::vector& connections, - int userTag, - int macTileTag, - std::vector const& sdim, - std::vector const& jammedTiles, - CommandParametersPtr params, - ContextPtr context, - bool isDirect2LDS = false); + ROCROLLER_DECLSPEC void loadMacroTile_VGPR(KernelGraph& graph, + std::vector& connections, + int userTag, + int macTileTag, + std::vector const& sdim, + std::vector const& jammedTiles, + CommandParametersPtr params, + ContextPtr context, + bool isDirect2LDS = false); /** * @brief Store version of addLoadThreadTileCT. */ - void addStoreThreadTileCT(KernelGraph& graph, - std::vector& connections, - int macTileTag, - int iMacX, - int iMacY, - std::array const& workgroupSizes, - std::vector const& jammedTiles, - bool useSwappedAccess, - bool isDirect2LDS = false); + ROCROLLER_DECLSPEC void + addStoreThreadTileCT(KernelGraph& graph, + std::vector& connections, + int macTileTag, + int iMacX, + int iMacY, + std::array const& workgroupSizes, + std::vector const& jammedTiles, + bool useSwappedAccess, + bool isDirect2LDS = false); /** * @brief Store version of addLoadMacroTileCT. */ - std::tuple - addStoreMacroTileCT(KernelGraph& graph, - std::vector& connections, - int macTileTag, - std::vector const& sdim, - std::vector const& jammedTiles = {1, 1}); + ROCROLLER_DECLSPEC std::tuple + addStoreMacroTileCT(KernelGraph& graph, + std::vector& connections, + int macTileTag, + std::vector const& sdim, + std::vector const& jammedTiles = {1, 1}); /** * @brief Store version of addLoad1DMacroTileCT. */ - std::tuple - addStore1DMacroTileCT(KernelGraph& graph, - std::vector& connections, - int macTileTag, - std::vector const& sdim, - std::vector const& jammedTiles = {1, 1}); + ROCROLLER_DECLSPEC std::tuple + addStore1DMacroTileCT(KernelGraph& graph, + std::vector& connections, + int macTileTag, + std::vector const& sdim, + std::vector const& jammedTiles = {1, 1}); /** * @brief Add coordinate-transforms for tiling two @@ -448,11 +463,11 @@ namespace rocRoller * @return Tuple of: row MacroTileNumber, row MacroTileIndex, * column MacroTileNumber, column MacroTileIndex. */ - std::tuple - addLoadMacroTileCT(KernelGraph& graph, - std::vector& connections, - int macTileTag, - std::vector const& sdim); + ROCROLLER_DECLSPEC std::tuple + addLoadMacroTileCT(KernelGraph& graph, + std::vector& connections, + int macTileTag, + std::vector const& sdim); /** * @brief Add coordinate-transforms for tiling the X @@ -470,10 +485,11 @@ namespace rocRoller * @return Tuple of: row MacroTileNumber, row MacroTileIndex, * column MacroTileIndex. */ - std::tuple addLoad1DMacroTileCT(KernelGraph& graph, - std::vector& connections, - int macTileTag, - std::vector const& sdim); + ROCROLLER_DECLSPEC std::tuple + addLoad1DMacroTileCT(KernelGraph& graph, + std::vector& connections, + int macTileTag, + std::vector const& sdim); /** * @brief Add coordinate-transforms for loading a ThreadTile @@ -498,26 +514,27 @@ namespace rocRoller * Required (deferred) connections are appended to * `connections`. */ - void addLoadThreadTileCT(KernelGraph& graph, - std::vector& connections, - int macTileTag, - int iMacX, - int iMacY, - std::array const& workgroupSizes, - std::vector const& jammedTiles, - bool useSwappedAccess, - bool isDirect2LDS = false); + ROCROLLER_DECLSPEC void + addLoadThreadTileCT(KernelGraph& graph, + std::vector& connections, + int macTileTag, + int iMacX, + int iMacY, + std::array const& workgroupSizes, + std::vector const& jammedTiles, + bool useSwappedAccess, + bool isDirect2LDS = false); /** * @brief Create an internal tile backed by a ThreadTile. * * Implemented in LowerTile.cpp. */ - int createInternalTile(KernelGraph& graph, - VariableType varType, - int macTileTag, - CommandParametersPtr params, - ContextPtr context); + ROCROLLER_DECLSPEC int createInternalTile(KernelGraph& graph, + VariableType varType, + int macTileTag, + CommandParametersPtr params, + ContextPtr context); /** * @brief Create an internal tile backed by a ThreadTile. The @@ -525,13 +542,13 @@ namespace rocRoller * * Implemented in LowerTile.cpp. */ - int createInternalTile(KernelGraph& graph, - VariableType varType, - int macTileTag, - std::vector const& numWaveTiles, - bool splitStore, - CommandParametersPtr params, - ContextPtr context); + ROCROLLER_DECLSPEC int createInternalTile(KernelGraph& graph, + VariableType varType, + int macTileTag, + std::vector const& numWaveTiles, + bool splitStore, + CommandParametersPtr params, + ContextPtr context); /** * @brief Order all input pairs of memory nodes in graph. @@ -540,9 +557,9 @@ namespace rocRoller * @param pairs Pairs of memory nodes to be ordered. * @param ordered If true, the pairs are passed in order. */ - void orderMemoryNodes(KernelGraph& graph, - std::set> const& pairs, - bool ordered); + ROCROLLER_DECLSPEC void orderMemoryNodes(KernelGraph& graph, + std::set> const& pairs, + bool ordered); /** * @brief Order all memory nodes in srcs with respect to all memory nodes in dests. @@ -552,10 +569,10 @@ namespace rocRoller * @param dests * @param ordered If true, all orderings will be src -> dest. */ - void orderMemoryNodes(KernelGraph& graph, - std::set const& srcs, - std::set const& dests, - bool ordered); + ROCROLLER_DECLSPEC void orderMemoryNodes(KernelGraph& graph, + std::set const& srcs, + std::set const& dests, + bool ordered); /** * @brief Order all input nodes with respect to each other. @@ -564,16 +581,17 @@ namespace rocRoller * @param nodes * @param ordered If true, all orderings will be nodes[i-1] -> nodes[i]. */ - void orderMemoryNodes(KernelGraph& graph, std::vector const& nodes, bool ordered); + ROCROLLER_DECLSPEC void + orderMemoryNodes(KernelGraph& graph, std::vector const& nodes, bool ordered); /** * Replace the use of an old macrotile in the given control * nodes with a new macrotile. */ - void replaceMacroTile(KernelGraph& graph, - std::unordered_set const& ops, - int oldMacTileTag, - int newMacTileTag); + ROCROLLER_DECLSPEC void replaceMacroTile(KernelGraph& graph, + std::unordered_set const& ops, + int oldMacTileTag, + int newMacTileTag); /** * @brief @@ -583,7 +601,8 @@ namespace rocRoller * @param opTag2 LoadTileDirect2LDS operation * */ - void moveConnections(rocRoller::KernelGraph::KernelGraph& kgraph, int opTag1, int opTag2); + ROCROLLER_DECLSPEC void + moveConnections(rocRoller::KernelGraph::KernelGraph& kgraph, int opTag1, int opTag2); /** * @brief ceil(a/b) = (a+b-1)/b @@ -592,7 +611,8 @@ namespace rocRoller * @param tileSize MacroTile size * */ - Expression::ExpressionPtr tileCeilDivide(Expression::ExpressionPtr sdSize, int tileSize); + ROCROLLER_DECLSPEC Expression::ExpressionPtr + tileCeilDivide(Expression::ExpressionPtr sdSize, int tileSize); /** * @brief Identifies whether a registerTag has an associated deallocate node. @@ -601,7 +621,7 @@ namespace rocRoller * @param registerTag * */ - bool hasDeallocate(const KernelGraph& graph, int tag); + ROCROLLER_DECLSPEC bool hasDeallocate(const KernelGraph& graph, int tag); } } diff --git a/lib/include/rocRoller/KernelGraph/Visitors.hpp b/lib/include/rocRoller/KernelGraph/Visitors.hpp index 72846dec..11d0018b 100644 --- a/lib/include/rocRoller/KernelGraph/Visitors.hpp +++ b/lib/include/rocRoller/KernelGraph/Visitors.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include #include @@ -126,7 +128,7 @@ namespace rocRoller * To rewrite, for example, only LoadLinear edges, simply * override the `visitEdge` method. */ - struct BaseGraphVisitor + struct ROCROLLER_DECLSPEC BaseGraphVisitor { BaseGraphVisitor(ContextPtr context, Graph::Direction controlGraphOrder = Graph::Direction::Downstream, diff --git a/lib/include/rocRoller/KernelOptions.hpp b/lib/include/rocRoller/KernelOptions.hpp index 251613b2..c8d46b34 100644 --- a/lib/include/rocRoller/KernelOptions.hpp +++ b/lib/include/rocRoller/KernelOptions.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include @@ -42,7 +44,7 @@ namespace rocRoller const std::string SCRATCH = "SCRATCH"; const std::string NUMWGS = "numWGs"; - struct KernelOptions + struct ROCROLLER_DECLSPEC KernelOptions { LogLevel logLevel = LogLevel::Verbose; diff --git a/lib/include/rocRoller/Operations/BlockScale.hpp b/lib/include/rocRoller/Operations/BlockScale.hpp index 90f49e31..837959b3 100644 --- a/lib/include/rocRoller/Operations/BlockScale.hpp +++ b/lib/include/rocRoller/Operations/BlockScale.hpp @@ -30,6 +30,8 @@ #pragma once +#include + #include #include #include @@ -45,7 +47,7 @@ namespace rocRoller /** * A block scale operation for MX datatypes */ - class BlockScale : public BaseOperation + class ROCROLLER_DECLSPEC BlockScale : public BaseOperation { public: BlockScale() = delete; diff --git a/lib/include/rocRoller/Operations/BlockScale_fwd.hpp b/lib/include/rocRoller/Operations/BlockScale_fwd.hpp index 7bd3d495..10978f54 100644 --- a/lib/include/rocRoller/Operations/BlockScale_fwd.hpp +++ b/lib/include/rocRoller/Operations/BlockScale_fwd.hpp @@ -30,6 +30,8 @@ #pragma once +#include + #include namespace rocRoller @@ -39,7 +41,7 @@ namespace rocRoller /** * A block scale operation for MX datatypes */ - class BlockScale; + class ROCROLLER_DECLSPEC BlockScale; enum class ScaleMode { @@ -50,8 +52,8 @@ namespace rocRoller Count }; - std::string toString(ScaleMode const& mode); - std::ostream& operator<<(std::ostream& stream, ScaleMode const& mode); + ROCROLLER_DECLSPEC std::string toString(ScaleMode const& mode); + ROCROLLER_DECLSPEC std::ostream& operator<<(std::ostream& stream, ScaleMode const& mode); } } diff --git a/lib/include/rocRoller/Operations/Command.hpp b/lib/include/rocRoller/Operations/Command.hpp index 77956dac..f70049ec 100644 --- a/lib/include/rocRoller/Operations/Command.hpp +++ b/lib/include/rocRoller/Operations/Command.hpp @@ -29,6 +29,8 @@ #pragma once +#include + #include #include #include @@ -43,7 +45,7 @@ namespace rocRoller { - struct Command : public std::enable_shared_from_this + struct ROCROLLER_DECLSPEC Command : public std::enable_shared_from_this { public: using OperationList = std::vector>; @@ -145,7 +147,7 @@ namespace rocRoller int m_runtimeArgsOffset = 0; }; - std::ostream& operator<<(std::ostream& stream, Command const& command); + ROCROLLER_DECLSPEC std::ostream& operator<<(std::ostream& stream, Command const& command); } diff --git a/lib/include/rocRoller/Operations/CommandArgument.hpp b/lib/include/rocRoller/Operations/CommandArgument.hpp index 27c1e47e..b5f6e242 100644 --- a/lib/include/rocRoller/Operations/CommandArgument.hpp +++ b/lib/include/rocRoller/Operations/CommandArgument.hpp @@ -29,6 +29,8 @@ #pragma once +#include + #include #include @@ -56,7 +58,7 @@ namespace rocRoller * specific value or type of value should be implemented through * the predication mechanism. */ - class CommandArgument : public std::enable_shared_from_this + class ROCROLLER_DECLSPEC CommandArgument : public std::enable_shared_from_this { public: CommandArgument(CommandPtr com, @@ -100,25 +102,26 @@ namespace rocRoller std::string m_name; }; - std::ostream& operator<<(std::ostream&, CommandArgument const&); - std::ostream& operator<<(std::ostream&, CommandArgumentPtr const&); - std::ostream& operator<<(std::ostream&, std::vector const&); + ROCROLLER_DECLSPEC std::ostream& operator<<(std::ostream&, CommandArgument const&); + ROCROLLER_DECLSPEC std::ostream& operator<<(std::ostream&, CommandArgumentPtr const&); + ROCROLLER_DECLSPEC std::ostream& operator<<(std::ostream&, + std::vector const&); - VariableType variableType(CommandArgumentValue const& val); - std::string name(CommandArgumentValue const& val); - std::string toString(CommandArgumentValue const& val); - std::ostream& operator<<(std::ostream&, CommandArgumentValue const&); + ROCROLLER_DECLSPEC VariableType variableType(CommandArgumentValue const& val); + ROCROLLER_DECLSPEC std::string name(CommandArgumentValue const& val); + ROCROLLER_DECLSPEC std::string toString(CommandArgumentValue const& val); + ROCROLLER_DECLSPEC std::ostream& operator<<(std::ostream&, CommandArgumentValue const&); /** * Returns an unsigned integer from a CommandArgumentValue. * If the CommandArgumentValue is not an integer, an exception will be thrown. */ - unsigned int getUnsignedInt(CommandArgumentValue val); + ROCROLLER_DECLSPEC unsigned int getUnsignedInt(CommandArgumentValue val); /** * Returns true if a CommandArgumentValue is an integer type. */ - bool isInteger(CommandArgumentValue val); + ROCROLLER_DECLSPEC bool isInteger(CommandArgumentValue val); } #include diff --git a/lib/include/rocRoller/Operations/CommandArgument_fwd.hpp b/lib/include/rocRoller/Operations/CommandArgument_fwd.hpp index 7a569adc..42216030 100644 --- a/lib/include/rocRoller/Operations/CommandArgument_fwd.hpp +++ b/lib/include/rocRoller/Operations/CommandArgument_fwd.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include #include @@ -34,7 +36,7 @@ namespace rocRoller { - class CommandArgument; + class ROCROLLER_DECLSPEC CommandArgument; using CommandArgumentPtr = std::shared_ptr; using CommandArgumentValue = std::variant< diff --git a/lib/include/rocRoller/Operations/CommandArguments.hpp b/lib/include/rocRoller/Operations/CommandArguments.hpp index 8279bebc..189e3c97 100644 --- a/lib/include/rocRoller/Operations/CommandArguments.hpp +++ b/lib/include/rocRoller/Operations/CommandArguments.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include #include @@ -33,10 +35,10 @@ namespace rocRoller { - std::string toString(ArgumentType); - std::ostream& operator<<(std::ostream&, ArgumentType); + ROCROLLER_DECLSPEC std::string toString(ArgumentType); + ROCROLLER_DECLSPEC std::ostream& operator<<(std::ostream&, ArgumentType); - class CommandArguments + class ROCROLLER_DECLSPEC CommandArguments { public: CommandArguments() = delete; diff --git a/lib/include/rocRoller/Operations/Command_fwd.hpp b/lib/include/rocRoller/Operations/Command_fwd.hpp index 617269f3..088d92bf 100644 --- a/lib/include/rocRoller/Operations/Command_fwd.hpp +++ b/lib/include/rocRoller/Operations/Command_fwd.hpp @@ -29,13 +29,15 @@ #pragma once +#include + #include #include namespace rocRoller { - class Command; + class ROCROLLER_DECLSPEC Command; using CommandPtr = std::shared_ptr; - std::ostream& operator<<(std::ostream& stream, Command const& command); + ROCROLLER_DECLSPEC std::ostream& operator<<(std::ostream& stream, Command const& command); } diff --git a/lib/include/rocRoller/Operations/Operation.hpp b/lib/include/rocRoller/Operations/Operation.hpp index 2ac6afa0..0752bd02 100644 --- a/lib/include/rocRoller/Operations/Operation.hpp +++ b/lib/include/rocRoller/Operations/Operation.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include #include @@ -36,7 +38,7 @@ namespace rocRoller { namespace Operations { - class BaseOperation + class ROCROLLER_DECLSPEC BaseOperation { public: BaseOperation(); diff --git a/lib/include/rocRoller/Operations/OperationTag.hpp b/lib/include/rocRoller/Operations/OperationTag.hpp index 8ed6b7eb..320269f8 100644 --- a/lib/include/rocRoller/Operations/OperationTag.hpp +++ b/lib/include/rocRoller/Operations/OperationTag.hpp @@ -26,13 +26,15 @@ #pragma once +#include + #include namespace rocRoller { namespace Operations { - struct OperationTag final : public DistinctType + struct ROCROLLER_DECLSPEC OperationTag final : public DistinctType { explicit OperationTag(int value) : DistinctType(value) diff --git a/lib/include/rocRoller/Operations/Operations.hpp b/lib/include/rocRoller/Operations/Operations.hpp index c0d4b0e1..d0ee9fdd 100644 --- a/lib/include/rocRoller/Operations/Operations.hpp +++ b/lib/include/rocRoller/Operations/Operations.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include @@ -44,7 +46,7 @@ namespace rocRoller { namespace Operations { - struct Nop + struct ROCROLLER_DECLSPEC Nop { Nop() {} template @@ -55,7 +57,7 @@ namespace rocRoller auto operator<=>(Nop const&) const = default; }; - struct Inputs + struct ROCROLLER_DECLSPEC Inputs { std::unordered_set call(Operation const&); @@ -79,7 +81,7 @@ namespace rocRoller std::unordered_set operator()(Nop const&); }; - struct Outputs + struct ROCROLLER_DECLSPEC Outputs { std::unordered_set call(Operation const&); @@ -103,7 +105,7 @@ namespace rocRoller std::unordered_set operator()(Nop const&); }; - struct TagVisitor + struct ROCROLLER_DECLSPEC TagVisitor { OperationTag call(XOp const&); OperationTag operator()(E_Unary const&); @@ -111,7 +113,7 @@ namespace rocRoller OperationTag operator()(E_Ternary const&); }; - struct AssignOutputs + struct ROCROLLER_DECLSPEC AssignOutputs { std::unordered_set call(Operation&, OperationTag); @@ -138,7 +140,7 @@ namespace rocRoller OperationTag m_nextTagValue; }; - struct ToStringVisitor + struct ROCROLLER_DECLSPEC ToStringVisitor { std::string call(Operation const&, const unsigned char*); @@ -165,7 +167,7 @@ namespace rocRoller const unsigned char* m_runtimeArgs; }; - struct SetCommand + struct ROCROLLER_DECLSPEC SetCommand { SetCommand(CommandPtr); @@ -188,7 +190,7 @@ namespace rocRoller CommandPtr command; }; - struct AllocateArguments + struct ROCROLLER_DECLSPEC AllocateArguments { void call(Operation&); @@ -207,7 +209,7 @@ namespace rocRoller void operator()(RandomNumberGenerator&); }; - struct VariableTypeVisitor + struct ROCROLLER_DECLSPEC VariableTypeVisitor { rocRoller::VariableType call(Operation&); diff --git a/lib/include/rocRoller/Operations/Operations_fwd.hpp b/lib/include/rocRoller/Operations/Operations_fwd.hpp index 1bf71bdb..2dc7947a 100644 --- a/lib/include/rocRoller/Operations/Operations_fwd.hpp +++ b/lib/include/rocRoller/Operations/Operations_fwd.hpp @@ -29,6 +29,8 @@ */ #pragma once + +#include #include #include @@ -36,19 +38,19 @@ namespace rocRoller { namespace Operations { - struct Tensor; - struct Scalar; - struct Literal; - struct BlockScale; - struct T_Load_Linear; - struct T_Load_Scalar; - struct T_Load_Tiled; - struct T_Mul; - struct T_Store_Linear; - struct T_Store_Tiled; - struct T_Execute; - struct Nop; - struct RandomNumberGenerator; + struct ROCROLLER_DECLSPEC Tensor; + struct ROCROLLER_DECLSPEC Scalar; + struct ROCROLLER_DECLSPEC Literal; + struct ROCROLLER_DECLSPEC BlockScale; + struct ROCROLLER_DECLSPEC T_Load_Linear; + struct ROCROLLER_DECLSPEC T_Load_Scalar; + struct ROCROLLER_DECLSPEC T_Load_Tiled; + struct ROCROLLER_DECLSPEC T_Mul; + struct ROCROLLER_DECLSPEC T_Store_Linear; + struct ROCROLLER_DECLSPEC T_Store_Tiled; + struct ROCROLLER_DECLSPEC T_Execute; + struct ROCROLLER_DECLSPEC Nop; + struct ROCROLLER_DECLSPEC RandomNumberGenerator; using Operation = std::variant concept CConcreteOperation = (COperation && !std::same_as); - struct Inputs; - struct Outputs; - struct TagVisitor; + struct ROCROLLER_DECLSPEC Inputs; + struct ROCROLLER_DECLSPEC Outputs; + struct ROCROLLER_DECLSPEC TagVisitor; std::string name(Operation const&); diff --git a/lib/include/rocRoller/Operations/RandomNumberGenerator.hpp b/lib/include/rocRoller/Operations/RandomNumberGenerator.hpp index a4313f88..11c31507 100644 --- a/lib/include/rocRoller/Operations/RandomNumberGenerator.hpp +++ b/lib/include/rocRoller/Operations/RandomNumberGenerator.hpp @@ -30,6 +30,8 @@ #pragma once +#include + #include "Operation.hpp" #include @@ -39,7 +41,7 @@ namespace rocRoller { namespace Operations { - class RandomNumberGenerator : public BaseOperation + class ROCROLLER_DECLSPEC RandomNumberGenerator : public BaseOperation { public: enum class SeedMode @@ -63,7 +65,8 @@ namespace rocRoller bool operator==(RandomNumberGenerator const&) const; }; - std::string toString(RandomNumberGenerator::SeedMode const&); - std::ostream& operator<<(std::ostream& stream, RandomNumberGenerator::SeedMode mode); + ROCROLLER_DECLSPEC std::string toString(RandomNumberGenerator::SeedMode const&); + ROCROLLER_DECLSPEC std::ostream& operator<<(std::ostream& stream, + RandomNumberGenerator::SeedMode mode); } } diff --git a/lib/include/rocRoller/Operations/T_Execute.hpp b/lib/include/rocRoller/Operations/T_Execute.hpp index 7a081b70..ef959c83 100644 --- a/lib/include/rocRoller/Operations/T_Execute.hpp +++ b/lib/include/rocRoller/Operations/T_Execute.hpp @@ -29,6 +29,8 @@ */ #pragma once +#include + #include #include #include @@ -44,7 +46,7 @@ namespace rocRoller namespace Operations { - struct E_Unary + struct ROCROLLER_DECLSPEC E_Unary { E_Unary(OperationTag a); E_Unary(const std::initializer_list&); @@ -65,7 +67,7 @@ namespace rocRoller // Macro for declaring a new Unary XOp #define MAKE_UNARY_XOP(FUNC) \ - struct FUNC : public E_Unary \ + struct ROCROLLER_DECLSPEC FUNC : public E_Unary \ { \ FUNC(OperationTag a) \ : E_Unary(a) \ @@ -86,7 +88,7 @@ namespace rocRoller MAKE_UNARY_XOP(E_Not) MAKE_UNARY_XOP(E_RandomNumber) - struct E_Cvt : public E_Unary + struct ROCROLLER_DECLSPEC E_Cvt : public E_Unary { E_Cvt(OperationTag a, rocRoller::DataType destType) : E_Unary(a) @@ -106,7 +108,7 @@ namespace rocRoller rocRoller::DataType destType; }; - struct E_Binary + struct ROCROLLER_DECLSPEC E_Binary { E_Binary(OperationTag a, OperationTag b); E_Binary(const std::initializer_list&); @@ -129,7 +131,7 @@ namespace rocRoller // Macro for defining a new binary XOp #define MAKE_BINARY_XOP(FUNC) \ - struct FUNC : public E_Binary \ + struct ROCROLLER_DECLSPEC FUNC : public E_Binary \ { \ FUNC(OperationTag a, OperationTag b) \ : E_Binary(a, b) \ @@ -145,7 +147,7 @@ namespace rocRoller } \ }; - struct E_StochasticRoundingCvt : public E_Binary + struct ROCROLLER_DECLSPEC E_StochasticRoundingCvt : public E_Binary { E_StochasticRoundingCvt(OperationTag data, OperationTag seed, @@ -176,7 +178,7 @@ namespace rocRoller MAKE_BINARY_XOP(E_Or) MAKE_BINARY_XOP(E_GreaterThan) - struct E_Ternary + struct ROCROLLER_DECLSPEC E_Ternary { E_Ternary(OperationTag a, OperationTag b, OperationTag c); E_Ternary(const std::initializer_list&); @@ -200,7 +202,7 @@ namespace rocRoller // Macro for defining a new ternary XOp #define MAKE_TERNARY_XOP(FUNC) \ - struct FUNC : public E_Ternary \ + struct ROCROLLER_DECLSPEC FUNC : public E_Ternary \ { \ FUNC(OperationTag a, OperationTag b, OperationTag c) \ : E_Ternary(a, b, c) \ @@ -217,7 +219,7 @@ namespace rocRoller }; MAKE_TERNARY_XOP(E_Conditional) - class T_Execute : public BaseOperation + class ROCROLLER_DECLSPEC T_Execute : public BaseOperation { public: T_Execute() = delete; diff --git a/lib/include/rocRoller/Operations/T_Execute_fwd.hpp b/lib/include/rocRoller/Operations/T_Execute_fwd.hpp index cfc416a0..a73d4229 100644 --- a/lib/include/rocRoller/Operations/T_Execute_fwd.hpp +++ b/lib/include/rocRoller/Operations/T_Execute_fwd.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include @@ -33,24 +35,24 @@ namespace rocRoller { namespace Operations { - struct T_Execute; - struct E_Unary; - struct E_Binary; - struct E_Ternary; - struct E_Neg; - struct E_Abs; - struct E_RandomNumber; - struct E_Not; - struct E_Cvt; - struct E_StochasticRoundingCvt; - struct E_Add; - struct E_Sub; - struct E_Mul; - struct E_Div; - struct E_And; - struct E_Or; - struct E_GreaterThan; - struct E_Conditional; + struct ROCROLLER_DECLSPEC T_Execute; + struct ROCROLLER_DECLSPEC E_Unary; + struct ROCROLLER_DECLSPEC E_Binary; + struct ROCROLLER_DECLSPEC E_Ternary; + struct ROCROLLER_DECLSPEC E_Neg; + struct ROCROLLER_DECLSPEC E_Abs; + struct ROCROLLER_DECLSPEC E_RandomNumber; + struct ROCROLLER_DECLSPEC E_Not; + struct ROCROLLER_DECLSPEC E_Cvt; + struct ROCROLLER_DECLSPEC E_StochasticRoundingCvt; + struct ROCROLLER_DECLSPEC E_Add; + struct ROCROLLER_DECLSPEC E_Sub; + struct ROCROLLER_DECLSPEC E_Mul; + struct ROCROLLER_DECLSPEC E_Div; + struct ROCROLLER_DECLSPEC E_And; + struct ROCROLLER_DECLSPEC E_Or; + struct ROCROLLER_DECLSPEC E_GreaterThan; + struct ROCROLLER_DECLSPEC E_Conditional; using XOp = std::variant + #include #include @@ -34,7 +36,7 @@ namespace rocRoller { namespace Operations { - class T_Load_Linear : public BaseOperation + class ROCROLLER_DECLSPEC T_Load_Linear : public BaseOperation { public: T_Load_Linear() = delete; @@ -53,9 +55,9 @@ namespace rocRoller OperationTag m_srcTag; }; - std::ostream& operator<<(std::ostream& stream, T_Load_Linear const& val); + ROCROLLER_DECLSPEC std::ostream& operator<<(std::ostream& stream, T_Load_Linear const& val); - class T_Load_Scalar : public BaseOperation + class ROCROLLER_DECLSPEC T_Load_Scalar : public BaseOperation { public: T_Load_Scalar() = delete; @@ -74,9 +76,9 @@ namespace rocRoller OperationTag m_srcTag; }; - std::ostream& operator<<(std::ostream& stream, T_Load_Scalar const& val); + ROCROLLER_DECLSPEC std::ostream& operator<<(std::ostream& stream, T_Load_Scalar const& val); - class T_Load_Tiled : public BaseOperation + class ROCROLLER_DECLSPEC T_Load_Tiled : public BaseOperation { public: T_Load_Tiled() = delete; @@ -95,9 +97,9 @@ namespace rocRoller OperationTag m_srcTag; }; - std::ostream& operator<<(std::ostream& stream, T_Load_Tiled const& val); + ROCROLLER_DECLSPEC std::ostream& operator<<(std::ostream& stream, T_Load_Tiled const& val); - class T_Store_Linear : public BaseOperation + class ROCROLLER_DECLSPEC T_Store_Linear : public BaseOperation { template friend struct rocRoller::Serialization::MappingTraits; @@ -117,9 +119,10 @@ namespace rocRoller OperationTag m_dstTag; }; - std::ostream& operator<<(std::ostream& stream, T_Store_Linear const& val); + ROCROLLER_DECLSPEC std::ostream& operator<<(std::ostream& stream, + T_Store_Linear const& val); - class T_Store_Tiled : public BaseOperation + class ROCROLLER_DECLSPEC T_Store_Tiled : public BaseOperation { public: T_Store_Tiled() = delete; @@ -139,6 +142,6 @@ namespace rocRoller OperationTag m_dstTag; }; - std::ostream& operator<<(std::ostream& stream, T_Store_Tiled const& val); + ROCROLLER_DECLSPEC std::ostream& operator<<(std::ostream& stream, T_Store_Tiled const& val); } } diff --git a/lib/include/rocRoller/Operations/T_Mul.hpp b/lib/include/rocRoller/Operations/T_Mul.hpp index afaf7f6d..00cec427 100644 --- a/lib/include/rocRoller/Operations/T_Mul.hpp +++ b/lib/include/rocRoller/Operations/T_Mul.hpp @@ -30,6 +30,8 @@ #pragma once +#include + #include #include @@ -40,7 +42,7 @@ namespace rocRoller { namespace Operations { - class T_Mul : public BaseOperation + class ROCROLLER_DECLSPEC T_Mul : public BaseOperation { public: T_Mul() = delete; diff --git a/lib/include/rocRoller/Operations/TensorScalar.hpp b/lib/include/rocRoller/Operations/TensorScalar.hpp index 1e514482..95ab4907 100644 --- a/lib/include/rocRoller/Operations/TensorScalar.hpp +++ b/lib/include/rocRoller/Operations/TensorScalar.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include #include @@ -36,7 +38,7 @@ namespace rocRoller { namespace Operations { - class Literal : public BaseOperation + class ROCROLLER_DECLSPEC Literal : public BaseOperation { public: Literal() = delete; @@ -54,9 +56,9 @@ namespace rocRoller CommandArgumentValue m_value; }; - std::ostream& operator<<(std::ostream& stream, Literal const& val); + ROCROLLER_DECLSPEC std::ostream& operator<<(std::ostream& stream, Literal const& val); - class Scalar : public BaseOperation + class ROCROLLER_DECLSPEC Scalar : public BaseOperation { public: Scalar() = delete; @@ -82,9 +84,9 @@ namespace rocRoller VariableType m_variableType; }; - std::ostream& operator<<(std::ostream& stream, Scalar const& val); + ROCROLLER_DECLSPEC std::ostream& operator<<(std::ostream& stream, Scalar const& val); - class Tensor : public BaseOperation + class ROCROLLER_DECLSPEC Tensor : public BaseOperation { public: Tensor() = delete; @@ -129,7 +131,7 @@ namespace rocRoller std::vector m_literalStrides; }; - std::ostream& operator<<(std::ostream& stream, Tensor const& val); + ROCROLLER_DECLSPEC std::ostream& operator<<(std::ostream& stream, Tensor const& val); } } diff --git a/lib/include/rocRoller/Operations/TensorScalar_fwd.hpp b/lib/include/rocRoller/Operations/TensorScalar_fwd.hpp index 29603f37..8648134b 100644 --- a/lib/include/rocRoller/Operations/TensorScalar_fwd.hpp +++ b/lib/include/rocRoller/Operations/TensorScalar_fwd.hpp @@ -26,11 +26,13 @@ #pragma once +#include + namespace rocRoller { namespace Operations { - class Scalar; - class Tensor; + class ROCROLLER_DECLSPEC Scalar; + class ROCROLLER_DECLSPEC Tensor; } } diff --git a/lib/include/rocRoller/ScheduledInstructions.hpp b/lib/include/rocRoller/ScheduledInstructions.hpp index 49c1179c..26f95157 100644 --- a/lib/include/rocRoller/ScheduledInstructions.hpp +++ b/lib/include/rocRoller/ScheduledInstructions.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include @@ -35,7 +37,7 @@ namespace rocRoller { - class ScheduledInstructions + class ROCROLLER_DECLSPEC ScheduledInstructions { public: explicit ScheduledInstructions(ContextPtr ctx); diff --git a/lib/include/rocRoller/ScheduledInstructions_fwd.hpp b/lib/include/rocRoller/ScheduledInstructions_fwd.hpp index 1d6553e1..2eace489 100644 --- a/lib/include/rocRoller/ScheduledInstructions_fwd.hpp +++ b/lib/include/rocRoller/ScheduledInstructions_fwd.hpp @@ -26,7 +26,9 @@ #pragma once +#include + namespace rocRoller { - class ScheduledInstructions; + class ROCROLLER_DECLSPEC ScheduledInstructions; } diff --git a/lib/include/rocRoller/Scheduling/CooperativeScheduler.hpp b/lib/include/rocRoller/Scheduling/CooperativeScheduler.hpp index fa3980a9..7898560c 100644 --- a/lib/include/rocRoller/Scheduling/CooperativeScheduler.hpp +++ b/lib/include/rocRoller/Scheduling/CooperativeScheduler.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include #include @@ -45,7 +47,7 @@ namespace rocRoller * the minimum cost, and yielding from that stream until an * instruction with a non-zero cost is encountered. */ - class CooperativeScheduler : public Scheduler + class ROCROLLER_DECLSPEC CooperativeScheduler : public Scheduler { public: CooperativeScheduler(ContextPtr, CostFunction); diff --git a/lib/include/rocRoller/Scheduling/CooperativeScheduler_fwd.hpp b/lib/include/rocRoller/Scheduling/CooperativeScheduler_fwd.hpp index 79016c60..e2eb9e45 100644 --- a/lib/include/rocRoller/Scheduling/CooperativeScheduler_fwd.hpp +++ b/lib/include/rocRoller/Scheduling/CooperativeScheduler_fwd.hpp @@ -26,10 +26,12 @@ #pragma once +#include + namespace rocRoller { namespace Scheduling { - class CooperativeScheduler; + class ROCROLLER_DECLSPEC CooperativeScheduler; } } diff --git a/lib/include/rocRoller/Scheduling/Costs/Cost.hpp b/lib/include/rocRoller/Scheduling/Costs/Cost.hpp index d0e5862b..583f7598 100644 --- a/lib/include/rocRoller/Scheduling/Costs/Cost.hpp +++ b/lib/include/rocRoller/Scheduling/Costs/Cost.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include #include @@ -45,7 +47,7 @@ namespace rocRoller * * - This class should be able to be made into `ComponentBase` class */ - class Cost + class ROCROLLER_DECLSPEC Cost { public: using Argument = std::tuple; @@ -84,7 +86,7 @@ namespace rocRoller std::weak_ptr m_ctx; }; - std::ostream& operator<<(std::ostream&, CostFunction); + ROCROLLER_DECLSPEC std::ostream& operator<<(std::ostream&, CostFunction); } } diff --git a/lib/include/rocRoller/Scheduling/Costs/Cost_fwd.hpp b/lib/include/rocRoller/Scheduling/Costs/Cost_fwd.hpp index c03ad6c1..d150350a 100644 --- a/lib/include/rocRoller/Scheduling/Costs/Cost_fwd.hpp +++ b/lib/include/rocRoller/Scheduling/Costs/Cost_fwd.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include namespace rocRoller { @@ -41,8 +43,8 @@ namespace rocRoller Count }; - class Cost; + class ROCROLLER_DECLSPEC Cost; - std::string toString(CostFunction); + ROCROLLER_DECLSPEC std::string toString(CostFunction); } } diff --git a/lib/include/rocRoller/Scheduling/Costs/LinearWeightedCost.hpp b/lib/include/rocRoller/Scheduling/Costs/LinearWeightedCost.hpp index 60a901fa..2361c260 100644 --- a/lib/include/rocRoller/Scheduling/Costs/LinearWeightedCost.hpp +++ b/lib/include/rocRoller/Scheduling/Costs/LinearWeightedCost.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include #include @@ -44,7 +46,7 @@ namespace rocRoller * multiplied by each coefficient to arrive at the cost of a particular * function. */ - struct Weights + struct ROCROLLER_DECLSPEC Weights { /// Does the instruction require a nop before it? /// One per nop. @@ -135,7 +137,7 @@ namespace rocRoller /** * LinearWeightedCost: Orders the instructions based on a linear combination of a number of factors. */ - class LinearWeightedCost : public Cost + class ROCROLLER_DECLSPEC LinearWeightedCost : public Cost { public: LinearWeightedCost(ContextPtr); diff --git a/lib/include/rocRoller/Scheduling/Costs/LinearWeightedCost_fwd.hpp b/lib/include/rocRoller/Scheduling/Costs/LinearWeightedCost_fwd.hpp index d277a3d0..b230772c 100644 --- a/lib/include/rocRoller/Scheduling/Costs/LinearWeightedCost_fwd.hpp +++ b/lib/include/rocRoller/Scheduling/Costs/LinearWeightedCost_fwd.hpp @@ -26,10 +26,12 @@ #pragma once +#include + namespace rocRoller { namespace Scheduling { - class LinearWeightedCost; + class ROCROLLER_DECLSPEC LinearWeightedCost; } } diff --git a/lib/include/rocRoller/Scheduling/Costs/MinNopsCost.hpp b/lib/include/rocRoller/Scheduling/Costs/MinNopsCost.hpp index f0a0442d..f9e000c5 100644 --- a/lib/include/rocRoller/Scheduling/Costs/MinNopsCost.hpp +++ b/lib/include/rocRoller/Scheduling/Costs/MinNopsCost.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include #include @@ -41,7 +43,7 @@ namespace rocRoller /** * MinNopsCost: Orders the instructions based on the number of Nops. */ - class MinNopsCost : public Cost + class ROCROLLER_DECLSPEC MinNopsCost : public Cost { public: MinNopsCost(ContextPtr); diff --git a/lib/include/rocRoller/Scheduling/Costs/MinNopsCost_fwd.hpp b/lib/include/rocRoller/Scheduling/Costs/MinNopsCost_fwd.hpp index ab354f3e..7229d4b6 100644 --- a/lib/include/rocRoller/Scheduling/Costs/MinNopsCost_fwd.hpp +++ b/lib/include/rocRoller/Scheduling/Costs/MinNopsCost_fwd.hpp @@ -26,10 +26,12 @@ #pragma once +#include + namespace rocRoller { namespace Scheduling { - class MinNopsCost; + class ROCROLLER_DECLSPEC MinNopsCost; } } diff --git a/lib/include/rocRoller/Scheduling/Costs/NoneCost.hpp b/lib/include/rocRoller/Scheduling/Costs/NoneCost.hpp index 7a5ebbca..7bd068f2 100644 --- a/lib/include/rocRoller/Scheduling/Costs/NoneCost.hpp +++ b/lib/include/rocRoller/Scheduling/Costs/NoneCost.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include #include @@ -41,7 +43,7 @@ namespace rocRoller /** * NoneCost: This cost can be used for cost-independent schedulers. If it's ever initialized an exception is thrown. */ - class NoneCost : public Cost + class ROCROLLER_DECLSPEC NoneCost : public Cost { public: NoneCost(ContextPtr); diff --git a/lib/include/rocRoller/Scheduling/Costs/NoneCost_fwd.hpp b/lib/include/rocRoller/Scheduling/Costs/NoneCost_fwd.hpp index 6d795c3b..464b56a2 100644 --- a/lib/include/rocRoller/Scheduling/Costs/NoneCost_fwd.hpp +++ b/lib/include/rocRoller/Scheduling/Costs/NoneCost_fwd.hpp @@ -26,10 +26,12 @@ #pragma once +#include + namespace rocRoller { namespace Scheduling { - class NoneCost; + class ROCROLLER_DECLSPEC NoneCost; } } diff --git a/lib/include/rocRoller/Scheduling/Costs/UniformCost.hpp b/lib/include/rocRoller/Scheduling/Costs/UniformCost.hpp index 3ebd2a20..78a729e2 100644 --- a/lib/include/rocRoller/Scheduling/Costs/UniformCost.hpp +++ b/lib/include/rocRoller/Scheduling/Costs/UniformCost.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include #include @@ -41,7 +43,7 @@ namespace rocRoller /** * UniformCost: Gives zero cost to all instructions. */ - class UniformCost : public Cost + class ROCROLLER_DECLSPEC UniformCost : public Cost { public: UniformCost(ContextPtr); diff --git a/lib/include/rocRoller/Scheduling/Costs/UniformCost_fwd.hpp b/lib/include/rocRoller/Scheduling/Costs/UniformCost_fwd.hpp index 8f917dfc..52441b24 100644 --- a/lib/include/rocRoller/Scheduling/Costs/UniformCost_fwd.hpp +++ b/lib/include/rocRoller/Scheduling/Costs/UniformCost_fwd.hpp @@ -26,10 +26,12 @@ #pragma once +#include + namespace rocRoller { namespace Scheduling { - class UniformCost; + class ROCROLLER_DECLSPEC UniformCost; } } diff --git a/lib/include/rocRoller/Scheduling/Costs/WaitCntNopCost.hpp b/lib/include/rocRoller/Scheduling/Costs/WaitCntNopCost.hpp index e440248f..517c4866 100644 --- a/lib/include/rocRoller/Scheduling/Costs/WaitCntNopCost.hpp +++ b/lib/include/rocRoller/Scheduling/Costs/WaitCntNopCost.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include #include @@ -41,7 +43,7 @@ namespace rocRoller /** * WaitCntNopCost: Orders the instructions based on the number of Nops and WaitCnts. */ - class WaitCntNopCost : public Cost + class ROCROLLER_DECLSPEC WaitCntNopCost : public Cost { public: WaitCntNopCost(ContextPtr); diff --git a/lib/include/rocRoller/Scheduling/Costs/WaitCntNopCost_fwd.hpp b/lib/include/rocRoller/Scheduling/Costs/WaitCntNopCost_fwd.hpp index 5df7f761..5df39880 100644 --- a/lib/include/rocRoller/Scheduling/Costs/WaitCntNopCost_fwd.hpp +++ b/lib/include/rocRoller/Scheduling/Costs/WaitCntNopCost_fwd.hpp @@ -26,10 +26,12 @@ #pragma once +#include + namespace rocRoller { namespace Scheduling { - class WaitCntNopCost; + class ROCROLLER_DECLSPEC WaitCntNopCost; } } diff --git a/lib/include/rocRoller/Scheduling/MetaObserver.hpp b/lib/include/rocRoller/Scheduling/MetaObserver.hpp index 3d64d020..906d4d26 100644 --- a/lib/include/rocRoller/Scheduling/MetaObserver.hpp +++ b/lib/include/rocRoller/Scheduling/MetaObserver.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include #include @@ -42,7 +44,7 @@ namespace rocRoller namespace Scheduling { template - class MetaObserver : public IObserver + class ROCROLLER_DECLSPEC MetaObserver : public IObserver { public: using Tup = std::tuple; diff --git a/lib/include/rocRoller/Scheduling/Observers/AllocatingObserver.hpp b/lib/include/rocRoller/Scheduling/Observers/AllocatingObserver.hpp index 752ee0e9..10c8267d 100644 --- a/lib/include/rocRoller/Scheduling/Observers/AllocatingObserver.hpp +++ b/lib/include/rocRoller/Scheduling/Observers/AllocatingObserver.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include #include @@ -34,7 +36,7 @@ namespace rocRoller { namespace Scheduling { - class AllocatingObserver + class ROCROLLER_DECLSPEC AllocatingObserver { public: AllocatingObserver() {} diff --git a/lib/include/rocRoller/Scheduling/Observers/FileWritingObserver.hpp b/lib/include/rocRoller/Scheduling/Observers/FileWritingObserver.hpp index ec9ef1f3..7537aa48 100644 --- a/lib/include/rocRoller/Scheduling/Observers/FileWritingObserver.hpp +++ b/lib/include/rocRoller/Scheduling/Observers/FileWritingObserver.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include #include @@ -39,7 +41,7 @@ namespace rocRoller { namespace Scheduling { - class FileWritingObserver + class ROCROLLER_DECLSPEC FileWritingObserver { public: FileWritingObserver() {} diff --git a/lib/include/rocRoller/Scheduling/Observers/FunctionalUnit/MFMAObserver.hpp b/lib/include/rocRoller/Scheduling/Observers/FunctionalUnit/MFMAObserver.hpp index 8369536d..27dbbddd 100644 --- a/lib/include/rocRoller/Scheduling/Observers/FunctionalUnit/MFMAObserver.hpp +++ b/lib/include/rocRoller/Scheduling/Observers/FunctionalUnit/MFMAObserver.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include #include @@ -37,7 +39,7 @@ namespace rocRoller namespace Scheduling { - class MFMAObserver + class ROCROLLER_DECLSPEC MFMAObserver { public: MFMAObserver(); diff --git a/lib/include/rocRoller/Scheduling/Observers/FunctionalUnit/WMMAObserver.hpp b/lib/include/rocRoller/Scheduling/Observers/FunctionalUnit/WMMAObserver.hpp index 6f0f4ba5..02aa14f6 100644 --- a/lib/include/rocRoller/Scheduling/Observers/FunctionalUnit/WMMAObserver.hpp +++ b/lib/include/rocRoller/Scheduling/Observers/FunctionalUnit/WMMAObserver.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include #include @@ -37,7 +39,7 @@ namespace rocRoller namespace Scheduling { - class WMMAObserver + class ROCROLLER_DECLSPEC WMMAObserver { public: WMMAObserver(); diff --git a/lib/include/rocRoller/Scheduling/Observers/ObserverCreation.hpp b/lib/include/rocRoller/Scheduling/Observers/ObserverCreation.hpp index 0bd5f26b..a0e57e19 100644 --- a/lib/include/rocRoller/Scheduling/Observers/ObserverCreation.hpp +++ b/lib/include/rocRoller/Scheduling/Observers/ObserverCreation.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include #include @@ -51,7 +53,7 @@ namespace rocRoller std::shared_ptr createObserver(ContextPtr const& ctx); template - struct PotentialObservers + struct ROCROLLER_DECLSPEC PotentialObservers { }; diff --git a/lib/include/rocRoller/Scheduling/Observers/RegisterLivenessObserver.hpp b/lib/include/rocRoller/Scheduling/Observers/RegisterLivenessObserver.hpp index 81fea300..f66f946f 100644 --- a/lib/include/rocRoller/Scheduling/Observers/RegisterLivenessObserver.hpp +++ b/lib/include/rocRoller/Scheduling/Observers/RegisterLivenessObserver.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include @@ -46,7 +48,7 @@ namespace rocRoller Count }; - struct LivenessHistoryEntry + struct ROCROLLER_DECLSPEC LivenessHistoryEntry { std::string instruction; std::unordered_map> @@ -57,7 +59,7 @@ namespace rocRoller size_t lineNumber = 0; }; - class RegisterLivenessObserver + class ROCROLLER_DECLSPEC RegisterLivenessObserver { public: RegisterLivenessObserver() {} @@ -150,8 +152,9 @@ namespace rocRoller std::string livenessString() const; }; - std::string toString(RegisterLiveState const& rls); - std::ostream& operator<<(std::ostream& stream, RegisterLiveState const& rls); + ROCROLLER_DECLSPEC std::string toString(RegisterLiveState const& rls); + ROCROLLER_DECLSPEC std::ostream& operator<<(std::ostream& stream, + RegisterLiveState const& rls); static_assert(CObserverRuntime); } diff --git a/lib/include/rocRoller/Scheduling/Observers/SupportedInstructionObserver.hpp b/lib/include/rocRoller/Scheduling/Observers/SupportedInstructionObserver.hpp index 50f5a095..6815b8a4 100644 --- a/lib/include/rocRoller/Scheduling/Observers/SupportedInstructionObserver.hpp +++ b/lib/include/rocRoller/Scheduling/Observers/SupportedInstructionObserver.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include #include @@ -42,7 +44,7 @@ namespace rocRoller * present in the GPUArchitecture, then an exception is thrown. * */ - class SupportedInstructionObserver + class ROCROLLER_DECLSPEC SupportedInstructionObserver { public: SupportedInstructionObserver() {} diff --git a/lib/include/rocRoller/Scheduling/Observers/WaitState/BufferStoreDwordXXRead.hpp b/lib/include/rocRoller/Scheduling/Observers/WaitState/BufferStoreDwordXXRead.hpp index 226d64d2..752efaee 100644 --- a/lib/include/rocRoller/Scheduling/Observers/WaitState/BufferStoreDwordXXRead.hpp +++ b/lib/include/rocRoller/Scheduling/Observers/WaitState/BufferStoreDwordXXRead.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include namespace rocRoller @@ -49,7 +51,8 @@ namespace rocRoller * NOTE: If soffset argument is an SGPR, no NOPs required * */ - class BufferStoreDwordXXRead : public WaitStateObserver + class ROCROLLER_DECLSPEC BufferStoreDwordXXRead + : public WaitStateObserver { public: BufferStoreDwordXXRead() {} diff --git a/lib/include/rocRoller/Scheduling/Observers/WaitState/MFMA/ACCVGPRReadWrite.hpp b/lib/include/rocRoller/Scheduling/Observers/WaitState/MFMA/ACCVGPRReadWrite.hpp index e28f3195..412a6f72 100644 --- a/lib/include/rocRoller/Scheduling/Observers/WaitState/MFMA/ACCVGPRReadWrite.hpp +++ b/lib/include/rocRoller/Scheduling/Observers/WaitState/MFMA/ACCVGPRReadWrite.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include namespace rocRoller @@ -42,7 +44,7 @@ namespace rocRoller * | 908 | v_accvgpr_read write | v_accvgpr_write read SrcA | 2 | * */ - class ACCVGPRReadWrite : public WaitStateObserver + class ROCROLLER_DECLSPEC ACCVGPRReadWrite : public WaitStateObserver { public: ACCVGPRReadWrite() {} diff --git a/lib/include/rocRoller/Scheduling/Observers/WaitState/MFMA/ACCVGPRWriteWrite.hpp b/lib/include/rocRoller/Scheduling/Observers/WaitState/MFMA/ACCVGPRWriteWrite.hpp index 1857cfc3..c5d1f277 100644 --- a/lib/include/rocRoller/Scheduling/Observers/WaitState/MFMA/ACCVGPRWriteWrite.hpp +++ b/lib/include/rocRoller/Scheduling/Observers/WaitState/MFMA/ACCVGPRWriteWrite.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include namespace rocRoller @@ -42,7 +44,7 @@ namespace rocRoller * | 908 | v_accvgpr_write write | v_accvgpr_read read SrcA | 3 | * */ - class ACCVGPRWriteWrite : public WaitStateObserver + class ROCROLLER_DECLSPEC ACCVGPRWriteWrite : public WaitStateObserver { public: ACCVGPRWriteWrite() {} diff --git a/lib/include/rocRoller/Scheduling/Observers/WaitState/MFMA/CMPXWriteExec.hpp b/lib/include/rocRoller/Scheduling/Observers/WaitState/MFMA/CMPXWriteExec.hpp index 583e5cb1..c7441e75 100644 --- a/lib/include/rocRoller/Scheduling/Observers/WaitState/MFMA/CMPXWriteExec.hpp +++ b/lib/include/rocRoller/Scheduling/Observers/WaitState/MFMA/CMPXWriteExec.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include namespace rocRoller @@ -43,7 +45,7 @@ namespace rocRoller * | 94x | v_cmpx* write EXEC | v_mfma* | 4 | * */ - class CMPXWriteExec : public WaitStateObserver + class ROCROLLER_DECLSPEC CMPXWriteExec : public WaitStateObserver { public: CMPXWriteExec() {} diff --git a/lib/include/rocRoller/Scheduling/Observers/WaitState/MFMA/DGEMM16x16x4Write.hpp b/lib/include/rocRoller/Scheduling/Observers/WaitState/MFMA/DGEMM16x16x4Write.hpp index 5ef0ad05..f520b59f 100644 --- a/lib/include/rocRoller/Scheduling/Observers/WaitState/MFMA/DGEMM16x16x4Write.hpp +++ b/lib/include/rocRoller/Scheduling/Observers/WaitState/MFMA/DGEMM16x16x4Write.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include namespace rocRoller @@ -59,7 +61,7 @@ namespace rocRoller * Note: v_mfma_f64_16x16x4_f64 is an equivalent variant for 94x * */ - class DGEMM16x16x4Write : public WaitStateObserver + class ROCROLLER_DECLSPEC DGEMM16x16x4Write : public WaitStateObserver { public: DGEMM16x16x4Write() {} diff --git a/lib/include/rocRoller/Scheduling/Observers/WaitState/MFMA/DGEMM4x4x4Write.hpp b/lib/include/rocRoller/Scheduling/Observers/WaitState/MFMA/DGEMM4x4x4Write.hpp index 8a3956fc..82344122 100644 --- a/lib/include/rocRoller/Scheduling/Observers/WaitState/MFMA/DGEMM4x4x4Write.hpp +++ b/lib/include/rocRoller/Scheduling/Observers/WaitState/MFMA/DGEMM4x4x4Write.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include namespace rocRoller @@ -55,7 +57,7 @@ namespace rocRoller * | 94x | v_mfma_f64_4x4x4f64 write | flat* read overlapped | 9 | * */ - class DGEMM4x4x4Write : public WaitStateObserver + class ROCROLLER_DECLSPEC DGEMM4x4x4Write : public WaitStateObserver { public: DGEMM4x4x4Write() {} diff --git a/lib/include/rocRoller/Scheduling/Observers/WaitState/MFMA/DLWrite.hpp b/lib/include/rocRoller/Scheduling/Observers/WaitState/MFMA/DLWrite.hpp index b5c28c82..9955122d 100644 --- a/lib/include/rocRoller/Scheduling/Observers/WaitState/MFMA/DLWrite.hpp +++ b/lib/include/rocRoller/Scheduling/Observers/WaitState/MFMA/DLWrite.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include namespace rocRoller @@ -45,7 +47,7 @@ namespace rocRoller * | 94x | v_dot* write | Different opcode | 3 | * */ - class DLWrite : public WaitStateObserver + class ROCROLLER_DECLSPEC DLWrite : public WaitStateObserver { public: DLWrite() {} diff --git a/lib/include/rocRoller/Scheduling/Observers/WaitState/MFMA/VALUWrite.hpp b/lib/include/rocRoller/Scheduling/Observers/WaitState/MFMA/VALUWrite.hpp index 25a3b21d..782618b2 100644 --- a/lib/include/rocRoller/Scheduling/Observers/WaitState/MFMA/VALUWrite.hpp +++ b/lib/include/rocRoller/Scheduling/Observers/WaitState/MFMA/VALUWrite.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include namespace rocRoller @@ -43,7 +45,7 @@ namespace rocRoller * | 94x | v_* write | v_mfma* read | 2 | * */ - class VALUWrite : public WaitStateObserver + class ROCROLLER_DECLSPEC VALUWrite : public WaitStateObserver { public: VALUWrite() {} diff --git a/lib/include/rocRoller/Scheduling/Observers/WaitState/MFMA/XDLReadSrcC908.hpp b/lib/include/rocRoller/Scheduling/Observers/WaitState/MFMA/XDLReadSrcC908.hpp index 23c2d5b8..d31cfd1b 100644 --- a/lib/include/rocRoller/Scheduling/Observers/WaitState/MFMA/XDLReadSrcC908.hpp +++ b/lib/include/rocRoller/Scheduling/Observers/WaitState/MFMA/XDLReadSrcC908.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include namespace rocRoller @@ -42,7 +44,7 @@ namespace rocRoller * | 908 | v_mfma* read SrcC (16 pass) | v_accvgpr_write write overlapped | 13 | * */ - class XDLReadSrcC908 : public WaitStateObserver + class ROCROLLER_DECLSPEC XDLReadSrcC908 : public WaitStateObserver { public: XDLReadSrcC908() {} diff --git a/lib/include/rocRoller/Scheduling/Observers/WaitState/MFMA/XDLReadSrcC90a.hpp b/lib/include/rocRoller/Scheduling/Observers/WaitState/MFMA/XDLReadSrcC90a.hpp index d5cbaf60..7d24b608 100644 --- a/lib/include/rocRoller/Scheduling/Observers/WaitState/MFMA/XDLReadSrcC90a.hpp +++ b/lib/include/rocRoller/Scheduling/Observers/WaitState/MFMA/XDLReadSrcC90a.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include namespace rocRoller @@ -42,7 +44,7 @@ namespace rocRoller * | 90a | v_mfma* read SrcC (16 pass) | v_* write | 19 | * */ - class XDLReadSrcC90a : public WaitStateObserver + class ROCROLLER_DECLSPEC XDLReadSrcC90a : public WaitStateObserver { public: XDLReadSrcC90a() {} diff --git a/lib/include/rocRoller/Scheduling/Observers/WaitState/MFMA/XDLReadSrcC94x.hpp b/lib/include/rocRoller/Scheduling/Observers/WaitState/MFMA/XDLReadSrcC94x.hpp index 0e35836f..f93969fa 100644 --- a/lib/include/rocRoller/Scheduling/Observers/WaitState/MFMA/XDLReadSrcC94x.hpp +++ b/lib/include/rocRoller/Scheduling/Observers/WaitState/MFMA/XDLReadSrcC94x.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include namespace rocRoller @@ -43,7 +45,7 @@ namespace rocRoller * | 94x | v_mfma* read SrcC (16 pass) | v_* write | 15 | * */ - class XDLReadSrcC94x : public WaitStateObserver + class ROCROLLER_DECLSPEC XDLReadSrcC94x : public WaitStateObserver { public: XDLReadSrcC94x() {} diff --git a/lib/include/rocRoller/Scheduling/Observers/WaitState/MFMA/XDLWrite908.hpp b/lib/include/rocRoller/Scheduling/Observers/WaitState/MFMA/XDLWrite908.hpp index 1d7c2941..272ef5bd 100644 --- a/lib/include/rocRoller/Scheduling/Observers/WaitState/MFMA/XDLWrite908.hpp +++ b/lib/include/rocRoller/Scheduling/Observers/WaitState/MFMA/XDLWrite908.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include namespace rocRoller @@ -48,7 +50,7 @@ namespace rocRoller * | 908 | v_mfma* write (16 pass) | v_accvgpr_write write | 15 | * */ - class XDLWrite908 : public WaitStateObserver + class ROCROLLER_DECLSPEC XDLWrite908 : public WaitStateObserver { public: XDLWrite908() {} diff --git a/lib/include/rocRoller/Scheduling/Observers/WaitState/MFMA/XDLWrite90a.hpp b/lib/include/rocRoller/Scheduling/Observers/WaitState/MFMA/XDLWrite90a.hpp index 7bc84d3d..9d53c2f2 100644 --- a/lib/include/rocRoller/Scheduling/Observers/WaitState/MFMA/XDLWrite90a.hpp +++ b/lib/include/rocRoller/Scheduling/Observers/WaitState/MFMA/XDLWrite90a.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include namespace rocRoller @@ -63,7 +65,7 @@ namespace rocRoller * | 90a | v_mfma* write (16 pass) | v_* read/write | 19 | * */ - class XDLWrite90a : public WaitStateObserver + class ROCROLLER_DECLSPEC XDLWrite90a : public WaitStateObserver { public: XDLWrite90a() {} diff --git a/lib/include/rocRoller/Scheduling/Observers/WaitState/MFMA/XDLWrite94x.hpp b/lib/include/rocRoller/Scheduling/Observers/WaitState/MFMA/XDLWrite94x.hpp index 1e2bb9f4..163d5a59 100644 --- a/lib/include/rocRoller/Scheduling/Observers/WaitState/MFMA/XDLWrite94x.hpp +++ b/lib/include/rocRoller/Scheduling/Observers/WaitState/MFMA/XDLWrite94x.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include namespace rocRoller @@ -101,7 +103,7 @@ namespace rocRoller * | 94x | v_mfma* write (16 pass) | v_* read/write | 19 | * */ - class XDLWrite94x : public WaitStateObserver + class ROCROLLER_DECLSPEC XDLWrite94x : public WaitStateObserver { public: XDLWrite94x() {} diff --git a/lib/include/rocRoller/Scheduling/Observers/WaitState/OPSEL94x.hpp b/lib/include/rocRoller/Scheduling/Observers/WaitState/OPSEL94x.hpp index b09cb7ef..828e4b02 100644 --- a/lib/include/rocRoller/Scheduling/Observers/WaitState/OPSEL94x.hpp +++ b/lib/include/rocRoller/Scheduling/Observers/WaitState/OPSEL94x.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include namespace rocRoller @@ -41,7 +43,7 @@ namespace rocRoller * | 94x | v_* using SDWA | v_* read | 1 | * */ - class OPSEL94x : public WaitStateObserver + class ROCROLLER_DECLSPEC OPSEL94x : public WaitStateObserver { public: OPSEL94x() {} diff --git a/lib/include/rocRoller/Scheduling/Observers/WaitState/VALUTransWrite94x.hpp b/lib/include/rocRoller/Scheduling/Observers/WaitState/VALUTransWrite94x.hpp index 6afc27bc..98f63f73 100644 --- a/lib/include/rocRoller/Scheduling/Observers/WaitState/VALUTransWrite94x.hpp +++ b/lib/include/rocRoller/Scheduling/Observers/WaitState/VALUTransWrite94x.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include namespace rocRoller @@ -59,7 +61,7 @@ namespace rocRoller * | 94x | v_log_legacy_f32 | v_* | 1 | * */ - class VALUTransWrite94x : public WaitStateObserver + class ROCROLLER_DECLSPEC VALUTransWrite94x : public WaitStateObserver { public: VALUTransWrite94x() {} diff --git a/lib/include/rocRoller/Scheduling/Observers/WaitState/VALUWriteReadlane94x.hpp b/lib/include/rocRoller/Scheduling/Observers/WaitState/VALUWriteReadlane94x.hpp index 45ae5f6d..d2c6f4e3 100644 --- a/lib/include/rocRoller/Scheduling/Observers/WaitState/VALUWriteReadlane94x.hpp +++ b/lib/include/rocRoller/Scheduling/Observers/WaitState/VALUWriteReadlane94x.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include namespace rocRoller @@ -41,7 +43,8 @@ namespace rocRoller * | 950 | v_* write | v_permlane* read | 2 | * */ - class VALUWriteReadlane94x : public WaitStateObserver + class ROCROLLER_DECLSPEC VALUWriteReadlane94x + : public WaitStateObserver { public: VALUWriteReadlane94x() {} diff --git a/lib/include/rocRoller/Scheduling/Observers/WaitState/VALUWriteSGPRVCC94x.hpp b/lib/include/rocRoller/Scheduling/Observers/WaitState/VALUWriteSGPRVCC94x.hpp index c757bf2b..4f9649d4 100644 --- a/lib/include/rocRoller/Scheduling/Observers/WaitState/VALUWriteSGPRVCC94x.hpp +++ b/lib/include/rocRoller/Scheduling/Observers/WaitState/VALUWriteSGPRVCC94x.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include namespace rocRoller @@ -57,7 +59,7 @@ namespace rocRoller * NOTE: If the SGPR/VCC is read as a carry in these cases, 0 NOPs are required. * */ - class VALUWriteSGPRVCC94x : public WaitStateObserver + class ROCROLLER_DECLSPEC VALUWriteSGPRVCC94x : public WaitStateObserver { public: VALUWriteSGPRVCC94x() {} diff --git a/lib/include/rocRoller/Scheduling/Observers/WaitState/VALUWriteSGPRVMEM.hpp b/lib/include/rocRoller/Scheduling/Observers/WaitState/VALUWriteSGPRVMEM.hpp index 3fc76f22..59a8f614 100644 --- a/lib/include/rocRoller/Scheduling/Observers/WaitState/VALUWriteSGPRVMEM.hpp +++ b/lib/include/rocRoller/Scheduling/Observers/WaitState/VALUWriteSGPRVMEM.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include namespace rocRoller @@ -42,7 +44,7 @@ namespace rocRoller * | 94x | v_* write SGPR | VMEM read SGPR | 5 | * */ - class VALUWriteSGPRVMEM : public WaitStateObserver + class ROCROLLER_DECLSPEC VALUWriteSGPRVMEM : public WaitStateObserver { public: VALUWriteSGPRVMEM() {} diff --git a/lib/include/rocRoller/Scheduling/Observers/WaitState/VALUWriteVCCVDIVFMAS.hpp b/lib/include/rocRoller/Scheduling/Observers/WaitState/VALUWriteVCCVDIVFMAS.hpp index 07e597ea..8a207b4c 100644 --- a/lib/include/rocRoller/Scheduling/Observers/WaitState/VALUWriteVCCVDIVFMAS.hpp +++ b/lib/include/rocRoller/Scheduling/Observers/WaitState/VALUWriteVCCVDIVFMAS.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include namespace rocRoller @@ -42,7 +44,8 @@ namespace rocRoller * | 94x | v_* write VCC | v_div_fmas | 4 | * */ - class VALUWriteVCCVDIVFMAS : public WaitStateObserver + class ROCROLLER_DECLSPEC VALUWriteVCCVDIVFMAS + : public WaitStateObserver { public: VALUWriteVCCVDIVFMAS() {} diff --git a/lib/include/rocRoller/Scheduling/Observers/WaitState/VCMPXWrite94x.hpp b/lib/include/rocRoller/Scheduling/Observers/WaitState/VCMPXWrite94x.hpp index 32353a2b..42c032dc 100644 --- a/lib/include/rocRoller/Scheduling/Observers/WaitState/VCMPXWrite94x.hpp +++ b/lib/include/rocRoller/Scheduling/Observers/WaitState/VCMPXWrite94x.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include namespace rocRoller @@ -45,7 +47,7 @@ namespace rocRoller * | 950 | v_cmpx_* | v_permlane* | 4 | * */ - class VCMPXWrite94x : public WaitStateObserver + class ROCROLLER_DECLSPEC VCMPXWrite94x : public WaitStateObserver { public: VCMPXWrite94x() {} diff --git a/lib/include/rocRoller/Scheduling/Observers/WaitState/WMMA/WMMAReadSrcD.hpp b/lib/include/rocRoller/Scheduling/Observers/WaitState/WMMA/WMMAReadSrcD.hpp index a5274d98..fef12610 100644 --- a/lib/include/rocRoller/Scheduling/Observers/WaitState/WMMA/WMMAReadSrcD.hpp +++ b/lib/include/rocRoller/Scheduling/Observers/WaitState/WMMA/WMMAReadSrcD.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include namespace rocRoller @@ -41,7 +43,7 @@ namespace rocRoller * | 120x | v_wmma* read SrcC (16 pass) | v_* read | 8 | * */ - class WMMAReadSrcD : public WaitStateObserver + class ROCROLLER_DECLSPEC WMMAReadSrcD : public WaitStateObserver { public: WMMAReadSrcD() {} diff --git a/lib/include/rocRoller/Scheduling/Observers/WaitState/WMMA/WMMAWrite.hpp b/lib/include/rocRoller/Scheduling/Observers/WaitState/WMMA/WMMAWrite.hpp index eac26ae5..8b50b222 100644 --- a/lib/include/rocRoller/Scheduling/Observers/WaitState/WMMA/WMMAWrite.hpp +++ b/lib/include/rocRoller/Scheduling/Observers/WaitState/WMMA/WMMAWrite.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include namespace rocRoller @@ -40,7 +42,7 @@ namespace rocRoller * | 120x | v_wmma* | v_wmma* read SrcA/B | 1 | * */ - class WMMAWrite : public WaitStateObserver + class ROCROLLER_DECLSPEC WMMAWrite : public WaitStateObserver { public: WMMAWrite() {} diff --git a/lib/include/rocRoller/Scheduling/Observers/WaitState/WMMA/WMMAWriteSrcD.hpp b/lib/include/rocRoller/Scheduling/Observers/WaitState/WMMA/WMMAWriteSrcD.hpp index 44ad289e..7f571eb1 100644 --- a/lib/include/rocRoller/Scheduling/Observers/WaitState/WMMA/WMMAWriteSrcD.hpp +++ b/lib/include/rocRoller/Scheduling/Observers/WaitState/WMMA/WMMAWriteSrcD.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include namespace rocRoller @@ -41,7 +43,7 @@ namespace rocRoller * | 120x | v_wmma* read SrcC (16 pass) | v_* write | 8 | * */ - class WMMAWriteSrcD : public WaitStateObserver + class ROCROLLER_DECLSPEC WMMAWriteSrcD : public WaitStateObserver { public: WMMAWriteSrcD() {} diff --git a/lib/include/rocRoller/Scheduling/Observers/WaitState/WaitStateObserver.hpp b/lib/include/rocRoller/Scheduling/Observers/WaitState/WaitStateObserver.hpp index 14eb3cd3..286e6a6d 100644 --- a/lib/include/rocRoller/Scheduling/Observers/WaitState/WaitStateObserver.hpp +++ b/lib/include/rocRoller/Scheduling/Observers/WaitState/WaitStateObserver.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include @@ -104,7 +106,7 @@ namespace rocRoller }; template - class WaitStateObserver + class ROCROLLER_DECLSPEC WaitStateObserver { public: WaitStateObserver() {} diff --git a/lib/include/rocRoller/Scheduling/Observers/WaitcntObserver.hpp b/lib/include/rocRoller/Scheduling/Observers/WaitcntObserver.hpp index 4cfa6652..df253f5c 100644 --- a/lib/include/rocRoller/Scheduling/Observers/WaitcntObserver.hpp +++ b/lib/include/rocRoller/Scheduling/Observers/WaitcntObserver.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include #include @@ -43,7 +45,7 @@ namespace rocRoller * @brief This struct is used to store the _unallocated_ state of the waitcnt queues. * */ - struct WaitcntState + struct ROCROLLER_DECLSPEC WaitcntState { public: WaitcntState(); @@ -74,7 +76,7 @@ namespace rocRoller WaitQueueMap m_typeInQueue; }; - class WaitcntObserver + class ROCROLLER_DECLSPEC WaitcntObserver { public: WaitcntObserver(); diff --git a/lib/include/rocRoller/Scheduling/PriorityScheduler.hpp b/lib/include/rocRoller/Scheduling/PriorityScheduler.hpp index 16589e6e..1cc8db56 100644 --- a/lib/include/rocRoller/Scheduling/PriorityScheduler.hpp +++ b/lib/include/rocRoller/Scheduling/PriorityScheduler.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include #include @@ -45,7 +47,7 @@ namespace rocRoller * instruction from the the lowest index stream with the * least cost. */ - class PriorityScheduler : public Scheduler + class ROCROLLER_DECLSPEC PriorityScheduler : public Scheduler { public: PriorityScheduler(ContextPtr, CostFunction); diff --git a/lib/include/rocRoller/Scheduling/PriorityScheduler_fwd.hpp b/lib/include/rocRoller/Scheduling/PriorityScheduler_fwd.hpp index ea5e54c4..bbf0d9ac 100644 --- a/lib/include/rocRoller/Scheduling/PriorityScheduler_fwd.hpp +++ b/lib/include/rocRoller/Scheduling/PriorityScheduler_fwd.hpp @@ -26,10 +26,12 @@ #pragma once +#include + namespace rocRoller { namespace Scheduling { - class PriorityScheduler; + class ROCROLLER_DECLSPEC PriorityScheduler; } } diff --git a/lib/include/rocRoller/Scheduling/RandomScheduler.hpp b/lib/include/rocRoller/Scheduling/RandomScheduler.hpp index 4dfe1627..4f7a4a8d 100644 --- a/lib/include/rocRoller/Scheduling/RandomScheduler.hpp +++ b/lib/include/rocRoller/Scheduling/RandomScheduler.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include #include @@ -45,7 +47,7 @@ namespace rocRoller * The same random seed should produce the same program, regardless of addition or * removal of comment-only instructions. Respects the locking rules. */ - class RandomScheduler : public Scheduler + class ROCROLLER_DECLSPEC RandomScheduler : public Scheduler { public: RandomScheduler(ContextPtr); diff --git a/lib/include/rocRoller/Scheduling/RandomScheduler_fwd.hpp b/lib/include/rocRoller/Scheduling/RandomScheduler_fwd.hpp index ec193ca9..1c504e6b 100644 --- a/lib/include/rocRoller/Scheduling/RandomScheduler_fwd.hpp +++ b/lib/include/rocRoller/Scheduling/RandomScheduler_fwd.hpp @@ -26,10 +26,12 @@ #pragma once +#include + namespace rocRoller { namespace Scheduling { - class RandomScheduler; + class ROCROLLER_DECLSPEC RandomScheduler; } } diff --git a/lib/include/rocRoller/Scheduling/RoundRobinScheduler.hpp b/lib/include/rocRoller/Scheduling/RoundRobinScheduler.hpp index 4045bccd..afce6250 100644 --- a/lib/include/rocRoller/Scheduling/RoundRobinScheduler.hpp +++ b/lib/include/rocRoller/Scheduling/RoundRobinScheduler.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include #include @@ -44,7 +46,7 @@ namespace rocRoller * * Must also follow the locking rules. */ - class RoundRobinScheduler : public Scheduler + class ROCROLLER_DECLSPEC RoundRobinScheduler : public Scheduler { public: RoundRobinScheduler(ContextPtr); diff --git a/lib/include/rocRoller/Scheduling/RoundRobinScheduler_fwd.hpp b/lib/include/rocRoller/Scheduling/RoundRobinScheduler_fwd.hpp index 8f3f58e3..a8038dce 100644 --- a/lib/include/rocRoller/Scheduling/RoundRobinScheduler_fwd.hpp +++ b/lib/include/rocRoller/Scheduling/RoundRobinScheduler_fwd.hpp @@ -26,10 +26,12 @@ #pragma once +#include + namespace rocRoller { namespace Scheduling { - class RoundRobinScheduler; + class ROCROLLER_DECLSPEC RoundRobinScheduler; } } diff --git a/lib/include/rocRoller/Scheduling/Scheduler.hpp b/lib/include/rocRoller/Scheduling/Scheduler.hpp index 4640020d..469464ce 100644 --- a/lib/include/rocRoller/Scheduling/Scheduler.hpp +++ b/lib/include/rocRoller/Scheduling/Scheduler.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include @@ -40,7 +42,7 @@ namespace rocRoller { namespace Scheduling { - class LockState + class ROCROLLER_DECLSPEC LockState { public: explicit LockState(ContextPtr ctx); @@ -79,7 +81,7 @@ namespace rocRoller * * - This class should be able to be made into `ComponentBase` class */ - class Scheduler + class ROCROLLER_DECLSPEC Scheduler { public: using Argument = std::tuple; @@ -132,8 +134,8 @@ namespace rocRoller std::vector::iterator>& iterators); }; - std::ostream& operator<<(std::ostream&, SchedulerProcedure const&); - std::ostream& operator<<(std::ostream&, Dependency const&); + ROCROLLER_DECLSPEC std::ostream& operator<<(std::ostream&, SchedulerProcedure const&); + ROCROLLER_DECLSPEC std::ostream& operator<<(std::ostream&, Dependency const&); } } diff --git a/lib/include/rocRoller/Scheduling/Scheduler_fwd.hpp b/lib/include/rocRoller/Scheduling/Scheduler_fwd.hpp index a53d4882..3eba78bb 100644 --- a/lib/include/rocRoller/Scheduling/Scheduler_fwd.hpp +++ b/lib/include/rocRoller/Scheduling/Scheduler_fwd.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include namespace rocRoller { @@ -52,10 +54,10 @@ namespace rocRoller Count }; - class Scheduler; - class LockState; + class ROCROLLER_DECLSPEC Scheduler; + class ROCROLLER_DECLSPEC LockState; - std::string toString(SchedulerProcedure const&); - std::string toString(Dependency const&); + ROCROLLER_DECLSPEC std::string toString(SchedulerProcedure const&); + ROCROLLER_DECLSPEC std::string toString(Dependency const&); } } diff --git a/lib/include/rocRoller/Scheduling/Scheduling.hpp b/lib/include/rocRoller/Scheduling/Scheduling.hpp index 51c06caa..00f877eb 100644 --- a/lib/include/rocRoller/Scheduling/Scheduling.hpp +++ b/lib/include/rocRoller/Scheduling/Scheduling.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include #include @@ -47,7 +49,7 @@ namespace rocRoller * 1. Have a neutral default value, either in the declaration or in the constructor * 2. Be added to the combine() function which is used to merge values from different observers. */ - struct InstructionStatus + struct ROCROLLER_DECLSPEC InstructionStatus { unsigned int stallCycles = 0; WaitCount waitCount; @@ -124,7 +126,7 @@ namespace rocRoller } -> std::convertible_to; }; - struct IObserver + struct ROCROLLER_DECLSPEC IObserver { virtual ~IObserver(); diff --git a/lib/include/rocRoller/Scheduling/Scheduling_fwd.hpp b/lib/include/rocRoller/Scheduling/Scheduling_fwd.hpp index e272e6ff..ca18c0e0 100644 --- a/lib/include/rocRoller/Scheduling/Scheduling_fwd.hpp +++ b/lib/include/rocRoller/Scheduling/Scheduling_fwd.hpp @@ -26,12 +26,14 @@ #pragma once +#include + namespace rocRoller { namespace Scheduling { - struct InstructionStatus; - struct IObserver; + struct ROCROLLER_DECLSPEC InstructionStatus; + struct ROCROLLER_DECLSPEC IObserver; } } diff --git a/lib/include/rocRoller/Scheduling/SequentialScheduler.hpp b/lib/include/rocRoller/Scheduling/SequentialScheduler.hpp index 170edb6d..eef415ac 100644 --- a/lib/include/rocRoller/Scheduling/SequentialScheduler.hpp +++ b/lib/include/rocRoller/Scheduling/SequentialScheduler.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include #include @@ -48,7 +50,7 @@ namespace rocRoller * comments from the beginning of new streams, for the purpose of finding * and running Deallocate nodes as soon as they are available. */ - class SequentialScheduler : public Scheduler + class ROCROLLER_DECLSPEC SequentialScheduler : public Scheduler { public: SequentialScheduler(ContextPtr); diff --git a/lib/include/rocRoller/Scheduling/SequentialScheduler_fwd.hpp b/lib/include/rocRoller/Scheduling/SequentialScheduler_fwd.hpp index a685d4e5..5f50e522 100644 --- a/lib/include/rocRoller/Scheduling/SequentialScheduler_fwd.hpp +++ b/lib/include/rocRoller/Scheduling/SequentialScheduler_fwd.hpp @@ -26,10 +26,12 @@ #pragma once +#include + namespace rocRoller { namespace Scheduling { - class SequentialScheduler; + class ROCROLLER_DECLSPEC SequentialScheduler; } } diff --git a/lib/include/rocRoller/Scheduling/WaitStateHazardCounter.hpp b/lib/include/rocRoller/Scheduling/WaitStateHazardCounter.hpp index eda76330..09e756f1 100644 --- a/lib/include/rocRoller/Scheduling/WaitStateHazardCounter.hpp +++ b/lib/include/rocRoller/Scheduling/WaitStateHazardCounter.hpp @@ -26,13 +26,15 @@ #pragma once +#include + #include namespace rocRoller { namespace Scheduling { - class WaitStateHazardCounter + class ROCROLLER_DECLSPEC WaitStateHazardCounter { public: WaitStateHazardCounter() {} diff --git a/lib/include/rocRoller/Serialization/AssemblyKernel.hpp b/lib/include/rocRoller/Serialization/AssemblyKernel.hpp index 683e9a48..b0b96695 100644 --- a/lib/include/rocRoller/Serialization/AssemblyKernel.hpp +++ b/lib/include/rocRoller/Serialization/AssemblyKernel.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #ifdef ROCROLLER_USE_LLVM #include #endif @@ -45,7 +47,7 @@ namespace rocRoller namespace Serialization { template - struct MappingTraits + struct ROCROLLER_DECLSPEC MappingTraits { static const bool flow = false; using iot = IOTraits; @@ -79,7 +81,7 @@ namespace rocRoller }; template - struct MappingTraits + struct ROCROLLER_DECLSPEC MappingTraits { static const bool flow = false; using iot = IOTraits; @@ -179,7 +181,7 @@ namespace rocRoller }; template - struct MappingTraits + struct ROCROLLER_DECLSPEC MappingTraits { static const bool flow = false; using iot = IOTraits; diff --git a/lib/include/rocRoller/Serialization/Base.hpp b/lib/include/rocRoller/Serialization/Base.hpp index 2050cbfb..cd6e443e 100644 --- a/lib/include/rocRoller/Serialization/Base.hpp +++ b/lib/include/rocRoller/Serialization/Base.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include #include @@ -41,7 +43,7 @@ namespace llvm { namespace yaml { - struct EmptyContext; + struct ROCROLLER_DECLSPEC EmptyContext; } } #endif @@ -53,19 +55,19 @@ namespace rocRoller #ifdef ROCROLLER_USE_LLVM using EmptyContext = llvm::yaml::EmptyContext; #else - struct EmptyContext + struct ROCROLLER_DECLSPEC EmptyContext { }; #endif /** - * Override this struct for a type to use a custom constructor. + * Override this struct ROCROLLER_DECLSPEC for a type to use a custom constructor. * * Useful for Variant and SharedPointer is the types default * contructor has been explicitly deleted. */ template - struct DefaultConstruct + struct ROCROLLER_DECLSPEC DefaultConstruct { static T call() { @@ -74,7 +76,7 @@ namespace rocRoller }; /** - * Override this struct for a type to use a custom string serialization. + * Override this struct ROCROLLER_DECLSPEC for a type to use a custom string serialization. * * You must implement: * 1. static std::string output(T) @@ -89,41 +91,41 @@ namespace rocRoller * */ template - struct ScalarTraits + struct ROCROLLER_DECLSPEC ScalarTraits { }; template - struct IOTraits + struct ROCROLLER_DECLSPEC IOTraits { }; template - struct MappingTraits + struct ROCROLLER_DECLSPEC MappingTraits { static const bool flow = false; }; template - struct CustomMappingTraits + struct ROCROLLER_DECLSPEC CustomMappingTraits { static const bool flow = false; }; template - struct SequenceTraits + struct ROCROLLER_DECLSPEC SequenceTraits { using Value = int; static const bool flow = false; }; template - struct EnumTraits + struct ROCROLLER_DECLSPEC EnumTraits { }; template - struct EmptyMappingTraits + struct ROCROLLER_DECLSPEC EmptyMappingTraits { using iot = IOTraits; // static_assert(Object::HasValue == false, @@ -135,7 +137,7 @@ namespace rocRoller }; template - struct ValueMappingTraits + struct ROCROLLER_DECLSPEC ValueMappingTraits { using iot = IOTraits; static_assert(Object::HasValue == true, @@ -149,7 +151,7 @@ namespace rocRoller }; template - struct IndexMappingTraits + struct ROCROLLER_DECLSPEC IndexMappingTraits { using iot = IOTraits; static_assert(Object::HasIndex == true, @@ -163,7 +165,7 @@ namespace rocRoller }; template - struct IndexValueMappingTraits + struct ROCROLLER_DECLSPEC IndexValueMappingTraits { using iot = IOTraits; static_assert(Object::HasIndex == true && Object::HasValue == true, @@ -181,44 +183,47 @@ namespace rocRoller typename IO, bool HasIndex = Object::HasIndex, bool HasValue = Object::HasValue> - struct AutoMappingTraits + struct ROCROLLER_DECLSPEC AutoMappingTraits { }; template - struct AutoMappingTraits : public EmptyMappingTraits + struct ROCROLLER_DECLSPEC AutoMappingTraits + : public EmptyMappingTraits { }; template - struct AutoMappingTraits : public ValueMappingTraits + struct ROCROLLER_DECLSPEC AutoMappingTraits + : public ValueMappingTraits { }; template - struct AutoMappingTraits : public IndexMappingTraits + struct ROCROLLER_DECLSPEC AutoMappingTraits + : public IndexMappingTraits { }; template - struct AutoMappingTraits + struct ROCROLLER_DECLSPEC AutoMappingTraits : public IndexValueMappingTraits { }; template - struct SubclassMappingTraits + struct ROCROLLER_DECLSPEC SubclassMappingTraits { }; template - struct PointerMappingTraits; + struct ROCROLLER_DECLSPEC PointerMappingTraits; /** * Used by AutoMappingTraits to serialize an object via a std::shared_ptr where the object is of a type derived from Base. */ template - struct PointerMappingTraits + struct ROCROLLER_DECLSPEC PointerMappingTraits { using Subclass = typename SubclassPtr::element_type; using iot = IOTraits; @@ -264,7 +269,7 @@ namespace rocRoller * Set Nullable to true to allow serializing `nullptr`. */ template - struct SharedPointerMappingTraits + struct ROCROLLER_DECLSPEC SharedPointerMappingTraits { using Element = typename SharedPtr::element_type; using iot = IOTraits; @@ -309,7 +314,7 @@ namespace rocRoller }; template - struct BaseClassMappingTraits + struct ROCROLLER_DECLSPEC BaseClassMappingTraits { using iot = IOTraits; @@ -330,10 +335,10 @@ namespace rocRoller }; template - struct DefaultSubclassMappingTraits; + struct ROCROLLER_DECLSPEC DefaultSubclassMappingTraits; template - struct DefaultSubclassMappingTraits + struct ROCROLLER_DECLSPEC DefaultSubclassMappingTraits { using iot = IOTraits; using SubclassFn = bool(IO&, typename std::shared_ptr&, Context&); @@ -357,7 +362,7 @@ namespace rocRoller }; template - struct DefaultSubclassMappingTraits + struct ROCROLLER_DECLSPEC DefaultSubclassMappingTraits { using iot = IOTraits; using SubclassFn = bool(IO&, typename std::shared_ptr&); diff --git a/lib/include/rocRoller/Serialization/Base_fwd.hpp b/lib/include/rocRoller/Serialization/Base_fwd.hpp index 3eafc775..2b0174c7 100644 --- a/lib/include/rocRoller/Serialization/Base_fwd.hpp +++ b/lib/include/rocRoller/Serialization/Base_fwd.hpp @@ -26,30 +26,32 @@ #pragma once +#include + namespace rocRoller { namespace Serialization { template - struct IOTraits; + struct ROCROLLER_DECLSPEC IOTraits; template - struct MappingTraits; + struct ROCROLLER_DECLSPEC MappingTraits; template - struct EnumTraits; + struct ROCROLLER_DECLSPEC EnumTraits; template - struct CustomMappingTraits; + struct ROCROLLER_DECLSPEC CustomMappingTraits; template - struct SequenceTraits; + struct ROCROLLER_DECLSPEC SequenceTraits; template - struct EnumTraits; + struct ROCROLLER_DECLSPEC EnumTraits; template - struct EmptyMappingTraits; + struct ROCROLLER_DECLSPEC EmptyMappingTraits; } } diff --git a/lib/include/rocRoller/Serialization/Colouring.hpp b/lib/include/rocRoller/Serialization/Colouring.hpp index 7c4ab3bb..acec97fa 100644 --- a/lib/include/rocRoller/Serialization/Colouring.hpp +++ b/lib/include/rocRoller/Serialization/Colouring.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include #include @@ -36,13 +38,13 @@ namespace rocRoller { template - struct CustomMappingTraits, IO> + struct ROCROLLER_DECLSPEC CustomMappingTraits, IO> : public DefaultCustomMappingTraits, IO, false, true> { }; template - struct MappingTraits + struct ROCROLLER_DECLSPEC MappingTraits { static const bool flow = false; using iot = IOTraits; diff --git a/lib/include/rocRoller/Serialization/Command.hpp b/lib/include/rocRoller/Serialization/Command.hpp index e9f736d0..7d232f83 100644 --- a/lib/include/rocRoller/Serialization/Command.hpp +++ b/lib/include/rocRoller/Serialization/Command.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #ifdef ROCROLLER_USE_LLVM #include #endif @@ -47,13 +49,13 @@ namespace rocRoller namespace Serialization { template - struct SequenceTraits, IO> + struct ROCROLLER_DECLSPEC SequenceTraits, IO> : public DefaultSequenceTraits, IO, false> { }; template <> - struct KeyConversion + struct ROCROLLER_DECLSPEC KeyConversion { static std::string toString(ArgumentOffsetMap::key_type const& value) { @@ -80,13 +82,13 @@ namespace rocRoller }; template - struct CustomMappingTraits + struct ROCROLLER_DECLSPEC CustomMappingTraits : public DefaultCustomMappingTraits { }; template - struct MappingTraits + struct ROCROLLER_DECLSPEC MappingTraits { static const bool flow = false; using iot = IOTraits; @@ -110,7 +112,7 @@ namespace rocRoller }; template - struct MappingTraits + struct ROCROLLER_DECLSPEC MappingTraits : public SharedPointerMappingTraits { }; diff --git a/lib/include/rocRoller/Serialization/Containers.hpp b/lib/include/rocRoller/Serialization/Containers.hpp index 7df1c445..0b244232 100644 --- a/lib/include/rocRoller/Serialization/Containers.hpp +++ b/lib/include/rocRoller/Serialization/Containers.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include @@ -37,7 +39,7 @@ namespace rocRoller namespace Serialization { template - struct KeyConversion + struct ROCROLLER_DECLSPEC KeyConversion { static std::string toString(Key const& value) { @@ -55,7 +57,7 @@ namespace rocRoller }; template <> - struct KeyConversion + struct ROCROLLER_DECLSPEC KeyConversion { static std::string const& toString(std::string const& value) { @@ -69,7 +71,7 @@ namespace rocRoller }; template - struct DefaultCustomMappingTraits + struct ROCROLLER_DECLSPEC DefaultCustomMappingTraits { using iot = IOTraits; using key_type = typename Map::key_type; @@ -116,25 +118,25 @@ namespace rocRoller }; template - struct CustomMappingTraits, IO> + struct ROCROLLER_DECLSPEC CustomMappingTraits, IO> : public DefaultCustomMappingTraits, IO, false, true> { }; template - struct CustomMappingTraits, IO> + struct ROCROLLER_DECLSPEC CustomMappingTraits, IO> : public DefaultCustomMappingTraits, IO, false, true> { }; template - struct CustomMappingTraits, IO> + struct ROCROLLER_DECLSPEC CustomMappingTraits, IO> : public DefaultCustomMappingTraits, IO, false, true> { }; template - struct DefaultSequenceTraits + struct ROCROLLER_DECLSPEC DefaultSequenceTraits { using Value = typename Seq::value_type; @@ -158,13 +160,13 @@ namespace rocRoller #define ROCROLLER_SERIALIZE_VECTOR(flow, ...) \ template \ - struct SequenceTraits, IO> \ + struct ROCROLLER_DECLSPEC SequenceTraits, IO> \ : public DefaultSequenceTraits, IO, flow> \ { \ } template - struct SequenceTraits, IO> + struct ROCROLLER_DECLSPEC SequenceTraits, IO> { static const bool flow = true; diff --git a/lib/include/rocRoller/Serialization/ControlGraph.hpp b/lib/include/rocRoller/Serialization/ControlGraph.hpp index 505007eb..6f01165a 100644 --- a/lib/include/rocRoller/Serialization/ControlGraph.hpp +++ b/lib/include/rocRoller/Serialization/ControlGraph.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include #include @@ -43,13 +45,13 @@ namespace rocRoller { template requires(std::constructible_from) struct MappingTraits + T>) struct ROCROLLER_DECLSPEC MappingTraits : public EmptyMappingTraits { }; template - struct MappingTraits + struct ROCROLLER_DECLSPEC MappingTraits { using iot = IOTraits; static void mapping(IO& io, BufferInstructionOptions& opt, Context&) @@ -71,7 +73,8 @@ namespace rocRoller }; template - struct MappingTraits + struct ROCROLLER_DECLSPEC + MappingTraits { using iot = IOTraits; static void mapping(IO& io, KernelGraph::ControlGraph::SetCoordinate& op, Context& ctx) @@ -95,7 +98,7 @@ namespace rocRoller }; template - struct MappingTraits + struct ROCROLLER_DECLSPEC MappingTraits { using iot = IOTraits; static void mapping(IO& io, KernelGraph::ControlGraph::ForLoopOp& op, Context&) @@ -120,7 +123,8 @@ namespace rocRoller }; template - struct MappingTraits + struct ROCROLLER_DECLSPEC + MappingTraits { using iot = IOTraits; static void mapping(IO& io, KernelGraph::ControlGraph::ConditionalOp& op, Context&) @@ -145,7 +149,7 @@ namespace rocRoller }; template - struct MappingTraits + struct ROCROLLER_DECLSPEC MappingTraits { using iot = IOTraits; static void mapping(IO& io, KernelGraph::ControlGraph::AssertOp& op, Context&) @@ -170,7 +174,7 @@ namespace rocRoller }; template - struct MappingTraits + struct ROCROLLER_DECLSPEC MappingTraits { using iot = IOTraits; static void mapping(IO& io, KernelGraph::ControlGraph::DoWhileOp& op, Context&) @@ -195,7 +199,7 @@ namespace rocRoller }; template - struct MappingTraits + struct ROCROLLER_DECLSPEC MappingTraits { using iot = IOTraits; static void mapping(IO& io, KernelGraph::ControlGraph::UnrollOp& op, Context&) @@ -219,7 +223,7 @@ namespace rocRoller }; template - struct MappingTraits + struct ROCROLLER_DECLSPEC MappingTraits { using iot = IOTraits; static void mapping(IO& io, KernelGraph::ControlGraph::SeedPRNG& op, Context&) @@ -243,7 +247,7 @@ namespace rocRoller }; template - struct MappingTraits + struct ROCROLLER_DECLSPEC MappingTraits { using iot = IOTraits; static void mapping(IO& io, KernelGraph::ControlGraph::Assign& op, Context&) @@ -269,7 +273,8 @@ namespace rocRoller }; template - struct MappingTraits + struct ROCROLLER_DECLSPEC + MappingTraits { using iot = IOTraits; static void mapping(IO& io, KernelGraph::ControlGraph::ComputeIndex& op, Context&) @@ -290,15 +295,15 @@ namespace rocRoller }; template - requires( - CIsAnyOf) struct MappingTraits + requires(CIsAnyOf) struct ROCROLLER_DECLSPEC + MappingTraits { using iot = IOTraits; static void mapping(IO& io, Op& op, Context&) @@ -337,7 +342,7 @@ namespace rocRoller }; template - struct MappingTraits + struct ROCROLLER_DECLSPEC MappingTraits { using iot = IOTraits; static void mapping(IO& io, KernelGraph::ControlGraph::Multiply& op, Context&) @@ -356,11 +361,11 @@ namespace rocRoller }; template - requires( - CIsAnyOf) struct MappingTraits + requires(CIsAnyOf) struct ROCROLLER_DECLSPEC + MappingTraits { using iot = IOTraits; static void mapping(IO& io, Op& op, Context&) @@ -386,7 +391,8 @@ namespace rocRoller }; template - struct MappingTraits + struct ROCROLLER_DECLSPEC + MappingTraits { using iot = IOTraits; static void mapping(IO& io, KernelGraph::ControlGraph::TensorContraction& op, Context&) @@ -410,14 +416,14 @@ namespace rocRoller template requires(std::constructible_from&& T::HasValue - == false) struct MappingTraits + == false) struct ROCROLLER_DECLSPEC MappingTraits : public EmptyMappingTraits { }; static_assert(CNamedVariant); template - struct MappingTraits + struct ROCROLLER_DECLSPEC MappingTraits : public DefaultVariantMappingTraits @@ -426,14 +432,15 @@ namespace rocRoller static_assert(CNamedVariant); template - struct MappingTraits + struct ROCROLLER_DECLSPEC MappingTraits : public DefaultVariantMappingTraits { }; static_assert(CNamedVariant); template - struct MappingTraits + struct ROCROLLER_DECLSPEC + MappingTraits : public DefaultVariantMappingTraits @@ -475,12 +482,12 @@ namespace rocRoller } template - struct CustomMappingTraits + struct ROCROLLER_DECLSPEC CustomMappingTraits : public DefaultCustomMappingTraits { }; template - struct CustomMappingTraits + struct ROCROLLER_DECLSPEC CustomMappingTraits : public DefaultCustomMappingTraits { }; @@ -491,7 +498,8 @@ namespace rocRoller #endif template - struct MappingTraits + struct ROCROLLER_DECLSPEC + MappingTraits { using iot = IOTraits; using HG = typename KernelGraph::ControlGraph::ControlGraph::Base; diff --git a/lib/include/rocRoller/Serialization/ControlToCoordinateMapper.hpp b/lib/include/rocRoller/Serialization/ControlToCoordinateMapper.hpp index 764628cd..f6574e59 100644 --- a/lib/include/rocRoller/Serialization/ControlToCoordinateMapper.hpp +++ b/lib/include/rocRoller/Serialization/ControlToCoordinateMapper.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include #include @@ -41,13 +43,14 @@ namespace rocRoller namespace Serialization { template - struct MappingTraits + struct ROCROLLER_DECLSPEC MappingTraits : public EmptyMappingTraits { }; template - struct MappingTraits + struct ROCROLLER_DECLSPEC + MappingTraits { using iot = IOTraits; @@ -66,7 +69,8 @@ namespace rocRoller }; template - struct MappingTraits + struct ROCROLLER_DECLSPEC + MappingTraits { using iot = IOTraits; @@ -87,7 +91,8 @@ namespace rocRoller }; template - struct MappingTraits + struct ROCROLLER_DECLSPEC + MappingTraits { using iot = IOTraits; @@ -108,7 +113,7 @@ namespace rocRoller }; template - struct MappingTraits + struct ROCROLLER_DECLSPEC MappingTraits { using iot = IOTraits; @@ -130,7 +135,8 @@ namespace rocRoller static_assert(CNamedVariant); template - struct MappingTraits + struct ROCROLLER_DECLSPEC + MappingTraits : public DefaultVariantMappingTraits @@ -139,7 +145,8 @@ namespace rocRoller }; template - struct MappingTraits + struct ROCROLLER_DECLSPEC + MappingTraits { using iot = IOTraits; @@ -163,7 +170,7 @@ namespace rocRoller ROCROLLER_SERIALIZE_VECTOR(false, KernelGraph::ControlToCoordinateMapper::Connection); template - struct MappingTraits + struct ROCROLLER_DECLSPEC MappingTraits { using iot = IOTraits; diff --git a/lib/include/rocRoller/Serialization/CoordinateGraph.hpp b/lib/include/rocRoller/Serialization/CoordinateGraph.hpp index 5e326a96..b4360a7d 100644 --- a/lib/include/rocRoller/Serialization/CoordinateGraph.hpp +++ b/lib/include/rocRoller/Serialization/CoordinateGraph.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include #include @@ -42,7 +44,8 @@ namespace rocRoller namespace Serialization { template - struct MappingTraits + struct ROCROLLER_DECLSPEC + MappingTraits { using iot = IOTraits; @@ -63,7 +66,7 @@ namespace rocRoller }; template - struct MappingTraits + struct ROCROLLER_DECLSPEC MappingTraits { using iot = IOTraits; @@ -85,7 +88,7 @@ namespace rocRoller }; template - struct MappingTraits + struct ROCROLLER_DECLSPEC MappingTraits { using iot = IOTraits; @@ -115,7 +118,8 @@ namespace rocRoller }; template - struct MappingTraits + struct ROCROLLER_DECLSPEC + MappingTraits { using iot = IOTraits; @@ -138,7 +142,7 @@ namespace rocRoller }; template - struct MappingTraits + struct ROCROLLER_DECLSPEC MappingTraits { using iot = IOTraits; @@ -161,7 +165,8 @@ namespace rocRoller }; template - struct MappingTraits + struct ROCROLLER_DECLSPEC + MappingTraits { using iot = IOTraits; @@ -188,7 +193,8 @@ namespace rocRoller }; template - struct MappingTraits + struct ROCROLLER_DECLSPEC + MappingTraits { using iot = IOTraits; @@ -213,7 +219,7 @@ namespace rocRoller }; template - struct MappingTraits + struct ROCROLLER_DECLSPEC MappingTraits { using iot = IOTraits; @@ -240,13 +246,13 @@ namespace rocRoller template requires(std::constructible_from&& T::HasValue - == false) struct MappingTraits + == false) struct ROCROLLER_DECLSPEC MappingTraits : public EmptyMappingTraits { }; template - struct MappingTraits + struct ROCROLLER_DECLSPEC MappingTraits { using iot = IOTraits; @@ -267,7 +273,7 @@ namespace rocRoller template requires(std::constructible_from&& std::derived_from&& T::HasValue - == false) struct MappingTraits + == false) struct ROCROLLER_DECLSPEC MappingTraits { using iot = IOTraits; @@ -292,7 +298,7 @@ namespace rocRoller T, KernelGraph::CoordinateGraph:: BaseDimension> && !std::derived_from && T::HasValue == false) struct - MappingTraits + ROCROLLER_DECLSPEC MappingTraits { using iot = IOTraits; @@ -313,7 +319,8 @@ namespace rocRoller static_assert(CNamedVariant); template - struct MappingTraits + struct ROCROLLER_DECLSPEC + MappingTraits : public DefaultVariantMappingTraits< KernelGraph::CoordinateGraph::CoordinateTransformEdge, IO, @@ -324,7 +331,8 @@ namespace rocRoller static_assert(CNamedVariant); template - struct MappingTraits + struct ROCROLLER_DECLSPEC + MappingTraits : public DefaultVariantMappingTraits @@ -333,14 +341,15 @@ namespace rocRoller static_assert(CNamedVariant); template - struct MappingTraits + struct ROCROLLER_DECLSPEC MappingTraits : public DefaultVariantMappingTraits { }; static_assert(CNamedVariant); template - struct MappingTraits + struct ROCROLLER_DECLSPEC + MappingTraits : public DefaultVariantMappingTraits @@ -349,7 +358,8 @@ namespace rocRoller static_assert(CNamedVariant); template - struct MappingTraits + struct ROCROLLER_DECLSPEC + MappingTraits : public DefaultVariantMappingTraits< KernelGraph::CoordinateGraph::CoordinateGraph::Element, IO, @@ -358,7 +368,8 @@ namespace rocRoller }; template - struct MappingTraits + struct ROCROLLER_DECLSPEC + MappingTraits { using iot = IOTraits; using HG = typename KernelGraph::CoordinateGraph::CoordinateGraph::Base; diff --git a/lib/include/rocRoller/Serialization/Enum.hpp b/lib/include/rocRoller/Serialization/Enum.hpp index 8ee988b0..15cf34ed 100644 --- a/lib/include/rocRoller/Serialization/Enum.hpp +++ b/lib/include/rocRoller/Serialization/Enum.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include @@ -40,7 +42,7 @@ namespace rocRoller * i.e. provides a Count value and toString (fromString() uses toString). */ template - struct ScalarTraits + struct ROCROLLER_DECLSPEC ScalarTraits { static std::string output(const Enum& value) { diff --git a/lib/include/rocRoller/Serialization/Expression.hpp b/lib/include/rocRoller/Serialization/Expression.hpp index 5342fed6..c90a7380 100644 --- a/lib/include/rocRoller/Serialization/Expression.hpp +++ b/lib/include/rocRoller/Serialization/Expression.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include #include @@ -41,21 +43,21 @@ namespace rocRoller namespace Serialization { template - struct MappingTraits + struct ROCROLLER_DECLSPEC MappingTraits : public SharedPointerMappingTraits { static const bool flow = true; }; template - struct MappingTraits + struct ROCROLLER_DECLSPEC MappingTraits : public DefaultVariantMappingTraits { static const bool flow = true; }; template - struct MappingTraits + struct ROCROLLER_DECLSPEC MappingTraits { static const bool flow = true; using iot = IOTraits; @@ -76,7 +78,7 @@ namespace rocRoller }; template - struct MappingTraits + struct ROCROLLER_DECLSPEC MappingTraits { static const bool flow = true; using iot = IOTraits; @@ -96,7 +98,7 @@ namespace rocRoller }; template - struct MappingTraits + struct ROCROLLER_DECLSPEC MappingTraits { static const bool flow = true; using iot = IOTraits; @@ -117,7 +119,7 @@ namespace rocRoller }; template - struct MappingTraits + struct ROCROLLER_DECLSPEC MappingTraits { static const bool flow = true; using iot = IOTraits; @@ -140,7 +142,7 @@ namespace rocRoller }; template - struct MappingTraits + struct ROCROLLER_DECLSPEC MappingTraits { using iot = IOTraits; @@ -163,7 +165,7 @@ namespace rocRoller }; template - struct MappingTraits + struct ROCROLLER_DECLSPEC MappingTraits { static const bool flow = true; using iot = IOTraits; @@ -186,7 +188,7 @@ namespace rocRoller static_assert(CNamedVariant); template - struct MappingTraits + struct ROCROLLER_DECLSPEC MappingTraits : public DefaultVariantMappingTraits { static const bool flow = true; @@ -236,7 +238,7 @@ namespace rocRoller }; template - struct MappingTraits + struct ROCROLLER_DECLSPEC MappingTraits { static const bool flow = true; using iot = IOTraits; @@ -281,7 +283,7 @@ namespace rocRoller }; template - struct MappingTraits + struct ROCROLLER_DECLSPEC MappingTraits { static const bool flow = true; using iot = IOTraits; @@ -302,7 +304,7 @@ namespace rocRoller }; template - struct MappingTraits + struct ROCROLLER_DECLSPEC MappingTraits { static const bool flow = true; using iot = IOTraits; @@ -335,14 +337,14 @@ namespace rocRoller }; template - struct MappingTraits + struct ROCROLLER_DECLSPEC MappingTraits : public SharedPointerMappingTraits { static const bool flow = true; }; template - struct MappingTraits + struct ROCROLLER_DECLSPEC MappingTraits { static const bool flow = true; using iot = IOTraits; @@ -370,7 +372,7 @@ namespace rocRoller }; template - struct MappingTraits + struct ROCROLLER_DECLSPEC MappingTraits { static const bool flow = true; using iot = IOTraits; @@ -392,7 +394,7 @@ namespace rocRoller }; template - struct MappingTraits + struct ROCROLLER_DECLSPEC MappingTraits { static const bool flow = true; using iot = IOTraits; diff --git a/lib/include/rocRoller/Serialization/GPUArchitecture.hpp b/lib/include/rocRoller/Serialization/GPUArchitecture.hpp index d5d2af80..68ef2943 100644 --- a/lib/include/rocRoller/Serialization/GPUArchitecture.hpp +++ b/lib/include/rocRoller/Serialization/GPUArchitecture.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #ifdef ROCROLLER_USE_LLVM #include #endif @@ -61,7 +63,7 @@ namespace rocRoller * generic enum serialization. */ template <> - struct ScalarTraits + struct ROCROLLER_DECLSPEC ScalarTraits { static std::string output(const GPUWaitQueueType& value) { @@ -75,7 +77,7 @@ namespace rocRoller }; template - struct MappingTraits + struct ROCROLLER_DECLSPEC MappingTraits { static const bool flow = false; using iot = IOTraits; @@ -92,7 +94,7 @@ namespace rocRoller }; template - struct MappingTraits + struct ROCROLLER_DECLSPEC MappingTraits { static const bool flow = false; using iot = IOTraits; @@ -111,7 +113,7 @@ namespace rocRoller }; template - struct MappingTraits + struct ROCROLLER_DECLSPEC MappingTraits { static const bool flow = true; using iot = IOTraits; @@ -134,7 +136,7 @@ namespace rocRoller }; template - struct MappingTraits + struct ROCROLLER_DECLSPEC MappingTraits { static const bool flow = false; using iot = IOTraits; @@ -153,7 +155,7 @@ namespace rocRoller }; template - struct MappingTraits + struct ROCROLLER_DECLSPEC MappingTraits { static const bool flow = false; using iot = IOTraits; @@ -170,7 +172,7 @@ namespace rocRoller }; template - struct CustomMappingTraits, IO> + struct ROCROLLER_DECLSPEC CustomMappingTraits, IO> : public DefaultCustomMappingTraits, IO, false, @@ -179,13 +181,14 @@ namespace rocRoller }; template - struct CustomMappingTraits, IO> + struct ROCROLLER_DECLSPEC CustomMappingTraits, IO> : public DefaultCustomMappingTraits, IO, false, true> { }; template - struct CustomMappingTraits, IO> + struct ROCROLLER_DECLSPEC + CustomMappingTraits, IO> : public DefaultCustomMappingTraits, IO, false, diff --git a/lib/include/rocRoller/Serialization/HasTraits.hpp b/lib/include/rocRoller/Serialization/HasTraits.hpp index 38e32bc9..5a779401 100644 --- a/lib/include/rocRoller/Serialization/HasTraits.hpp +++ b/lib/include/rocRoller/Serialization/HasTraits.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include @@ -35,7 +37,7 @@ namespace rocRoller namespace Serialization { template - struct SameType; + struct ROCROLLER_DECLSPEC SameType; template concept CMappedType = requires(T& obj, IO& io, Context& ctx) @@ -50,13 +52,13 @@ namespace rocRoller }; template - struct has_MappingTraits + struct ROCROLLER_DECLSPEC has_MappingTraits { static const bool value = CMappedType; }; template - struct has_EmptyMappingTraits + struct ROCROLLER_DECLSPEC has_EmptyMappingTraits { static const bool value = std::same_as && CEmptyMappedType; @@ -67,7 +69,7 @@ namespace rocRoller value && !has_MappingTraits::value; template - class has_EnumTraits + class ROCROLLER_DECLSPEC has_EnumTraits { using enumeration = void (*)(IO&, T&); @@ -96,7 +98,7 @@ namespace rocRoller }; template - class has_SequenceTraits + class ROCROLLER_DECLSPEC has_SequenceTraits { using size = size_t (*)(IO&, T&); @@ -114,7 +116,7 @@ namespace rocRoller concept SequenceType = has_SequenceTraits::value; template - class has_CustomMappingTraits + class ROCROLLER_DECLSPEC has_CustomMappingTraits { using inputOne = void (*)(IO&, std::string const&, T&); using output = void (*)(IO&, T&); @@ -134,7 +136,7 @@ namespace rocRoller concept CustomMappingType = has_CustomMappingTraits::value; template - struct has_SerializationTraits + struct ROCROLLER_DECLSPEC has_SerializationTraits { static const bool value0 = has_EmptyMappingTraits::value || has_MappingTraits::value; @@ -165,13 +167,13 @@ namespace rocRoller }; template - struct HasFlowValue + struct ROCROLLER_DECLSPEC HasFlowValue { static const bool flow = false; }; template - struct HasFlowValue + struct ROCROLLER_DECLSPEC HasFlowValue { static const bool flow = T::flow; }; diff --git a/lib/include/rocRoller/Serialization/Hypergraph.hpp b/lib/include/rocRoller/Serialization/Hypergraph.hpp index e0b65e49..534aec58 100644 --- a/lib/include/rocRoller/Serialization/Hypergraph.hpp +++ b/lib/include/rocRoller/Serialization/Hypergraph.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include #include @@ -40,7 +42,7 @@ namespace rocRoller namespace Serialization { template - struct MappingTraits + struct ROCROLLER_DECLSPEC MappingTraits { using iot = IOTraits; static const bool flow = true; @@ -61,14 +63,14 @@ namespace rocRoller ROCROLLER_SERIALIZE_VECTOR(false, Graph::HypergraphIncident); template - struct ElementEntry + struct ROCROLLER_DECLSPEC ElementEntry { int id; Var value; }; template - struct MappingTraits, IO, Context> + struct ROCROLLER_DECLSPEC MappingTraits, IO, Context> { using iot = IOTraits; using Entry = ElementEntry; @@ -90,7 +92,7 @@ namespace rocRoller }; template - struct SequenceTraits>, IO> + struct ROCROLLER_DECLSPEC SequenceTraits>, IO> : public DefaultSequenceTraits>, IO, false> { }; @@ -102,7 +104,8 @@ namespace rocRoller // }; template - struct MappingTraits, IO, EmptyContext> + struct ROCROLLER_DECLSPEC + MappingTraits, IO, EmptyContext> { using iot = IOTraits; using HG = Graph::Hypergraph; diff --git a/lib/include/rocRoller/Serialization/KernelGraph.hpp b/lib/include/rocRoller/Serialization/KernelGraph.hpp index 7e1d85cd..1f1c1095 100644 --- a/lib/include/rocRoller/Serialization/KernelGraph.hpp +++ b/lib/include/rocRoller/Serialization/KernelGraph.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #ifdef ROCROLLER_USE_LLVM #include #endif @@ -44,7 +46,7 @@ namespace rocRoller { template - struct MappingTraits + struct ROCROLLER_DECLSPEC MappingTraits { static const bool flow = false; using iot = IOTraits; @@ -63,7 +65,7 @@ namespace rocRoller }; template - struct MappingTraits + struct ROCROLLER_DECLSPEC MappingTraits : public SharedPointerMappingTraits { }; diff --git a/lib/include/rocRoller/Serialization/Operations.hpp b/lib/include/rocRoller/Serialization/Operations.hpp index b186a73d..2eb60611 100644 --- a/lib/include/rocRoller/Serialization/Operations.hpp +++ b/lib/include/rocRoller/Serialization/Operations.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #ifdef ROCROLLER_USE_LLVM #include #endif @@ -45,7 +47,7 @@ namespace rocRoller namespace Serialization { template <> - struct ScalarTraits + struct ROCROLLER_DECLSPEC ScalarTraits { static std::string output(Operations::OperationTag const& x) { @@ -62,13 +64,13 @@ namespace rocRoller }; template - struct SequenceTraits, IO> + struct ROCROLLER_DECLSPEC SequenceTraits, IO> : public DefaultSequenceTraits, IO, true> { }; template - struct MappingTraits + struct ROCROLLER_DECLSPEC MappingTraits { using TOp = Operations::Tensor; using iot = IOTraits; @@ -94,7 +96,7 @@ namespace rocRoller }; template - struct MappingTraits + struct ROCROLLER_DECLSPEC MappingTraits { using TOp = Operations::Scalar; using iot = IOTraits; @@ -115,7 +117,7 @@ namespace rocRoller }; template - struct MappingTraits + struct ROCROLLER_DECLSPEC MappingTraits { using TOp = Operations::Literal; using iot = IOTraits; @@ -135,7 +137,7 @@ namespace rocRoller }; template - struct MappingTraits + struct ROCROLLER_DECLSPEC MappingTraits { using TOp = Operations::BlockScale; using iot = IOTraits; @@ -170,7 +172,7 @@ namespace rocRoller }; template - struct MappingTraits + struct ROCROLLER_DECLSPEC MappingTraits { using TOp = Operations::T_Load_Linear; using iot = IOTraits; @@ -190,7 +192,7 @@ namespace rocRoller }; template - struct MappingTraits + struct ROCROLLER_DECLSPEC MappingTraits { using TOp = Operations::T_Load_Scalar; using iot = IOTraits; @@ -210,7 +212,7 @@ namespace rocRoller }; template - struct MappingTraits + struct ROCROLLER_DECLSPEC MappingTraits { using TOp = Operations::T_Load_Tiled; using iot = IOTraits; @@ -230,7 +232,7 @@ namespace rocRoller }; template - struct MappingTraits + struct ROCROLLER_DECLSPEC MappingTraits { using TOp = Operations::T_Mul; using iot = IOTraits; @@ -251,7 +253,7 @@ namespace rocRoller }; template - struct MappingTraits + struct ROCROLLER_DECLSPEC MappingTraits { using TOp = Operations::T_Store_Linear; using iot = IOTraits; @@ -272,7 +274,7 @@ namespace rocRoller }; template - struct MappingTraits + struct ROCROLLER_DECLSPEC MappingTraits { using TOp = Operations::T_Store_Tiled; using iot = IOTraits; @@ -293,7 +295,7 @@ namespace rocRoller }; template - struct MappingTraits + struct ROCROLLER_DECLSPEC MappingTraits { using TOp = Operations::Nop; using iot = IOTraits; @@ -310,7 +312,7 @@ namespace rocRoller }; template - struct MappingTraits + struct ROCROLLER_DECLSPEC MappingTraits { using TOp = Operations::RandomNumberGenerator; using iot = IOTraits; @@ -332,7 +334,7 @@ namespace rocRoller }; template - struct MappingTraits + struct ROCROLLER_DECLSPEC MappingTraits { using TOp = Operations::T_Execute; using iot = IOTraits; @@ -375,7 +377,7 @@ namespace rocRoller }; template <> - struct DefaultConstruct + struct ROCROLLER_DECLSPEC DefaultConstruct { static Operations::Operation call() { @@ -384,7 +386,7 @@ namespace rocRoller }; template <> - struct DefaultConstruct + struct ROCROLLER_DECLSPEC DefaultConstruct { static Operations::Operation call() { @@ -393,7 +395,7 @@ namespace rocRoller }; template <> - struct DefaultConstruct + struct ROCROLLER_DECLSPEC DefaultConstruct { static Operations::Operation call() { @@ -402,7 +404,7 @@ namespace rocRoller }; template <> - struct DefaultConstruct + struct ROCROLLER_DECLSPEC DefaultConstruct { static Operations::Operation call() { @@ -411,7 +413,7 @@ namespace rocRoller }; template <> - struct DefaultConstruct + struct ROCROLLER_DECLSPEC DefaultConstruct { static Operations::Operation call() { @@ -420,7 +422,8 @@ namespace rocRoller }; template <> - struct DefaultConstruct + struct ROCROLLER_DECLSPEC + DefaultConstruct { static Operations::Operation call() { @@ -429,7 +432,7 @@ namespace rocRoller }; template <> - struct DefaultConstruct + struct ROCROLLER_DECLSPEC DefaultConstruct { static Operations::Operation call() { @@ -438,7 +441,7 @@ namespace rocRoller }; template <> - struct DefaultConstruct + struct ROCROLLER_DECLSPEC DefaultConstruct { static Operations::Operation call() { @@ -447,7 +450,7 @@ namespace rocRoller }; template <> - struct DefaultConstruct + struct ROCROLLER_DECLSPEC DefaultConstruct { static Operations::Operation call() { @@ -456,7 +459,7 @@ namespace rocRoller }; template <> - struct DefaultConstruct + struct ROCROLLER_DECLSPEC DefaultConstruct { static Operations::Operation call() { @@ -466,7 +469,8 @@ namespace rocRoller }; template <> - struct DefaultConstruct + struct ROCROLLER_DECLSPEC + DefaultConstruct { static Operations::Operation call() { @@ -476,7 +480,7 @@ namespace rocRoller }; template <> - struct DefaultConstruct + struct ROCROLLER_DECLSPEC DefaultConstruct { static Operations::Operation call() { @@ -486,7 +490,7 @@ namespace rocRoller }; template <> - struct DefaultConstruct + struct ROCROLLER_DECLSPEC DefaultConstruct { static Operations::Operation call() { @@ -495,7 +499,7 @@ namespace rocRoller }; template <> - struct DefaultConstruct + struct ROCROLLER_DECLSPEC DefaultConstruct { static Operations::Operation call() { @@ -504,13 +508,13 @@ namespace rocRoller }; template - struct MappingTraits + struct ROCROLLER_DECLSPEC MappingTraits : public DefaultVariantMappingTraits { }; template - struct MappingTraits, IO, Context> + struct ROCROLLER_DECLSPEC MappingTraits, IO, Context> : public SharedPointerMappingTraits, IO, Context, @@ -519,7 +523,8 @@ namespace rocRoller }; template - struct SequenceTraits>, IO> + struct ROCROLLER_DECLSPEC + SequenceTraits>, IO> : public DefaultSequenceTraits>, IO, false> diff --git a/lib/include/rocRoller/Serialization/Variant.hpp b/lib/include/rocRoller/Serialization/Variant.hpp index 72599517..7c053a8f 100644 --- a/lib/include/rocRoller/Serialization/Variant.hpp +++ b/lib/include/rocRoller/Serialization/Variant.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include @@ -57,7 +59,7 @@ namespace rocRoller }; template - struct VariantTypeKeySpecifier + struct ROCROLLER_DECLSPEC VariantTypeKeySpecifier { static std::string TypeKey() { @@ -111,7 +113,7 @@ namespace rocRoller * allows the Adhoc dimension to use this for its `m_name` field. */ template - struct DefaultVariantMappingTraits + struct ROCROLLER_DECLSPEC DefaultVariantMappingTraits { using iot = IOTraits; using AlternativeFn = std::function; @@ -217,7 +219,7 @@ namespace rocRoller DefaultVariantMappingTraits::alternatives = DefaultVariantMappingTraits::GetAlternatives(); - struct RemainingTypePathVisitor + struct ROCROLLER_DECLSPEC RemainingTypePathVisitor { template std::string operator()(Var const& v) diff --git a/lib/include/rocRoller/Serialization/XOps.hpp b/lib/include/rocRoller/Serialization/XOps.hpp index 5edab36b..afa8cdc6 100644 --- a/lib/include/rocRoller/Serialization/XOps.hpp +++ b/lib/include/rocRoller/Serialization/XOps.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #ifdef ROCROLLER_USE_LLVM #include #endif @@ -43,7 +45,7 @@ namespace rocRoller { template - struct MappingTraits + struct ROCROLLER_DECLSPEC MappingTraits { using iot = IOTraits; @@ -62,7 +64,7 @@ namespace rocRoller }; template - struct MappingTraits + struct ROCROLLER_DECLSPEC MappingTraits { using iot = IOTraits; @@ -82,7 +84,7 @@ namespace rocRoller }; template - struct MappingTraits + struct ROCROLLER_DECLSPEC MappingTraits { using iot = IOTraits; @@ -103,7 +105,7 @@ namespace rocRoller }; template <> - struct DefaultConstruct + struct ROCROLLER_DECLSPEC DefaultConstruct { static Operations::XOp call() { @@ -112,7 +114,7 @@ namespace rocRoller }; template <> - struct DefaultConstruct + struct ROCROLLER_DECLSPEC DefaultConstruct { static Operations::XOp call() { @@ -121,7 +123,7 @@ namespace rocRoller }; template <> - struct DefaultConstruct + struct ROCROLLER_DECLSPEC DefaultConstruct { static Operations::XOp call() { @@ -130,7 +132,7 @@ namespace rocRoller }; template <> - struct DefaultConstruct + struct ROCROLLER_DECLSPEC DefaultConstruct { static Operations::XOp call() { @@ -139,7 +141,8 @@ namespace rocRoller }; template <> - struct DefaultConstruct + struct ROCROLLER_DECLSPEC + DefaultConstruct { static Operations::XOp call() { @@ -149,7 +152,7 @@ namespace rocRoller }; template <> - struct DefaultConstruct + struct ROCROLLER_DECLSPEC DefaultConstruct { static Operations::XOp call() { @@ -159,7 +162,7 @@ namespace rocRoller }; template <> - struct DefaultConstruct + struct ROCROLLER_DECLSPEC DefaultConstruct { static Operations::XOp call() { @@ -169,7 +172,7 @@ namespace rocRoller }; template <> - struct DefaultConstruct + struct ROCROLLER_DECLSPEC DefaultConstruct { static Operations::XOp call() { @@ -179,7 +182,7 @@ namespace rocRoller }; template <> - struct DefaultConstruct + struct ROCROLLER_DECLSPEC DefaultConstruct { static Operations::XOp call() { @@ -189,7 +192,7 @@ namespace rocRoller }; template <> - struct DefaultConstruct + struct ROCROLLER_DECLSPEC DefaultConstruct { static Operations::XOp call() { @@ -199,7 +202,7 @@ namespace rocRoller }; template <> - struct DefaultConstruct + struct ROCROLLER_DECLSPEC DefaultConstruct { static Operations::XOp call() { @@ -208,7 +211,7 @@ namespace rocRoller }; template <> - struct DefaultConstruct + struct ROCROLLER_DECLSPEC DefaultConstruct { static Operations::XOp call() { @@ -218,7 +221,7 @@ namespace rocRoller }; template <> - struct DefaultConstruct + struct ROCROLLER_DECLSPEC DefaultConstruct { static Operations::XOp call() { @@ -229,7 +232,7 @@ namespace rocRoller }; template <> - struct DefaultConstruct + struct ROCROLLER_DECLSPEC DefaultConstruct { static Operations::XOp call() { @@ -238,13 +241,13 @@ namespace rocRoller }; template - struct MappingTraits + struct ROCROLLER_DECLSPEC MappingTraits : public DefaultVariantMappingTraits { }; template - struct MappingTraits, IO, Context> + struct ROCROLLER_DECLSPEC MappingTraits, IO, Context> { using TOp = std::shared_ptr; using iot = IOTraits; @@ -267,7 +270,7 @@ namespace rocRoller }; template - struct SequenceTraits>, IO> + struct ROCROLLER_DECLSPEC SequenceTraits>, IO> : public DefaultSequenceTraits>, IO, false> { }; diff --git a/lib/include/rocRoller/Serialization/llvm/YAML.hpp b/lib/include/rocRoller/Serialization/llvm/YAML.hpp index 271deccd..a762ccae 100644 --- a/lib/include/rocRoller/Serialization/llvm/YAML.hpp +++ b/lib/include/rocRoller/Serialization/llvm/YAML.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include @@ -69,18 +71,18 @@ namespace llvm */ template - struct FlowBase + struct ROCROLLER_DECLSPEC FlowBase { }; template - struct FlowBase + struct ROCROLLER_DECLSPEC FlowBase { static const bool flow = true; }; template - requires(sn::has_SequenceTraits::value) struct SequenceTraits + requires(sn::has_SequenceTraits::value) struct ROCROLLER_DECLSPEC SequenceTraits : public FlowBase> { static size_t size(IO& io, T& seq) @@ -95,7 +97,8 @@ namespace llvm }; template - requires(sn::has_EnumTraits::value) struct ScalarEnumerationTraits + requires(sn::has_EnumTraits::value) struct ROCROLLER_DECLSPEC + ScalarEnumerationTraits { static void enumeration(IO& io, T& value) { @@ -104,7 +107,8 @@ namespace llvm }; template - requires(sn::has_MappingTraits::value) struct MappingContextTraits + requires(sn::has_MappingTraits::value) struct ROCROLLER_DECLSPEC + MappingContextTraits { static void mapping(IO& io, T& obj, Context& ctx) { @@ -119,8 +123,8 @@ namespace llvm }; template - requires(sn::has_EmptyMappingTraits::value) struct MappingTraits - : public FlowBase> + requires(sn::has_EmptyMappingTraits::value) struct ROCROLLER_DECLSPEC + MappingTraits : public FlowBase> { static void mapping(IO& io, T& obj) { @@ -135,7 +139,8 @@ namespace llvm }; template - requires(sn::has_CustomMappingTraits::value) struct CustomMappingTraits + requires(sn::has_CustomMappingTraits::value) struct ROCROLLER_DECLSPEC + CustomMappingTraits { using Impl = sn::CustomMappingTraits; @@ -151,7 +156,7 @@ namespace llvm }; template - struct Hide + struct ROCROLLER_DECLSPEC Hide { T& value; @@ -173,7 +178,7 @@ namespace rocRoller namespace Serialization { template <> - struct IOTraits + struct ROCROLLER_DECLSPEC IOTraits { using IO = llvm::yaml::IO; @@ -249,7 +254,7 @@ namespace llvm = std::conditional::value, FooType, size_t>::type; template <> - struct ScalarTraits + struct ROCROLLER_DECLSPEC ScalarTraits { static void output(const mysize_t& value, void* ctx, raw_ostream& stream) { @@ -272,7 +277,7 @@ namespace llvm }; template - struct MappingTraits> + struct ROCROLLER_DECLSPEC MappingTraits> { static void mapping(IO& io, Hide& value) { @@ -283,7 +288,7 @@ namespace llvm }; template - struct SequenceTraits> + struct ROCROLLER_DECLSPEC SequenceTraits> { using Impl = sn::SequenceTraits; using Value = typename Impl::Value; @@ -301,7 +306,7 @@ namespace llvm }; template - struct ScalarEnumerationTraits> + struct ROCROLLER_DECLSPEC ScalarEnumerationTraits> { static void enumeration(IO& io, Hide& value) { @@ -310,7 +315,7 @@ namespace llvm }; template - struct CustomMappingTraits> + struct ROCROLLER_DECLSPEC CustomMappingTraits> { using Impl = sn::CustomMappingTraits; @@ -336,7 +341,7 @@ namespace llvm rocRoller::BF8, rocRoller::FP6, rocRoller::BF6, - rocRoller::FP4>) struct ScalarTraits + rocRoller::FP4>) struct ROCROLLER_DECLSPEC ScalarTraits { static void output(const T& value, void* ctx, llvm::raw_ostream& out) { @@ -362,7 +367,7 @@ namespace llvm }; template - struct ScalarTraits + struct ROCROLLER_DECLSPEC ScalarTraits { using rrTraits = rocRoller::Serialization::ScalarTraits; diff --git a/lib/include/rocRoller/Serialization/msgpack/Msgpack.hpp b/lib/include/rocRoller/Serialization/msgpack/Msgpack.hpp index c4c91f7b..a283b607 100644 --- a/lib/include/rocRoller/Serialization/msgpack/Msgpack.hpp +++ b/lib/include/rocRoller/Serialization/msgpack/Msgpack.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include #include @@ -40,7 +42,7 @@ namespace msgpack namespace adaptor { template <> - struct convert + struct ROCROLLER_DECLSPEC convert { msgpack::object const& operator()(msgpack::object const& o, rocRoller::GPUArchitecture& v) const @@ -64,7 +66,7 @@ namespace msgpack }; template <> - struct pack + struct ROCROLLER_DECLSPEC pack { template packer& operator()(msgpack::packer& o, @@ -79,7 +81,7 @@ namespace msgpack }; template <> - struct convert + struct ROCROLLER_DECLSPEC convert { msgpack::object const& operator()(msgpack::object const& o, rocRoller::GPUArchitectureTarget& v) const @@ -99,7 +101,7 @@ namespace msgpack }; template <> - struct pack + struct ROCROLLER_DECLSPEC pack { template packer& operator()(msgpack::packer& o, @@ -112,7 +114,7 @@ namespace msgpack }; template <> - struct convert + struct ROCROLLER_DECLSPEC convert { msgpack::object const& operator()(msgpack::object const& o, rocRoller::GPUArchitectureGFX& v) const @@ -131,7 +133,7 @@ namespace msgpack }; template <> - struct pack + struct ROCROLLER_DECLSPEC pack { template packer& operator()(msgpack::packer& o, @@ -144,7 +146,7 @@ namespace msgpack }; template <> - struct convert + struct ROCROLLER_DECLSPEC convert { msgpack::object const& operator()(msgpack::object const& o, rocRoller::GPUCapability& v) const @@ -163,7 +165,7 @@ namespace msgpack }; template <> - struct pack + struct ROCROLLER_DECLSPEC pack { template packer& operator()(msgpack::packer& o, @@ -176,7 +178,7 @@ namespace msgpack }; template <> - struct convert + struct ROCROLLER_DECLSPEC convert { msgpack::object const& operator()(msgpack::object const& o, rocRoller::GPUInstructionInfo& v) const @@ -202,7 +204,7 @@ namespace msgpack }; template <> - struct pack + struct ROCROLLER_DECLSPEC pack { template packer& operator()(msgpack::packer& o, @@ -221,7 +223,7 @@ namespace msgpack }; template <> - struct convert + struct ROCROLLER_DECLSPEC convert { msgpack::object const& operator()(msgpack::object const& o, rocRoller::GPUWaitQueueType& v) const @@ -240,7 +242,7 @@ namespace msgpack }; template <> - struct pack + struct ROCROLLER_DECLSPEC pack { template packer& operator()(msgpack::packer& o, diff --git a/lib/include/rocRoller/Serialization/yaml-cpp/YAML.hpp b/lib/include/rocRoller/Serialization/yaml-cpp/YAML.hpp index 5e0fb46e..c35a7fe1 100644 --- a/lib/include/rocRoller/Serialization/yaml-cpp/YAML.hpp +++ b/lib/include/rocRoller/Serialization/yaml-cpp/YAML.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include #include @@ -40,7 +42,7 @@ namespace rocRoller { namespace Serialization { - struct EmitterOutput + struct ROCROLLER_DECLSPEC EmitterOutput { YAML::Emitter* emitter; void* context; @@ -207,7 +209,7 @@ namespace rocRoller } template <> - struct IOTraits + struct ROCROLLER_DECLSPEC IOTraits { using IO = EmitterOutput; @@ -292,7 +294,7 @@ namespace rocRoller val.data = floatVal; } - struct NodeInput + struct ROCROLLER_DECLSPEC NodeInput { YAML::Node* node; void* context; @@ -372,7 +374,7 @@ namespace rocRoller }; template <> - struct IOTraits + struct ROCROLLER_DECLSPEC IOTraits { using IO = NodeInput; @@ -436,7 +438,7 @@ namespace rocRoller namespace YAML { template <> - struct convert + struct ROCROLLER_DECLSPEC convert { static Node encode(const rocRoller::BFloat16& rhs) { @@ -458,7 +460,7 @@ namespace YAML }; template <> - struct convert + struct ROCROLLER_DECLSPEC convert { static Node encode(const rocRoller::FP8& rhs) { @@ -480,7 +482,7 @@ namespace YAML }; template <> - struct convert + struct ROCROLLER_DECLSPEC convert { static Node encode(const rocRoller::BF8& rhs) { @@ -502,7 +504,7 @@ namespace YAML }; template <> - struct convert + struct ROCROLLER_DECLSPEC convert { static Node encode(const rocRoller::FP6& rhs) { @@ -524,7 +526,7 @@ namespace YAML }; template <> - struct convert + struct ROCROLLER_DECLSPEC convert { static Node encode(const rocRoller::BF6& rhs) { @@ -546,7 +548,7 @@ namespace YAML }; template <> - struct convert + struct ROCROLLER_DECLSPEC convert { static Node encode(const rocRoller::FP4& rhs) { diff --git a/lib/include/rocRoller/TensorDescriptor.hpp b/lib/include/rocRoller/TensorDescriptor.hpp index d3cac75c..604b8b78 100644 --- a/lib/include/rocRoller/TensorDescriptor.hpp +++ b/lib/include/rocRoller/TensorDescriptor.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include @@ -38,7 +40,7 @@ namespace rocRoller * Describes a tensor including dimensions, memory layout, and data type. * Decoupled from any particular pointer value or memory location. */ - class TensorDescriptor + class ROCROLLER_DECLSPEC TensorDescriptor { public: TensorDescriptor() diff --git a/lib/include/rocRoller/Utilities/Comparison.hpp b/lib/include/rocRoller/Utilities/Comparison.hpp index c4be0933..bb45497c 100644 --- a/lib/include/rocRoller/Utilities/Comparison.hpp +++ b/lib/include/rocRoller/Utilities/Comparison.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include namespace rocRoller @@ -85,7 +87,7 @@ namespace rocRoller * - 1: rhs is lesser */ template - struct Comparison + struct ROCROLLER_DECLSPEC Comparison { enum { @@ -187,7 +189,7 @@ namespace rocRoller } template - struct tuple_hash + struct ROCROLLER_DECLSPEC tuple_hash { using MyTuple = std::tuple; using TypeN = typename std::tuple_element::type; @@ -201,7 +203,7 @@ namespace rocRoller }; template - struct tuple_hash<0, Types...> + struct ROCROLLER_DECLSPEC tuple_hash<0, Types...> { using MyTuple = std::tuple; using Type0 = typename std::tuple_element<0, MyTuple>::type; @@ -225,7 +227,7 @@ namespace rocRoller namespace std { template - struct hash> + struct ROCROLLER_DECLSPEC hash> { inline size_t operator()(tuple const& tup) const { diff --git a/lib/include/rocRoller/Utilities/Component.hpp b/lib/include/rocRoller/Utilities/Component.hpp index 0f1f7efd..00cf231b 100644 --- a/lib/include/rocRoller/Utilities/Component.hpp +++ b/lib/include/rocRoller/Utilities/Component.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include #include @@ -47,10 +49,10 @@ namespace rocRoller }; /** - * @brief A `ComponentBase` is a base class for a category of components. + * @brief A `ComponentBase` is a base class ROCROLLER_DECLSPEC for a category of components. * * - It defines the interface for accessing the implementations. - * - All subclasses should provide interchangeable functionality. + * - All subclass ROCROLLER_DECLSPECes should provide interchangeable functionality. * */ template @@ -75,19 +77,19 @@ namespace rocRoller /** * A function to match whether a given component is appropriate for the situation. * - * Only one subclass should match any given situation. + * Only one subclass ROCROLLER_DECLSPEC should match any given situation. */ template using Matcher = std::function; /** - * A factory function to create an instance of a particular subclass. + * A factory function to create an instance of a particular subclass ROCROLLER_DECLSPEC. */ template using Builder = std::function(typename Base::Argument)>; /** - * A concrete subclass which fulfils the required functionality in a subset of situations. + * A concrete subclass ROCROLLER_DECLSPEC which fulfils the required functionality in a subset of situations. */ template concept Component = requires(T a) @@ -118,7 +120,7 @@ namespace rocRoller }; /** - * @brief Returns an object of the appropriate subclass of `Base`, based on `ctx`. + * @brief Returns an object of the appropriate subclass ROCROLLER_DECLSPEC of `Base`, based on `ctx`. * May be the same object from one call to the next. */ template @@ -140,7 +142,7 @@ namespace rocRoller // clang-format on /** - * @brief Returns a new instance object of the appropriate subclass of `Base`, based + * @brief Returns a new instance object of the appropriate subclass ROCROLLER_DECLSPEC of `Base`, based * on `ctx`. */ template @@ -184,7 +186,7 @@ namespace rocRoller = rocRoller::Component::RegisterComponentImpl>(); \ } - class ComponentFactoryBase + class ROCROLLER_DECLSPEC ComponentFactoryBase { public: static void ClearAllCaches(); @@ -198,12 +200,12 @@ namespace rocRoller }; template - class ComponentFactory : public ComponentFactoryBase + class ROCROLLER_DECLSPEC ComponentFactory : public ComponentFactoryBase { public: using Argument = typename Base::Argument; - struct Entry + struct ROCROLLER_DECLSPEC Entry { std::string name; Matcher matcher; @@ -236,7 +238,7 @@ namespace rocRoller std::vector m_entries; /** - * Finds an entry among the registered entries (classes). This is the fallback for if there + * Finds an entry among the registered entries (class ROCROLLER_DECLSPECes). This is the fallback for if there * is no entry in the cache. */ template diff --git a/lib/include/rocRoller/Utilities/EnumBitset.hpp b/lib/include/rocRoller/Utilities/EnumBitset.hpp index c666ffab..7db9db03 100644 --- a/lib/include/rocRoller/Utilities/EnumBitset.hpp +++ b/lib/include/rocRoller/Utilities/EnumBitset.hpp @@ -25,6 +25,8 @@ *******************************************************************************/ #pragma once +#include + #include #include @@ -32,7 +34,7 @@ namespace rocRoller { /** - * Bitset which uses an enum (class) as an indexer. Has the interface of std::bitset, and can be + * Bitset which uses an enum (class ROCROLLER_DECLSPEC) as an indexer. Has the interface of std::bitset, and can be * indexed by the enum. * * All new interfaces are constexpr so it can be used in concepts and to limit template instantiation. @@ -42,7 +44,7 @@ namespace rocRoller * - Count must be <= 64. */ template - class EnumBitset : public std::bitset(Enum::Count)> + class ROCROLLER_DECLSPEC EnumBitset : public std::bitset(Enum::Count)> { public: static constexpr size_t Size = static_cast(Enum::Count); @@ -69,10 +71,10 @@ namespace rocRoller }; template - std::string toString(EnumBitset const& bs); + ROCROLLER_DECLSPEC std::string toString(EnumBitset const& bs); template - std::ostream& operator<<(std::ostream& stream, EnumBitset const& bs); + ROCROLLER_DECLSPEC std::ostream& operator<<(std::ostream& stream, EnumBitset const& bs); } #include diff --git a/lib/include/rocRoller/Utilities/Error.hpp b/lib/include/rocRoller/Utilities/Error.hpp index fe6f615e..2ef2b97b 100644 --- a/lib/include/rocRoller/Utilities/Error.hpp +++ b/lib/include/rocRoller/Utilities/Error.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include #include @@ -39,7 +41,7 @@ namespace rocRoller { - struct Error : public std::runtime_error + struct ROCROLLER_DECLSPEC Error : public std::runtime_error { using std::runtime_error::runtime_error; @@ -56,23 +58,23 @@ namespace rocRoller std::string m_annotatedMessage; }; - struct FatalError : public Error + struct ROCROLLER_DECLSPEC FatalError : public Error { using Error::Error; }; - struct RecoverableError : public Error + struct ROCROLLER_DECLSPEC RecoverableError : public Error { using Error::Error; }; template - [[noreturn]] void Throw(Ts const&...); + [[noreturn]] ROCROLLER_DECLSPEC void Throw(Ts const&...); /** * Initiates a segfault. This can be useful for debugging purposes. */ - [[noreturn]] void Crash(); + [[noreturn]] ROCROLLER_DECLSPEC void Crash(); int* GetNullPointer(); diff --git a/lib/include/rocRoller/Utilities/Error_fwd.hpp b/lib/include/rocRoller/Utilities/Error_fwd.hpp index e200216f..3a59453e 100644 --- a/lib/include/rocRoller/Utilities/Error_fwd.hpp +++ b/lib/include/rocRoller/Utilities/Error_fwd.hpp @@ -26,10 +26,12 @@ #pragma once +#include + namespace rocRoller { - class Error; + class ROCROLLER_DECLSPEC Error; - class FatalError; - class RecoverableError; + class ROCROLLER_DECLSPEC FatalError; + class ROCROLLER_DECLSPEC RecoverableError; } diff --git a/lib/include/rocRoller/Utilities/Error_impl.hpp b/lib/include/rocRoller/Utilities/Error_impl.hpp index e8eacdba..3b1d6566 100644 --- a/lib/include/rocRoller/Utilities/Error_impl.hpp +++ b/lib/include/rocRoller/Utilities/Error_impl.hpp @@ -38,7 +38,7 @@ namespace rocRoller } template - [[noreturn]] void Throw(Ts const&... message) + inline void Throw(Ts const&... message) { bool var = Error::BreakOnThrow(); if(var) diff --git a/lib/include/rocRoller/Utilities/Generator.hpp b/lib/include/rocRoller/Utilities/Generator.hpp index 90f9afb5..2704fb0c 100644 --- a/lib/include/rocRoller/Utilities/Generator.hpp +++ b/lib/include/rocRoller/Utilities/Generator.hpp @@ -26,6 +26,8 @@ #pragma once +#include + /** * Compiler-specific tricks to get coroutines working * @@ -90,8 +92,8 @@ namespace rocRoller Done, Count }; - std::string toString(GeneratorState s); - std::ostream& operator<<(std::ostream&, GeneratorState const&); + ROCROLLER_DECLSPEC std::string toString(GeneratorState s); + ROCROLLER_DECLSPEC std::ostream& operator<<(std::ostream&, GeneratorState const&); /** * Range/ConcreteRange wraps a collection object behind a virtual interface, @@ -99,7 +101,7 @@ namespace rocRoller * and provide access to its items. */ template - struct Range + struct ROCROLLER_DECLSPEC Range { virtual ~Range() = default; @@ -110,7 +112,7 @@ namespace rocRoller }; template TheRange> - struct ConcreteRange : public Range + struct ROCROLLER_DECLSPEC ConcreteRange : public Range { template ARange> explicit ConcreteRange(ARange&& r); @@ -331,7 +333,7 @@ namespace rocRoller * */ template - class Generator + class ROCROLLER_DECLSPEC Generator { public: /** @@ -343,7 +345,7 @@ namespace rocRoller * state of whether we currently have a value or not, and whether we * currently have a range or not. */ - class promise_type + class ROCROLLER_DECLSPEC promise_type { public: /**** @@ -405,7 +407,7 @@ namespace rocRoller * `std::default_sentinel_t`, not `Iterator`. The * `std::common_iterator<>` adaptor provides that functionality. */ - class Iterator + class ROCROLLER_DECLSPEC Iterator { public: // Required for STL iterator adaptors. Not actually required to provide the - operator though. diff --git a/lib/include/rocRoller/Utilities/HIPTimer.hpp b/lib/include/rocRoller/Utilities/HIPTimer.hpp index f9a6b3dd..6e2bf5ea 100644 --- a/lib/include/rocRoller/Utilities/HIPTimer.hpp +++ b/lib/include/rocRoller/Utilities/HIPTimer.hpp @@ -32,6 +32,8 @@ #ifdef ROCROLLER_USE_HIP +#include + #include #include #include @@ -49,7 +51,7 @@ namespace rocRoller { - class HIPTimer : public Timer + class ROCROLLER_DECLSPEC HIPTimer : public Timer { public: HIPTimer() = delete; diff --git a/lib/include/rocRoller/Utilities/LazySingleton.hpp b/lib/include/rocRoller/Utilities/LazySingleton.hpp index c46de56b..536bf91f 100644 --- a/lib/include/rocRoller/Utilities/LazySingleton.hpp +++ b/lib/include/rocRoller/Utilities/LazySingleton.hpp @@ -26,12 +26,14 @@ #pragma once +#include + #include namespace rocRoller { template - class LazySingleton + class ROCROLLER_DECLSPEC LazySingleton { public: static std::shared_ptr getInstance() diff --git a/lib/include/rocRoller/Utilities/Logging.hpp b/lib/include/rocRoller/Utilities/Logging.hpp index 518ba6f3..f98fc9d9 100644 --- a/lib/include/rocRoller/Utilities/Logging.hpp +++ b/lib/include/rocRoller/Utilities/Logging.hpp @@ -26,6 +26,8 @@ #pragma once +#include + // Only need the forward declaration of LogLevel #include @@ -39,7 +41,7 @@ namespace rocRoller namespace Log { - class logger + class ROCROLLER_DECLSPEC logger { public: void log(LogLevel level, const std::string& str); @@ -78,7 +80,7 @@ namespace rocRoller }; using LoggerPtr = std::shared_ptr; - LoggerPtr getLogger(); + ROCROLLER_DECLSPEC LoggerPtr getLogger(); inline void log(LogLevel level, const std::string& str) { diff --git a/lib/include/rocRoller/Utilities/Random.hpp b/lib/include/rocRoller/Utilities/Random.hpp index 195b01dd..48d48577 100644 --- a/lib/include/rocRoller/Utilities/Random.hpp +++ b/lib/include/rocRoller/Utilities/Random.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include #include @@ -48,7 +50,7 @@ namespace rocRoller * A seed may be set programmatically (at any time) by calling * seed(). */ - class RandomGenerator + class ROCROLLER_DECLSPEC RandomGenerator { public: explicit RandomGenerator(int seedNumber); diff --git a/lib/include/rocRoller/Utilities/Random_fwd.hpp b/lib/include/rocRoller/Utilities/Random_fwd.hpp index ef0a6334..0ab671cc 100644 --- a/lib/include/rocRoller/Utilities/Random_fwd.hpp +++ b/lib/include/rocRoller/Utilities/Random_fwd.hpp @@ -30,7 +30,9 @@ #pragma once +#include + namespace rocRoller { - class RandomGenerator; + class ROCROLLER_DECLSPEC RandomGenerator; } diff --git a/lib/include/rocRoller/Utilities/Settings.hpp b/lib/include/rocRoller/Utilities/Settings.hpp index 1e648463..e90292d1 100644 --- a/lib/include/rocRoller/Utilities/Settings.hpp +++ b/lib/include/rocRoller/Utilities/Settings.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include #include @@ -50,7 +52,7 @@ namespace rocRoller * * Keeps track of its instances, so do not create temporaries. */ - struct SettingsOptionBase + struct ROCROLLER_DECLSPEC SettingsOptionBase { std::string name; std::string description; @@ -76,7 +78,7 @@ namespace rocRoller * @tparam T type of underlying option. */ template - struct SettingsOption : public SettingsOptionBase + struct ROCROLLER_DECLSPEC SettingsOption : public SettingsOptionBase { using Type = T; using DefaultValueType = @@ -97,11 +99,11 @@ namespace rocRoller int getBitIndex() const; }; - std::string toString(LogLevel level); - std::ostream& operator<<(std::ostream&, LogLevel const&); + ROCROLLER_DECLSPEC std::string toString(LogLevel level); + ROCROLLER_DECLSPEC std::ostream& operator<<(std::ostream&, LogLevel const&); /** - * @brief Settings class is derived from lazy singleton class and handles options + * @brief Settings class is derived from lazy singleton class and handles options * that are defined through environment variables or developer defined options. * * Getting a value requires a call to get(SettingsOption opt). When get() is called, @@ -112,7 +114,7 @@ namespace rocRoller * * A call to set(SettingsOption opt) sets (or overwrites) the corresponding value in m_values. */ - class Settings : public LazySingleton + class ROCROLLER_DECLSPEC Settings : public LazySingleton { public: using bitFieldType = std::bitset<32>; @@ -284,6 +286,8 @@ namespace rocRoller std::map m_values; std::vector m_setBitOptions; }; + + ROCROLLER_DECLSPEC F8Mode getDefaultF8ModeForCurrentHipDevice(); } #include diff --git a/lib/include/rocRoller/Utilities/Settings_fwd.hpp b/lib/include/rocRoller/Utilities/Settings_fwd.hpp index bc554fd3..948e0318 100644 --- a/lib/include/rocRoller/Utilities/Settings_fwd.hpp +++ b/lib/include/rocRoller/Utilities/Settings_fwd.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include namespace rocRoller @@ -53,9 +55,7 @@ namespace rocRoller std::string toString(F8Mode); - F8Mode getDefaultF8ModeForCurrentHipDevice(); - - const char* getDefaultArchitectureFilePath(); + ROCROLLER_DECLSPEC F8Mode getDefaultF8ModeForCurrentHipDevice(); - class Settings; + class ROCROLLER_DECLSPEC Settings; } diff --git a/lib/include/rocRoller/Utilities/Timer.hpp b/lib/include/rocRoller/Utilities/Timer.hpp index aea0bdf8..ece617ba 100644 --- a/lib/include/rocRoller/Utilities/Timer.hpp +++ b/lib/include/rocRoller/Utilities/Timer.hpp @@ -30,6 +30,8 @@ #pragma once +#include + #include #include #include @@ -55,7 +57,7 @@ namespace rocRoller * Elapsed times are accumulated (atomically) by name. Times are * accumulated by the rocRoller::Timer class below. */ - class TimerPool + class ROCROLLER_DECLSPEC TimerPool { public: TimerPool(TimerPool const&) = delete; @@ -109,7 +111,7 @@ namespace rocRoller * When a timer is stopped (via toc()) the elapsed time is added * to the TimerPool. */ - class Timer + class ROCROLLER_DECLSPEC Timer { public: Timer() = delete; diff --git a/lib/include/rocRoller/Utilities/Utils.hpp b/lib/include/rocRoller/Utilities/Utils.hpp index e5a3ee60..491fab92 100644 --- a/lib/include/rocRoller/Utilities/Utils.hpp +++ b/lib/include/rocRoller/Utilities/Utils.hpp @@ -26,6 +26,8 @@ #pragma once +#include + #include #include #include @@ -56,19 +58,19 @@ namespace rocRoller */ template - constexpr T CeilDivide(T num, T den) + ROCROLLER_DECLSPEC constexpr T CeilDivide(T num, T den) { return (num + (den - 1)) / den; } template - constexpr T RoundUpToMultiple(T val, T den) + ROCROLLER_DECLSPEC constexpr T RoundUpToMultiple(T val, T den) { return CeilDivide(val, den) * den; } template ::value>> - constexpr T IsPrime(T val) + ROCROLLER_DECLSPEC constexpr T IsPrime(T val) { if(val < 2) return false; @@ -84,7 +86,7 @@ namespace rocRoller } template ::value>> - constexpr T NextPrime(T val) + ROCROLLER_DECLSPEC constexpr T NextPrime(T val) { if(val < 2) return 2; @@ -94,7 +96,7 @@ namespace rocRoller } template - std::variant singleVariant(T value) + ROCROLLER_DECLSPEC std::variant singleVariant(T value) { return std::variant(std::move(value)); } @@ -108,7 +110,7 @@ namespace rocRoller * - No two sets in return value have any common elements. * - mergeSets(mergeSets(x)) == mergeSets(x) for any x. */ - std::vector> mergeSets(std::vector> sets); + ROCROLLER_DECLSPEC std::vector> mergeSets(std::vector> sets); inline std::string toString(int x) { @@ -123,7 +125,7 @@ namespace rocRoller } template - void streamJoin(std::ostream& stream, Container const& c, Joiner const& j) + ROCROLLER_DECLSPEC void streamJoin(std::ostream& stream, Container const& c, Joiner const& j) { bool first = true; for(auto const& item : c) @@ -235,7 +237,7 @@ namespace rocRoller return concatenate(vals...); } - class StreamRead + class ROCROLLER_DECLSPEC StreamRead { public: explicit StreamRead(std::string const& value, bool except = true); @@ -256,7 +258,7 @@ namespace rocRoller return stream; } - struct BitFieldGenerator + struct ROCROLLER_DECLSPEC BitFieldGenerator { constexpr static uint32_t maxBitFieldWidth = 32; @@ -294,20 +296,20 @@ namespace rocRoller }; template - Generator iota(T begin, T end, T inc) + ROCROLLER_DECLSPEC Generator iota(T begin, T end, T inc) { for(; begin < end; begin += inc) co_yield begin; } template - Generator iota(T begin, T end) + ROCROLLER_DECLSPEC Generator iota(T begin, T end) { co_yield iota(begin, end, 1); } template - Generator iota(T begin) + ROCROLLER_DECLSPEC Generator iota(T begin) { for(;; ++begin) co_yield begin; @@ -359,7 +361,7 @@ namespace rocRoller // helper for visitor template - struct overloaded : Ts... + struct ROCROLLER_DECLSPEC overloaded : Ts... { // cppcheck-suppress syntaxError using Ts::operator()...; @@ -373,20 +375,20 @@ namespace rocRoller * Converts a string value to an enum by comparing against each toString conversion. */ template - T fromString(std::string const& str); + ROCROLLER_DECLSPEC T fromString(std::string const& str); template - requires(std::default_initializable) std::string name() + requires(std::default_initializable) ROCROLLER_DECLSPEC std::string name() { T obj; return name(obj); } - std::string escapeSymbolName(std::string name); + ROCROLLER_DECLSPEC std::string escapeSymbolName(std::string name); - std::vector readFile(std::string const&); + ROCROLLER_DECLSPEC std::vector readFile(std::string const&); - std::string readMetaDataFromCodeObject(std::string const& fileName); + ROCROLLER_DECLSPEC std::string readMetaDataFromCodeObject(std::string const& fileName); /** * @} diff --git a/lib/include/rocRoller/Utilities/Version.hpp b/lib/include/rocRoller/Utilities/Version.hpp index 335a281a..42a67684 100644 --- a/lib/include/rocRoller/Utilities/Version.hpp +++ b/lib/include/rocRoller/Utilities/Version.hpp @@ -26,12 +26,14 @@ #pragma once +#include + #include namespace rocRoller { namespace Version { - std::string Git(); + ROCROLLER_DECLSPEC std::string Git(); } } diff --git a/lib/include/rocRoller/rocRoller.hpp b/lib/include/rocRoller/rocRoller.hpp new file mode 100644 index 00000000..b3c2a5fc --- /dev/null +++ b/lib/include/rocRoller/rocRoller.hpp @@ -0,0 +1,37 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright 2024-2025 AMD ROCm(TM) Software + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ + +#pragma once + +#ifdef _WIN32 +#ifdef _EXPORTING +#define ROCROLLER_DECLSPEC __declspec(dllexport) +#else +#define ROCROLLER_DECLSPEC __declspec(dllimport) +#endif +#else +#define ROCROLLER_DECLSPEC __attribute__((visibility("default"))) +#endif diff --git a/lib/source/CodeGen/LoadStoreTileGenerator.cpp b/lib/source/CodeGen/LoadStoreTileGenerator.cpp index 11989362..bc01817e 100644 --- a/lib/source/CodeGen/LoadStoreTileGenerator.cpp +++ b/lib/source/CodeGen/LoadStoreTileGenerator.cpp @@ -45,6 +45,7 @@ #include #include #include +#include namespace rocRoller { diff --git a/lib/source/CodeGen/MemoryInstructions.cpp b/lib/source/CodeGen/MemoryInstructions.cpp index 468e54c9..500a4494 100644 --- a/lib/source/CodeGen/MemoryInstructions.cpp +++ b/lib/source/CodeGen/MemoryInstructions.cpp @@ -25,6 +25,7 @@ *******************************************************************************/ #include +#include #include namespace rocRoller diff --git a/lib/source/Expression.cpp b/lib/source/Expression.cpp index cbf5db96..7b467aa2 100644 --- a/lib/source/Expression.cpp +++ b/lib/source/Expression.cpp @@ -27,6 +27,7 @@ #include #include +#include #include #include @@ -34,7 +35,6 @@ #include #include #include -#include #include namespace rocRoller @@ -676,74 +676,6 @@ namespace rocRoller return ExpressionComplexityVisitor().call(expr); } - template - struct ContainsVisitor - { - bool operator()(T const& expr) - { - return true; - } - - template - requires(!std::same_as) bool operator()(U const& expr) - { - return call(expr.arg); - } - - template - requires(!std::same_as) bool operator()(U const& expr) - { - return call(expr.lhs) || call(expr.rhs); - } - - template - requires(!std::same_as) bool operator()(U const& expr) - { - return call(expr.lhs) || call(expr.r1hs) || call(expr.r2hs); - } - - template U> - requires(!std::same_as) bool operator()(U const& expr) - { - return call(expr.matA) || call(expr.matB) || call(expr.matC) || call(expr.scaleA) - || call(expr.scaleB); - } - - template - requires(!std::same_as) bool operator()(U const& expr) - { - return false; - } - - bool call(Expression const& expr) - { - return std::visit(*this, expr); - } - - bool call(ExpressionPtr const& expr) - { - if(!expr) - return false; - - return call(*expr); - } - }; - - template - __attribute__((noinline)) bool contains(Expression const& expr) - { - ContainsVisitor v; - return v.call(expr); - } - - template - __attribute__((noinline)) bool contains(ExpressionPtr expr) - { - AssertFatal(expr != nullptr); - - return contains(*expr); - } - /** * Force instantiation of contains() for every type of expression, so * that it can be implemented in the .cpp file. diff --git a/lib/source/ExpressionTransformations/FastMultiplication.cpp b/lib/source/ExpressionTransformations/FastMultiplication.cpp index c4d220b7..960c9ed7 100644 --- a/lib/source/ExpressionTransformations/FastMultiplication.cpp +++ b/lib/source/ExpressionTransformations/FastMultiplication.cpp @@ -25,6 +25,7 @@ *******************************************************************************/ #include +#include #include diff --git a/lib/source/ExpressionTransformations/FuseTernary.cpp b/lib/source/ExpressionTransformations/FuseTernary.cpp index de77c67a..c6d7a3c6 100644 --- a/lib/source/ExpressionTransformations/FuseTernary.cpp +++ b/lib/source/ExpressionTransformations/FuseTernary.cpp @@ -25,6 +25,7 @@ *******************************************************************************/ #include +#include template constexpr auto cast_to_unsigned(T val) diff --git a/lib/source/ExpressionTransformations/Identity.cpp b/lib/source/ExpressionTransformations/Identity.cpp index da243fc4..2785b0af 100644 --- a/lib/source/ExpressionTransformations/Identity.cpp +++ b/lib/source/ExpressionTransformations/Identity.cpp @@ -24,6 +24,7 @@ * *******************************************************************************/ +#include #include namespace rocRoller diff --git a/lib/source/ExpressionTransformations/LowerExponential.cpp b/lib/source/ExpressionTransformations/LowerExponential.cpp index 4265d148..44b48450 100644 --- a/lib/source/ExpressionTransformations/LowerExponential.cpp +++ b/lib/source/ExpressionTransformations/LowerExponential.cpp @@ -25,6 +25,7 @@ *******************************************************************************/ #include +#include namespace rocRoller { diff --git a/lib/source/ExpressionTransformations/LowerPRNG.cpp b/lib/source/ExpressionTransformations/LowerPRNG.cpp index 5584e789..9e62ba2d 100644 --- a/lib/source/ExpressionTransformations/LowerPRNG.cpp +++ b/lib/source/ExpressionTransformations/LowerPRNG.cpp @@ -25,6 +25,7 @@ *******************************************************************************/ #include +#include namespace rocRoller { diff --git a/lib/source/ExpressionTransformations/RestoreCommandArguments.cpp b/lib/source/ExpressionTransformations/RestoreCommandArguments.cpp index f827a8f4..05bde30e 100644 --- a/lib/source/ExpressionTransformations/RestoreCommandArguments.cpp +++ b/lib/source/ExpressionTransformations/RestoreCommandArguments.cpp @@ -26,6 +26,7 @@ #include #include +#include #include namespace rocRoller diff --git a/lib/source/ExpressionTransformations/Simplify.cpp b/lib/source/ExpressionTransformations/Simplify.cpp index ce4b4ca4..3b58c23f 100644 --- a/lib/source/ExpressionTransformations/Simplify.cpp +++ b/lib/source/ExpressionTransformations/Simplify.cpp @@ -25,6 +25,7 @@ *******************************************************************************/ #include +#include template constexpr auto cast_to_unsigned(T val) diff --git a/lib/source/KernelGraph/Transformations/CleanArguments.cpp b/lib/source/KernelGraph/Transformations/CleanArguments.cpp index f87cd8ee..e06391f6 100644 --- a/lib/source/KernelGraph/Transformations/CleanArguments.cpp +++ b/lib/source/KernelGraph/Transformations/CleanArguments.cpp @@ -35,6 +35,8 @@ #include +#include + namespace rocRoller { namespace KernelGraph diff --git a/lib/source/KernelGraph/Transformations/UnrollLoops.cpp b/lib/source/KernelGraph/Transformations/UnrollLoops.cpp index c95c942a..176c2fe3 100644 --- a/lib/source/KernelGraph/Transformations/UnrollLoops.cpp +++ b/lib/source/KernelGraph/Transformations/UnrollLoops.cpp @@ -32,6 +32,7 @@ #include #include #include + namespace rocRoller { namespace KernelGraph diff --git a/test/catch/SettingsTest.cpp b/test/catch/SettingsTest.cpp index 7353022f..b0afeaed 100644 --- a/test/catch/SettingsTest.cpp +++ b/test/catch/SettingsTest.cpp @@ -50,7 +50,17 @@ namespace SettingsTest { val = ptr; } - m_envVars.emplace_back(setting->name, std::move(val)); + m_envVars[setting->name] = val; + } + + // This is not a setting option, but affects the env + if(auto ptr = getenv(Settings::BitfieldName.c_str())) + { + m_envVars[Settings::BitfieldName] = ptr; + } + else + { + m_envVars[Settings::BitfieldName] = std::nullopt; } setenv(Settings::BitfieldName.c_str(), "0xFFFFFFFF", 1); @@ -60,8 +70,25 @@ namespace SettingsTest setenv(Settings::Scheduler.name.c_str(), "invalidScheduler", 1); } + ~EnvSettingsTest() override + { + for(auto entry : m_envVars) + { + if(entry.second.has_value()) + { + setenv(entry.first.c_str(), entry.second.value().c_str(), 1); + } + else + { + unsetenv(entry.first.c_str()); + } + } + + Settings::reset(); + } + private: - std::vector>> m_envVars; + std::map> m_envVars; }; TEST_CASE("Basic settings behavior", "[settings]") @@ -86,8 +113,6 @@ namespace SettingsTest SECTION("Log levels") { - auto settings = Settings::getInstance(); - std::ostringstream out; out << LogLevel::None << std::endl; out << LogLevel::Error << std::endl; @@ -138,6 +163,8 @@ namespace SettingsTest == Scheduling::SchedulerProcedure::Cooperative); CHECK_THROWS_AS(settings->set(Settings::LogConsole, "invalidValue"), FatalError); + + Settings::reset(); } SECTION("Settings should be helpful") @@ -165,7 +192,8 @@ namespace SettingsTest size_t numUnexpectedLogLevels = 0; size_t numIters = 0; -#pragma omp parallel num_threads(numTestThreads) reduction(+:numUnexpectedLogLevels) reduction(+:numIters) +#pragma omp parallel num_threads(numTestThreads) reduction(+ : numUnexpectedLogLevels) \ + reduction(+ : numIters) { size_t iters = 0; int tid = omp_get_thread_num(); @@ -193,6 +221,8 @@ namespace SettingsTest CHECK(numUnexpectedLogLevels == 0); CHECK(numIters >= numTestThreads * minIters); } + + Settings::reset(); } TEST_CASE("Settings with associated envvars", "[settings]") @@ -209,6 +239,8 @@ namespace SettingsTest unsetenv(Settings::BitfieldName.c_str()); auto settings = Settings::getInstance(); CHECK_THROWS_AS(settings->get(Settings::Scheduler), FatalError); + + Settings::reset(); } SECTION("Environment variables take precedence") @@ -221,6 +253,8 @@ namespace SettingsTest // bitfield takes precedence over default value CHECK(settings->get(Settings::SaveAssembly)); + + Settings::reset(); } SECTION("Set and get from envvars-backed settings") @@ -251,6 +285,8 @@ namespace SettingsTest settings->set(Settings::BreakOnThrow, false); // Fatal error reading unparseable env var CHECK_THROWS_AS(settings->get(Settings::Scheduler), FatalError); + + Settings::reset(); } } } diff --git a/test/catch/SimpleTest.hpp b/test/catch/SimpleTest.hpp index f2f0b5c4..10e4f92a 100644 --- a/test/catch/SimpleTest.hpp +++ b/test/catch/SimpleTest.hpp @@ -33,7 +33,7 @@ class SimpleTest { public: SimpleTest() = default; - ~SimpleTest() + virtual ~SimpleTest() { rocRoller::Settings::reset(); rocRoller::Component::ComponentFactoryBase::ClearAllCaches(); diff --git a/test/catch/SubDwordExpressionTest.cpp b/test/catch/SubDwordExpressionTest.cpp index b48abdd5..f5f5d576 100644 --- a/test/catch/SubDwordExpressionTest.cpp +++ b/test/catch/SubDwordExpressionTest.cpp @@ -44,35 +44,35 @@ namespace SubDwordExpressionTest SECTION("cannot get bitfield from a literal") { auto literal = Register::Value::Literal(42); - CHECK_THROWS(literal->bitfield(8, 8)); + CHECK_THROWS_AS(literal->bitfield(8, 8), FatalError); } SECTION("bitOffset cannot be greater than the number of bits in a Value") { auto r = std::make_shared( context.get(), Register::Type::Vector, DataType::UInt32, 1); - CHECK_THROWS(r->bitfield(32, 8)); + CHECK_THROWS_AS(r->bitfield(32, 8), FatalError); } SECTION("bitwidth cannot be zero") { auto r = std::make_shared( context.get(), Register::Type::Vector, DataType::UInt32, 1); - CHECK_THROWS(r->bitfield(8, 0)); + CHECK_THROWS_AS(r->bitfield(8, 0), FatalError); } SECTION("bitwidth cannot be greater than the number of bits in a register") { auto r = std::make_shared( context.get(), Register::Type::Vector, DataType::UInt32, 1); - CHECK_THROWS(r->bitfield(8, 32)); + CHECK_THROWS_AS(r->bitfield(8, 32), FatalError); } SECTION("indices must refer to adjacent elements") { auto r = std::make_shared( context.get(), Register::Type::Vector, DataType::UInt8x4, 1); - CHECK_THROWS(r->segment({0, 2})); + CHECK_THROWS_AS(r->segment({0, 2}), FatalError); } SECTION("generate from expression with bitfields") diff --git a/test/unit/MatrixMultiplyTest.cpp b/test/unit/MatrixMultiplyTest.cpp index 8d3e694a..d33761a8 100644 --- a/test/unit/MatrixMultiplyTest.cpp +++ b/test/unit/MatrixMultiplyTest.cpp @@ -44,6 +44,7 @@ #include #include #include +#include #include "GPUContextFixture.hpp" #include "SourceMatcher.hpp"