diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 62f2a7e2..83c452fb 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -41,3 +41,4 @@ jobs: ./build/tools/run_tests ./build/tools/benchmodel ./example_models/wavenet.nam ./build/tools/benchmodel ./example_models/lstm.nam + ./build/tools/render ./example_models/wavenet.nam ./example_audio/input.wav ./example_audio/output.wav diff --git a/.gitignore b/.gitignore index b7ee58e6..34ee36ee 100644 --- a/.gitignore +++ b/.gitignore @@ -35,3 +35,5 @@ docs/_build/ *.DS_Store + +example_audio/output.wav diff --git a/.gitmodules b/.gitmodules index 11c19841..f49ce6e8 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,6 @@ [submodule "Dependencies/eigen"] path = Dependencies/eigen url = https://gitlab.com/libeigen/eigen +[submodule "Dependencies/AudioDSPTools"] + path = Dependencies/AudioDSPTools + url = https://github.com/sdatkinson/AudioDSPTools.git diff --git a/Dependencies/AudioDSPTools b/Dependencies/AudioDSPTools new file mode 160000 index 00000000..0827c6c2 --- /dev/null +++ b/Dependencies/AudioDSPTools @@ -0,0 +1 @@ +Subproject commit 0827c6c2fc0deced568536142ea86f189e0b98a1 diff --git a/example_audio/input.wav b/example_audio/input.wav new file mode 100644 index 00000000..fd0302bd Binary files /dev/null and b/example_audio/input.wav differ diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index 8118e085..2c3ddabe 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -4,14 +4,42 @@ file(GLOB_RECURSE NAM_SOURCES ../NAM/*.cpp ../NAM/*.c ../NAM*.h) set(TOOLS benchmodel) add_custom_target(tools ALL - DEPENDS ${TOOLS}) + DEPENDS ${TOOLS} render) + +set(AUDIO_DSP_TOOLS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../Dependencies/AudioDSPTools") +set(AUDIO_DSP_TOOLS_WAV_SOURCES "${AUDIO_DSP_TOOLS_DIR}/dsp/wav.cpp") include_directories(tools ..) include_directories(tools ${NAM_DEPS_PATH}/eigen) include_directories(tools ${NAM_DEPS_PATH}/nlohmann) +include_directories(tools ${AUDIO_DSP_TOOLS_DIR}/dsp) add_executable(loadmodel loadmodel.cpp ${NAM_SOURCES}) add_executable(benchmodel benchmodel.cpp ${NAM_SOURCES}) +add_executable(render render.cpp ${NAM_SOURCES} ${AUDIO_DSP_TOOLS_WAV_SOURCES}) +target_compile_features(render PUBLIC cxx_std_20) +# AudioDSPTools wav.cpp has sign-compare issues; don't fail build +set_source_files_properties(${AUDIO_DSP_TOOLS_WAV_SOURCES} PROPERTIES COMPILE_FLAGS "-Wno-error") +set_target_properties(render PROPERTIES + CXX_VISIBILITY_PRESET hidden + INTERPROCEDURAL_OPTIMIZATION TRUE + PREFIX "" +) +if (CMAKE_SYSTEM_NAME STREQUAL "Windows") + target_compile_definitions(render PRIVATE NOMINMAX WIN32_LEAN_AND_MEAN) +endif() +if (MSVC) + target_compile_options(render PRIVATE + "$<$:/W4>" + "$<$:/O2>" + ) +else() + target_compile_options(render PRIVATE + -Wall -Wextra -Wpedantic -Wstrict-aliasing -Wunreachable-code -Weffc++ -Wno-unused-parameter + "$<$:-Og;-ggdb;-Werror>" + "$<$:-Ofast>" + ) +endif() add_executable(run_tests run_tests.cpp test/allocation_tracking.cpp ${NAM_SOURCES}) # Compile run_tests without optimizations to ensure allocation tracking works correctly # Also ensure assertions are enabled (NDEBUG is not defined) so tests actually run diff --git a/tools/render.cpp b/tools/render.cpp new file mode 100644 index 00000000..77836b41 --- /dev/null +++ b/tools/render.cpp @@ -0,0 +1,159 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "NAM/dsp.h" +#include "NAM/get_dsp.h" +#include "wav.h" + +namespace +{ +// Write mono 32-bit float WAV file (IEEE float format 3). +bool SaveWavFloat32(const char* fileName, const float* samples, size_t numSamples, double sampleRate) +{ + std::ofstream out(fileName, std::ios::binary); + if (!out.is_open()) + { + std::cerr << "Error: Failed to open output file " << fileName << "\n"; + return false; + } + + const uint32_t dataSize = static_cast(numSamples * sizeof(float)); + const uint32_t chunkSize = 36 + dataSize; + + // RIFF header + out.write("RIFF", 4); + out.write(reinterpret_cast(&chunkSize), 4); + out.write("WAVE", 4); + + // fmt chunk (16 bytes for PCM/IEEE) + const uint32_t fmtSize = 16; + out.write("fmt ", 4); + out.write(reinterpret_cast(&fmtSize), 4); + const uint16_t audioFormat = 3; // IEEE float + out.write(reinterpret_cast(&audioFormat), 2); + const uint16_t numChannels = 1; + out.write(reinterpret_cast(&numChannels), 2); + const uint32_t sr = static_cast(sampleRate); + out.write(reinterpret_cast(&sr), 4); + const uint32_t byteRate = sr * sizeof(float); + out.write(reinterpret_cast(&byteRate), 4); + const uint16_t blockAlign = sizeof(float); + out.write(reinterpret_cast(&blockAlign), 2); + const uint16_t bitsPerSample = 32; + out.write(reinterpret_cast(&bitsPerSample), 2); + + // data chunk + out.write("data", 4); + out.write(reinterpret_cast(&dataSize), 4); + out.write(reinterpret_cast(samples), dataSize); + + return out.good(); +} + +} // namespace + +int main(int argc, char* argv[]) +{ + if (argc < 3 || argc > 4) + { + std::cerr << "Usage: render [output.wav]\n"; + return 1; + } + + const char* modelPath = argv[1]; + const char* inputPath = argv[2]; + const char* outputPath = (argc >= 4) ? argv[3] : "output.wav"; + + std::cerr << "Loading model [" << modelPath << "]\n"; + auto model = nam::get_dsp(std::filesystem::path(modelPath)); + if (!model) + { + std::cerr << "Failed to load model\n"; + return 1; + } + std::cerr << "Model loaded successfully\n"; + + std::vector inputAudio; + double inputSampleRate = 0.0; + auto loadResult = dsp::wav::Load(inputPath, inputAudio, inputSampleRate); + if (loadResult != dsp::wav::LoadReturnCode::SUCCESS) + { + std::cerr << "Failed to load input WAV: " << dsp::wav::GetMsgForLoadReturnCode(loadResult) << "\n"; + return 1; + } + + const double expectedRate = model->GetExpectedSampleRate(); + if (expectedRate > 0 && std::abs(inputSampleRate - expectedRate) > 0.5) + { + std::cerr << "Error: Input WAV sample rate (" << inputSampleRate + << " Hz) does not match model expected rate (" << expectedRate << " Hz)\n"; + return 1; + } + + const double sampleRate = expectedRate > 0 ? expectedRate : inputSampleRate; + const int bufferSize = 64; + model->Reset(sampleRate, bufferSize); + + const int inChannels = model->NumInputChannels(); + const int outChannels = model->NumOutputChannels(); + + if (inChannels != 1) + { + std::cerr << "Error: render tool currently supports mono input only (model has " << inChannels + << " input channels)\n"; + return 1; + } + + std::vector> inputBuffers(inChannels); + std::vector> outputBuffers(outChannels); + std::vector inputPtrs(inChannels); + std::vector outputPtrs(outChannels); + + for (int ch = 0; ch < inChannels; ch++) + { + inputBuffers[ch].resize(bufferSize, 0.0); + inputPtrs[ch] = inputBuffers[ch].data(); + } + for (int ch = 0; ch < outChannels; ch++) + { + outputBuffers[ch].resize(bufferSize, 0.0); + outputPtrs[ch] = outputBuffers[ch].data(); + } + + std::vector outputAudio; + outputAudio.reserve(static_cast(outChannels) * inputAudio.size()); + + size_t readPos = 0; + const size_t totalSamples = inputAudio.size(); + + while (readPos < totalSamples) + { + const size_t toRead = std::min(static_cast(bufferSize), totalSamples - readPos); + + for (size_t i = 0; i < toRead; i++) + inputBuffers[0][i] = static_cast(inputAudio[readPos + i]); + for (size_t i = toRead; i < static_cast(bufferSize); i++) + inputBuffers[0][i] = 0; + + model->process(inputPtrs.data(), outputPtrs.data(), static_cast(toRead)); + + for (size_t i = 0; i < toRead; i++) + outputAudio.push_back(static_cast(outputBuffers[0][i])); + + readPos += toRead; + } + + if (!SaveWavFloat32(outputPath, outputAudio.data(), outputAudio.size(), sampleRate)) + { + return 1; + } + + std::cerr << "Wrote " << outputAudio.size() << " samples to " << outputPath << "\n"; + return 0; +}