diff --git a/CHANGELOG.md b/CHANGELOG.md index 455e10f1..62d2ba30 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,10 @@ +## Unreleased + +* Preload Windows backend modules with `LOAD_WITH_ALTERED_SEARCH_PATH` from + the resolved native backend bundle directory before handing them to + llama.cpp, so CUDA backend discovery can resolve colocated CUDA + redistributables without app `PATH` changes. + ## 0.8.2 * Updated the default llama.cpp native runtime pin to diff --git a/lib/src/backends/llama_cpp/llama_cpp_service.dart b/lib/src/backends/llama_cpp/llama_cpp_service.dart index 0014aa07..634241ee 100644 --- a/lib/src/backends/llama_cpp/llama_cpp_service.dart +++ b/lib/src/backends/llama_cpp/llama_cpp_service.dart @@ -46,6 +46,14 @@ typedef _GgmlBackendRegDevGetNative = ggml_backend_dev_t Function(ggml_backend_reg_t, Size); typedef _GgmlBackendRegDevGetDart = ggml_backend_dev_t Function(ggml_backend_reg_t, int); +typedef _LoadLibraryExWNative = + Pointer Function(Pointer, Pointer, Uint32); +typedef _LoadLibraryExWDart = + Pointer Function(Pointer, Pointer, int); +typedef _FreeLibraryNative = Int32 Function(Pointer); +typedef _FreeLibraryDart = int Function(Pointer); +typedef _SetErrorModeNative = Uint32 Function(Uint32); +typedef _SetErrorModeDart = int Function(int); typedef _GgmlBackendDevCountNative = Size Function(); typedef _GgmlBackendDevCountDart = int Function(); typedef _GgmlBackendDevGetNative = ggml_backend_dev_t Function(Size); @@ -382,6 +390,8 @@ class LlamaCppService { defaultValue: false, ); static const int _maxStartupDiagnostics = 32; + static const int _loadWithAlteredSearchPath = 0x00000008; + static const int _semFailCriticalErrors = 0x0001; static const Map _androidCpuVariantPriority = { 'android_armv9.2_2': 0, 'android_armv9.2_1': 1, @@ -398,6 +408,8 @@ class LlamaCppService { final Set _failedBackendModules = {}; final Map _loadedBackendLibraries = {}; + final Map> _preloadedBackendDependencyLibraries = + >{}; final List _preloadedCoreLibraries = []; bool _backendLoadAllSymbolUnavailable = false; bool _backendLoadAllFromPathSymbolUnavailable = false; @@ -1835,11 +1847,17 @@ class LlamaCppService { candidates.addAll(fileNameCandidates); } + _preloadWindowsBackendDependencies(backend); + for (final candidate in candidates) { if (path.isAbsolute(candidate) && !File(candidate).existsSync()) { continue; } + final alteredSearchPathHandle = _preloadWindowsBackendModule( + candidate, + backend, + ); final libraryPathPtr = candidate.toNativeUtf8(); try { ggml_backend_reg_t reg; @@ -1869,6 +1887,13 @@ class LlamaCppService { _failedBackendModules.remove(backend); return true; } finally { + if (alteredSearchPathHandle != nullptr) { + _freeWindowsBackendModule( + alteredSearchPathHandle, + candidate, + backend, + ); + } malloc.free(libraryPathPtr); } } @@ -1900,6 +1925,200 @@ class LlamaCppService { return candidates.toList(growable: false); } + Pointer _preloadWindowsBackendModule( + String libraryPath, + String backend, + ) { + final flags = windowsBackendModuleLoadFlags(libraryPath); + if (!Platform.isWindows || flags == 0) { + return nullptr; + } + + try { + final kernel32 = DynamicLibrary.open('kernel32.dll'); + final setErrorMode = kernel32 + .lookupFunction<_SetErrorModeNative, _SetErrorModeDart>( + 'SetErrorMode', + ); + final loadLibraryExW = kernel32 + .lookupFunction<_LoadLibraryExWNative, _LoadLibraryExWDart>( + 'LoadLibraryExW', + ); + final oldMode = setErrorMode(_semFailCriticalErrors); + setErrorMode(oldMode | _semFailCriticalErrors); + + final libraryPathPtr = libraryPath.toNativeUtf16(); + try { + final handle = loadLibraryExW(libraryPathPtr, nullptr, flags); + if (handle == nullptr) { + _recordStartupDiagnostic( + 'Failed to preload Windows backend module `$libraryPath` with ' + 'LOAD_WITH_ALTERED_SEARCH_PATH for `$backend`.', + ); + } + return handle; + } finally { + malloc.free(libraryPathPtr); + setErrorMode(oldMode); + } + } catch (error) { + _recordStartupDiagnostic( + 'Failed to preload Windows backend module `$libraryPath` with ' + 'LOAD_WITH_ALTERED_SEARCH_PATH for `$backend`: $error', + ); + return nullptr; + } + } + + void _freeWindowsBackendModule( + Pointer handle, + String libraryPath, + String backend, + ) { + if (!Platform.isWindows || handle == nullptr) { + return; + } + + try { + final kernel32 = DynamicLibrary.open('kernel32.dll'); + final freeLibrary = kernel32 + .lookupFunction<_FreeLibraryNative, _FreeLibraryDart>('FreeLibrary'); + if (freeLibrary(handle) == 0) { + _recordStartupDiagnostic( + 'Failed to release temporary Windows backend module preload for ' + '`$libraryPath` (`$backend`).', + ); + } + } catch (error) { + _recordStartupDiagnostic( + 'Failed to release temporary Windows backend module preload for ' + '`$libraryPath` (`$backend`): $error', + ); + } + } + + void _preloadWindowsBackendDependencies(String backend) { + if (!Platform.isWindows) { + return; + } + + final backendModuleDirectory = _backendModuleDirectory; + if (backendModuleDirectory == null) { + return; + } + + final cacheKey = + '$backend|${path.normalize(backendModuleDirectory).toLowerCase()}'; + if (_preloadedBackendDependencyLibraries.containsKey(cacheKey)) { + return; + } + + final handles = []; + _preloadedBackendDependencyLibraries[cacheKey] = handles; + + for (final dependencyPath in windowsBackendDependencyPaths( + backendModuleDirectory, + backend, + )) { + try { + handles.add(DynamicLibrary.open(dependencyPath)); + } catch (error) { + _recordStartupDiagnostic( + 'Failed to preload Windows backend dependency ' + '`$dependencyPath` for `$backend`: $error', + ); + } + } + } + + /// Returns Windows `LoadLibraryExW` flags for preloading a backend module. + /// + /// llama.cpp currently opens dynamic backend modules with plain + /// `LoadLibraryW`. For absolute native-asset bundle paths, an earlier + /// `LOAD_WITH_ALTERED_SEARCH_PATH` load lets Windows resolve transitive DLL + /// imports from the backend module directory before llama.cpp registers it. + static int windowsBackendModuleLoadFlags(String libraryPath) { + if (!path.isAbsolute(libraryPath)) { + return 0; + } + return _loadWithAlteredSearchPath; + } + + /// Returns absolute paths for backend-owned Windows dependency DLLs that + /// can be preloaded before asking llama.cpp to dynamically load [backend]. + /// + /// This is only a best-effort compatibility path. The backend module itself + /// is also preloaded with `LOAD_WITH_ALTERED_SEARCH_PATH`, because Windows + /// may still fail to resolve module-owned transitive imports from the bundle + /// directory after individual dependency DLLs were loaded by absolute path. + static List windowsBackendDependencyPaths( + String directoryPath, + String backend, { + Iterable? fileNames, + }) { + if (backend != 'cuda') { + return const []; + } + + final names = + fileNames?.toList(growable: false) ?? + _listWindowsBackendDependencyFileNames(directoryPath); + final selected = []; + for (final name in names) { + final lower = name.toLowerCase(); + if (!lower.endsWith('.dll')) { + continue; + } + if (lower.startsWith('cudart64_') || + lower.startsWith('cublas64_') || + lower.startsWith('cublaslt64_')) { + selected.add(name); + } + } + + selected.sort((a, b) { + final priorityCompare = _windowsCudaDependencyPriority( + a, + ).compareTo(_windowsCudaDependencyPriority(b)); + if (priorityCompare != 0) { + return priorityCompare; + } + return a.toLowerCase().compareTo(b.toLowerCase()); + }); + + return selected + .map((name) => path.join(directoryPath, name)) + .toList(growable: false); + } + + static List _listWindowsBackendDependencyFileNames( + String directoryPath, + ) { + try { + return Directory(directoryPath) + .listSync() + .whereType() + .map((file) => path.basename(file.path)) + .toList(growable: false); + } catch (_) { + return const []; + } + } + + static int _windowsCudaDependencyPriority(String fileName) { + final lower = fileName.toLowerCase(); + if (lower.startsWith('cudart64_')) { + return 0; + } + if (lower.startsWith('cublas64_')) { + return 1; + } + if (lower.startsWith('cublaslt64_')) { + return 2; + } + return 100; + } + bool _tryRegisterBackendModuleViaAsset(String backend) { final assetCandidates = _backendAssetUriCandidates(backend); final recordAssetDiagnostics = backend == 'cpu' && Platform.isAndroid; diff --git a/test/unit/backends/llama_cpp/llama_cpp_service_test.dart b/test/unit/backends/llama_cpp/llama_cpp_service_test.dart index decf803c..39ff2f8d 100644 --- a/test/unit/backends/llama_cpp/llama_cpp_service_test.dart +++ b/test/unit/backends/llama_cpp/llama_cpp_service_test.dart @@ -726,6 +726,47 @@ void main() { expect(path.normalize(resolved!), path.normalize(overrideDir.path)); }); + test('uses altered search path for absolute Windows backend modules', () { + expect( + LlamaCppService.windowsBackendModuleLoadFlags( + path.join(tempRoot.path, 'ggml-cuda.dll'), + ), + 0x00000008, + ); + expect( + LlamaCppService.windowsBackendModuleLoadFlags('ggml-cuda.dll'), + isZero, + ); + }); + + test('orders CUDA redistributable DLLs for best-effort preloading', () { + final dependencyPaths = LlamaCppService.windowsBackendDependencyPaths( + tempRoot.path, + 'cuda', + fileNames: const [ + 'ggml-cuda.dll', + 'cublasLt64_12.dll', + 'notes.txt', + 'cudart64_12.dll', + 'cublas64_12.dll', + ], + ); + + expect(dependencyPaths, [ + path.join(tempRoot.path, 'cudart64_12.dll'), + path.join(tempRoot.path, 'cublas64_12.dll'), + path.join(tempRoot.path, 'cublasLt64_12.dll'), + ]); + expect( + LlamaCppService.windowsBackendDependencyPaths( + tempRoot.path, + 'vulkan', + fileNames: const ['cudart64_12.dll'], + ), + isEmpty, + ); + }); + test('falls back to hook cache extracted bundle directory', () { final extractedDir = Directory( path.join(