Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
## Unreleased

* Preload Windows backend modules with `LOAD_WITH_ALTERED_SEARCH_PATH` from
the resolved native backend bundle directory before handing them to
llama.cpp, so CUDA backend discovery can resolve colocated CUDA
redistributables without app `PATH` changes.

## 0.8.2

* Updated the default llama.cpp native runtime pin to
Expand Down
219 changes: 219 additions & 0 deletions lib/src/backends/llama_cpp/llama_cpp_service.dart
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,14 @@ typedef _GgmlBackendRegDevGetNative =
ggml_backend_dev_t Function(ggml_backend_reg_t, Size);
typedef _GgmlBackendRegDevGetDart =
ggml_backend_dev_t Function(ggml_backend_reg_t, int);
typedef _LoadLibraryExWNative =
Pointer<Void> Function(Pointer<Utf16>, Pointer<Void>, Uint32);
typedef _LoadLibraryExWDart =
Pointer<Void> Function(Pointer<Utf16>, Pointer<Void>, int);
typedef _FreeLibraryNative = Int32 Function(Pointer<Void>);
typedef _FreeLibraryDart = int Function(Pointer<Void>);
typedef _SetErrorModeNative = Uint32 Function(Uint32);
typedef _SetErrorModeDart = int Function(int);
typedef _GgmlBackendDevCountNative = Size Function();
typedef _GgmlBackendDevCountDart = int Function();
typedef _GgmlBackendDevGetNative = ggml_backend_dev_t Function(Size);
Expand Down Expand Up @@ -382,6 +390,8 @@ class LlamaCppService {
defaultValue: false,
);
static const int _maxStartupDiagnostics = 32;
static const int _loadWithAlteredSearchPath = 0x00000008;
static const int _semFailCriticalErrors = 0x0001;
static const Map<String, int> _androidCpuVariantPriority = <String, int>{
'android_armv9.2_2': 0,
'android_armv9.2_1': 1,
Expand All @@ -398,6 +408,8 @@ class LlamaCppService {
final Set<String> _failedBackendModules = <String>{};
final Map<String, DynamicLibrary> _loadedBackendLibraries =
<String, DynamicLibrary>{};
final Map<String, List<DynamicLibrary>> _preloadedBackendDependencyLibraries =
<String, List<DynamicLibrary>>{};
final List<DynamicLibrary> _preloadedCoreLibraries = <DynamicLibrary>[];
bool _backendLoadAllSymbolUnavailable = false;
bool _backendLoadAllFromPathSymbolUnavailable = false;
Expand Down Expand Up @@ -1835,11 +1847,17 @@ class LlamaCppService {
candidates.addAll(fileNameCandidates);
}

_preloadWindowsBackendDependencies(backend);

for (final candidate in candidates) {
if (path.isAbsolute(candidate) && !File(candidate).existsSync()) {
continue;
}

final alteredSearchPathHandle = _preloadWindowsBackendModule(
candidate,
backend,
);
final libraryPathPtr = candidate.toNativeUtf8();
try {
ggml_backend_reg_t reg;
Expand Down Expand Up @@ -1869,6 +1887,13 @@ class LlamaCppService {
_failedBackendModules.remove(backend);
return true;
} finally {
if (alteredSearchPathHandle != nullptr) {
_freeWindowsBackendModule(
alteredSearchPathHandle,
candidate,
backend,
);
}
malloc.free(libraryPathPtr);
}
}
Expand Down Expand Up @@ -1900,6 +1925,200 @@ class LlamaCppService {
return candidates.toList(growable: false);
}

Pointer<Void> _preloadWindowsBackendModule(
String libraryPath,
String backend,
) {
final flags = windowsBackendModuleLoadFlags(libraryPath);
if (!Platform.isWindows || flags == 0) {
return nullptr;
}

try {
final kernel32 = DynamicLibrary.open('kernel32.dll');
final setErrorMode = kernel32
.lookupFunction<_SetErrorModeNative, _SetErrorModeDart>(
'SetErrorMode',
);
final loadLibraryExW = kernel32
.lookupFunction<_LoadLibraryExWNative, _LoadLibraryExWDart>(
'LoadLibraryExW',
);
final oldMode = setErrorMode(_semFailCriticalErrors);
setErrorMode(oldMode | _semFailCriticalErrors);

final libraryPathPtr = libraryPath.toNativeUtf16();
try {
final handle = loadLibraryExW(libraryPathPtr, nullptr, flags);
if (handle == nullptr) {
_recordStartupDiagnostic(
'Failed to preload Windows backend module `$libraryPath` with '
'LOAD_WITH_ALTERED_SEARCH_PATH for `$backend`.',
);
}
return handle;
} finally {
malloc.free(libraryPathPtr);
setErrorMode(oldMode);
}
} catch (error) {
_recordStartupDiagnostic(
'Failed to preload Windows backend module `$libraryPath` with '
'LOAD_WITH_ALTERED_SEARCH_PATH for `$backend`: $error',
);
return nullptr;
}
}

void _freeWindowsBackendModule(
Pointer<Void> handle,
String libraryPath,
String backend,
) {
if (!Platform.isWindows || handle == nullptr) {
return;
}

try {
final kernel32 = DynamicLibrary.open('kernel32.dll');
final freeLibrary = kernel32
.lookupFunction<_FreeLibraryNative, _FreeLibraryDart>('FreeLibrary');
if (freeLibrary(handle) == 0) {
_recordStartupDiagnostic(
'Failed to release temporary Windows backend module preload for '
'`$libraryPath` (`$backend`).',
);
}
} catch (error) {
_recordStartupDiagnostic(
'Failed to release temporary Windows backend module preload for '
'`$libraryPath` (`$backend`): $error',
);
}
}

void _preloadWindowsBackendDependencies(String backend) {
if (!Platform.isWindows) {
return;
}

final backendModuleDirectory = _backendModuleDirectory;
if (backendModuleDirectory == null) {
return;
}

final cacheKey =
'$backend|${path.normalize(backendModuleDirectory).toLowerCase()}';
if (_preloadedBackendDependencyLibraries.containsKey(cacheKey)) {
return;
}

final handles = <DynamicLibrary>[];
_preloadedBackendDependencyLibraries[cacheKey] = handles;

for (final dependencyPath in windowsBackendDependencyPaths(
backendModuleDirectory,
backend,
)) {
try {
handles.add(DynamicLibrary.open(dependencyPath));
} catch (error) {
_recordStartupDiagnostic(
'Failed to preload Windows backend dependency '
'`$dependencyPath` for `$backend`: $error',
);
}
}
}

/// Returns Windows `LoadLibraryExW` flags for preloading a backend module.
///
/// llama.cpp currently opens dynamic backend modules with plain
/// `LoadLibraryW`. For absolute native-asset bundle paths, an earlier
/// `LOAD_WITH_ALTERED_SEARCH_PATH` load lets Windows resolve transitive DLL
/// imports from the backend module directory before llama.cpp registers it.
static int windowsBackendModuleLoadFlags(String libraryPath) {
if (!path.isAbsolute(libraryPath)) {
return 0;
}
return _loadWithAlteredSearchPath;
}

/// Returns absolute paths for backend-owned Windows dependency DLLs that
/// can be preloaded before asking llama.cpp to dynamically load [backend].
///
/// This is only a best-effort compatibility path. The backend module itself
/// is also preloaded with `LOAD_WITH_ALTERED_SEARCH_PATH`, because Windows
/// may still fail to resolve module-owned transitive imports from the bundle
/// directory after individual dependency DLLs were loaded by absolute path.
static List<String> windowsBackendDependencyPaths(
String directoryPath,
String backend, {
Iterable<String>? fileNames,
}) {
if (backend != 'cuda') {
return const <String>[];
}

final names =
fileNames?.toList(growable: false) ??
_listWindowsBackendDependencyFileNames(directoryPath);
final selected = <String>[];
for (final name in names) {
final lower = name.toLowerCase();
if (!lower.endsWith('.dll')) {
continue;
}
if (lower.startsWith('cudart64_') ||
lower.startsWith('cublas64_') ||
lower.startsWith('cublaslt64_')) {
selected.add(name);
}
}

selected.sort((a, b) {
final priorityCompare = _windowsCudaDependencyPriority(
a,
).compareTo(_windowsCudaDependencyPriority(b));
if (priorityCompare != 0) {
return priorityCompare;
}
return a.toLowerCase().compareTo(b.toLowerCase());
});

return selected
.map((name) => path.join(directoryPath, name))
.toList(growable: false);
}

static List<String> _listWindowsBackendDependencyFileNames(
String directoryPath,
) {
try {
return Directory(directoryPath)
.listSync()
.whereType<File>()
.map((file) => path.basename(file.path))
.toList(growable: false);
} catch (_) {
return const <String>[];
}
}

static int _windowsCudaDependencyPriority(String fileName) {
final lower = fileName.toLowerCase();
if (lower.startsWith('cudart64_')) {
return 0;
}
if (lower.startsWith('cublas64_')) {
return 1;
}
if (lower.startsWith('cublaslt64_')) {
return 2;
}
return 100;
}

bool _tryRegisterBackendModuleViaAsset(String backend) {
final assetCandidates = _backendAssetUriCandidates(backend);
final recordAssetDiagnostics = backend == 'cpu' && Platform.isAndroid;
Expand Down
41 changes: 41 additions & 0 deletions test/unit/backends/llama_cpp/llama_cpp_service_test.dart
Original file line number Diff line number Diff line change
Expand Up @@ -726,6 +726,47 @@ void main() {
expect(path.normalize(resolved!), path.normalize(overrideDir.path));
});

test('uses altered search path for absolute Windows backend modules', () {
expect(
LlamaCppService.windowsBackendModuleLoadFlags(
path.join(tempRoot.path, 'ggml-cuda.dll'),
),
0x00000008,
);
expect(
LlamaCppService.windowsBackendModuleLoadFlags('ggml-cuda.dll'),
isZero,
);
});

test('orders CUDA redistributable DLLs for best-effort preloading', () {
final dependencyPaths = LlamaCppService.windowsBackendDependencyPaths(
tempRoot.path,
'cuda',
fileNames: const <String>[
'ggml-cuda.dll',
'cublasLt64_12.dll',
'notes.txt',
'cudart64_12.dll',
'cublas64_12.dll',
],
);

expect(dependencyPaths, <String>[
path.join(tempRoot.path, 'cudart64_12.dll'),
path.join(tempRoot.path, 'cublas64_12.dll'),
path.join(tempRoot.path, 'cublasLt64_12.dll'),
]);
expect(
LlamaCppService.windowsBackendDependencyPaths(
tempRoot.path,
'vulkan',
fileNames: const <String>['cudart64_12.dll'],
),
isEmpty,
);
});

test('falls back to hook cache extracted bundle directory', () {
final extractedDir = Directory(
path.join(
Expand Down