-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathgpu-limits.ts
More file actions
115 lines (104 loc) · 3.25 KB
/
gpu-limits.ts
File metadata and controls
115 lines (104 loc) · 3.25 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
// GPU Limits Detection and Configuration
// Detects actual WebGPU limits and recommends optimal model sizes
export interface GPUTierConfig {
maxBufferSize: number;
maxStorageBufferBinding: number;
maxUniformBufferBinding: number;
recommendedModelSize: number;
tier: 'mobile' | 'integrated' | 'discrete';
}
export const GPU_TIERS = {
mobile: {
maxBufferSize: 256 * 1024 * 1024, // 256MB
maxStorageBufferBinding: 128 * 1024 * 1024,
maxUniformBufferBinding: 64 * 1024,
recommendedModelSize: 350 * 1024 * 1024, // Q4_K_M 0.5B
tier: 'mobile' as const,
},
integrated: {
maxBufferSize: 1024 * 1024 * 1024, // 1GB
maxStorageBufferBinding: 512 * 1024 * 1024,
maxUniformBufferBinding: 64 * 1024,
recommendedModelSize: 900 * 1024 * 1024, // Q4_K_M 1.5B
tier: 'integrated' as const,
},
discrete: {
maxBufferSize: 4096 * 1024 * 1024, // 4GB
maxStorageBufferBinding: 2048 * 1024 * 1024,
maxUniformBufferBinding: 64 * 1024,
recommendedModelSize: 1900 * 1024 * 1024, // Q4_F16 3B
tier: 'discrete' as const,
},
};
/**
* Probe actual GPU limits from WebGPU adapter
* Returns actual limits and recommended tier
*/
export async function probeActualLimits(): Promise<GPUTierConfig | null> {
try {
if (!navigator.gpu) {
console.log('❌ WebGPU not available');
return null;
}
const adapter = await navigator.gpu.requestAdapter();
if (!adapter) {
console.log('❌ WebGPU adapter not available');
return null;
}
const limits = adapter.limits;
const maxBufferSize = limits.maxBufferSize;
console.log('🔍 Detected GPU limits:', {
maxBufferSize: `${Math.round(maxBufferSize / 1024 / 1024)}MB`,
maxStorageBufferBinding: `${Math.round(limits.maxStorageBufferBindingSize / 1024 / 1024)}MB`,
maxComputeWorkgroupSizeX: limits.maxComputeWorkgroupSizeX,
});
// Determine tier based on max buffer size
let tier: 'mobile' | 'integrated' | 'discrete';
if (maxBufferSize >= 2_000_000_000) {
tier = 'discrete'; // >= 2GB
} else if (maxBufferSize >= 800_000_000) {
tier = 'integrated'; // >= 800MB
} else {
tier = 'mobile'; // < 800MB
}
const config: GPUTierConfig = {
maxBufferSize: limits.maxBufferSize,
maxStorageBufferBinding: limits.maxStorageBufferBindingSize,
maxUniformBufferBinding: limits.maxUniformBufferBindingSize,
recommendedModelSize: Math.floor(maxBufferSize * 0.7), // 70% safety margin
tier,
};
console.log(`✅ GPU Tier: ${tier.toUpperCase()}`);
return config;
} catch (error) {
console.warn('⚠️ Failed to probe GPU limits:', error);
return null;
}
}
/**
* Get WebGPU configuration based on GPU tier
*/
export function getWebGPUConfig(tier: 'mobile' | 'integrated' | 'discrete'): {
max_batch_size: number;
max_window_size: number;
recommended_context: number;
} {
const configs = {
mobile: {
max_batch_size: 32,
max_window_size: 1024,
recommended_context: 512,
},
integrated: {
max_batch_size: 64,
max_window_size: 2048,
recommended_context: 1024,
},
discrete: {
max_batch_size: 128,
max_window_size: 4096,
recommended_context: 2048,
},
};
return configs[tier];
}