Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,14 @@ else()
set(OLLAMA_HAVE_LLAMA_SERVER FALSE)
endif()

# DARS scientific optimization framework options
# Declared here so superbuild + downstream llama/server can consume them
option(OLLAMA_DARS "Enable DARS scientific optimization framework" OFF)
option(OLLAMA_DARS_DUAL "Enable dual-model cascade" OFF)
option(OLLAMA_DARS_HEBBIAN "Enable Hebbian activation profiling" OFF)
option(OLLAMA_DARS_MERGE "Enable model merge toolkit" OFF)
option(OLLAMA_DARS_UPCYCLE "Enable dense-to-MoE upcycling" OFF)

# RDNA4 gfx1201 native optimizations (clean integration, not a patch)
# This includes cmake/gfx1201.cmake which applies build-level optimizations
# when AMDGPU_TARGETS contains gfx1201.
Expand Down
194 changes: 194 additions & 0 deletions Granite_Benchmark.ps1
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
$ErrorActionPreference = "Continue"

$timestamp = Get-Date -Format "yyyyMMdd_HHmmss"
$resultsDir = "granite_benchmark_$timestamp"
New-Item -ItemType Directory -Force -Path $resultsDir | Out-Null

$libRocm = Resolve-Path "lib\ollama\rocm\"
$scriptDir = Get-Location

$layers = @(25, 29, 33, "FULL")
$graniteModels = @(
"granite-4.1-8b-Q4:latest",
"granite-4.1-8b-Q6:latest",
"granite-4.1-3b-Q8:latest"
)

$tokenGenFile = Join-Path $resultsDir "token_gen_results.txt"
$codegenFile = Join-Path $resultsDir "codegen_results.txt"

function Clean-Ollama {
Stop-Process -Name "ollama" -Force -ErrorAction SilentlyContinue
Stop-Process -Name "llama-server" -Force -ErrorAction SilentlyContinue
Start-Sleep -Seconds 3
}

function Start-Ollama($layerCount) {
Clean-Ollama
$env:HSA_OVERRIDE_GFX_VERSION = "12.0.1"
$env:OLLAMA_FLASH_ATTENTION = "1"
$env:OLLAMA_NUM_GPU = $layerCount
$env:OLLAMA_DEBUG = "0"
$env:OLLAMA_KEEP_ALIVE = "-1"
$env:ROCR_VISIBLE_DEVICES = "0"
$env:HIP_VISIBLE_DEVICES = "0"
$env:GIN_MODE = "release"
[System.Environment]::SetEnvironmentVariable("PATH", "$libRocm;$scriptDir;$(Resolve-Path 'lib\ollama');$($env:PATH)", "Process")
return Start-Process -FilePath ".\ollama.exe" -ArgumentList "serve" -NoNewWindow -PassThru
}

function Wait-API {
for ($i=0; $i -lt 15; $i++) {
$r = curl.exe -s -m 2 http://127.0.0.1:11434/api/tags 2>$null
if ($LASTEXITCODE -eq 0) { return $true }
Start-Sleep -Seconds 1
}
return $false
}

function Run-Inference($model, $prompt) {
$payload = @{ model=$model; prompt=$prompt; stream=$false } | ConvertTo-Json -Compress
$tmp = Join-Path $env:TEMP "bench_payload_$(Get-Random).json"
[System.IO.File]::WriteAllText($tmp, $payload, (New-Object System.Text.UTF8Encoding($false)))
$out = curl.exe -s --max-time 120 -X POST http://127.0.0.1:11434/api/generate -H "Content-Type: application/json" -d "@$tmp" 2>$null
Remove-Item $tmp -ErrorAction SilentlyContinue
return $out | ConvertFrom-Json
}

function Test-CSharp-Notepad($code, $outDir) {
$codeFile = Join-Path $outDir "NotepadApp.cs"
$exePath = Join-Path $outDir "NotepadApp.exe"
[System.IO.File]::WriteAllText($codeFile, $code, (New-Object System.Text.UTF8Encoding($false)))

$csc = "C:\Windows\Microsoft.NET\Framework64\v4.0.30319\csc.exe"
if (-not (Test-Path $csc)) { $csc = "C:\Windows\Microsoft.NET\Framework\v4.0.30319\csc.exe" }

if (Test-Path $csc) {
$out = & $csc /target:winexe /out:$exePath $codeFile 2>&1 | Out-String
$ok = ($LASTEXITCODE -eq 0)
return @{ ok=$ok; log=$out; exe=(Test-Path $exePath) }
}
return @{ ok=$false; log="csc not found"; exe=$false }
}

function Test-Python-Syntax($code, $outDir) {
$pyFile = Join-Path $outDir "notepad.py"
[System.IO.File]::WriteAllText($pyFile, $code, (New-Object System.Text.UTF8Encoding($false)))

$pyExe = $null
$candidates = @("python", "python3", "py")
foreach ($c in $candidates) {
$v = & $c --version 2>&1
if ($LASTEXITCODE -eq 0) { $pyExe = $c; break }
}

if (-not $pyExe) { return @{ ok=$false; log="No Python interpreter found"; ran=$false } }

$out = & $pyExe -c "import ast; ast.parse(open(r'$pyFile').read())" 2>&1 | Out-String
$ok = ($LASTEXITCODE -eq 0)
return @{ ok=$ok; log=$out; ran=$ok }
}

Write-Host "=== Granite Models Benchmark ===" -ForegroundColor Cyan
Write-Host "Models: $($graniteModels -join ', ')" -ForegroundColor Gray
Write-Host "Layers: $($layers -join ', ')" -ForegroundColor Gray

"=== Granite Token Generation ===" | Out-File $tokenGenFile -Encoding ascii
"Started: $(Get-Date)" | Out-File $tokenGenFile -Append -Encoding ascii
"" | Out-File $tokenGenFile -Append -Encoding ascii

$prompt = "Write a Python quicksort with detailed comments explaining each step."

foreach ($model in $graniteModels) {
Write-Host "`n[MDOEL] $model" -ForegroundColor Magenta
"MODEL: $model" | Out-File $tokenGenFile -Append -Encoding ascii

foreach ($l in $layers) {
Write-Host " Layers: $l" -ForegroundColor Yellow
$proc = Start-Ollama $l
Start-Sleep -Seconds 6

if (-not (Wait-API)) {
Write-Host " [ERROR] API not ready" -ForegroundColor Red
" Layers $l : API_TIMEOUT" | Out-File $tokenGenFile -Append -Encoding ascii
Stop-Process -Id $proc.Id -Force -ErrorAction SilentlyContinue
continue
}

try {
$r = Run-Inference $model $prompt
if ($r.eval_count -gt 0) {
$rate = [math]::Round($r.eval_count / ($r.eval_duration / 1e9), 2)
$promptRate = [math]::Round($r.prompt_eval_count / ($r.prompt_eval_duration / 1e9), 2)
Write-Host " [OK] Eval=$rate tok/s | Prompt=$promptRate tok/s | Tokens=$($r.eval_count)" -ForegroundColor Green
" Layers $l : Eval=$rate tok/s | Prompt=$promptRate tok/s | Tokens=$($r.eval_count)" | Out-File $tokenGenFile -Append -Encoding ascii
} else {
$err = if ($r.error) { $r.error } else { "NO_OUTPUT" }
Write-Host " [FAIL] $err" -ForegroundColor Red
" Layers $l : FAILED - $err" | Out-File $tokenGenFile -Append -Encoding ascii
}
} catch {
Write-Host " [EXCEPTION] $_" -ForegroundColor Red
" Layers $l : EXCEPTION" | Out-File $tokenGenFile -Append -Encoding ascii
}

Stop-Process -Id $proc.Id -Force -ErrorAction SilentlyContinue
"" | Out-File $tokenGenFile -Append -Encoding ascii
}
}

Write-Host "`n=== Code Generation Test ===" -ForegroundColor Green

$csharpPrompt = "Write a complete C# Windows Forms Notepad application in a SINGLE file. Requirements: main form with multiline TextBox filling window; menu bar with File (New, Open, Save, Save As, Exit), Edit (Cut, Copy, Paste, Select All), Help (About); Open loads .txt files; Save/Save As save to file; title bar shows filename and asterisk if unsaved; word wrap toggle in Format menu. Output ONLY raw C# code, no markdown fences, no explanations."

$pythonPrompt = "Write a complete Python tkinter Notepad application in a SINGLE file. Requirements: main window with Text widget; menu bar with File (New, Open, Save, Save As, Exit), Edit (Cut, Copy, Paste, Select All), Help (About); Open loads .txt files; Save/Save As save to file; title bar shows filename and asterisk if unsaved; word wrap toggle. Output ONLY raw Python code, no markdown fences, no explanations."

"=== Granite Code Generation ===" | Out-File $codegenFile -Encoding ascii
"Started: $(Get-Date)" | Out-File $codegenFile -Append -Encoding ascii
"" | Out-File $codegenFile -Append -Encoding ascii

Clean-Ollama
$proc = Start-Ollama "FULL"
Start-Sleep -Seconds 6

if (Wait-API) {
foreach ($model in $graniteModels) {
Write-Host "`n --- $model ---" -ForegroundColor Cyan

$outDir = Join-Path $resultsDir ($model -replace "[^a-zA-Z0-9\-]","_")
New-Item -ItemType Directory -Force -Path $outDir | Out-Null

"MODEL: $model" | Out-File $codegenFile -Append -Encoding ascii

Write-Host " [C#] Generating..." -ForegroundColor DarkGray
try {
$csResp = Run-Inference $model $csharpPrompt
$csResult = if ($csResp.response) { Test-CSharp-Notepad $csResp.response $outDir } else { @{ ok=$false; log="NO_RESPONSE"; exe=$false } }
$csRate = if ($csResp.eval_duration -gt 0) { [math]::Round($csResp.eval_count / ($csResp.eval_duration / 1e9), 2) } else { 0 }
$csStatus = if ($csResult.ok) { "PASS" } else { "FAIL" }
Write-Host " C#: $csStatus | Rate=$csRate tok/s | exe=$(if($csResult.exe){'YES'}else{'NO'})" -ForegroundColor $(if($csResult.ok){"Green"}else{"Red"})
" C# : $csStatus | Rate=$csRate tok/s | exe=$(if($csResult.exe){'YES'}else{'NO'})" | Out-File $codegenFile -Append -Encoding ascii
if (-not $csResult.ok) { " Log: $($csResult.log.Substring(0, [Math]::Min(300, $csResult.log.Length)))" | Out-File $codegenFile -Append -Encoding ascii }
} catch {
" C# : ERROR" | Out-File $codegenFile -Append -Encoding ascii
}

Write-Host " [Python] Generating..." -ForegroundColor DarkGray
try {
$pyResp = Run-Inference $model $pythonPrompt
$pyResult = if ($pyResp.response) { Test-Python-Syntax $pyResp.response $outDir } else { @{ ok=$false; log="NO_RESPONSE"; ran=$false } }
$pyRate = if ($pyResp.eval_duration -gt 0) { [math]::Round($pyResp.eval_count / ($pyResp.eval_duration / 1e9), 2) } else { 0 }
$pyStatus = if ($pyResult.ok) { "PASS" } else { "FAIL" }
Write-Host " Python: $pyStatus | Rate=$pyRate tok/s" -ForegroundColor $(if($pyResult.ok){"Green"}else{"Red"})
" Python: $pyStatus | Rate=$pyRate tok/s" | Out-File $codegenFile -Append -Encoding ascii
if (-not $pyResult.ok) { " Log: $($pyResult.log.Substring(0, [Math]::Min(300, $pyResult.log.Length)))" | Out-File $codegenFile -Append -Encoding ascii }
} catch {
" Python: ERROR" | Out-File $codegenFile -Append -Encoding ascii
}
"" | Out-File $codegenFile -Append -Encoding ascii
}
}
Stop-Process -Id $proc.Id -Force -ErrorAction SilentlyContinue

Write-Host "`n=== BENCHMARK COMPLETE ===" -ForegroundColor Green
Write-Host "Results in: $resultsDir" -ForegroundColor Cyan
13 changes: 13 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,19 @@ These are **stable, reproducible** numbers on a reference AMD Radeon RX 9070 XT
| Gemma-4 12B | IQ3_XXS | **~51 tok/s** | ~5.5 GB |
| Starcoder2 15B | Q4_K_M | **~48 tok/s** | ~11 GB |
| Devstral 24B | IQ4_XS | **~43 tok/s** | ~13 GB |
| Granite 4.1 8B Q4 | Q4_K_M | **~80 tok/s** | ~5 GB |
| Granite 4.1 8B Q6 | Q6_K | **~66 tok/s** | ~6.5 GB |
| Granite 4.1 3B Q8 | Q8_0 | **~109 tok/s** | ~2 GB |

### Granite Multi-Layer Benchmark Results (RX 9070 XT)

| Model | Layer 25 | Layer 29 | Layer 33 | Full GPU |
|---|---|---|---|---|
| Granite 4.1 8B Q4 | 79.53 tok/s | 81.04 tok/s | 79.59 tok/s | **80.74 tok/s** |
| Granite 4.1 8B Q6 | 65.22 tok/s | 66.81 tok/s | 66.61 tok/s | **66.54 tok/s** |
| Granite 4.1 3B Q8 | 108.76 tok/s | 107.57 tok/s | 109.11 tok/s | **109.33 tok/s** |

All granite models tested: VRAM used ~5-6GB (safe under 15.8GB available).

*Note: Devstral scores measured at < 1K context length (4096 window). Performance will naturally decrease as the 256K context fills up due to KV cache pressure.*

Expand Down
Loading
Loading