Maxritz · Maxritz · Jun 12, 2026 · Jun 12, 2026 · Jun 12, 2026 · Jun 12, 2026
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -64,6 +64,14 @@ else()
     set(OLLAMA_HAVE_LLAMA_SERVER FALSE)
 endif()
 
+# DARS scientific optimization framework options
+# Declared here so superbuild + downstream llama/server can consume them
+option(OLLAMA_DARS "Enable DARS scientific optimization framework" OFF)
+option(OLLAMA_DARS_DUAL "Enable dual-model cascade" OFF)
+option(OLLAMA_DARS_HEBBIAN "Enable Hebbian activation profiling" OFF)
+option(OLLAMA_DARS_MERGE "Enable model merge toolkit" OFF)
+option(OLLAMA_DARS_UPCYCLE "Enable dense-to-MoE upcycling" OFF)
+
 # RDNA4 gfx1201 native optimizations (clean integration, not a patch)
 # This includes cmake/gfx1201.cmake which applies build-level optimizations
 # when AMDGPU_TARGETS contains gfx1201.

diff --git a/Granite_Benchmark.ps1 b/Granite_Benchmark.ps1
@@ -0,0 +1,194 @@
+$ErrorActionPreference = "Continue"
+
+$timestamp = Get-Date -Format "yyyyMMdd_HHmmss"
+$resultsDir = "granite_benchmark_$timestamp"
+New-Item -ItemType Directory -Force -Path $resultsDir | Out-Null
+
+$libRocm = Resolve-Path "lib\ollama\rocm\"
+$scriptDir = Get-Location
+
+$layers = @(25, 29, 33, "FULL")
+$graniteModels = @(
+    "granite-4.1-8b-Q4:latest",
+    "granite-4.1-8b-Q6:latest",
+    "granite-4.1-3b-Q8:latest"
+)
+
+$tokenGenFile = Join-Path $resultsDir "token_gen_results.txt"
+$codegenFile = Join-Path $resultsDir "codegen_results.txt"
+
+function Clean-Ollama {
+    Stop-Process -Name "ollama" -Force -ErrorAction SilentlyContinue
+    Stop-Process -Name "llama-server" -Force -ErrorAction SilentlyContinue
+    Start-Sleep -Seconds 3
+}
+
+function Start-Ollama($layerCount) {
+    Clean-Ollama
+    $env:HSA_OVERRIDE_GFX_VERSION = "12.0.1"
+    $env:OLLAMA_FLASH_ATTENTION = "1"
+    $env:OLLAMA_NUM_GPU = $layerCount
+    $env:OLLAMA_DEBUG = "0"
+    $env:OLLAMA_KEEP_ALIVE = "-1"
+    $env:ROCR_VISIBLE_DEVICES = "0"
+    $env:HIP_VISIBLE_DEVICES = "0"
+    $env:GIN_MODE = "release"
+    [System.Environment]::SetEnvironmentVariable("PATH", "$libRocm;$scriptDir;$(Resolve-Path 'lib\ollama');$($env:PATH)", "Process")
+    return Start-Process -FilePath ".\ollama.exe" -ArgumentList "serve" -NoNewWindow -PassThru
+}
+
+function Wait-API {
+    for ($i=0; $i -lt 15; $i++) {
+        $r = curl.exe -s -m 2 http://127.0.0.1:11434/api/tags 2>$null
+        if ($LASTEXITCODE -eq 0) { return $true }
+        Start-Sleep -Seconds 1
+    }
+    return $false
+}
+
+function Run-Inference($model, $prompt) {
+    $payload = @{ model=$model; prompt=$prompt; stream=$false } | ConvertTo-Json -Compress
+    $tmp = Join-Path $env:TEMP "bench_payload_$(Get-Random).json"
+    [System.IO.File]::WriteAllText($tmp, $payload, (New-Object System.Text.UTF8Encoding($false)))
+    $out = curl.exe -s --max-time 120 -X POST http://127.0.0.1:11434/api/generate -H "Content-Type: application/json" -d "@$tmp" 2>$null
+    Remove-Item $tmp -ErrorAction SilentlyContinue
+    return $out | ConvertFrom-Json
+}
+
+function Test-CSharp-Notepad($code, $outDir) {
+    $codeFile = Join-Path $outDir "NotepadApp.cs"
+    $exePath = Join-Path $outDir "NotepadApp.exe"
+    [System.IO.File]::WriteAllText($codeFile, $code, (New-Object System.Text.UTF8Encoding($false)))
+
+    $csc = "C:\Windows\Microsoft.NET\Framework64\v4.0.30319\csc.exe"
+    if (-not (Test-Path $csc)) { $csc = "C:\Windows\Microsoft.NET\Framework\v4.0.30319\csc.exe" }
+
+    if (Test-Path $csc) {
+        $out = & $csc /target:winexe /out:$exePath $codeFile 2>&1 | Out-String
+        $ok = ($LASTEXITCODE -eq 0)
+        return @{ ok=$ok; log=$out; exe=(Test-Path $exePath) }
+    }
+    return @{ ok=$false; log="csc not found"; exe=$false }
+}
+
+function Test-Python-Syntax($code, $outDir) {
+    $pyFile = Join-Path $outDir "notepad.py"
+    [System.IO.File]::WriteAllText($pyFile, $code, (New-Object System.Text.UTF8Encoding($false)))
+
+    $pyExe = $null
+    $candidates = @("python", "python3", "py")
+    foreach ($c in $candidates) {
+        $v = & $c --version 2>&1
+        if ($LASTEXITCODE -eq 0) { $pyExe = $c; break }
+    }
+
+    if (-not $pyExe) { return @{ ok=$false; log="No Python interpreter found"; ran=$false } }
+
+    $out = & $pyExe -c "import ast; ast.parse(open(r'$pyFile').read())" 2>&1 | Out-String
+    $ok = ($LASTEXITCODE -eq 0)
+    return @{ ok=$ok; log=$out; ran=$ok }
+}
+
+Write-Host "=== Granite Models Benchmark ===" -ForegroundColor Cyan
+Write-Host "Models: $($graniteModels -join ', ')" -ForegroundColor Gray
+Write-Host "Layers: $($layers -join ', ')" -ForegroundColor Gray
+
+"=== Granite Token Generation ===" | Out-File $tokenGenFile -Encoding ascii
+"Started: $(Get-Date)" | Out-File $tokenGenFile -Append -Encoding ascii
+"" | Out-File $tokenGenFile -Append -Encoding ascii
+
+$prompt = "Write a Python quicksort with detailed comments explaining each step."
+
+foreach ($model in $graniteModels) {
+    Write-Host "`n[MDOEL] $model" -ForegroundColor Magenta
+    "MODEL: $model" | Out-File $tokenGenFile -Append -Encoding ascii
+
+    foreach ($l in $layers) {
+        Write-Host "  Layers: $l" -ForegroundColor Yellow
+        $proc = Start-Ollama $l
+        Start-Sleep -Seconds 6
+
+        if (-not (Wait-API)) {
+            Write-Host "    [ERROR] API not ready" -ForegroundColor Red
+            "  Layers $l : API_TIMEOUT" | Out-File $tokenGenFile -Append -Encoding ascii
+            Stop-Process -Id $proc.Id -Force -ErrorAction SilentlyContinue
+            continue
+        }
+
+        try {
+            $r = Run-Inference $model $prompt
+            if ($r.eval_count -gt 0) {
+                $rate = [math]::Round($r.eval_count / ($r.eval_duration / 1e9), 2)
+                $promptRate = [math]::Round($r.prompt_eval_count / ($r.prompt_eval_duration / 1e9), 2)
+                Write-Host "    [OK] Eval=$rate tok/s | Prompt=$promptRate tok/s | Tokens=$($r.eval_count)" -ForegroundColor Green
+                "  Layers $l : Eval=$rate tok/s | Prompt=$promptRate tok/s | Tokens=$($r.eval_count)" | Out-File $tokenGenFile -Append -Encoding ascii
+            } else {
+                $err = if ($r.error) { $r.error } else { "NO_OUTPUT" }
+                Write-Host "    [FAIL] $err" -ForegroundColor Red
+                "  Layers $l : FAILED - $err" | Out-File $tokenGenFile -Append -Encoding ascii
+            }
+        } catch {
+            Write-Host "    [EXCEPTION] $_" -ForegroundColor Red
+            "  Layers $l : EXCEPTION" | Out-File $tokenGenFile -Append -Encoding ascii
+        }
+
+        Stop-Process -Id $proc.Id -Force -ErrorAction SilentlyContinue
+        "" | Out-File $tokenGenFile -Append -Encoding ascii
+    }
+}
+
+Write-Host "`n=== Code Generation Test ===" -ForegroundColor Green
+
+$csharpPrompt = "Write a complete C# Windows Forms Notepad application in a SINGLE file. Requirements: main form with multiline TextBox filling window; menu bar with File (New, Open, Save, Save As, Exit), Edit (Cut, Copy, Paste, Select All), Help (About); Open loads .txt files; Save/Save As save to file; title bar shows filename and asterisk if unsaved; word wrap toggle in Format menu. Output ONLY raw C# code, no markdown fences, no explanations."
+
+$pythonPrompt = "Write a complete Python tkinter Notepad application in a SINGLE file. Requirements: main window with Text widget; menu bar with File (New, Open, Save, Save As, Exit), Edit (Cut, Copy, Paste, Select All), Help (About); Open loads .txt files; Save/Save As save to file; title bar shows filename and asterisk if unsaved; word wrap toggle. Output ONLY raw Python code, no markdown fences, no explanations."
+
+"=== Granite Code Generation ===" | Out-File $codegenFile -Encoding ascii
+"Started: $(Get-Date)" | Out-File $codegenFile -Append -Encoding ascii
+"" | Out-File $codegenFile -Append -Encoding ascii
+
+Clean-Ollama
+$proc = Start-Ollama "FULL"
+Start-Sleep -Seconds 6
+
+if (Wait-API) {
+    foreach ($model in $graniteModels) {
+        Write-Host "`n  --- $model ---" -ForegroundColor Cyan
+
+        $outDir = Join-Path $resultsDir ($model -replace "[^a-zA-Z0-9\-]","_")
+        New-Item -ItemType Directory -Force -Path $outDir | Out-Null
+
+        "MODEL: $model" | Out-File $codegenFile -Append -Encoding ascii
+
+        Write-Host "    [C#] Generating..." -ForegroundColor DarkGray
+        try {
+            $csResp = Run-Inference $model $csharpPrompt
+            $csResult = if ($csResp.response) { Test-CSharp-Notepad $csResp.response $outDir } else { @{ ok=$false; log="NO_RESPONSE"; exe=$false } }
+            $csRate = if ($csResp.eval_duration -gt 0) { [math]::Round($csResp.eval_count / ($csResp.eval_duration / 1e9), 2) } else { 0 }
+            $csStatus = if ($csResult.ok) { "PASS" } else { "FAIL" }
+            Write-Host "      C#: $csStatus | Rate=$csRate tok/s | exe=$(if($csResult.exe){'YES'}else{'NO'})" -ForegroundColor $(if($csResult.ok){"Green"}else{"Red"})
+            "    C# : $csStatus | Rate=$csRate tok/s | exe=$(if($csResult.exe){'YES'}else{'NO'})" | Out-File $codegenFile -Append -Encoding ascii
+            if (-not $csResult.ok) { "      Log: $($csResult.log.Substring(0, [Math]::Min(300, $csResult.log.Length)))" | Out-File $codegenFile -Append -Encoding ascii }
+        } catch {
+            "    C# : ERROR" | Out-File $codegenFile -Append -Encoding ascii
+        }
+
+        Write-Host "    [Python] Generating..." -ForegroundColor DarkGray
+        try {
+            $pyResp = Run-Inference $model $pythonPrompt
+            $pyResult = if ($pyResp.response) { Test-Python-Syntax $pyResp.response $outDir } else { @{ ok=$false; log="NO_RESPONSE"; ran=$false } }
+            $pyRate = if ($pyResp.eval_duration -gt 0) { [math]::Round($pyResp.eval_count / ($pyResp.eval_duration / 1e9), 2) } else { 0 }
+            $pyStatus = if ($pyResult.ok) { "PASS" } else { "FAIL" }
+            Write-Host "      Python: $pyStatus | Rate=$pyRate tok/s" -ForegroundColor $(if($pyResult.ok){"Green"}else{"Red"})
+            "    Python: $pyStatus | Rate=$pyRate tok/s" | Out-File $codegenFile -Append -Encoding ascii
+            if (-not $pyResult.ok) { "      Log: $($pyResult.log.Substring(0, [Math]::Min(300, $pyResult.log.Length)))" | Out-File $codegenFile -Append -Encoding ascii }
+        } catch {
+            "    Python: ERROR" | Out-File $codegenFile -Append -Encoding ascii
+        }
+        "" | Out-File $codegenFile -Append -Encoding ascii
+    }
+}
+Stop-Process -Id $proc.Id -Force -ErrorAction SilentlyContinue
+
+Write-Host "`n=== BENCHMARK COMPLETE ===" -ForegroundColor Green
+Write-Host "Results in: $resultsDir" -ForegroundColor Cyan
diff --git a/README.md b/README.md
@@ -196,6 +196,19 @@ These are **stable, reproducible** numbers on a reference AMD Radeon RX 9070 XT
 | Gemma-4 12B | IQ3_XXS | **~51 tok/s** | ~5.5 GB |
 | Starcoder2 15B | Q4_K_M | **~48 tok/s** | ~11 GB |
 | Devstral 24B | IQ4_XS | **~43 tok/s** | ~13 GB |
+| Granite 4.1 8B Q4 | Q4_K_M | **~80 tok/s** | ~5 GB |
+| Granite 4.1 8B Q6 | Q6_K | **~66 tok/s** | ~6.5 GB |
+| Granite 4.1 3B Q8 | Q8_0 | **~109 tok/s** | ~2 GB |
+
+### Granite Multi-Layer Benchmark Results (RX 9070 XT)
+
+| Model | Layer 25 | Layer 29 | Layer 33 | Full GPU |
+|---|---|---|---|---|
+| Granite 4.1 8B Q4 | 79.53 tok/s | 81.04 tok/s | 79.59 tok/s | **80.74 tok/s** |
+| Granite 4.1 8B Q6 | 65.22 tok/s | 66.81 tok/s | 66.61 tok/s | **66.54 tok/s** |
+| Granite 4.1 3B Q8 | 108.76 tok/s | 107.57 tok/s | 109.11 tok/s | **109.33 tok/s** |
+
+All granite models tested: VRAM used ~5-6GB (safe under 15.8GB available).
 
 *Note: Devstral scores measured at < 1K context length (4096 window). Performance will naturally decrease as the 256K context fills up due to KV cache pressure.*