rh-ai-quickstart · johnson2500 · Jun 26, 2026
diff --git a/.github/workflows/e2e-tests.yaml b/.github/workflows/e2e-tests.yaml
@@ -5,6 +5,7 @@ on:
     branches:
       - dev
       - main
+
   push:
     branches:
       - dev
@@ -335,18 +336,24 @@ jobs:
           # Override MaaS configuration from environment variables
           # Note: All MaaS settings (endpoint, model ID, API key) passed via --set
           # Some values may appear as *** in logs due to GitHub Actions secret masking
+          # wait-for-models init is rendered when a MaaS model is registered; the subchart
+          # ignores initContainers/skipModelWait in values. Patch it out after install.
           helm install rag deploy/helm/rag \
             --namespace rag-e2e \
             --values tests/e2e/values-e2e.yaml \
             --set global.models.${MAAS_MODEL_ID}.url="${MAAS_ENDPOINT}" \
             --set global.models.${MAAS_MODEL_ID}.id="${MAAS_MODEL_ID}" \
             --set global.models.${MAAS_MODEL_ID}.enabled=true \
             --set global.models.${MAAS_MODEL_ID}.apiToken="${MAAS_API_KEY}" \
-            --set-json llama-stack.initContainers='[]' \
+            --wait=false \
             --skip-crds \
             --timeout 20m \
             --debug
 
+          kubectl patch deployment llamastack -n rag-e2e --type=json \
+            -p='[{"op":"replace","path":"/spec/template/spec/initContainers","value":[]}]'
+          kubectl rollout restart deployment/llamastack -n rag-e2e
+
       - name: Wait for core services to be ready
         run: |
           echo "========================================="
@@ -459,7 +466,7 @@ jobs:
           echo "=== RAG UI logs ==="
           kubectl logs -l app.kubernetes.io/name=rag -n rag-e2e --tail=200 || echo "No RAG UI logs available"
 
-          echo "=== Llama Stack logs (CRITICAL - Check model registration) ==="
+          echo "=== Llama Stack logs ==="
           kubectl logs -l app.kubernetes.io/name=llamastack -n rag-e2e --tail=300 || echo "No Llama Stack logs available"
 
           echo "=== Llama Stack pod details ==="
@@ -618,18 +625,24 @@ jobs:
         run: |
           kubectl create namespace rag-e2e-ui || true
 
+          # wait-for-models init is rendered when a MaaS model is registered; the subchart
+          # ignores initContainers/skipModelWait in values. Patch it out after install.
           helm install rag deploy/helm/rag \
             --namespace rag-e2e-ui \
             --values tests/e2e/values-e2e.yaml \
             --set global.models.${MAAS_MODEL_ID}.url="${MAAS_ENDPOINT}" \
             --set global.models.${MAAS_MODEL_ID}.id="${MAAS_MODEL_ID}" \
             --set global.models.${MAAS_MODEL_ID}.enabled=true \
             --set global.models.${MAAS_MODEL_ID}.apiToken="${MAAS_API_KEY}" \
-            --set-json llama-stack.initContainers='[]' \
+            --wait=false \
             --skip-crds \
             --timeout 20m \
             --debug
 
+          kubectl patch deployment llamastack -n rag-e2e-ui --type=json \
+            -p='[{"op":"replace","path":"/spec/template/spec/initContainers","value":[]}]'
+          kubectl rollout restart deployment/llamastack -n rag-e2e-ui
+
       - name: Wait for services to be ready
         run: |
           kubectl wait --for=condition=available --timeout=600s \

diff --git a/tests/e2e/values-e2e.yaml b/tests/e2e/values-e2e.yaml
@@ -47,15 +47,9 @@ volumeMounts:
 
 # Configure models to use Red Hat MaaS
 # All MaaS configuration (url, id, enabled, apiToken) will be injected via helm --set in GitHub Actions
-# This allows flexible configuration from workflow environment variables
+# The workflow patches out wait-for-models init after install (subchart ignores skipModelWait here)
 global:
   models: {}
-    # Example structure (populated by workflow):
-    # llama-3-2-3b:
-    #   url: "https://maas-endpoint/v1"
-    #   id: "llama-3-2-3b"
-    #   enabled: true
-    #   apiToken: "secret-key"
 
 # PostgreSQL + PGVector configuration
 pgvector:
@@ -68,31 +62,67 @@ pgvector:
     port: "5432"
   resources:
     requests:
+      memory: "256Mi"
+      cpu: "250m"
+    limits:
       memory: "512Mi"
       cpu: "500m"
-    limits:
-      memory: "1Gi"
-      cpu: "1"
-
-# Llama Stack configuration for MaaS
+  # Default chart PVC is 5Gi — too heavy for Kind on GHA runners
+  volumeClaimTemplates:
+    - metadata:
+        name: pg-data
+      spec:
+        accessModes: ["ReadWriteOnce"]
+        resources:
+          requests:
+            storage: 1Gi
+
+# Llama Stack configuration for MaaS (ogxai/distribution-starter via llama-stack 0.8.x)
 llama-stack:
   enabled: true
   secrets:
     TAVILY_SEARCH_API_KEY: ""
-
+
+  initContainers: []
+  skipModelWait: true
+
+  # OGX starter needs more headroom than the old llamastack image in Kind CI
   resources:
     requests:
-      memory: "512Mi"
+      memory: "1Gi"
       cpu: "500m"
     limits:
-      memory: "1Gi"
+      memory: "2Gi"
       cpu: "1"
-
-  # Skip waiting for models since we're using external MaaS
-  # Override init containers to prevent waiting for local models
-  initContainers: []
-  # Don't wait for models - they're external via MaaS
-  skipModelWait: true
+
+  readinessProbe:
+    httpGet:
+      path: /v1/health
+      port: 8321
+    initialDelaySeconds: 60
+    periodSeconds: 15
+    timeoutSeconds: 10
+    failureThreshold: 10
+
+  livenessProbe:
+    httpGet:
+      path: /v1/health
+      port: 8321
+    initialDelaySeconds: 120
+    periodSeconds: 30
+    timeoutSeconds: 10
+    failureThreshold: 10
+
+  # Avoid llama-stack-data PVC (2Gi RWO) — use emptyDir in Kind CI
+  volumes:
+    - configMap:
+        defaultMode: 420
+        name: run-config
+      name: run-config-volume
+    - emptyDir: {}
+      name: dot-llama
+    - emptyDir: {}
+      name: cache
 
   fileProcessors:
     enabled: true

diff --git a/tests/integration/llamastack/values-e2e.yaml b/tests/integration/llamastack/values-e2e.yaml
@@ -48,15 +48,9 @@ volumeMounts:
 
 # Configure models to use Red Hat MaaS
 # All MaaS configuration (url, id, enabled, apiToken) will be injected via helm --set in GitHub Actions
-# This allows flexible configuration from workflow environment variables
+# The workflow patches out wait-for-models init after install (subchart ignores skipModelWait here)
 global:
   models: {}
-    # Example structure (populated by workflow):
-    # llama-3-2-3b:
-    #   url: "https://maas-endpoint/v1"
-    #   id: "llama-3-2-3b"
-    #   enabled: true
-    #   apiToken: "secret-key"
 
 # PostgreSQL + PGVector configuration
 pgvector:
@@ -69,31 +63,64 @@ pgvector:
     port: "5432"
   resources:
     requests:
+      memory: "256Mi"
+      cpu: "250m"
+    limits:
       memory: "512Mi"
       cpu: "500m"
-    limits:
-      memory: "1Gi"
-      cpu: "1"
-
-# Llama Stack configuration for MaaS
+  volumeClaimTemplates:
+    - metadata:
+        name: pg-data
+      spec:
+        accessModes: ["ReadWriteOnce"]
+        resources:
+          requests:
+            storage: 1Gi
+
+# Llama Stack configuration for MaaS (ogxai/distribution-starter via llama-stack 0.8.x)
 llama-stack:
   enabled: true
   secrets:
     TAVILY_SEARCH_API_KEY: ""
-
+
+  initContainers: []
+  skipModelWait: true
+
   resources:
     requests:
-      memory: "512Mi"
+      memory: "1Gi"
       cpu: "500m"
     limits:
-      memory: "1Gi"
+      memory: "2Gi"
       cpu: "1"
-
-  # Skip waiting for models since we're using external MaaS
-  # Override init containers to prevent waiting for local models
-  initContainers: []
-  # Don't wait for models - they're external via MaaS
-  skipModelWait: true
+
+  readinessProbe:
+    httpGet:
+      path: /v1/health
+      port: 8321
+    initialDelaySeconds: 60
+    periodSeconds: 15
+    timeoutSeconds: 10
+    failureThreshold: 10
+
+  livenessProbe:
+    httpGet:
+      path: /v1/health
+      port: 8321
+    initialDelaySeconds: 120
+    periodSeconds: 30
+    timeoutSeconds: 10
+    failureThreshold: 10
+
+  volumes:
+    - configMap:
+        defaultMode: 420
+        name: run-config
+      name: run-config-volume
+    - emptyDir: {}
+      name: dot-llama
+    - emptyDir: {}
+      name: cache
 
   fileProcessors:
     enabled: true