Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 16 additions & 3 deletions .github/workflows/e2e-tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ on:
branches:
- dev
- main

push:
branches:
- dev
Expand Down Expand Up @@ -335,18 +336,24 @@ jobs:
# Override MaaS configuration from environment variables
# Note: All MaaS settings (endpoint, model ID, API key) passed via --set
# Some values may appear as *** in logs due to GitHub Actions secret masking
# wait-for-models init is rendered when a MaaS model is registered; the subchart
# ignores initContainers/skipModelWait in values. Patch it out after install.
helm install rag deploy/helm/rag \
--namespace rag-e2e \
--values tests/e2e/values-e2e.yaml \
--set global.models.${MAAS_MODEL_ID}.url="${MAAS_ENDPOINT}" \
--set global.models.${MAAS_MODEL_ID}.id="${MAAS_MODEL_ID}" \
--set global.models.${MAAS_MODEL_ID}.enabled=true \
--set global.models.${MAAS_MODEL_ID}.apiToken="${MAAS_API_KEY}" \
--set-json llama-stack.initContainers='[]' \
--wait=false \
--skip-crds \
--timeout 20m \
--debug

kubectl patch deployment llamastack -n rag-e2e --type=json \
-p='[{"op":"replace","path":"/spec/template/spec/initContainers","value":[]}]'
kubectl rollout restart deployment/llamastack -n rag-e2e

- name: Wait for core services to be ready
run: |
echo "========================================="
Expand Down Expand Up @@ -459,7 +466,7 @@ jobs:
echo "=== RAG UI logs ==="
kubectl logs -l app.kubernetes.io/name=rag -n rag-e2e --tail=200 || echo "No RAG UI logs available"

echo "=== Llama Stack logs (CRITICAL - Check model registration) ==="
echo "=== Llama Stack logs ==="
kubectl logs -l app.kubernetes.io/name=llamastack -n rag-e2e --tail=300 || echo "No Llama Stack logs available"

echo "=== Llama Stack pod details ==="
Expand Down Expand Up @@ -618,18 +625,24 @@ jobs:
run: |
kubectl create namespace rag-e2e-ui || true

# wait-for-models init is rendered when a MaaS model is registered; the subchart
# ignores initContainers/skipModelWait in values. Patch it out after install.
helm install rag deploy/helm/rag \
--namespace rag-e2e-ui \
--values tests/e2e/values-e2e.yaml \
--set global.models.${MAAS_MODEL_ID}.url="${MAAS_ENDPOINT}" \
--set global.models.${MAAS_MODEL_ID}.id="${MAAS_MODEL_ID}" \
--set global.models.${MAAS_MODEL_ID}.enabled=true \
--set global.models.${MAAS_MODEL_ID}.apiToken="${MAAS_API_KEY}" \
--set-json llama-stack.initContainers='[]' \
--wait=false \
--skip-crds \
--timeout 20m \
--debug

kubectl patch deployment llamastack -n rag-e2e-ui --type=json \
-p='[{"op":"replace","path":"/spec/template/spec/initContainers","value":[]}]'
kubectl rollout restart deployment/llamastack -n rag-e2e-ui

- name: Wait for services to be ready
run: |
kubectl wait --for=condition=available --timeout=600s \
Expand Down
72 changes: 51 additions & 21 deletions tests/e2e/values-e2e.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -47,15 +47,9 @@ volumeMounts:

# Configure models to use Red Hat MaaS
# All MaaS configuration (url, id, enabled, apiToken) will be injected via helm --set in GitHub Actions
# This allows flexible configuration from workflow environment variables
# The workflow patches out wait-for-models init after install (subchart ignores skipModelWait here)
global:
models: {}
# Example structure (populated by workflow):
# llama-3-2-3b:
# url: "https://maas-endpoint/v1"
# id: "llama-3-2-3b"
# enabled: true
# apiToken: "secret-key"

# PostgreSQL + PGVector configuration
pgvector:
Expand All @@ -68,31 +62,67 @@ pgvector:
port: "5432"
resources:
requests:
memory: "256Mi"
cpu: "250m"
limits:
memory: "512Mi"
cpu: "500m"
limits:
memory: "1Gi"
cpu: "1"

# Llama Stack configuration for MaaS
# Default chart PVC is 5Gi — too heavy for Kind on GHA runners
volumeClaimTemplates:
- metadata:
name: pg-data
spec:
accessModes: ["ReadWriteOnce"]
resources:
requests:
storage: 1Gi

# Llama Stack configuration for MaaS (ogxai/distribution-starter via llama-stack 0.8.x)
llama-stack:
enabled: true
secrets:
TAVILY_SEARCH_API_KEY: ""


initContainers: []
skipModelWait: true

# OGX starter needs more headroom than the old llamastack image in Kind CI
resources:
requests:
memory: "512Mi"
memory: "1Gi"
cpu: "500m"
limits:
memory: "1Gi"
memory: "2Gi"
cpu: "1"

# Skip waiting for models since we're using external MaaS
# Override init containers to prevent waiting for local models
initContainers: []
# Don't wait for models - they're external via MaaS
skipModelWait: true

readinessProbe:
httpGet:
path: /v1/health
port: 8321
initialDelaySeconds: 60
periodSeconds: 15
timeoutSeconds: 10
failureThreshold: 10

livenessProbe:
httpGet:
path: /v1/health
port: 8321
initialDelaySeconds: 120
periodSeconds: 30
timeoutSeconds: 10
failureThreshold: 10

# Avoid llama-stack-data PVC (2Gi RWO) — use emptyDir in Kind CI
volumes:
- configMap:
defaultMode: 420
name: run-config
name: run-config-volume
- emptyDir: {}
name: dot-llama
- emptyDir: {}
name: cache

fileProcessors:
enabled: true
Expand Down
69 changes: 48 additions & 21 deletions tests/integration/llamastack/values-e2e.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,15 +48,9 @@ volumeMounts:

# Configure models to use Red Hat MaaS
# All MaaS configuration (url, id, enabled, apiToken) will be injected via helm --set in GitHub Actions
# This allows flexible configuration from workflow environment variables
# The workflow patches out wait-for-models init after install (subchart ignores skipModelWait here)
global:
models: {}
# Example structure (populated by workflow):
# llama-3-2-3b:
# url: "https://maas-endpoint/v1"
# id: "llama-3-2-3b"
# enabled: true
# apiToken: "secret-key"

# PostgreSQL + PGVector configuration
pgvector:
Expand All @@ -69,31 +63,64 @@ pgvector:
port: "5432"
resources:
requests:
memory: "256Mi"
cpu: "250m"
limits:
memory: "512Mi"
cpu: "500m"
limits:
memory: "1Gi"
cpu: "1"

# Llama Stack configuration for MaaS
volumeClaimTemplates:
- metadata:
name: pg-data
spec:
accessModes: ["ReadWriteOnce"]
resources:
requests:
storage: 1Gi

# Llama Stack configuration for MaaS (ogxai/distribution-starter via llama-stack 0.8.x)
llama-stack:
enabled: true
secrets:
TAVILY_SEARCH_API_KEY: ""


initContainers: []
skipModelWait: true

resources:
requests:
memory: "512Mi"
memory: "1Gi"
cpu: "500m"
limits:
memory: "1Gi"
memory: "2Gi"
cpu: "1"

# Skip waiting for models since we're using external MaaS
# Override init containers to prevent waiting for local models
initContainers: []
# Don't wait for models - they're external via MaaS
skipModelWait: true

readinessProbe:
httpGet:
path: /v1/health
port: 8321
initialDelaySeconds: 60
periodSeconds: 15
timeoutSeconds: 10
failureThreshold: 10

livenessProbe:
httpGet:
path: /v1/health
port: 8321
initialDelaySeconds: 120
periodSeconds: 30
timeoutSeconds: 10
failureThreshold: 10

volumes:
- configMap:
defaultMode: 420
name: run-config
name: run-config-volume
- emptyDir: {}
name: dot-llama
- emptyDir: {}
name: cache

fileProcessors:
enabled: true
Expand Down
Loading