From a9950e1d2ee567e31be9defa8bd26742267a0800 Mon Sep 17 00:00:00 2001 From: Ryan Johnson Date: Fri, 26 Jun 2026 14:23:49 -0400 Subject: [PATCH] fix(e2e): fix e2e tests --- .github/workflows/e2e-tests.yaml | 19 +++++- tests/e2e/values-e2e.yaml | 72 ++++++++++++++------ tests/integration/llamastack/values-e2e.yaml | 69 +++++++++++++------ 3 files changed, 115 insertions(+), 45 deletions(-) diff --git a/.github/workflows/e2e-tests.yaml b/.github/workflows/e2e-tests.yaml index 40ef75e..b5a742d 100644 --- a/.github/workflows/e2e-tests.yaml +++ b/.github/workflows/e2e-tests.yaml @@ -5,6 +5,7 @@ on: branches: - dev - main + push: branches: - dev @@ -335,6 +336,8 @@ jobs: # Override MaaS configuration from environment variables # Note: All MaaS settings (endpoint, model ID, API key) passed via --set # Some values may appear as *** in logs due to GitHub Actions secret masking + # wait-for-models init is rendered when a MaaS model is registered; the subchart + # ignores initContainers/skipModelWait in values. Patch it out after install. helm install rag deploy/helm/rag \ --namespace rag-e2e \ --values tests/e2e/values-e2e.yaml \ @@ -342,11 +345,15 @@ jobs: --set global.models.${MAAS_MODEL_ID}.id="${MAAS_MODEL_ID}" \ --set global.models.${MAAS_MODEL_ID}.enabled=true \ --set global.models.${MAAS_MODEL_ID}.apiToken="${MAAS_API_KEY}" \ - --set-json llama-stack.initContainers='[]' \ + --wait=false \ --skip-crds \ --timeout 20m \ --debug + kubectl patch deployment llamastack -n rag-e2e --type=json \ + -p='[{"op":"replace","path":"/spec/template/spec/initContainers","value":[]}]' + kubectl rollout restart deployment/llamastack -n rag-e2e + - name: Wait for core services to be ready run: | echo "=========================================" @@ -459,7 +466,7 @@ jobs: echo "=== RAG UI logs ===" kubectl logs -l app.kubernetes.io/name=rag -n rag-e2e --tail=200 || echo "No RAG UI logs available" - echo "=== Llama Stack logs (CRITICAL - Check model registration) ===" + echo "=== Llama Stack logs ===" kubectl logs -l app.kubernetes.io/name=llamastack -n rag-e2e --tail=300 || echo "No Llama Stack logs available" echo "=== Llama Stack pod details ===" @@ -618,6 +625,8 @@ jobs: run: | kubectl create namespace rag-e2e-ui || true + # wait-for-models init is rendered when a MaaS model is registered; the subchart + # ignores initContainers/skipModelWait in values. Patch it out after install. helm install rag deploy/helm/rag \ --namespace rag-e2e-ui \ --values tests/e2e/values-e2e.yaml \ @@ -625,11 +634,15 @@ jobs: --set global.models.${MAAS_MODEL_ID}.id="${MAAS_MODEL_ID}" \ --set global.models.${MAAS_MODEL_ID}.enabled=true \ --set global.models.${MAAS_MODEL_ID}.apiToken="${MAAS_API_KEY}" \ - --set-json llama-stack.initContainers='[]' \ + --wait=false \ --skip-crds \ --timeout 20m \ --debug + kubectl patch deployment llamastack -n rag-e2e-ui --type=json \ + -p='[{"op":"replace","path":"/spec/template/spec/initContainers","value":[]}]' + kubectl rollout restart deployment/llamastack -n rag-e2e-ui + - name: Wait for services to be ready run: | kubectl wait --for=condition=available --timeout=600s \ diff --git a/tests/e2e/values-e2e.yaml b/tests/e2e/values-e2e.yaml index 2d3ff98..f848bec 100644 --- a/tests/e2e/values-e2e.yaml +++ b/tests/e2e/values-e2e.yaml @@ -47,15 +47,9 @@ volumeMounts: # Configure models to use Red Hat MaaS # All MaaS configuration (url, id, enabled, apiToken) will be injected via helm --set in GitHub Actions -# This allows flexible configuration from workflow environment variables +# The workflow patches out wait-for-models init after install (subchart ignores skipModelWait here) global: models: {} - # Example structure (populated by workflow): - # llama-3-2-3b: - # url: "https://maas-endpoint/v1" - # id: "llama-3-2-3b" - # enabled: true - # apiToken: "secret-key" # PostgreSQL + PGVector configuration pgvector: @@ -68,31 +62,67 @@ pgvector: port: "5432" resources: requests: + memory: "256Mi" + cpu: "250m" + limits: memory: "512Mi" cpu: "500m" - limits: - memory: "1Gi" - cpu: "1" - -# Llama Stack configuration for MaaS + # Default chart PVC is 5Gi — too heavy for Kind on GHA runners + volumeClaimTemplates: + - metadata: + name: pg-data + spec: + accessModes: ["ReadWriteOnce"] + resources: + requests: + storage: 1Gi + +# Llama Stack configuration for MaaS (ogxai/distribution-starter via llama-stack 0.8.x) llama-stack: enabled: true secrets: TAVILY_SEARCH_API_KEY: "" - + + initContainers: [] + skipModelWait: true + + # OGX starter needs more headroom than the old llamastack image in Kind CI resources: requests: - memory: "512Mi" + memory: "1Gi" cpu: "500m" limits: - memory: "1Gi" + memory: "2Gi" cpu: "1" - - # Skip waiting for models since we're using external MaaS - # Override init containers to prevent waiting for local models - initContainers: [] - # Don't wait for models - they're external via MaaS - skipModelWait: true + + readinessProbe: + httpGet: + path: /v1/health + port: 8321 + initialDelaySeconds: 60 + periodSeconds: 15 + timeoutSeconds: 10 + failureThreshold: 10 + + livenessProbe: + httpGet: + path: /v1/health + port: 8321 + initialDelaySeconds: 120 + periodSeconds: 30 + timeoutSeconds: 10 + failureThreshold: 10 + + # Avoid llama-stack-data PVC (2Gi RWO) — use emptyDir in Kind CI + volumes: + - configMap: + defaultMode: 420 + name: run-config + name: run-config-volume + - emptyDir: {} + name: dot-llama + - emptyDir: {} + name: cache fileProcessors: enabled: true diff --git a/tests/integration/llamastack/values-e2e.yaml b/tests/integration/llamastack/values-e2e.yaml index 62e5308..9b55ee4 100644 --- a/tests/integration/llamastack/values-e2e.yaml +++ b/tests/integration/llamastack/values-e2e.yaml @@ -48,15 +48,9 @@ volumeMounts: # Configure models to use Red Hat MaaS # All MaaS configuration (url, id, enabled, apiToken) will be injected via helm --set in GitHub Actions -# This allows flexible configuration from workflow environment variables +# The workflow patches out wait-for-models init after install (subchart ignores skipModelWait here) global: models: {} - # Example structure (populated by workflow): - # llama-3-2-3b: - # url: "https://maas-endpoint/v1" - # id: "llama-3-2-3b" - # enabled: true - # apiToken: "secret-key" # PostgreSQL + PGVector configuration pgvector: @@ -69,31 +63,64 @@ pgvector: port: "5432" resources: requests: + memory: "256Mi" + cpu: "250m" + limits: memory: "512Mi" cpu: "500m" - limits: - memory: "1Gi" - cpu: "1" - -# Llama Stack configuration for MaaS + volumeClaimTemplates: + - metadata: + name: pg-data + spec: + accessModes: ["ReadWriteOnce"] + resources: + requests: + storage: 1Gi + +# Llama Stack configuration for MaaS (ogxai/distribution-starter via llama-stack 0.8.x) llama-stack: enabled: true secrets: TAVILY_SEARCH_API_KEY: "" - + + initContainers: [] + skipModelWait: true + resources: requests: - memory: "512Mi" + memory: "1Gi" cpu: "500m" limits: - memory: "1Gi" + memory: "2Gi" cpu: "1" - - # Skip waiting for models since we're using external MaaS - # Override init containers to prevent waiting for local models - initContainers: [] - # Don't wait for models - they're external via MaaS - skipModelWait: true + + readinessProbe: + httpGet: + path: /v1/health + port: 8321 + initialDelaySeconds: 60 + periodSeconds: 15 + timeoutSeconds: 10 + failureThreshold: 10 + + livenessProbe: + httpGet: + path: /v1/health + port: 8321 + initialDelaySeconds: 120 + periodSeconds: 30 + timeoutSeconds: 10 + failureThreshold: 10 + + volumes: + - configMap: + defaultMode: 420 + name: run-config + name: run-config-volume + - emptyDir: {} + name: dot-llama + - emptyDir: {} + name: cache fileProcessors: enabled: true