diff --git a/deploy/helm/rag-values.yaml.example b/deploy/helm/rag-values.yaml.example index 0c0b3d9..6484539 100644 --- a/deploy/helm/rag-values.yaml.example +++ b/deploy/helm/rag-values.yaml.example @@ -169,6 +169,17 @@ global: # operator: Exists # effect: NoSchedule + # Example Xeon configurations: + # llama-3-2-3b-instruct: + # id: meta-llama/Llama-3.2-3B-Instruct + # enabled: true + # device: "xeon" + # args: + # - --max-model-len + # - "14336" + # - --max-num-seqs + # - "32" + # MCP servers configuration mcp-servers: {} diff --git a/deploy/helm/rag/Chart.yaml b/deploy/helm/rag/Chart.yaml index a441cf7..c357774 100644 --- a/deploy/helm/rag/Chart.yaml +++ b/deploy/helm/rag/Chart.yaml @@ -7,26 +7,26 @@ appVersion: "0.2.45" dependencies: - name: pgvector - version: 0.5.5 + version: 0.5.6 repository: https://rh-ai-quickstart.github.io/ai-architecture-charts condition: pgvector.enabled - name: llm-service - version: 0.5.9 + version: 0.5.10 repository: https://rh-ai-quickstart.github.io/ai-architecture-charts condition: llm-service.enabled - name: configure-pipeline - version: 0.5.8 + version: 0.5.9 repository: https://rh-ai-quickstart.github.io/ai-architecture-charts condition: configure-pipeline.enabled - name: ingestion-pipeline - version: 0.7.4 + version: 0.7.5 repository: https://rh-ai-quickstart.github.io/ai-architecture-charts condition: ingestion-pipeline.enabled - name: llama-stack - version: 0.8.6 + version: 0.8.7 repository: https://rh-ai-quickstart.github.io/ai-architecture-charts condition: llama-stack.enabled - name: mcp-servers - version: 0.5.15 + version: 0.5.18 repository: https://rh-ai-quickstart.github.io/ai-architecture-charts condition: mcp-servers.enabled