langflow-ai · phact · Jun 2, 2026 · Jun 2, 2026 · coderabbitai · Jun 2, 2026
diff --git a/.env.example b/.env.example
@@ -119,6 +119,11 @@ OPENSEARCH_USERNAME=admin
 # Change this if you want to use a different index name or avoid conflicts
 OPENSEARCH_INDEX_NAME=documents
 
+# OpenSearch index layout for newly-created OpenRAG indices
+# Shard count cannot be changed on an existing index without reindexing.
+OPENRAG_OPENSEARCH_NUMBER_OF_SHARDS=1
+OPENRAG_OPENSEARCH_NUMBER_OF_REPLICAS=0
+
 # IBM AMS Authentication (IBM Watsonx Data embedded mode)
 # Set IBM_AUTH_ENABLED=true to authenticate via the ibm-openrag-session cookie
 # instead of Google OAuth. The raw IBM JWT is also passed directly to OpenSearch.

diff --git a/docker-compose.yml b/docker-compose.yml
@@ -95,6 +95,8 @@ services:
       - IBM_COS_HMAC_SECRET_ACCESS_KEY=${IBM_COS_HMAC_SECRET_ACCESS_KEY}
       - IBM_COS_AUTH_ENDPOINT=${IBM_COS_AUTH_ENDPOINT}
       - OPENSEARCH_INDEX_NAME=${OPENSEARCH_INDEX_NAME:-documents}
+      - OPENRAG_OPENSEARCH_NUMBER_OF_SHARDS=${OPENRAG_OPENSEARCH_NUMBER_OF_SHARDS:-1}
+      - OPENRAG_OPENSEARCH_NUMBER_OF_REPLICAS=${OPENRAG_OPENSEARCH_NUMBER_OF_REPLICAS:-0}
       - LANGFLOW_KEY=${LANGFLOW_KEY}
       - SEGMENT_WRITE_KEY=${SEGMENT_WRITE_KEY:-}
       - ENVIRONMENT=${ENVIRONMENT:-production}
@@ -195,6 +197,8 @@ services:
       - OPENSEARCH_PORT=${LANGFLOW_OPENSEARCH_PORT:-${OPENSEARCH_PORT:-9200}}
       - OPENSEARCH_URL=https://${LANGFLOW_OPENSEARCH_HOST:-${OPENSEARCH_HOST:-opensearch}}:${LANGFLOW_OPENSEARCH_PORT:-${OPENSEARCH_PORT:-9200}}
       - OPENSEARCH_INDEX_NAME=${OPENSEARCH_INDEX_NAME:-documents}
+      - OPENRAG_OPENSEARCH_NUMBER_OF_SHARDS=${OPENRAG_OPENSEARCH_NUMBER_OF_SHARDS:-1}
+      - OPENRAG_OPENSEARCH_NUMBER_OF_REPLICAS=${OPENRAG_OPENSEARCH_NUMBER_OF_REPLICAS:-0}
       - DOCLING_SERVE_URL=${DOCLING_SERVE_URL:-http://host.docker.internal:5001}
       - DOCLING_TASK_ID=None
       - FILENAME=None

diff --git a/docs/docs/reference/configuration.mdx b/docs/docs/reference/configuration.mdx
@@ -152,6 +152,8 @@ Configure OpenSearch database authentication.
 | `OPENSEARCH_HOST` | `localhost` | OpenSearch service host. |
 | `OPENSEARCH_PORT` | `9200` | OpenSearch service port. |
 | `OPENSEARCH_USERNAME` | `admin` | OpenSearch administrator username. |
+| `OPENRAG_OPENSEARCH_NUMBER_OF_SHARDS` | `1` | Primary shard count for newly-created OpenRAG indices. Existing indices must be reindexed to change shard count. |
+| `OPENRAG_OPENSEARCH_NUMBER_OF_REPLICAS` | `0` | Replica shard count for OpenRAG indices. OpenRAG reconciles existing managed indices to this value at startup. |
 | `LANGFLOW_OPENSEARCH_HOST` | Not set | By default, OpenRAG passes the `OPENSEARCH_HOST` value to Langflow. Use the `LANGFLOW_OPENSEARCH_*` variables to set a different OpenSearch endpoint for Langflow specifically. OpenRAG itself still uses the `OPENSEARCH_HOST` value. |
 | `LANGFLOW_OPENSEARCH_PORT` | Not set | By default, OpenRAG passes the `OPENSEARCH_PORT` value to Langflow. Use the `LANGFLOW_OPENSEARCH_*` variables to set a different OpenSearch endpoint for Langflow specifically. OpenRAG itself still uses the `OPENSEARCH_PORT` value. |
 

diff --git a/flows/components/opensearch_multimodal.py b/flows/components/opensearch_multimodal.py
@@ -2,6 +2,7 @@
 
 import copy
 import json
+import os
 import uuid
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from typing import Any
@@ -29,6 +30,18 @@
 REQUEST_TIMEOUT = 60
 MAX_RETRIES = 5
 
+
+def _get_min_env_int(key: str, default: int, minimum: int) -> int:
+    try:
+        value = int(os.getenv(key, default))
+    except (TypeError, ValueError):
+        value = default
+    return max(value, minimum)
+
+
+OPENSEARCH_NUMBER_OF_SHARDS = _get_min_env_int("OPENRAG_OPENSEARCH_NUMBER_OF_SHARDS", 1, 1)
+OPENSEARCH_NUMBER_OF_REPLICAS = _get_min_env_int("OPENRAG_OPENSEARCH_NUMBER_OF_REPLICAS", 0, 0)
+
 # watsonx.ai surfaces rate-limit state via these (mostly non-standard) response
 # headers. The IBM SDK acts on the x-requests-limit-* family directly; we log
 # them on a failed embedding call to aid plan/region tuning.
@@ -53,7 +66,9 @@ def _log_watsonx_rate_limit_headers(error: Exception) -> None:
         if not headers:
             return
         status = getattr(response, "status_code", "unknown")
-        observed = {h: headers.get(h) for h in _WATSONX_RATE_LIMIT_HEADERS if headers.get(h) is not None}
+        observed = {
+            h: headers.get(h) for h in _WATSONX_RATE_LIMIT_HEADERS if headers.get(h) is not None
+        }
         if str(status) == "429" or observed:
             logger.warning(f"watsonx rate-limit response (status={status}): {observed}")
     except Exception as log_error:  # never let diagnostics mask the real error
@@ -371,7 +386,7 @@ class OpenSearchVectorStoreComponentMultimodalMultiEmbedding(LCVectorStoreCompon
                 "Valid JSON Web Token for authentication. "
                 "Will be sent in the Authorization header (with optional 'Bearer ' prefix)."
             ),
-            required=False
+            required=False,
         ),
         StrInput(
             name="jwt_header",
@@ -536,10 +551,8 @@ def raw_search(self, query: str | dict | None = None) -> Data:
 
             # Apply score_threshold / scoreThreshold as min_score if not already set
             if "min_score" not in query_body:
-
                 score_threshold = self._resolve_score_threshold(filter_obj)
                 if score_threshold is not None:
-
                     query_body["min_score"] = score_threshold
 
         client = self.build_client()
@@ -664,7 +677,11 @@ def _default_text_mapping(
             Dictionary containing OpenSearch index mapping configuration
         """
         return {
-            "settings": {"index": {"knn": True, "knn.algo_param.ef_search": ef_search}},
+            "settings": {
+                "index": {"knn": True, "knn.algo_param.ef_search": ef_search},
+                "number_of_shards": OPENSEARCH_NUMBER_OF_SHARDS,
+                "number_of_replicas": OPENSEARCH_NUMBER_OF_REPLICAS,
+            },
             "mappings": {
                 "properties": {
                     vector_field: {
@@ -1446,7 +1463,6 @@ def _add_documents_to_vector_store(self, client: OpenSearch) -> None:
         logger.debug(f"Is IBM/watsonx embedding: {is_ibm}")
 
         if is_ibm:
-
             # Hand the full batch to the SDK and let it batch/throttle/retry.
             # Retry attempts and base backoff are tunable via the SDK's own
             # WATSONX_MAX_RETRIES / WATSONX_DELAY_TIME environment variables.
@@ -1722,7 +1738,6 @@ def _coerce_filter_clauses(self, filter_obj: dict | None) -> list[dict]:
                 context_clauses.append({"terms": {field: values}})
         return context_clauses
 
-
     def _parse_filter_expression(self) -> dict | None:
         """Parse and validate optional filter_expression JSON.
 
@@ -1777,8 +1792,9 @@ def _resolve_score_threshold(self, filter_obj: dict | None) -> float | None:
             return None
         return float(score_threshold)
 
-    def _detect_available_models(self, client: OpenSearch, filter_clauses: list[dict] | None = None) -> list[str]:
-
+    def _detect_available_models(
+        self, client: OpenSearch, filter_clauses: list[dict] | None = None
+    ) -> list[str]:
         """Detect which embedding models have documents in the index.
 
         Uses aggregation to find all unique embedding_model values, optionally
@@ -2401,7 +2417,6 @@ def search(self, query: str | None = None) -> list[dict[str, Any]]:
         ]
 
     def search_documents(self) -> Table:
-
         """Search documents and return results as a Table.
 
         This is the main interface method that performs the multi-model search using the

diff --git a/flows/ingestion_flow.json b/flows/ingestion_flow.json
diff --git a/flows/openrag_agent.json b/flows/openrag_agent.json
diff --git a/flows/openrag_nudges.json b/flows/openrag_nudges.json
diff --git a/flows/openrag_url_mcp.json b/flows/openrag_url_mcp.json
diff --git a/kubernetes/helm/openrag/templates/backend/backend-dotenv.yaml b/kubernetes/helm/openrag/templates/backend/backend-dotenv.yaml
@@ -83,6 +83,8 @@ stringData:
     {{- if .Values.global.opensearch.indexName }}
     OPENSEARCH_INDEX_NAME={{ .Values.global.opensearch.indexName | quote }}
     {{- end }}
+    OPENRAG_OPENSEARCH_NUMBER_OF_SHARDS={{ .Values.global.opensearch.numberOfShards | quote }}
+    OPENRAG_OPENSEARCH_NUMBER_OF_REPLICAS={{ .Values.global.opensearch.numberOfReplicas | quote }}
     LANGFLOW_OPENSEARCH_HOST={{ include "openrag.langflow.opensearch.host" . | quote }}
     LANGFLOW_OPENSEARCH_PORT={{ include "openrag.langflow.opensearch.port" . | quote }}
 

diff --git a/kubernetes/helm/openrag/templates/langflow/langflow-dotenv.yaml b/kubernetes/helm/openrag/templates/langflow/langflow-dotenv.yaml
@@ -113,6 +113,8 @@ stringData:
     OPENSEARCH_PORT={{ include "openrag.langflow.opensearch.port" . | quote }}
     OPENSEARCH_URL={{ include "openrag.langflow.opensearch.url" . | quote }}
     OPENSEARCH_INDEX_NAME={{ .Values.global.opensearch.indexName | quote }}
+    OPENRAG_OPENSEARCH_NUMBER_OF_SHARDS={{ .Values.global.opensearch.numberOfShards | quote }}
+    OPENRAG_OPENSEARCH_NUMBER_OF_REPLICAS={{ .Values.global.opensearch.numberOfReplicas | quote }}
     {{- if .Values.global.opensearch.password }}
     OPENSEARCH_PASSWORD={{ .Values.global.opensearch.password | quote }}
     {{- end }}

diff --git a/kubernetes/helm/openrag/values.yaml b/kubernetes/helm/openrag/values.yaml
@@ -28,6 +28,8 @@ global:
     username: "admin"                     # OpenSearch username
     password: ""                          # OpenSearch password (stored in secret)
     indexName: "documents"                # OpenSearch index name
+    numberOfShards: 1                     # Primary shards for newly-created OpenRAG indices
+    numberOfReplicas: 0                   # Replica shards for OpenRAG indices
 
   docling:
     host: docling-serve.docling.svc.cluster.local

diff --git a/kubernetes/operator/README.md b/kubernetes/operator/README.md
@@ -289,6 +289,8 @@ spec:
       size: 10Gi
   opensearch:
     host: opensearch-coordinating.opensearch.svc.cluster.local
+    numberOfShards: 1
+    numberOfReplicas: 0
     credentialsSecret: opensearch-credentials   # keys: username, password
   # docling:                        # optional
   #   host: docling-serve.docling.svc.cluster.local

diff --git a/kubernetes/operator/api/v1alpha1/openrag_types.go b/kubernetes/operator/api/v1alpha1/openrag_types.go
@@ -319,6 +319,18 @@ type OpenSearchSpec struct {
 	// +kubebuilder:default="documents"
 	IndexName string `json:"indexName,omitempty"`
 
+	// NumberOfShards configures primary shards for newly-created OpenRAG indices.
+	// +optional
+	// +kubebuilder:default=1
+	// +kubebuilder:validation:Minimum=1
+	NumberOfShards int32 `json:"numberOfShards,omitempty"`
+
+	// NumberOfReplicas configures replica shards for OpenRAG indices.
+	// +optional
+	// +kubebuilder:default=0
+	// +kubebuilder:validation:Minimum=0
+	NumberOfReplicas int32 `json:"numberOfReplicas,omitempty"`
+
 	// CredentialsSecret is the name of a Secret with keys "username" and "password".
 	// +optional
 	CredentialsSecret string `json:"credentialsSecret,omitempty"`

diff --git a/kubernetes/operator/config/crd/bases/openr.ag_openrags.yaml b/kubernetes/operator/config/crd/bases/openr.ag_openrags.yaml
@@ -13276,6 +13276,20 @@ spec:
                     default: documents
                     description: IndexName used for document storage.
                     type: string
+                  numberOfReplicas:
+                    default: 0
+                    description: NumberOfReplicas configures replica shards for
+                      OpenRAG indices.
+                    format: int32
+                    minimum: 0
+                    type: integer
+                  numberOfShards:
+                    default: 1
+                    description: NumberOfShards configures primary shards for newly-created
+                      OpenRAG indices.
+                    format: int32
+                    minimum: 1
+                    type: integer
                   port:
                     default: 9200
                     format: int32

diff --git a/kubernetes/operator/config/samples/kind-cluster-openrag-cr.yaml b/kubernetes/operator/config/samples/kind-cluster-openrag-cr.yaml
@@ -54,6 +54,8 @@ spec:
   #   port: 9200
   #   scheme: https
   #   indexName: documents
+  #   numberOfShards: 1
+  #   numberOfReplicas: 0
   #   credentialsSecret: opensearch-credentials   # keys: username, password
 
   # Operator-managed Docling components — optional document processing

diff --git a/kubernetes/operator/config/samples/openrag_v1alpha1_openrag.yaml b/kubernetes/operator/config/samples/openrag_v1alpha1_openrag.yaml
@@ -69,6 +69,8 @@ spec:
   #   port: 9200
   #   scheme: https
   #   indexName: documents
+  #   numberOfShards: 1
+  #   numberOfReplicas: 0
   #   credentialsSecret: opensearch-credentials   # keys: username, password
 
   # Operator-managed Docling components — optional document processing

diff --git a/kubernetes/operator/internal/controller/env.go b/kubernetes/operator/internal/controller/env.go
@@ -75,9 +75,11 @@ func NewEnvVarManager() *EnvVarManager {
 			"SELECTED_EMBEDDING_MODEL":  "",
 
 			// OpenSearch defaults (for variables in LANGFLOW_VARIABLES_TO_GET_FROM_ENVIRONMENT)
-			"OPENSEARCH_PASSWORD":   "None",
-			"OPENSEARCH_URL":        "None",
-			"OPENSEARCH_INDEX_NAME": "None",
+			"OPENSEARCH_PASSWORD":                   "None",
+			"OPENSEARCH_URL":                        "None",
+			"OPENSEARCH_INDEX_NAME":                 "None",
+			"OPENRAG_OPENSEARCH_NUMBER_OF_SHARDS":   "1",
+			"OPENRAG_OPENSEARCH_NUMBER_OF_REPLICAS": "0",
 
 			// Docling defaults (for variables in LANGFLOW_VARIABLES_TO_GET_FROM_ENVIRONMENT)
 			"DOCLING_SERVE_URL": "None",
@@ -114,7 +116,9 @@ func NewEnvVarManager() *EnvVarManager {
 			"OPENRAG_VERSION":           "latest",
 
 			// OpenSearch configuration
-			"OPENSEARCH_DATA_PATH": "",
+			"OPENSEARCH_DATA_PATH":                  "",
+			"OPENRAG_OPENSEARCH_NUMBER_OF_SHARDS":   "1",
+			"OPENRAG_OPENSEARCH_NUMBER_OF_REPLICAS": "0",
 
 			// Logging configuration
 			"LOG_LEVEL":    "DEBUG",

diff --git a/kubernetes/operator/internal/controller/openrag_controller.go b/kubernetes/operator/internal/controller/openrag_controller.go
@@ -306,6 +306,12 @@ func (r *OpenRAGReconciler) buildBackendEnv(ctx context.Context, o *openragv1alp
 		if os.IndexName != "" {
 			envVars["OPENSEARCH_INDEX_NAME"] = os.IndexName
 		}
+		if os.NumberOfShards > 0 {
+			envVars["OPENRAG_OPENSEARCH_NUMBER_OF_SHARDS"] = fmt.Sprintf("%d", os.NumberOfShards)
+		}
+		if os.NumberOfReplicas >= 0 {
+			envVars["OPENRAG_OPENSEARCH_NUMBER_OF_REPLICAS"] = fmt.Sprintf("%d", os.NumberOfReplicas)
+		}
 
 		// Read OpenSearch credentials from user-provided secret
 		if os.CredentialsSecret != "" {
@@ -480,6 +486,12 @@ func (r *OpenRAGReconciler) buildLangflowEnv(ctx context.Context, o *openragv1al
 		if os.IndexName != "" {
 			envVars["OPENSEARCH_INDEX_NAME"] = os.IndexName
 		}
+		if os.NumberOfShards > 0 {
+			envVars["OPENRAG_OPENSEARCH_NUMBER_OF_SHARDS"] = fmt.Sprintf("%d", os.NumberOfShards)
+		}
+		if os.NumberOfReplicas >= 0 {
+			envVars["OPENRAG_OPENSEARCH_NUMBER_OF_REPLICAS"] = fmt.Sprintf("%d", os.NumberOfReplicas)
+		}
 	}
 
 	// WatsonX configuration from CR spec

diff --git a/kubernetes/operator/internal/controller/openrag_controller_test.go b/kubernetes/operator/internal/controller/openrag_controller_test.go
@@ -1359,6 +1359,28 @@ func TestEnvHash_ChangesWhenEnvChanges(t *testing.T) {
 	assert.NotEqual(t, hash1, hash2, "Hash should change when env vars change")
 }
 
+func TestBuildEnv_IncludesOpenSearchIndexSettings(t *testing.T) {
+	s := newScheme(t)
+	cr := minimalCR("test-openrag", "test-ns")
+	cr.Spec.OpenSearch = &openragv1alpha1.OpenSearchSpec{
+		Host:             "opensearch.example.com",
+		NumberOfShards:   3,
+		NumberOfReplicas: 2,
+	}
+
+	r, _ := reconciler(s, cr)
+
+	backendEnvContent, err := r.buildBackendEnv(context.Background(), cr, "test-ns")
+	require.NoError(t, err)
+	langflowEnvContent, err := r.buildLangflowEnv(context.Background(), cr, "test-ns")
+	require.NoError(t, err)
+
+	for _, envContent := range []string{backendEnvContent, langflowEnvContent} {
+		assert.Equal(t, "3", parseEnvValue(envContent, "OPENRAG_OPENSEARCH_NUMBER_OF_SHARDS"))
+		assert.Equal(t, "2", parseEnvValue(envContent, "OPENRAG_OPENSEARCH_NUMBER_OF_REPLICAS"))
+	}
+}
+
 func TestDeployment_ContainsEnvHashAnnotation(t *testing.T) {
 	// Test that backend deployment has env hash annotation
 	s := newScheme(t)

diff --git a/src/config/settings.py b/src/config/settings.py
@@ -447,16 +447,24 @@ def is_no_auth_mode():
 # actual frontend origin that is carried in the OAuth state parameter.
 OAUTH_BROKER_URL = os.getenv("OAUTH_BROKER_URL")
 
+
+def _get_min_env_int(key: str, default: int, minimum: int) -> int:
+    """Read an integer env var, clamped to a minimum valid value."""
+    return max(get_env_int(key, default), minimum)
+
+
 # OpenSearch configuration
 VECTOR_DIM = 1536
 KNN_EF_CONSTRUCTION = 100
 KNN_M = 16
+OPENSEARCH_NUMBER_OF_SHARDS = _get_min_env_int("OPENRAG_OPENSEARCH_NUMBER_OF_SHARDS", 1, 1)
+OPENSEARCH_NUMBER_OF_REPLICAS = _get_min_env_int("OPENRAG_OPENSEARCH_NUMBER_OF_REPLICAS", 0, 0)
 
 INDEX_BODY = {
     "settings": {
         "index": {"knn": True},
-        "number_of_shards": 1,
-        "number_of_replicas": 0,
+        "number_of_shards": OPENSEARCH_NUMBER_OF_SHARDS,
+        "number_of_replicas": OPENSEARCH_NUMBER_OF_REPLICAS,
     },
     "mappings": {
         "properties": {
@@ -492,7 +500,10 @@ def is_no_auth_mode():
 DLS_PRINCIPAL_INDEX_NAME = "openrag_dls_principals"
 DLS_PRINCIPAL_INDEX_BODY: dict[str, Any] = {
     "settings": {
-        "index": {"number_of_replicas": 0, "number_of_shards": 1},
+        "index": {
+            "number_of_replicas": OPENSEARCH_NUMBER_OF_REPLICAS,
+            "number_of_shards": OPENSEARCH_NUMBER_OF_SHARDS,
+        },
     },
     "mappings": {
         "properties": {
@@ -511,8 +522,8 @@ def is_no_auth_mode():
 API_KEYS_INDEX_NAME = "api_keys"
 API_KEYS_INDEX_BODY = {
     "settings": {
-        "number_of_shards": 1,
-        "number_of_replicas": 0,
+        "number_of_shards": OPENSEARCH_NUMBER_OF_SHARDS,
+        "number_of_replicas": OPENSEARCH_NUMBER_OF_REPLICAS,
     },
     "mappings": {
         "properties": {