diff --git a/braintrust/README.md b/braintrust/README.md index bf3b4bb..babe3bf 100644 --- a/braintrust/README.md +++ b/braintrust/README.md @@ -158,8 +158,20 @@ This Helm chart includes comprehensive automated unit tests. ## Breaking Changes +### Version 2 + With version 2 of this helm, the Brainstore pods are split into Readers and Writers improving performance and the ability to independently scale for more read operations or write operations. For existing customers that have deployed our Helm or via other means on Kubernetes, please update your override values file or deployment to match this change. This will result in no data loss, but will be a brief downtime as the existing Brainstore Pods are removed and new Brainstore Pods for Reading and Writing are launched. +### Version 3 + +Breaking change only for Azure customers which introduced the Azure Container Storage CSI driver. + +### Version 4 + +This version of the Helm is in preparation of 2.0.0 of the Braintrust Self hosted Data Plane. Starting with 1.1.32 Brainstore will now need to reach out to the API, where before Brainstore didn't talk to the API. In Helm this is being done over the internal Kubernetes endpoint. If you have additional security restrictions or are limiting traffic between services, this will need to be allowed before upgrading to 2.0.0 of the data plane. + +We are also increasing the default sizing of our deployments, please ensure you have the node pool capacity for these increased defaults. + ## Example Values Files Example values files for different cloud providers and configurations are located in the `examples/` folder. diff --git a/braintrust/examples/google-autopilot/values.yaml b/braintrust/examples/google-autopilot/values.yaml index 6a0fe45..3865408 100644 --- a/braintrust/examples/google-autopilot/values.yaml +++ b/braintrust/examples/google-autopilot/values.yaml @@ -24,7 +24,7 @@ api: service: networking.gke.io/load-balancer-type: "Internal" replicas: 4 - # Uncomment the following section to use usee a different image or tag from the version in the Helm release + # Uncomment the following section to use a different image or tag from the version in the Helm release #image: #repository: public.ecr.aws/braintrust/standalone-api #tag: "" @@ -75,11 +75,11 @@ brainstore: cpu: "16" memory: "32Gi" limits: - cpu: "20" - memory: "40Gi" + cpu: "16" + memory: "32Gi" cacheDir: "/mnt/tmp/brainstore" objectStoreCacheMemoryLimit: "1Gi" - objectStoreCacheFileSize: "100Gi" + objectStoreCacheFileSize: "1000Gi" verbose: true volume: size: "200Gi" @@ -99,11 +99,11 @@ brainstore: cpu: "32" memory: "64Gi" limits: - cpu: "40" - memory: "80Gi" + cpu: "32" + memory: "64Gi" cacheDir: "/mnt/tmp/brainstore" objectStoreCacheMemoryLimit: "1Gi" - objectStoreCacheFileSize: "100Gi" + objectStoreCacheFileSize: "1000Gi" verbose: true volume: size: "200Gi" diff --git a/braintrust/examples/google-standard/values.yaml b/braintrust/examples/google-standard/values.yaml new file mode 100644 index 0000000..2717bda --- /dev/null +++ b/braintrust/examples/google-standard/values.yaml @@ -0,0 +1,144 @@ +# Sample values for GKE Standard deployment +# +# GKE Standard requires manual node pool configuration: +# - Create a dedicated node pool with local NVMe SSDs for Brainstore workloads +# - Recommended machine types: c4-standard-32 or higher with local SSDs +# - Configure local SSDs: Use 4x375GB local SSDs (1500GB total) or more +# - Total local SSD capacity should exceed the volume.size configured below + +# Global configs +global: + orgName: "" + namespace: "braintrust" + +# Cloud provider configuration +cloud: "google" + +# Google Cloud specific configuration for Standard +google: + mode: "standard" + +objectStorage: + google: + brainstoreBucket: "" + apiBucket: "" + +api: + name: "braintrust-api" + annotations: + service: + networking.gke.io/load-balancer-type: "Internal" + replicas: 4 + # Uncomment the following section to use a different image or tag from the version in the Helm release + #image: + #repository: public.ecr.aws/braintrust/standalone-api + #tag: "" + service: + type: LoadBalancer + port: 8000 + portName: http + serviceAccount: + name: "braintrust-api" + googleServiceAccount: "" + # this is for native GCS authentication via workload identity (defaults to false for S3-compatible access) Requires v1.1.31 or later of the dataplane to be set to true. + enableGcsAuth: false + nodeSelector: + cloud.google.com/gke-nodepool: "api" + resources: + requests: + cpu: "4" + memory: "4Gi" + limits: + cpu: "4" + memory: "8Gi" + extraEnvVars: + # For S3-compatible GCS Storage, set the AWS_REGION environment variable to the region of your GCS bucket + - name: AWS_REGION + value: "us-central1" + +# Brainstore configuration (split into reader and writer) +brainstore: + serviceAccount: + name: "brainstore" + googleServiceAccount: "" + # Uncomment the following section to use a different image or tag from the version in the Helm release + #image: + #repository: public.ecr.aws/braintrust/brainstore + #tag: "" + # New deployments should use objectStorage as the locks backend. Existing deployments should remain on redis at this time. + locksBackend: "objectStorage" + + # Brainstore Reader configuration + reader: + name: "brainstore-reader" + replicas: 2 + service: + name: "" + type: ClusterIP + port: 4000 + portName: http + nodeSelector: + cloud.google.com/gke-nodepool: "brainstore" # Target your node pool + resources: + requests: + cpu: "16" + memory: "32Gi" + limits: + cpu: "16" + memory: "32Gi" + affinity: # Prevent readers and writers from sharing nodes + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: app + operator: In + values: + - brainstore-reader + - brainstore-writer + topologyKey: kubernetes.io/hostname + cacheDir: "/mnt/tmp/brainstore" + objectStoreCacheMemoryLimit: "1Gi" + objectStoreCacheFileSize: "1000Gi" + verbose: true + volume: + size: "200Gi" + extraEnvVars: + + # Brainstore Writer configuration + writer: + name: "brainstore-writer" + replicas: 1 + service: + name: "" + type: ClusterIP + port: 4000 + portName: http + nodeSelector: + cloud.google.com/gke-nodepool: "brainstore" + resources: + requests: + cpu: "32" + memory: "64Gi" + limits: + cpu: "32" + memory: "64Gi" + affinity: # Prevent readers and writers from sharing nodes + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: app + operator: In + values: + - brainstore-reader + - brainstore-writer + topologyKey: kubernetes.io/hostname + cacheDir: "/mnt/tmp/brainstore" + objectStoreCacheMemoryLimit: "1Gi" + objectStoreCacheFileSize: "1000Gi" + verbose: true + volume: + size: "200Gi" + extraEnvVars: + \ No newline at end of file diff --git a/braintrust/templates/brainstore-reader-deployment.yaml b/braintrust/templates/brainstore-reader-deployment.yaml index 65b2a9c..7e905c9 100644 --- a/braintrust/templates/brainstore-reader-deployment.yaml +++ b/braintrust/templates/brainstore-reader-deployment.yaml @@ -107,6 +107,13 @@ spec: secretKeyRef: name: braintrust-secrets key: REDIS_URL + - name: BRAINSTORE_XACT_MANAGER_URI + valueFrom: + secretKeyRef: + name: braintrust-secrets + key: REDIS_URL + - name: BRAINSTORE_AI_PROXY_URL + value: "http://{{ .Values.api.service.name | default .Values.api.name }}:{{ .Values.api.service.port }}" {{- if eq .Values.brainstore.locksBackend "redis" }} - name: BRAINSTORE_LOCKS_URI valueFrom: diff --git a/braintrust/templates/brainstore-writer-deployment.yaml b/braintrust/templates/brainstore-writer-deployment.yaml index aadf86b..5866ea3 100644 --- a/braintrust/templates/brainstore-writer-deployment.yaml +++ b/braintrust/templates/brainstore-writer-deployment.yaml @@ -107,6 +107,13 @@ spec: secretKeyRef: name: braintrust-secrets key: REDIS_URL + - name: BRAINSTORE_XACT_MANAGER_URI + valueFrom: + secretKeyRef: + name: braintrust-secrets + key: REDIS_URL + - name: BRAINSTORE_AI_PROXY_URL + value: "http://{{ .Values.api.service.name | default .Values.api.name }}:{{ .Values.api.service.port }}" {{- if eq .Values.brainstore.locksBackend "redis" }} - name: BRAINSTORE_LOCKS_URI valueFrom: diff --git a/braintrust/tests/brainstore-reader_test.yaml b/braintrust/tests/brainstore-reader_test.yaml index 4beacd1..9bab62d 100644 --- a/braintrust/tests/brainstore-reader_test.yaml +++ b/braintrust/tests/brainstore-reader_test.yaml @@ -203,3 +203,30 @@ tests: path: spec.template.spec.containers[0].livenessProbe - isNull: path: spec.template.spec.containers[0].readinessProbe + + - it: should include BRAINSTORE_XACT_MANAGER_URI environment variable + values: + - __fixtures__/base-values.yaml + release: + namespace: "braintrust" + asserts: + - contains: + path: spec.template.spec.containers[0].env + content: + name: BRAINSTORE_XACT_MANAGER_URI + valueFrom: + secretKeyRef: + name: braintrust-secrets + key: REDIS_URL + + - it: should include BRAINSTORE_AI_PROXY_URL environment variable + values: + - __fixtures__/base-values.yaml + release: + namespace: "braintrust" + asserts: + - contains: + path: spec.template.spec.containers[0].env + content: + name: BRAINSTORE_AI_PROXY_URL + value: "http://braintrust-api:8000" diff --git a/braintrust/tests/brainstore-writer_test.yaml b/braintrust/tests/brainstore-writer_test.yaml index d6af6d7..d6cff0d 100644 --- a/braintrust/tests/brainstore-writer_test.yaml +++ b/braintrust/tests/brainstore-writer_test.yaml @@ -203,3 +203,30 @@ tests: path: spec.template.spec.containers[0].livenessProbe - isNull: path: spec.template.spec.containers[0].readinessProbe + + - it: should include BRAINSTORE_XACT_MANAGER_URI environment variable + values: + - __fixtures__/base-values.yaml + release: + namespace: "braintrust" + asserts: + - contains: + path: spec.template.spec.containers[0].env + content: + name: BRAINSTORE_XACT_MANAGER_URI + valueFrom: + secretKeyRef: + name: braintrust-secrets + key: REDIS_URL + + - it: should include BRAINSTORE_AI_PROXY_URL environment variable + values: + - __fixtures__/base-values.yaml + release: + namespace: "braintrust" + asserts: + - contains: + path: spec.template.spec.containers[0].env + content: + name: BRAINSTORE_AI_PROXY_URL + value: "http://braintrust-api:8000" diff --git a/braintrust/values.yaml b/braintrust/values.yaml index dd839d8..6dc8dd1 100644 --- a/braintrust/values.yaml +++ b/braintrust/values.yaml @@ -78,7 +78,7 @@ api: service: {} pod: {} serviceaccount: {} - replicas: 2 + replicas: 4 image: repository: public.ecr.aws/braintrust/standalone-api tag: v1.1.31 @@ -204,14 +204,14 @@ brainstore: portName: http resources: requests: - cpu: "4" - memory: "8Gi" + cpu: "16" + memory: "32Gi" limits: - cpu: "8" - memory: "16Gi" + cpu: "16" + memory: "32Gi" cacheDir: "/mnt/tmp/brainstore" objectStoreCacheMemoryLimit: "1Gi" - objectStoreCacheFileSize: "50Gi" + objectStoreCacheFileSize: "1000Gi" verbose: true # Optional: Volume configuration for cache storage # When not set, uses default emptyDir: {} (backward compatible) @@ -241,14 +241,14 @@ brainstore: portName: http resources: requests: - cpu: "8" - memory: "16Gi" + cpu: "32" + memory: "64Gi" limits: - cpu: "16" - memory: "32Gi" + cpu: "32" + memory: "64Gi" cacheDir: "/mnt/tmp/brainstore" objectStoreCacheMemoryLimit: "1Gi" - objectStoreCacheFileSize: "50Gi" + objectStoreCacheFileSize: "1000Gi" verbose: true # Optional: Volume configuration for cache storage # When not set, uses default emptyDir: {} (backward compatible)