stellar · cjonas9 · May 9, 2026 · May 4, 2026 · May 4, 2026 · May 4, 2026
diff --git a/.dockerignore b/.dockerignore
@@ -2,4 +2,4 @@ target/
 storage/
 .soroban/
 .cargo/
-.cargo-husky/
+.cargo-husky/
diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml
@@ -11,9 +11,13 @@ concurrency:
 
 jobs:
   run-system-test:
+    # Temporary branch used to iterate on the EC2-backed load-test workflow.
+    # The reusable system-test workflow is currently rejected for this PR path,
+    # so skip e2e here instead of failing every push to apply-load.
+    if: github.event_name != 'pull_request' || github.head_ref != 'apply-load'
     # set the git ref of stellar/system-test after the '@'
     # to specify which version of the workflow to call
-    uses: stellar/system-test/.github/workflows/test-workflow.yml@master
+    uses: stellar/system-test/.github/workflows/test-workflow.yml@789121c0914150a02122581f32ee62c4e42e1c84
     with:
       stellar-rpc-repo: "${{ github.repository }}"
       stellar-rpc-ref: "${{ github.ref }}"

diff --git a/.github/workflows/load-test.yml b/.github/workflows/load-test.yml
@@ -0,0 +1,213 @@
+name: Load test (ephemeral)
+# Launches a c5.2xlarge in Horizon (203618453975), polls it via SSM, posts
+# results to the PR, terminates. Box bootstrap lives in run-load-test.sh;
+# runner-side polling in runner/orchestrate.go.
+
+on:
+  push:
+    branches: [apply-load]
+
+permissions:
+  id-token: write          # for OIDC AssumeRole into the GHA role
+  contents: read
+  pull-requests: write
+
+jobs:
+  load-test:
+    name: Launch + await ephemeral load-test box
+    runs-on: ubuntu-latest
+    timeout-minutes: 225   # 210min results wait + buffer for boot/SSM/poll latency and cleanup (role lasts 240min)
+    env:
+      AWS_REGION: us-east-1
+      INSTANCE_TYPE: c5.2xlarge
+      ROOT_VOLUME_GB: 500
+      BOOTSTRAP_VOLUME_IOPS: 3000
+      # 3000 IOPS is the gp3 floor; 125 MiB/s alone would need only 500.
+      BOOTSTRAP_VOLUME_THROUGHPUT: 125
+      INSTANCE_PROFILE: stellar-rpc-ci-load-test
+      TEST_TAG_KEY: test
+      TEST_TAG_VAL: stellar-rpc-ci-load-test
+      SSM_REGISTRATION_TIMEOUT: 240   # SSM agent registers ~30-90s after boot
+      RESULTS_TIMEOUT: 12600          # 210 min wait for /tmp/done: ~55m bootstrap+build + ~90m benchmark, under the 170m go-test budget.
+      POLL_INTERVAL: 30
+      DEBUG_LOG_LINES: 40
+      DEBUG_LOG_EVERY_POLLS: 5
+      LOAD_TEST_DIR: cmd/stellar-rpc/internal/integrationtest/infrastructure/load-test
+
+    steps:
+      - name: Resolve target context
+        id: target
+        env:
+          GH_TOKEN: ${{ github.token }}
+        run: |
+          PR_NUMBER=$(gh pr list \
+            --repo "${{ github.repository }}" \
+            --state open \
+            --base main \
+            --head "${{ github.ref_name }}" \
+            --json number \
+            --jq '.[0].number // ""' 2>/dev/null || true)
+
+          RUN_LABEL="${PR_NUMBER:+pr$PR_NUMBER}"
+          {
+            echo "pr_number=$PR_NUMBER"
+            echo "pr_tag_value=${PR_NUMBER:-none}"
+            echo "run_label=${RUN_LABEL:-${{ github.ref_name }}}"
+          } >> "$GITHUB_OUTPUT"
+
+      - name: Checkout target ref
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ github.sha }}
+
+      # The runner-side half is `go run ... runner orchestrate`.
+      - uses: ./.github/actions/setup-go
+
+      - name: Configure AWS via OIDC
+        uses: aws-actions/configure-aws-credentials@v4
+        with:
+          role-to-assume: ${{ secrets.AWS_GHA_ROLE_ARN }}
+          aws-region: ${{ env.AWS_REGION }}
+          role-duration-seconds: 14400
+
+      - name: Resolve latest Ubuntu 22.04 AMI
+        id: ami
+        run: |
+          AMI=$(aws ec2 describe-images \
+            --owners 099720109477 \
+            --filters \
+              "Name=name,Values=ubuntu/images/hvm-ssd*/ubuntu-jammy-22.04-amd64-server-*" \
+              "Name=architecture,Values=x86_64" \
+              "Name=state,Values=available" \
+            --query 'sort_by(Images, &CreationDate)[-1].ImageId' \
+            --output text)
+          echo "ami=$AMI" >> "$GITHUB_OUTPUT"
+
+      - name: Render user-data
+        # The script ships verbatim; parameters travel in a two-line preamble
+        # so the bytes that run on the box match the bytes in git.
+        run: |
+          {
+            echo '#!/usr/bin/env bash'
+            echo 'export TARGET_SHA=${{ github.sha }} RUN_ID=${{ github.run_id }}'
+            cat "$LOAD_TEST_DIR/run-load-test.sh"
+          } > /tmp/user-data.sh
+
+      - name: Launch EC2 instance
+        id: launch
+        run: |
+          COMMON_TAGS="{Key=$TEST_TAG_KEY,Value=$TEST_TAG_VAL},
+            {Key=pr,Value=${{ steps.target.outputs.pr_tag_value }}},
+            {Key=ref,Value=${{ github.ref_name }}},
+            {Key=sha,Value=${{ github.sha }}},
+            {Key=run-id,Value=${{ github.run_id }}}"
+          RUN_INSTANCES_JSON=$(aws ec2 run-instances \
+            --image-id "${{ steps.ami.outputs.ami }}" \
+            --instance-type "$INSTANCE_TYPE" \
+            --iam-instance-profile "Name=$INSTANCE_PROFILE" \
+            --user-data file:///tmp/user-data.sh \
+            --block-device-mappings "[{
+              \"DeviceName\":\"/dev/sda1\",
+              \"Ebs\":{\"VolumeSize\":$ROOT_VOLUME_GB,\"VolumeType\":\"gp3\",\"Iops\":$BOOTSTRAP_VOLUME_IOPS,\"Throughput\":$BOOTSTRAP_VOLUME_THROUGHPUT,\"DeleteOnTermination\":true}
+            }]" \
+            --tag-specifications \
+              "ResourceType=instance,Tags=[
+                {Key=Name,Value=load-test-${{ steps.target.outputs.run_label }}},
+                $COMMON_TAGS
+              ]" \
+              "ResourceType=volume,Tags=[
+                {Key=Name,Value=load-test-${{ steps.target.outputs.run_label }}-root},
+                $COMMON_TAGS
+              ]" \
+            --count 1 \
+            --output json)
+
+          INSTANCE_ID=$(printf '%s' "$RUN_INSTANCES_JSON" | jq -r '.Instances[0].InstanceId')
+          echo "instance_id=$INSTANCE_ID" >> "$GITHUB_OUTPUT"
+
+      - name: Acknowledge launch in PR
+        if: steps.target.outputs.pr_number != ''
+        env:
+          GH_TOKEN: ${{ github.token }}
+        run: |
+          if ! gh pr comment ${{ steps.target.outputs.pr_number }} \
+            --repo ${{ github.repository }} \
+            --body "⏳ Load test launching on \`${{ steps.launch.outputs.instance_id }}\` (commit \`${{ github.sha }}\`).
+          Workflow run: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+          Posting results when the run finishes."; then
+            echo "::warning::Failed to post launch comment to PR #${{ steps.target.outputs.pr_number }}"
+          fi
+
+      - name: Wait for SSM agent to register
+        env:
+          INSTANCE_ID: ${{ steps.launch.outputs.instance_id }}
+        run: |
+          DEADLINE=$(( $(date +%s) + SSM_REGISTRATION_TIMEOUT ))
+          while [ $(date +%s) -lt $DEADLINE ]; do
+            PING=$(aws ssm describe-instance-information \
+                     --filters "Key=InstanceIds,Values=$INSTANCE_ID" \
+                     --query 'InstanceInformationList[0].PingStatus' \
+                     --output text 2>/dev/null || echo "")
+            echo "[$(date -u +%FT%TZ)] ssm ping=$PING"
+            if [ "$PING" = "Online" ]; then
+              exit 0
+            fi
+            sleep 10
+          done
+          echo "::error::SSM agent never registered for $INSTANCE_ID — verify AmazonSSMManagedInstanceCore is attached to the stellar-rpc-ci-load-test role"
+          exit 1
+
+      - name: Poll for results
+        id: results
+        env:
+          INSTANCE_ID: ${{ steps.launch.outputs.instance_id }}
+        run: go run "./$LOAD_TEST_DIR/runner" orchestrate
+
+      - name: Write results summary
+        if: always()
+        run: |
+          if [ -f /tmp/results.md ]; then
+            cat /tmp/results.md >> "$GITHUB_STEP_SUMMARY"
+          elif [ -f /tmp/timeout-comment.md ]; then
+            cat /tmp/timeout-comment.md >> "$GITHUB_STEP_SUMMARY"
+          fi
+
+      - name: Post results to PR
+        if: steps.target.outputs.pr_number != ''
+        env:
+          GH_TOKEN: ${{ github.token }}
+        run: |
+          if [ "${{ steps.results.outputs.found }}" = "true" ]; then
+            BODY=/tmp/results.md
+          else
+            BODY=/tmp/timeout-comment.md
+          fi
+          if [ ! -s "$BODY" ]; then
+            echo "::warning::No body to post to PR #${{ steps.target.outputs.pr_number }} ($BODY missing or empty)"
+            exit 0
+          fi
+          if ! gh pr comment ${{ steps.target.outputs.pr_number }} \
+            --repo ${{ github.repository }} \
+            --body-file "$BODY"; then
+            echo "::warning::Failed to post comment to PR #${{ steps.target.outputs.pr_number }}"
+          fi
+
+      - name: Fail workflow on timeout or load-test failure
+        if: always()
+        run: |
+          if [ "${{ steps.results.outputs.found }}" != "true" ]; then
+            echo "Load test timed out before producing instance results"
+            exit 1
+          fi
+
+          if [ "${{ steps.results.outputs.passed }}" != "true" ]; then
+            echo "Instance reported a failing verdict"
+            cat /tmp/results.md 2>/dev/null || true
+            exit 1
+          fi
+
+      - name: Terminate instance
+        if: always() && steps.launch.outputs.instance_id != ''
+        run: |
+          aws ec2 terminate-instances \
+            --instance-ids ${{ steps.launch.outputs.instance_id }} || true
diff --git a/.gitignore b/.gitignore
@@ -3,3 +3,11 @@ captive-core/
 .soroban/
 !test.toml
 *.sqlite*
+
+# Generated load-test ledger corpora (hundreds of MB; canonical copies live in
+# s3://stellar-rpc-ci-load-test/ledgers/).
+cmd/stellar-rpc/internal/integrationtest/infrastructure/testdata/*.xdr.zstd
+cmd/stellar-rpc/internal/integrationtest/infrastructure/load-test/testdata/*.xdr.zstd
+
+# Compiled refresh tool (build artifact; rebuild with `go build` in refresh/).
+cmd/stellar-rpc/internal/integrationtest/infrastructure/load-test/refresh/refresh-tool
diff --git a/README.md b/README.md
@@ -26,7 +26,7 @@ Integration tests:
 
 ```bash
 STELLAR_RPC_INTEGRATION_TESTS_ENABLED=true \
-STELLAR_RPC_INTEGRATION_TESTS_CORE_MAX_SUPPORTED_PROTOCOL=23 \
+STELLAR_RPC_INTEGRATION_TESTS_CORE_MAX_SUPPORTED_PROTOCOL=25 \
 STELLAR_RPC_INTEGRATION_TESTS_CAPTIVE_CORE_BIN=$(which stellar-core) \
     go test -v -failfast ./cmd/stellar-rpc/internal/integrationtest/...
 ```

diff --git a/cmd/stellar-rpc/internal/config/main.go b/cmd/stellar-rpc/internal/config/main.go
@@ -68,28 +68,57 @@ type Config struct {
 	RequestBacklogSimulateTransactionQueueLimit    uint
 	RequestBacklogGetFeeStatsTransactionQueueLimit uint
 	RequestExecutionWarningThreshold               time.Duration
-	MaxRequestExecutionDuration                    time.Duration
-	MaxGetHealthExecutionDuration                  time.Duration
-	MaxGetEventsExecutionDuration                  time.Duration
-	MaxGetNetworkExecutionDuration                 time.Duration
-	MaxGetVersionInfoExecutionDuration             time.Duration
-	MaxGetLatestLedgerExecutionDuration            time.Duration
-	MaxGetLedgerEntriesExecutionDuration           time.Duration
-	MaxGetTransactionExecutionDuration             time.Duration
-	MaxGetTransactionsExecutionDuration            time.Duration
-	MaxGetLedgersExecutionDuration                 time.Duration
-	MaxSendTransactionExecutionDuration            time.Duration
-	MaxSimulateTransactionExecutionDuration        time.Duration
-	MaxGetFeeStatsExecutionDuration                time.Duration
-	ServeLedgersFromDatastore                      bool
-	BufferedStorageBackendConfig                   ledgerbackend.BufferedStorageBackendConfig
-	DataStoreConfig                                datastore.DataStoreConfig
+
+	MaxRequestExecutionDuration             time.Duration
+	MaxGetHealthExecutionDuration           time.Duration
+	MaxGetEventsExecutionDuration           time.Duration
+	MaxGetNetworkExecutionDuration          time.Duration
+	MaxGetVersionInfoExecutionDuration      time.Duration
+	MaxGetLatestLedgerExecutionDuration     time.Duration
+	MaxGetLedgerEntriesExecutionDuration    time.Duration
+	MaxGetTransactionExecutionDuration      time.Duration
+	MaxGetTransactionsExecutionDuration     time.Duration
+	MaxGetLedgersExecutionDuration          time.Duration
+	MaxSendTransactionExecutionDuration     time.Duration
+	MaxSimulateTransactionExecutionDuration time.Duration
+	MaxGetFeeStatsExecutionDuration         time.Duration
+
+	ServeLedgersFromDatastore    bool
+	BufferedStorageBackendConfig ledgerbackend.BufferedStorageBackendConfig
+	DataStoreConfig              datastore.DataStoreConfig
+
+	LoadTest LoadTestConfig
 
 	// We memoize these, so they bind to pflags correctly
 	optionsCache *Options
 	flagset      *pflag.FlagSet
 }
 
+// LoadTestConfig groups the options for ingesting from pre-generated synthetic
+// ledger bundles. If no files are given, normal captive-core ingestion runs.
+type LoadTestConfig struct {
+	// Files are .xdr.zstd bundles of LedgerCloseMeta records produced by
+	// stellar-core's apply-load, replayed in order.
+	Files []string `toml:"files"`
+	// Frequency paces ingestion, replaying one synthetic ledger per duration.
+	// Zero means "use DefaultLoadTestFrequency".
+	Frequency time.Duration `toml:"frequency"`
+	// MaxLedgersPerFile optionally caps how many ledgers are replayed from each
+	// file in Files. Zero replays every ledger in every file.
+	MaxLedgersPerFile uint32 `toml:"max_ledgers_per_file"`
+}
+
+// Enabled reports whether the daemon should ingest from synthetic ledger
+// bundles instead of captive core.
+func (cfg LoadTestConfig) Enabled() bool {
+	return len(cfg.Files) > 0
+}
+
+// DefaultLoadTestFrequency is the pacing used when LoadTestConfig.Frequency
+// is unset. Applied at the daemon's use-site rather than at config-load time
+// so it survives the TOML-only configuration path.
+const DefaultLoadTestFrequency = 2 * time.Second
+
 func (cfg *Config) ExtendedUserAgent(extension string) string {
 	if cfg.HistoryArchiveUserAgent == "" {
 		return extension

diff --git a/cmd/stellar-rpc/internal/config/options.go b/cmd/stellar-rpc/internal/config/options.go
@@ -617,11 +617,7 @@ func (cfg *Config) options() Options {
 				return unmarshalTOMLTree(i, option.ConfigKey, "buffered_storage_backend_config")
 			},
 			MarshalTOML: func(_ *Option) (any, error) {
-				tomlBytes, err := toml.Marshal(defaultBufferedStorageBackendConfig())
-				if err != nil {
-					return nil, fmt.Errorf("failed to marshal buffered_storage_backend_config: %w", err)
-				}
-				return toml.LoadBytes(tomlBytes)
+				return marshalTOMLTree(defaultBufferedStorageBackendConfig(), "buffered_storage_backend_config")
 			},
 		},
 		{
@@ -632,11 +628,20 @@ func (cfg *Config) options() Options {
 				return unmarshalTOMLTree(i, option.ConfigKey, "datastore_config")
 			},
 			MarshalTOML: func(_ *Option) (any, error) {
-				tomlBytes, err := toml.Marshal(defaultDataStoreConfig())
-				if err != nil {
-					return nil, fmt.Errorf("failed to marshal datastore_config: %w", err)
-				}
-				return toml.LoadBytes(tomlBytes)
+				return marshalTOMLTree(defaultDataStoreConfig(), "datastore_config")
+			},
+		},
+		{
+			TomlKey:   "load_test_config",
+			ConfigKey: &cfg.LoadTest,
+			Usage: "Load testing configuration: replay pre-generated .xdr.zstd ledger bundles " +
+				"through ingestion. Subkeys: files (list of bundle paths), frequency (duration; " +
+				"defaults to 2s), max_ledgers_per_file (0 = all). WARNING: destructive to your database.",
+			CustomSetValue: func(option *Option, i any) error {
+				return unmarshalTOMLTree(i, option.ConfigKey, "load_test_config")
+			},
+			MarshalTOML: func(_ *Option) (any, error) {
+				return marshalTOMLTree(defaultLoadTestConfig(), "load_test_config")
 			},
 		},
 	}
@@ -664,6 +669,22 @@ func defaultDataStoreConfig() datastore.DataStoreConfig {
 	}
 }
 
+func defaultLoadTestConfig() LoadTestConfig {
+	return LoadTestConfig{
+		Frequency: DefaultLoadTestFrequency,
+	}
+}
+
+// marshalTOMLTree renders a sub-config struct as the TOML tree the option's
+// MarshalTOML hook must return.
+func marshalTOMLTree(v any, configName string) (any, error) {
+	tomlBytes, err := toml.Marshal(v)
+	if err != nil {
+		return nil, fmt.Errorf("failed to marshal %s: %w", configName, err)
+	}
+	return toml.LoadBytes(tomlBytes)
+}
+
 func unmarshalTOMLTree(tree any, out any, configName string) error {
 	t, ok := tree.(*toml.Tree)
 	if !ok {