From b54daef64f1e33bed957bdb4f19926a114610677 Mon Sep 17 00:00:00 2001 From: Calvin Smith Date: Mon, 15 Jun 2026 11:23:52 -0600 Subject: [PATCH 1/4] docs(modal): add MODAL_ALWAYS_ON scale-to-zero option Update the Modal backend page with configurable always-on vs scale-to-zero mode. Default behavior unchanged (always warm). Set MODAL_ALWAYS_ON=0 at deploy time to scale to zero when idle (~$25/month vs ~$102/month, with ~10-30s cold starts). - Updated deploy.py code block with MODAL_ALWAYS_ON env var - Rewrote Cost section with comparison table for both modes - Added scale-to-zero deploy command in step 3 Co-authored-by: openhands --- .../agent-canvas/backend-setup/modal.mdx | 57 +++++++++++++++---- 1 file changed, 45 insertions(+), 12 deletions(-) diff --git a/openhands/usage/agent-canvas/backend-setup/modal.mdx b/openhands/usage/agent-canvas/backend-setup/modal.mdx index 87c8f5d3..9b2a367a 100644 --- a/openhands/usage/agent-canvas/backend-setup/modal.mdx +++ b/openhands/usage/agent-canvas/backend-setup/modal.mdx @@ -76,6 +76,7 @@ Usage: modal run deploy.py """ +import os import subprocess import modal @@ -93,6 +94,13 @@ SCALEDOWN_WINDOW = 600 # seconds before an idle container is eligible for shutd CONTAINER_CPU = 2.0 CONTAINER_MEMORY_MB = 4096 # 4 GB +# Always-on mode (default): keeps one container warm at all times for zero +# cold-start latency. Costs ~$102/month (2 vCPU / 4 GB, 24/7). +# Set MODAL_ALWAYS_ON=0 to scale to zero when idle. You only pay while +# actively coding, but the first request after idle has a ~10-30s cold start. +ALWAYS_ON = os.environ.get("MODAL_ALWAYS_ON", "1").lower() in ("1", "true") +MIN_CONTAINERS = 1 if ALWAYS_ON else 0 + # --- Modal App --- app = modal.App("openhands-agent-server") @@ -138,10 +146,10 @@ agent_server_image = ( memory=CONTAINER_MEMORY_MB, scaledown_window=SCALEDOWN_WINDOW, timeout=3600, - # Pin to exactly 1 container, always warm. The agent-server is stateful - # (SQLite DB, tmux sessions, in-memory conversation state). Multiple - # containers would diverge. min_containers=1 eliminates cold starts. - min_containers=1, + # The agent-server is stateful (SQLite DB, tmux sessions, in-memory + # conversation state) — multiple containers would diverge. + # min_containers is controlled by MODAL_ALWAYS_ON (default: 1, always warm). + min_containers=MIN_CONTAINERS, max_containers=1, ) @modal.concurrent(max_inputs=10) @@ -160,13 +168,19 @@ class AgentServer: @app.local_entrypoint() def main(): + mode = "always-on" if ALWAYS_ON else "scale-to-zero" print("OpenHands Agent Server — Modal deployment") print(f" Image: ghcr.io/openhands/agent-server:{AGENT_SERVER_IMAGE_TAG}") print(f" Volume: openhands-data → {VOLUME_MOUNT}") + print(f" Mode: {mode} (min_containers={MIN_CONTAINERS})") print(f" Scaledown: {SCALEDOWN_WINDOW}s") print() print("To deploy:") print(" modal deploy deploy.py") + if ALWAYS_ON: + print() + print(" # Or, to scale to zero when idle (saves cost, adds cold starts):") + print(" MODAL_ALWAYS_ON=0 modal deploy deploy.py") print() print("After deploying, add the backend in Agent Canvas:") print(" 1. Open Agent Canvas") @@ -189,7 +203,13 @@ Modal builds the container image on first deploy (takes a few minutes), then pri https://openhands-agent-server--agentserver-serve.modal.run ``` -The agent server runs on 2 vCPU / 4 GB RAM with a persistent volume for conversations and settings. The container is always warm (`min_containers=1`) so there's no cold-start latency. +The agent server runs on 2 vCPU / 4 GB RAM with a persistent volume for conversations and settings. By default, the container is always warm (`min_containers=1`) so there's no cold-start latency. To scale to zero when idle instead (lower cost, but ~10-30s cold start on first request): + +```bash +MODAL_ALWAYS_ON=0 modal deploy deploy.py +``` + +See [Cost](#cost) for a comparison of the two modes. ## 4. Connect Agent Canvas @@ -218,15 +238,28 @@ Settings are stored server-side on the Modal volume (encrypted with `OH_SECRET_K ## Cost -The deployment keeps one container running at all times (`min_containers=1`) to eliminate cold-start latency. Modal charges per-second: +Modal charges per-second for CPU and memory. The `MODAL_ALWAYS_ON` setting controls whether the container stays warm between requests: + +| | Always-on (default) | Scale-to-zero (`MODAL_ALWAYS_ON=0`) | +|---|---|---| +| **Cold starts** | None | ~10-30s after idle period | +| **Idle behavior** | Container stays warm 24/7 | Scales down after 10 min idle | +| **Best for** | Daily driver, fast iteration | Occasional use, cost-sensitive | +| **Monthly cost** | ~$102 (24/7) | Pay only for active hours | + +Hourly rate breakdown (2 vCPU / 4 GB): + +| Resource | Rate | +|----------|------| +| 2 vCPU (1 physical core) | ~$0.096/hr | +| 4 GB RAM | ~$0.046/hr | +| **Total** | **~$0.14/hr** | + +**Always-on** costs ~$3.40/day (~$102/month). Modal's $30/month free credit covers about 9 days. -| Resource | Rate | Daily Cost | Monthly Cost | -|----------|------|------------|--------------| -| 2 vCPU (1 physical core) | ~$0.096/hr | ~$2.30 | ~$69 | -| 4 GB RAM | ~$0.046/hr | ~$1.10 | ~$33 | -| **Total** | **~$0.14/hr** | **~$3.40** | **~$102** | +**Scale-to-zero** costs only for the hours the container is running. At 8 hours/day on workdays, that's roughly ~$1.12/day (~$25/month). The first request after an idle period takes ~10-30s while the container cold-starts; after that, the `scaledown_window` (10 min) keeps it warm between interactions. -The $30/month free credit on Modal's starter tier covers about 9 days of continuous usage. To reduce costs, stop the deployment when not in use (`modal app stop openhands-agent-server`). Your data on the Modal volume persists. +To stop the deployment entirely and avoid all charges: `modal app stop openhands-agent-server`. Your data on the Modal volume persists. ## Limitations From f6121a58188d0ad571038d8235087ab30cf32185 Mon Sep 17 00:00:00 2001 From: Calvin Smith Date: Mon, 15 Jun 2026 11:27:45 -0600 Subject: [PATCH 2/4] docs(modal): add upgrading, troubleshooting, and tuning tips - Update 'When to Use It' to mention scale-to-zero option - Add scaledown_window tuning tip for scale-to-zero users - Add Upgrading section (how to bump the image tag) - Add Troubleshooting section (logs, app list, redeploy) Co-authored-by: openhands --- .../agent-canvas/backend-setup/modal.mdx | 40 ++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/openhands/usage/agent-canvas/backend-setup/modal.mdx b/openhands/usage/agent-canvas/backend-setup/modal.mdx index 9b2a367a..2c4254ad 100644 --- a/openhands/usage/agent-canvas/backend-setup/modal.mdx +++ b/openhands/usage/agent-canvas/backend-setup/modal.mdx @@ -15,7 +15,7 @@ A Modal backend is a good fit when you want to: - Offload agent execution to the cloud without managing your own VM or Docker host - Take advantage of Modal's per-second billing and free-tier credits -- Get a persistent, always-warm backend with minimal setup +- Get a persistent, always-warm backend with minimal setup — or scale to zero when idle to reduce costs ## Prerequisites @@ -261,6 +261,10 @@ Hourly rate breakdown (2 vCPU / 4 GB): To stop the deployment entirely and avoid all charges: `modal app stop openhands-agent-server`. Your data on the Modal volume persists. + + If you're using scale-to-zero and find the container scaling down too quickly between interactions, increase `SCALEDOWN_WINDOW` in `deploy.py`. The default is 600 seconds (10 minutes); setting it to 1800 (30 minutes) keeps the container warm during longer breaks without paying for overnight idle time. + + ## Limitations - **No Docker-in-Docker.** Modal containers don't support nested Docker. The agent executes code directly on the container filesystem (same model as running `npx @openhands/agent-canvas` locally). Tools that require Docker won't work. @@ -288,6 +292,40 @@ echo "New API Key: $API_KEY" Then update the API key in Agent Canvas — click the backend switcher → **Manage Backends** → edit the Modal backend → paste the new key. +## Upgrading + +To update to a newer agent-server version, change `AGENT_SERVER_IMAGE_TAG` in `deploy.py` to the desired tag (e.g. `1.25.0-python`) and redeploy: + +```bash +modal deploy deploy.py +``` + +Modal rebuilds the container image with the new version. Your data on the Modal volume (conversations, settings, LLM credentials) is preserved. + +Available tags are listed at [`ghcr.io/openhands/agent-server`](https://github.com/OpenHands/OpenHands/pkgs/container/agent-server). Use the `-python` variant. + +## Troubleshooting + +Check the server logs: + +```bash +modal app logs openhands-agent-server +``` + +List running apps to confirm the deployment is active: + +```bash +modal app list +``` + +If the container is crashing or unresponsive, redeploy to force a fresh start: + +```bash +modal deploy deploy.py +``` + +Your data on the Modal volume persists across redeploys. + ## Tearing Down To stop the deployment and stop incurring costs: From 2d53b01a01a9f3ed9527aaba85ecc995b61ff3f9 Mon Sep 17 00:00:00 2001 From: Calvin Smith Date: Mon, 15 Jun 2026 11:40:04 -0600 Subject: [PATCH 3/4] docs(modal): escape dollar signs to fix MDX math rendering Bare $ in prose and table cells was being parsed as inline math delimiters, causing text like '/day' to render as italicized math. Co-authored-by: openhands --- .../usage/agent-canvas/backend-setup/modal.mdx | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/openhands/usage/agent-canvas/backend-setup/modal.mdx b/openhands/usage/agent-canvas/backend-setup/modal.mdx index 2c4254ad..0026a217 100644 --- a/openhands/usage/agent-canvas/backend-setup/modal.mdx +++ b/openhands/usage/agent-canvas/backend-setup/modal.mdx @@ -19,7 +19,7 @@ A Modal backend is a good fit when you want to: ## Prerequisites -- A [Modal account](https://modal.com/signup) (free tier includes $30/month credit) +- A [Modal account](https://modal.com/signup) (free tier includes \$30/month credit) - Python 3.12+ - Agent Canvas running locally — see [Setup](/openhands/usage/agent-canvas/setup) - An LLM API key (OpenAI, Anthropic, etc.) @@ -245,19 +245,19 @@ Modal charges per-second for CPU and memory. The `MODAL_ALWAYS_ON` setting contr | **Cold starts** | None | ~10-30s after idle period | | **Idle behavior** | Container stays warm 24/7 | Scales down after 10 min idle | | **Best for** | Daily driver, fast iteration | Occasional use, cost-sensitive | -| **Monthly cost** | ~$102 (24/7) | Pay only for active hours | +| **Monthly cost** | ~\$102 (24/7) | Pay only for active hours | Hourly rate breakdown (2 vCPU / 4 GB): | Resource | Rate | |----------|------| -| 2 vCPU (1 physical core) | ~$0.096/hr | -| 4 GB RAM | ~$0.046/hr | -| **Total** | **~$0.14/hr** | +| 2 vCPU (1 physical core) | ~\$0.096/hr | +| 4 GB RAM | ~\$0.046/hr | +| **Total** | **~\$0.14/hr** | -**Always-on** costs ~$3.40/day (~$102/month). Modal's $30/month free credit covers about 9 days. +**Always-on** costs ~\$3.40/day (~\$102/month). Modal's \$30/month free credit covers about 9 days. -**Scale-to-zero** costs only for the hours the container is running. At 8 hours/day on workdays, that's roughly ~$1.12/day (~$25/month). The first request after an idle period takes ~10-30s while the container cold-starts; after that, the `scaledown_window` (10 min) keeps it warm between interactions. +**Scale-to-zero** costs only for the hours the container is running. At 8 hours/day on workdays, that's roughly ~\$1.12/day (~\$25/month). The first request after an idle period takes ~10-30s while the container cold-starts; after that, the `scaledown_window` (10 min) keeps it warm between interactions. To stop the deployment entirely and avoid all charges: `modal app stop openhands-agent-server`. Your data on the Modal volume persists. From ad48a266abcdba5baf4e4ca4e8ba992d57750aad Mon Sep 17 00:00:00 2001 From: Calvin Smith Date: Mon, 15 Jun 2026 11:42:22 -0600 Subject: [PATCH 4/4] docs(modal): escape tildes to fix MDX strikethrough rendering Bare ~ characters were being paired as strikethrough markers. Co-authored-by: openhands --- .../usage/agent-canvas/backend-setup/modal.mdx | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/openhands/usage/agent-canvas/backend-setup/modal.mdx b/openhands/usage/agent-canvas/backend-setup/modal.mdx index 0026a217..9236f1e2 100644 --- a/openhands/usage/agent-canvas/backend-setup/modal.mdx +++ b/openhands/usage/agent-canvas/backend-setup/modal.mdx @@ -31,7 +31,7 @@ pip install modal modal setup ``` -`modal setup` opens a browser to authenticate. Your credentials are saved to `~/.modal.toml`. +`modal setup` opens a browser to authenticate. Your credentials are saved to `\~/.modal.toml`. ## 2. Create a Modal Secret @@ -203,7 +203,7 @@ Modal builds the container image on first deploy (takes a few minutes), then pri https://openhands-agent-server--agentserver-serve.modal.run ``` -The agent server runs on 2 vCPU / 4 GB RAM with a persistent volume for conversations and settings. By default, the container is always warm (`min_containers=1`) so there's no cold-start latency. To scale to zero when idle instead (lower cost, but ~10-30s cold start on first request): +The agent server runs on 2 vCPU / 4 GB RAM with a persistent volume for conversations and settings. By default, the container is always warm (`min_containers=1`) so there's no cold-start latency. To scale to zero when idle instead (lower cost, but \~10-30s cold start on first request): ```bash MODAL_ALWAYS_ON=0 modal deploy deploy.py @@ -242,22 +242,22 @@ Modal charges per-second for CPU and memory. The `MODAL_ALWAYS_ON` setting contr | | Always-on (default) | Scale-to-zero (`MODAL_ALWAYS_ON=0`) | |---|---|---| -| **Cold starts** | None | ~10-30s after idle period | +| **Cold starts** | None | \~10-30s after idle period | | **Idle behavior** | Container stays warm 24/7 | Scales down after 10 min idle | | **Best for** | Daily driver, fast iteration | Occasional use, cost-sensitive | -| **Monthly cost** | ~\$102 (24/7) | Pay only for active hours | +| **Monthly cost** | \~\$102 (24/7) | Pay only for active hours | Hourly rate breakdown (2 vCPU / 4 GB): | Resource | Rate | |----------|------| -| 2 vCPU (1 physical core) | ~\$0.096/hr | -| 4 GB RAM | ~\$0.046/hr | -| **Total** | **~\$0.14/hr** | +| 2 vCPU (1 physical core) | \~\$0.096/hr | +| 4 GB RAM | \~\$0.046/hr | +| **Total** | **\~\$0.14/hr** | -**Always-on** costs ~\$3.40/day (~\$102/month). Modal's \$30/month free credit covers about 9 days. +**Always-on** costs \~\$3.40/day (\~\$102/month). Modal's \$30/month free credit covers about 9 days. -**Scale-to-zero** costs only for the hours the container is running. At 8 hours/day on workdays, that's roughly ~\$1.12/day (~\$25/month). The first request after an idle period takes ~10-30s while the container cold-starts; after that, the `scaledown_window` (10 min) keeps it warm between interactions. +**Scale-to-zero** costs only for the hours the container is running. At 8 hours/day on workdays, that's roughly \~\$1.12/day (\~\$25/month). The first request after an idle period takes \~10-30s while the container cold-starts; after that, the `scaledown_window` (10 min) keeps it warm between interactions. To stop the deployment entirely and avoid all charges: `modal app stop openhands-agent-server`. Your data on the Modal volume persists.