From b54daef64f1e33bed957bdb4f19926a114610677 Mon Sep 17 00:00:00 2001
From: Calvin Smith <email@cjsmith.io>
Date: Mon, 15 Jun 2026 11:23:52 -0600
Subject: [PATCH 1/4] docs(modal): add MODAL_ALWAYS_ON scale-to-zero option

Update the Modal backend page with configurable always-on vs
scale-to-zero mode. Default behavior unchanged (always warm).
Set MODAL_ALWAYS_ON=0 at deploy time to scale to zero when idle
(~$25/month vs ~$102/month, with ~10-30s cold starts).

- Updated deploy.py code block with MODAL_ALWAYS_ON env var
- Rewrote Cost section with comparison table for both modes
- Added scale-to-zero deploy command in step 3

Co-authored-by: openhands <openhands@all-hands.dev>
---
 .../agent-canvas/backend-setup/modal.mdx      | 57 +++++++++++++++----
 1 file changed, 45 insertions(+), 12 deletions(-)

diff --git a/openhands/usage/agent-canvas/backend-setup/modal.mdx b/openhands/usage/agent-canvas/backend-setup/modal.mdx
index 87c8f5d3..9b2a367a 100644
--- a/openhands/usage/agent-canvas/backend-setup/modal.mdx
+++ b/openhands/usage/agent-canvas/backend-setup/modal.mdx
@@ -76,6 +76,7 @@ Usage:
   modal run deploy.py
 """
 
+import os
 import subprocess
 
 import modal
@@ -93,6 +94,13 @@ SCALEDOWN_WINDOW = 600  # seconds before an idle container is eligible for shutd
 CONTAINER_CPU = 2.0
 CONTAINER_MEMORY_MB = 4096  # 4 GB
 
+# Always-on mode (default): keeps one container warm at all times for zero
+# cold-start latency. Costs ~$102/month (2 vCPU / 4 GB, 24/7).
+# Set MODAL_ALWAYS_ON=0 to scale to zero when idle. You only pay while
+# actively coding, but the first request after idle has a ~10-30s cold start.
+ALWAYS_ON = os.environ.get("MODAL_ALWAYS_ON", "1").lower() in ("1", "true")
+MIN_CONTAINERS = 1 if ALWAYS_ON else 0
+
 # --- Modal App ---
 
 app = modal.App("openhands-agent-server")
@@ -138,10 +146,10 @@ agent_server_image = (
     memory=CONTAINER_MEMORY_MB,
     scaledown_window=SCALEDOWN_WINDOW,
     timeout=3600,
-    # Pin to exactly 1 container, always warm. The agent-server is stateful
-    # (SQLite DB, tmux sessions, in-memory conversation state). Multiple
-    # containers would diverge. min_containers=1 eliminates cold starts.
-    min_containers=1,
+    # The agent-server is stateful (SQLite DB, tmux sessions, in-memory
+    # conversation state) — multiple containers would diverge.
+    # min_containers is controlled by MODAL_ALWAYS_ON (default: 1, always warm).
+    min_containers=MIN_CONTAINERS,
     max_containers=1,
 )
 @modal.concurrent(max_inputs=10)
@@ -160,13 +168,19 @@ class AgentServer:
 
 @app.local_entrypoint()
 def main():
+    mode = "always-on" if ALWAYS_ON else "scale-to-zero"
     print("OpenHands Agent Server — Modal deployment")
     print(f"  Image: ghcr.io/openhands/agent-server:{AGENT_SERVER_IMAGE_TAG}")
     print(f"  Volume: openhands-data → {VOLUME_MOUNT}")
+    print(f"  Mode: {mode} (min_containers={MIN_CONTAINERS})")
     print(f"  Scaledown: {SCALEDOWN_WINDOW}s")
     print()
     print("To deploy:")
     print("  modal deploy deploy.py")
+    if ALWAYS_ON:
+        print()
+        print("  # Or, to scale to zero when idle (saves cost, adds cold starts):")
+        print("  MODAL_ALWAYS_ON=0 modal deploy deploy.py")
     print()
     print("After deploying, add the backend in Agent Canvas:")
     print("  1. Open Agent Canvas")
@@ -189,7 +203,13 @@ Modal builds the container image on first deploy (takes a few minutes), then pri
 https://openhands-agent-server--agentserver-serve.modal.run
 ```
 
-The agent server runs on 2 vCPU / 4 GB RAM with a persistent volume for conversations and settings. The container is always warm (`min_containers=1`) so there's no cold-start latency.
+The agent server runs on 2 vCPU / 4 GB RAM with a persistent volume for conversations and settings. By default, the container is always warm (`min_containers=1`) so there's no cold-start latency. To scale to zero when idle instead (lower cost, but ~10-30s cold start on first request):
+
+```bash
+MODAL_ALWAYS_ON=0 modal deploy deploy.py
+```
+
+See [Cost](#cost) for a comparison of the two modes.
 
 ## 4. Connect Agent Canvas
 
@@ -218,15 +238,28 @@ Settings are stored server-side on the Modal volume (encrypted with `OH_SECRET_K
 
 ## Cost
 
-The deployment keeps one container running at all times (`min_containers=1`) to eliminate cold-start latency. Modal charges per-second:
+Modal charges per-second for CPU and memory. The `MODAL_ALWAYS_ON` setting controls whether the container stays warm between requests:
+
+| | Always-on (default) | Scale-to-zero (`MODAL_ALWAYS_ON=0`) |
+|---|---|---|
+| **Cold starts** | None | ~10-30s after idle period |
+| **Idle behavior** | Container stays warm 24/7 | Scales down after 10 min idle |
+| **Best for** | Daily driver, fast iteration | Occasional use, cost-sensitive |
+| **Monthly cost** | ~$102 (24/7) | Pay only for active hours |
+
+Hourly rate breakdown (2 vCPU / 4 GB):
+
+| Resource | Rate |
+|----------|------|
+| 2 vCPU (1 physical core) | ~$0.096/hr |
+| 4 GB RAM | ~$0.046/hr |
+| **Total** | **~$0.14/hr** |
+
+**Always-on** costs ~$3.40/day (~$102/month). Modal's $30/month free credit covers about 9 days.
 
-| Resource | Rate | Daily Cost | Monthly Cost |
-|----------|------|------------|--------------|
-| 2 vCPU (1 physical core) | ~$0.096/hr | ~$2.30 | ~$69 |
-| 4 GB RAM | ~$0.046/hr | ~$1.10 | ~$33 |
-| **Total** | **~$0.14/hr** | **~$3.40** | **~$102** |
+**Scale-to-zero** costs only for the hours the container is running. At 8 hours/day on workdays, that's roughly ~$1.12/day (~$25/month). The first request after an idle period takes ~10-30s while the container cold-starts; after that, the `scaledown_window` (10 min) keeps it warm between interactions.
 
-The $30/month free credit on Modal's starter tier covers about 9 days of continuous usage. To reduce costs, stop the deployment when not in use (`modal app stop openhands-agent-server`). Your data on the Modal volume persists.
+To stop the deployment entirely and avoid all charges: `modal app stop openhands-agent-server`. Your data on the Modal volume persists.
 
 ## Limitations
 

From f6121a58188d0ad571038d8235087ab30cf32185 Mon Sep 17 00:00:00 2001
From: Calvin Smith <email@cjsmith.io>
Date: Mon, 15 Jun 2026 11:27:45 -0600
Subject: [PATCH 2/4] docs(modal): add upgrading, troubleshooting, and tuning
 tips

- Update 'When to Use It' to mention scale-to-zero option
- Add scaledown_window tuning tip for scale-to-zero users
- Add Upgrading section (how to bump the image tag)
- Add Troubleshooting section (logs, app list, redeploy)

Co-authored-by: openhands <openhands@all-hands.dev>
---
 .../agent-canvas/backend-setup/modal.mdx      | 40 ++++++++++++++++++-
 1 file changed, 39 insertions(+), 1 deletion(-)

diff --git a/openhands/usage/agent-canvas/backend-setup/modal.mdx b/openhands/usage/agent-canvas/backend-setup/modal.mdx
index 9b2a367a..2c4254ad 100644
--- a/openhands/usage/agent-canvas/backend-setup/modal.mdx
+++ b/openhands/usage/agent-canvas/backend-setup/modal.mdx
@@ -15,7 +15,7 @@ A Modal backend is a good fit when you want to:
 
 - Offload agent execution to the cloud without managing your own VM or Docker host
 - Take advantage of Modal's per-second billing and free-tier credits
-- Get a persistent, always-warm backend with minimal setup
+- Get a persistent, always-warm backend with minimal setup — or scale to zero when idle to reduce costs
 
 ## Prerequisites
 
@@ -261,6 +261,10 @@ Hourly rate breakdown (2 vCPU / 4 GB):
 
 To stop the deployment entirely and avoid all charges: `modal app stop openhands-agent-server`. Your data on the Modal volume persists.
 
+<Tip>
+  If you're using scale-to-zero and find the container scaling down too quickly between interactions, increase `SCALEDOWN_WINDOW` in `deploy.py`. The default is 600 seconds (10 minutes); setting it to 1800 (30 minutes) keeps the container warm during longer breaks without paying for overnight idle time.
+</Tip>
+
 ## Limitations
 
 - **No Docker-in-Docker.** Modal containers don't support nested Docker. The agent executes code directly on the container filesystem (same model as running `npx @openhands/agent-canvas` locally). Tools that require Docker won't work.
@@ -288,6 +292,40 @@ echo "New API Key: $API_KEY"
 
 Then update the API key in Agent Canvas — click the backend switcher → **Manage Backends** → edit the Modal backend → paste the new key.
 
+## Upgrading
+
+To update to a newer agent-server version, change `AGENT_SERVER_IMAGE_TAG` in `deploy.py` to the desired tag (e.g. `1.25.0-python`) and redeploy:
+
+```bash
+modal deploy deploy.py
+```
+
+Modal rebuilds the container image with the new version. Your data on the Modal volume (conversations, settings, LLM credentials) is preserved.
+
+Available tags are listed at [`ghcr.io/openhands/agent-server`](https://github.com/OpenHands/OpenHands/pkgs/container/agent-server). Use the `-python` variant.
+
+## Troubleshooting
+
+Check the server logs:
+
+```bash
+modal app logs openhands-agent-server
+```
+
+List running apps to confirm the deployment is active:
+
+```bash
+modal app list
+```
+
+If the container is crashing or unresponsive, redeploy to force a fresh start:
+
+```bash
+modal deploy deploy.py
+```
+
+Your data on the Modal volume persists across redeploys.
+
 ## Tearing Down
 
 To stop the deployment and stop incurring costs:

From 2d53b01a01a9f3ed9527aaba85ecc995b61ff3f9 Mon Sep 17 00:00:00 2001
From: Calvin Smith <email@cjsmith.io>
Date: Mon, 15 Jun 2026 11:40:04 -0600
Subject: [PATCH 3/4] docs(modal): escape dollar signs to fix MDX math
 rendering

Bare $ in prose and table cells was being parsed as inline math
delimiters, causing text like '/day' to render as italicized math.

Co-authored-by: openhands <openhands@all-hands.dev>
---
 .../usage/agent-canvas/backend-setup/modal.mdx     | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/openhands/usage/agent-canvas/backend-setup/modal.mdx b/openhands/usage/agent-canvas/backend-setup/modal.mdx
index 2c4254ad..0026a217 100644
--- a/openhands/usage/agent-canvas/backend-setup/modal.mdx
+++ b/openhands/usage/agent-canvas/backend-setup/modal.mdx
@@ -19,7 +19,7 @@ A Modal backend is a good fit when you want to:
 
 ## Prerequisites
 
-- A [Modal account](https://modal.com/signup) (free tier includes $30/month credit)
+- A [Modal account](https://modal.com/signup) (free tier includes \$30/month credit)
 - Python 3.12+
 - Agent Canvas running locally — see [Setup](/openhands/usage/agent-canvas/setup)
 - An LLM API key (OpenAI, Anthropic, etc.)
@@ -245,19 +245,19 @@ Modal charges per-second for CPU and memory. The `MODAL_ALWAYS_ON` setting contr
 | **Cold starts** | None | ~10-30s after idle period |
 | **Idle behavior** | Container stays warm 24/7 | Scales down after 10 min idle |
 | **Best for** | Daily driver, fast iteration | Occasional use, cost-sensitive |
-| **Monthly cost** | ~$102 (24/7) | Pay only for active hours |
+| **Monthly cost** | ~\$102 (24/7) | Pay only for active hours |
 
 Hourly rate breakdown (2 vCPU / 4 GB):
 
 | Resource | Rate |
 |----------|------|
-| 2 vCPU (1 physical core) | ~$0.096/hr |
-| 4 GB RAM | ~$0.046/hr |
-| **Total** | **~$0.14/hr** |
+| 2 vCPU (1 physical core) | ~\$0.096/hr |
+| 4 GB RAM | ~\$0.046/hr |
+| **Total** | **~\$0.14/hr** |
 
-**Always-on** costs ~$3.40/day (~$102/month). Modal's $30/month free credit covers about 9 days.
+**Always-on** costs ~\$3.40/day (~\$102/month). Modal's \$30/month free credit covers about 9 days.
 
-**Scale-to-zero** costs only for the hours the container is running. At 8 hours/day on workdays, that's roughly ~$1.12/day (~$25/month). The first request after an idle period takes ~10-30s while the container cold-starts; after that, the `scaledown_window` (10 min) keeps it warm between interactions.
+**Scale-to-zero** costs only for the hours the container is running. At 8 hours/day on workdays, that's roughly ~\$1.12/day (~\$25/month). The first request after an idle period takes ~10-30s while the container cold-starts; after that, the `scaledown_window` (10 min) keeps it warm between interactions.
 
 To stop the deployment entirely and avoid all charges: `modal app stop openhands-agent-server`. Your data on the Modal volume persists.
 

From ad48a266abcdba5baf4e4ca4e8ba992d57750aad Mon Sep 17 00:00:00 2001
From: Calvin Smith <email@cjsmith.io>
Date: Mon, 15 Jun 2026 11:42:22 -0600
Subject: [PATCH 4/4] docs(modal): escape tildes to fix MDX strikethrough
 rendering

Bare ~ characters were being paired as strikethrough markers.

Co-authored-by: openhands <openhands@all-hands.dev>
---
 .../usage/agent-canvas/backend-setup/modal.mdx | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/openhands/usage/agent-canvas/backend-setup/modal.mdx b/openhands/usage/agent-canvas/backend-setup/modal.mdx
index 0026a217..9236f1e2 100644
--- a/openhands/usage/agent-canvas/backend-setup/modal.mdx
+++ b/openhands/usage/agent-canvas/backend-setup/modal.mdx
@@ -31,7 +31,7 @@ pip install modal
 modal setup
 ```
 
-`modal setup` opens a browser to authenticate. Your credentials are saved to `~/.modal.toml`.
+`modal setup` opens a browser to authenticate. Your credentials are saved to `\~/.modal.toml`.
 
 ## 2. Create a Modal Secret
 
@@ -203,7 +203,7 @@ Modal builds the container image on first deploy (takes a few minutes), then pri
 https://openhands-agent-server--agentserver-serve.modal.run
 ```
 
-The agent server runs on 2 vCPU / 4 GB RAM with a persistent volume for conversations and settings. By default, the container is always warm (`min_containers=1`) so there's no cold-start latency. To scale to zero when idle instead (lower cost, but ~10-30s cold start on first request):
+The agent server runs on 2 vCPU / 4 GB RAM with a persistent volume for conversations and settings. By default, the container is always warm (`min_containers=1`) so there's no cold-start latency. To scale to zero when idle instead (lower cost, but \~10-30s cold start on first request):
 
 ```bash
 MODAL_ALWAYS_ON=0 modal deploy deploy.py
@@ -242,22 +242,22 @@ Modal charges per-second for CPU and memory. The `MODAL_ALWAYS_ON` setting contr
 
 | | Always-on (default) | Scale-to-zero (`MODAL_ALWAYS_ON=0`) |
 |---|---|---|
-| **Cold starts** | None | ~10-30s after idle period |
+| **Cold starts** | None | \~10-30s after idle period |
 | **Idle behavior** | Container stays warm 24/7 | Scales down after 10 min idle |
 | **Best for** | Daily driver, fast iteration | Occasional use, cost-sensitive |
-| **Monthly cost** | ~\$102 (24/7) | Pay only for active hours |
+| **Monthly cost** | \~\$102 (24/7) | Pay only for active hours |
 
 Hourly rate breakdown (2 vCPU / 4 GB):
 
 | Resource | Rate |
 |----------|------|
-| 2 vCPU (1 physical core) | ~\$0.096/hr |
-| 4 GB RAM | ~\$0.046/hr |
-| **Total** | **~\$0.14/hr** |
+| 2 vCPU (1 physical core) | \~\$0.096/hr |
+| 4 GB RAM | \~\$0.046/hr |
+| **Total** | **\~\$0.14/hr** |
 
-**Always-on** costs ~\$3.40/day (~\$102/month). Modal's \$30/month free credit covers about 9 days.
+**Always-on** costs \~\$3.40/day (\~\$102/month). Modal's \$30/month free credit covers about 9 days.
 
-**Scale-to-zero** costs only for the hours the container is running. At 8 hours/day on workdays, that's roughly ~\$1.12/day (~\$25/month). The first request after an idle period takes ~10-30s while the container cold-starts; after that, the `scaledown_window` (10 min) keeps it warm between interactions.
+**Scale-to-zero** costs only for the hours the container is running. At 8 hours/day on workdays, that's roughly \~\$1.12/day (\~\$25/month). The first request after an idle period takes \~10-30s while the container cold-starts; after that, the `scaledown_window` (10 min) keeps it warm between interactions.
 
 To stop the deployment entirely and avoid all charges: `modal app stop openhands-agent-server`. Your data on the Modal volume persists.