From 3250a85489be2c8f5f72017fe56b47a9d23580ee Mon Sep 17 00:00:00 2001
From: Giselle van Dongen <giselle@restate.dev>
Date: Wed, 15 Apr 2026 13:37:37 +0200
Subject: [PATCH 1/4] first attempt to improve error handling docs for ai
 agents

---
 docs/ai/patterns/error-handling.mdx | 87 +++++++++++++++--------------
 1 file changed, 45 insertions(+), 42 deletions(-)
diff --git a/docs/ai/patterns/error-handling.mdx b/docs/ai/patterns/error-handling.mdx
index 62289a43..cef9068f 100644
--- a/docs/ai/patterns/error-handling.mdx
+++ b/docs/ai/patterns/error-handling.mdx
@@ -31,11 +31,13 @@ LLM API calls fail transiently (rate limits, network issues, provider outages).
 <GlobalTabs className={"hidden-tabs"}>
 <GlobalTab title="Vercel AI">
 
-In the Vercel AI SDK, set `maxRetries` on `generateText` (default: 2) to retry failed calls due to rate limits or transient errors.
-After retries are exhausted, the agent throws an error.
-Restate then retries the invocation with exponential backoff to handle longer outages or network issues.
+The Vercel AI SDK and the Restate middleware each have their own retry layer, and they compose.
 
-You can limit Restate's retries with the `maxRetryAttempts` option in `durableCalls` middleware:
+The Vercel AI SDK retries first, in-process: you set `maxRetries` on `generateText` (default: 2) to retry failed calls due to rate limits or transient errors. Once those are exhausted, the AI SDK throws an error.
+
+Restate then takes over and retries the invocation with exponential backoff. Each Restate retry replays the call, which goes through `maxRetries` Vercel AI SDK attempts again, so the two limits multiply (e.g. `maxRetryAttempts`: 3 × `maxRetries`: 2 = up to 6 attempts).
+
+You can configure Restate's retry behavior by passing [`RunOptions`](https://restatedev.github.io/sdk-typescript/types/_restatedev_restate-sdk.RunOptions.html) to the `durableCalls` middleware (for example, to limit attempts via `maxRetryAttempts`):
 
 ```typescript errorhandling/fail-on-terminal-tool-agent.ts {"CODE_LOAD::https://raw.githubusercontent.com/restatedev/ai-examples/refs/heads/main/vercel-ai/tour-of-agents/src/errorhandling/fail-on-terminal-tool-agent.ts#max_attempts_example"} 
 const model = wrapLanguageModel({
@@ -45,65 +47,63 @@ const model = wrapLanguageModel({
 ```
 <GitHubLink url="https://github.com/restatedev/ai-examples/tree/ai-structure/vercel-ai/tour-of-agents/src/errorhandling/fail-on-terminal-tool-agent.ts" />
 
-Each Restate retry triggers up to `maxRetries` SDK attempts.
-For example, with `maxRetryAttempts`: 3 and `maxRetries`: 2, a call may be attempted 6 times.
-Once Restate's retries are exhausted, the invocation fails with a `TerminalError` and won't be retried further.
+If you don't pass `RunOptions`, the call inherits the retry policy configured at the [service or handler level](/services/configuration#how-to-configure), or otherwise the [Restate server's default policy](/guides/error-handling#configure-restate-server-defaults). Once Restate's retries are exhausted, the invocation fails with a `TerminalError` and won't be retried further.
 
 </GlobalTab>
 <GlobalTab title="OpenAI Agents">
 
-Restate's `DurableRunner` lets you specify the retry behavior for LLM calls:
+Restate's `DurableRunner` lets you specify the retry behavior for LLM calls via [`RunOptions`](https://github.com/restatedev/sdk-python/blob/main/python/restate/context.py#L37):
 
-```python error_handling.py {"CODE_LOAD::https://raw.githubusercontent.com/restatedev/ai-examples/refs/heads/main/openai-agents/tour-of-agents/app/error_handling.py#handle"} 
+```python error_handling.py {"CODE_LOAD::https://raw.githubusercontent.com/restatedev/ai-examples/refs/heads/error-handling/openai-agents/tour-of-agents/app/error_handling.py#handle"} 
 try:
-    result = await DurableRunner.run(
-        agent,
-        req.message,
-        llm_retry_opts=LlmRetryOpts(
-            max_attempts=3, initial_retry_interval=timedelta(seconds=2)
-        ),
+    run_opts = RunOptions(
+        max_attempts=3, initial_retry_interval=timedelta(seconds=2)
     )
+    result = await DurableRunner.run(agent, req.message, run_options=run_opts)
 except restate.TerminalError as e:
     # Handle terminal errors gracefully
     return f"The agent couldn't complete the request: {e.message}"
 ```
-<GitHubLink url="https://github.com/restatedev/ai-examples/blob/main/openai-agents/tour-of-agents/app/error_handling.py" />
+<GitHubLink url="https://github.com/restatedev/ai-examples/blob/error-handling/openai-agents/tour-of-agents/app/error_handling.py" />
 
-By default, the runner retries ten times with an initial interval of one second. Once Restate's retries are exhausted, the invocation fails with a `TerminalError` and won't be retried further.
+If you don't pass `RunOptions`, the LLM call inherits the retry policy configured at the [service or handler level](/services/configuration#how-to-configure), or otherwise the [Restate server's default policy](/guides/error-handling#configure-restate-server-defaults).
 
 </GlobalTab>
 <GlobalTab title="Google ADK">
 
-Configure the number of retries for LLM calls when activating the Restate plugin for your ADK App:
+Configure the retry behavior for LLM calls via [`RunOptions`](https://github.com/restatedev/sdk-python/blob/main/python/restate/context.py#L37) when activating the Restate plugin for your ADK App:
 
-```python error_handling.py {"CODE_LOAD::https://raw.githubusercontent.com/restatedev/ai-examples/refs/heads/main/google-adk/tour-of-agents/app/error_handling.py#retries"} 
+```python error_handling.py {"CODE_LOAD::https://raw.githubusercontent.com/restatedev/ai-examples/refs/heads/error-handling/google-adk/tour-of-agents/app/error_handling.py#retries"} 
+run_options = RunOptions(max_attempts=3, initial_retry_interval=timedelta(seconds=1))
 app = App(
-    name=APP_NAME, root_agent=agent, plugins=[RestatePlugin(max_model_call_retries=3)]
+    name=APP_NAME,
+    root_agent=agent,
+    plugins=[RestatePlugin(run_options=run_options)],
 )
 ```
-<GitHubLink url="https://github.com/restatedev/ai-examples/blob/main/google-adk/tour-of-agents/app/error_handling.py" />
+<GitHubLink url="https://github.com/restatedev/ai-examples/blob/error-handling/google-adk/tour-of-agents/app/error_handling.py" />
 
-By default, the runner retries ten times with an initial interval of one second. Once Restate's retries are exhausted, the invocation fails with a `TerminalError` and won't be retried further.
+If you don't pass `RunOptions`, the LLM call inherits the retry policy configured at the [service or handler level](/services/configuration#how-to-configure), or otherwise the [Restate server's default policy](/guides/error-handling#configure-restate-server-defaults).
 
 </GlobalTab>
 <GlobalTab title="Pydantic AI">
 
-Restate's `RestateAgent` lets you specify the retry behavior for LLM calls via `RunOptions`:
+Restate's `RestateAgent` lets you specify the retry behavior for LLM calls via [`RunOptions`](https://github.com/restatedev/sdk-python/blob/main/python/restate/context.py#L37):
 
-```python error_handling.py {"CODE_LOAD::https://raw.githubusercontent.com/restatedev/ai-examples/refs/heads/main/pydantic-ai/tour-of-agents/app/error_handling.py#retries"} 
+```python error_handling.py {"CODE_LOAD::https://raw.githubusercontent.com/restatedev/ai-examples/refs/heads/error-handling/pydantic-ai/tour-of-agents/app/error_handling.py#retries"} 
 restate_agent = RestateAgent(
     agent,
     run_options=RunOptions(max_attempts=3, initial_retry_interval=timedelta(seconds=2)),
 )
 ```
-<GitHubLink url="https://github.com/restatedev/ai-examples/blob/main/pydantic-ai/tour-of-agents/app/error_handling.py" />
+<GitHubLink url="https://github.com/restatedev/ai-examples/blob/error-handling/pydantic-ai/tour-of-agents/app/error_handling.py" />
 
-By default, the runner retries ten times with an initial interval of one second. Once Restate's retries are exhausted, the invocation fails with a `TerminalError` and won't be retried further.
+If you don't pass `RunOptions`, the LLM call inherits the retry policy configured at the [service or handler level](/services/configuration#how-to-configure), or otherwise the [Restate server's default policy](/guides/error-handling#configure-restate-server-defaults).
 
 </GlobalTab>
 <GlobalTab title="Restate TS">
 
-Wrap LLM calls in `ctx.run()` with a retry limit to handle transient failures automatically:
+Wrap LLM calls in `ctx.run()` and pass [`RunOptions`](https://restatedev.github.io/sdk-typescript/types/_restatedev_restate-sdk.RunOptions.html) to control retry behavior:
 
 ```typescript
 // Retries up to 3 times with exponential backoff
@@ -114,14 +114,12 @@ const result = await ctx.run(
 );
 ```
 
-Without `maxRetryAttempts`, Restate retries indefinitely with exponential backoff. For LLM calls, setting a limit prevents runaway costs from persistent failures.
-
-You can set [custom retry policies](/guides/error-handling#at-the-run-block-level) for `ctx.run` steps.
+If you don't pass `RunOptions`, the call inherits the retry policy configured at the [service or handler level](/services/configuration#how-to-configure), or otherwise the [Restate server's default policy](/guides/error-handling#configure-restate-server-defaults). See also [custom retry policies](/guides/error-handling#at-the-run-block-level) for `ctx.run` steps.
 
 </GlobalTab>
 <GlobalTab title="Restate Py">
 
-Wrap LLM calls in `ctx.run_typed()` with a retry limit to handle transient failures automatically:
+Wrap LLM calls in `ctx.run_typed()` and pass [`RunOptions`](https://github.com/restatedev/sdk-python/blob/main/python/restate/context.py#L37) to control retry behavior:
 
 ```python
 # Retries up to 3 times with exponential backoff
@@ -134,9 +132,7 @@ result = await ctx.run_typed(
 )
 ```
 
-Without `max_attempts`, Restate retries indefinitely with exponential backoff. For LLM calls, setting a limit prevents runaway costs from persistent failures.
-
-You can set [custom retry policies](/guides/error-handling#at-the-run-block-level) for `.run` actions.
+If you don't pass `RunOptions`, the call inherits the retry policy configured at the [service or handler level](/services/configuration#how-to-configure), or otherwise the [Restate server's default policy](/guides/error-handling#configure-restate-server-defaults). See also [custom retry policies](/guides/error-handling#at-the-run-block-level) for `.run` actions.
 
 </GlobalTab>
 </GlobalTabs>
@@ -157,7 +153,7 @@ throw new TerminalError("This tool is not allowed to run for this input.");
 By default, the Vercel AI will convert any errors in tool executions into a message to the LLM, and the agent will decide how to proceed.
 This is often desirable, as the LLM can decide to use a different tool or provide a fallback answer.
 
-However, if you use Restate Context actions like `ctx.run` in your tool execution, Restate will retry any transient errors in these actions until they succeed.
+However, if you use Restate Context actions like `ctx.run` in your tool execution, Restate will retry any transient errors in these actions until they succeed. For `ctx.run` actions specifically, you can set a retry policy via [`RunOptions`](https://restatedev.github.io/sdk-typescript/types/_restatedev_restate-sdk.RunOptions.html).
 
 ```typescript {"CODE_LOAD::ts/src/tour/agents/inline-tool-errors.ts#here"} 
 // Without ctx.run - error goes straight to agent
@@ -247,6 +243,8 @@ from restate import TerminalError
 raise TerminalError("This tool is not allowed to run for this input.")
 ```
 
+For `ctx.run` actions specifically, you can set a retry policy via [`RunOptions`](https://github.com/restatedev/sdk-python/blob/main/python/restate/context.py#L37).
+
 By default, the Restate OpenAI integration will raise any terminal errors in tool executions and will let you handle them in your handler.
 
 <Warning>
@@ -271,10 +269,9 @@ from restate import TerminalError
 raise TerminalError("This tool is not allowed to run for this input.")
 ```
 
-Restate retries tool executions by default until they succeed.
-For errors which should not be retried, raise terminal errors from within your tool implementations.
+For `ctx.run` actions specifically, you can set a retry policy via [`RunOptions`](https://github.com/restatedev/sdk-python/blob/main/python/restate/context.py#L37).
 
-You can catch these terminal errors in your handler and handle them accordingly.
+For errors which should not be retried, raise terminal errors from within your tool implementations. You can catch these terminal errors in your handler and handle them accordingly.
 
 </GlobalTab>
 <GlobalTab title="Pydantic AI">
@@ -283,14 +280,16 @@ When agent tools use Restate Context actions like `ctx.run`, Restate automatical
 
 For example, wrapping a tool call in `restate_context().run_typed()` makes it durable with automatic retries:
 
-```python error_handling.py {"CODE_LOAD::https://raw.githubusercontent.com/restatedev/ai-examples/refs/heads/main/pydantic-ai/tour-of-agents/app/error_handling.py#here"} 
+```python error_handling.py {"CODE_LOAD::https://raw.githubusercontent.com/restatedev/ai-examples/refs/heads/error-handling/pydantic-ai/tour-of-agents/app/error_handling.py#here"} 
 async def get_weather(city: WeatherRequest) -> WeatherResponse:
     """Get the current weather for a given city."""
     return await restate_context().run_typed(
         f"Get weather {city}", fetch_weather, req=city
     )
 ```
-<GitHubLink url="https://github.com/restatedev/ai-examples/blob/main/pydantic-ai/tour-of-agents/app/error_handling.py" />
+<GitHubLink url="https://github.com/restatedev/ai-examples/blob/error-handling/pydantic-ai/tour-of-agents/app/error_handling.py" />
+
+For `ctx.run` actions specifically, you can set a retry policy via [`RunOptions`](https://github.com/restatedev/sdk-python/blob/main/python/restate/context.py#L37).
 
 For errors that should not be retried, raise a terminal error:
 
@@ -305,7 +304,7 @@ For errors which should not be retried, raise terminal errors from within your t
 
 You can catch these terminal errors in your handler and handle them accordingly:
 
-```python error_handling.py {"CODE_LOAD::https://raw.githubusercontent.com/restatedev/ai-examples/refs/heads/main/pydantic-ai/tour-of-agents/app/error_handling.py#handle"} 
+```python error_handling.py {"CODE_LOAD::https://raw.githubusercontent.com/restatedev/ai-examples/refs/heads/error-handling/pydantic-ai/tour-of-agents/app/error_handling.py#handle"} 
 @agent_service.handler()
 async def run(_ctx: restate.Context, req: WeatherPrompt) -> str:
     try:
@@ -315,13 +314,15 @@ async def run(_ctx: restate.Context, req: WeatherPrompt) -> str:
         return f"The agent couldn't complete the request: {e.message}"
     return result.output
 ```
-<GitHubLink url="https://github.com/restatedev/ai-examples/blob/main/pydantic-ai/tour-of-agents/app/error_handling.py" />
+<GitHubLink url="https://github.com/restatedev/ai-examples/blob/error-handling/pydantic-ai/tour-of-agents/app/error_handling.py" />
 
 </GlobalTab>
 <GlobalTab title="Restate TS">
 
 Restate automatically retries transient errors. This makes your tools resilient to network failures, database hiccups, and other temporary issues.
 
+For `ctx.run` actions specifically, you can set a retry policy via [`RunOptions`](https://restatedev.github.io/sdk-typescript/types/_restatedev_restate-sdk.RunOptions.html).
+
 When a tool encounters an unrecoverable error (e.g., resource not found, invalid input, business rule violation), throw a `TerminalError` to stop retries immediately:
 
 ```typescript {"CODE_LOAD::ts/src/tour/agents/terminal_error.ts#terminal_error"} 
@@ -335,6 +336,8 @@ You can catch and handle terminal errors in your agent logic if needed.
 
 Restate automatically retries transient errors. This makes your tools resilient to network failures, database hiccups, and other temporary issues.
 
+For `ctx.run` actions specifically, you can set a retry policy via [`RunOptions`](https://github.com/restatedev/sdk-python/blob/main/python/restate/context.py#L37).
+
 When a tool encounters an unrecoverable error (e.g., resource not found, invalid input, business rule violation), raise a `TerminalError` to stop retries immediately:
 
 ```python

From 007bfd8a9d921cf5f1e830e578d8160610d3d034 Mon Sep 17 00:00:00 2001
From: Giselle van Dongen <giselle@restate.dev>
Date: Wed, 15 Apr 2026 16:17:57 +0200
Subject: [PATCH 2/4] update error handling pages

---
 docs/ai/patterns/error-handling.mdx           | 368 +++++++++++++-----
 docs/guides/error-handling.mdx                |  13 +-
 snippets/python/src/ai/error_handling.py      |  29 ++
 .../ai/guides/errorhandling/error_handling.ts |  18 +
 4 files changed, 323 insertions(+), 105 deletions(-)
 create mode 100644 snippets/python/src/ai/error_handling.py
 create mode 100644 snippets/ts/src/ai/guides/errorhandling/error_handling.ts

diff --git a/docs/ai/patterns/error-handling.mdx b/docs/ai/patterns/error-handling.mdx
index cef9068f..5f026434 100644
--- a/docs/ai/patterns/error-handling.mdx
+++ b/docs/ai/patterns/error-handling.mdx
@@ -7,9 +7,9 @@ description: "Implement robust error handling and retry strategies for reliable
 
 import {GlobalTabs, GlobalTab} from "/snippets/components/global-tabs.jsx";
 import { GitHubLink } from '/snippets/blocks/github-link.mdx';
-import SetupPydanticAI from '/snippets/tour/ai/setup-pydantic-ai.mdx';
 
-LLM calls are costly, so you want to configure retry behavior to avoid infinite loops and high costs while still recovering from transient failures.
+Restate automatically retries failures of your agents until they succeed.
+But LLM calls are costly, so you might want to configure retry behavior to fit your use case and to avoid retrying errors that cannot heal.
 
 Restate distinguishes between two types of errors:
 - **Transient errors**: Temporary issues like network failures or rate limits. Restate automatically retries these until they succeed or the retry policy is exhausted.
@@ -26,18 +26,60 @@ Restate distinguishes between two types of errors:
 
 ## Retrying LLM calls
 
-LLM API calls fail transiently (rate limits, network issues, provider outages). Configure retry limits to handle this automatically and prevent runaway costs.
+LLM API calls can suffer from transient failures (rate limits, network issues, provider outages). Restate retries failed LLM calls so your agents recover automatically.
+
+### Default behavior
 
 <GlobalTabs className={"hidden-tabs"}>
 <GlobalTab title="Vercel AI">
 
 The Vercel AI SDK and the Restate middleware each have their own retry layer, and they compose.
 
-The Vercel AI SDK retries first, in-process: you set `maxRetries` on `generateText` (default: 2) to retry failed calls due to rate limits or transient errors. Once those are exhausted, the AI SDK throws an error.
+The Vercel AI SDK does the first layer of retries based on what is set for `maxRetries` on `generateText` (default: 2) . Once those are exhausted, the AI SDK throws an error.
+
+Restate then takes over and retries the invocation. Each Restate retry replays the call, which goes through `maxRetries` Vercel AI SDK attempts again.
+
+By default, Restate's retries follow the policy configured at the [service or handler level](/services/configuration#how-to-configure), or otherwise the [Restate server's default policy](/guides/error-handling#configure-restate-server-defaults).
+Restate will go through a limited set of retries with exponential backoff (see [default policy](/references/server-config#param-default-retry-policy)), after which the invocation will be paused. This gives you time to fix the issue, and then [resume the invocation](/services/invocation/managing-invocations#resume).
+
+</GlobalTab>
+<GlobalTab title="OpenAI Agents">
+
+By default, `DurableRunner.run` retries LLM calls according to the policy configured at the [service or handler level](/services/configuration#how-to-configure), or otherwise the [Restate server's default policy](/guides/error-handling#configure-restate-server-defaults).
+Restate will go through a limited set of retries with exponential backoff (see [default policy](/references/server-config#param-default-retry-policy)), after which the invocation will be paused. This gives you time to fix the issue, and then [resume the invocation](/services/invocation/managing-invocations#resume).
+
+</GlobalTab>
+<GlobalTab title="Google ADK">
+
+By default, the `RestatePlugin` retries LLM calls according to the policy configured at the [service or handler level](/services/configuration#how-to-configure), or otherwise the [Restate server's default policy](/guides/error-handling#configure-restate-server-defaults).
+Restate will go through a limited set of retries with exponential backoff (see [default policy](/references/server-config#param-default-retry-policy)), after which the invocation will be paused. This gives you time to fix the issue, and then [resume the invocation](/services/invocation/managing-invocations#resume).
 
-Restate then takes over and retries the invocation with exponential backoff. Each Restate retry replays the call, which goes through `maxRetries` Vercel AI SDK attempts again, so the two limits multiply (e.g. `maxRetryAttempts`: 3 × `maxRetries`: 2 = up to 6 attempts).
+</GlobalTab>
+<GlobalTab title="Pydantic AI">
+
+By default, `RestateAgent` retries LLM calls according to the policy configured at the [service or handler level](/services/configuration#how-to-configure), or otherwise the [Restate server's default policy](/guides/error-handling#configure-restate-server-defaults).
+Restate will go through a limited set of retries with exponential backoff (see [default policy](/references/server-config#param-default-retry-policy)), after which the invocation will be paused. This gives you time to fix the issue, and then [resume the invocation](/services/invocation/managing-invocations#resume).
+
+</GlobalTab>
+<GlobalTab title="Restate TS">
 
-You can configure Restate's retry behavior by passing [`RunOptions`](https://restatedev.github.io/sdk-typescript/types/_restatedev_restate-sdk.RunOptions.html) to the `durableCalls` middleware (for example, to limit attempts via `maxRetryAttempts`):
+When you wrap LLM calls in `ctx.run()`, Restate retries them according to the policy configured at the [service or handler level](/services/configuration#how-to-configure), or otherwise the [Restate server's default policy](/guides/error-handling#configure-restate-server-defaults).
+Restate will go through a limited set of retries with exponential backoff (see [default policy](/references/server-config#param-default-retry-policy)), after which the invocation will be paused. This gives you time to fix the issue, and then [resume the invocation](/services/invocation/managing-invocations#resume).
+
+</GlobalTab>
+<GlobalTab title="Restate Py">
+
+When you wrap LLM calls in `ctx.run_typed()`, Restate retries them according to the policy configured at the [service or handler level](/services/configuration#how-to-configure), or otherwise the [Restate server's default policy](/guides/error-handling#configure-restate-server-defaults).
+Restate will go through a limited set of retries with exponential backoff (see [default policy](/references/server-config#param-default-retry-policy)), after which the invocation will be paused. This gives you time to fix the issue, and then [resume the invocation](/services/invocation/managing-invocations#resume).
+
+</GlobalTab>
+</GlobalTabs>
+
+### Setting a retry policy
+
+<GlobalTabs className={"hidden-tabs"}>
+<GlobalTab title="Vercel AI">
+To set a separate retry policy for LLM calls, pass [`RunOptions`](https://restatedev.github.io/sdk-typescript/types/_restatedev_restate-sdk.RunOptions.html) to the `durableCalls` middleware:
 
 ```typescript errorhandling/fail-on-terminal-tool-agent.ts {"CODE_LOAD::https://raw.githubusercontent.com/restatedev/ai-examples/refs/heads/main/vercel-ai/tour-of-agents/src/errorhandling/fail-on-terminal-tool-agent.ts#max_attempts_example"} 
 const model = wrapLanguageModel({
@@ -47,31 +89,29 @@ const model = wrapLanguageModel({
 ```
 <GitHubLink url="https://github.com/restatedev/ai-examples/tree/ai-structure/vercel-ai/tour-of-agents/src/errorhandling/fail-on-terminal-tool-agent.ts" />
 
-If you don't pass `RunOptions`, the call inherits the retry policy configured at the [service or handler level](/services/configuration#how-to-configure), or otherwise the [Restate server's default policy](/guides/error-handling#configure-restate-server-defaults). Once Restate's retries are exhausted, the invocation fails with a `TerminalError` and won't be retried further.
+If you set a maximum number of retry attempts, Restate will still go through the AI SDK's `maxRetries` for each attempt, so the two limits multiply (e.g. `maxRetryAttempts`: 3 × `maxRetries`: 2 = up to 6 attempts).
+
+Once Restate's retries are exhausted, the invocation fails with a `TerminalError` and won't be retried further. You can catch the Terminal Error in your handler and act accordingly.
 
 </GlobalTab>
 <GlobalTab title="OpenAI Agents">
 
-Restate's `DurableRunner` lets you specify the retry behavior for LLM calls via [`RunOptions`](https://github.com/restatedev/sdk-python/blob/main/python/restate/context.py#L37):
-
-```python error_handling.py {"CODE_LOAD::https://raw.githubusercontent.com/restatedev/ai-examples/refs/heads/error-handling/openai-agents/tour-of-agents/app/error_handling.py#handle"} 
-try:
-    run_opts = RunOptions(
-        max_attempts=3, initial_retry_interval=timedelta(seconds=2)
-    )
-    result = await DurableRunner.run(agent, req.message, run_options=run_opts)
-except restate.TerminalError as e:
-    # Handle terminal errors gracefully
-    return f"The agent couldn't complete the request: {e.message}"
+To set a separate retry policy for LLM calls, pass [`RunOptions`](https://github.com/restatedev/sdk-python/blob/main/python/restate/context.py#L37) to `DurableRunner.run`:
+
+```python error_handling.py {"CODE_LOAD::https://raw.githubusercontent.com/restatedev/ai-examples/refs/heads/error-handling/openai-agents/tour-of-agents/app/error_handling.py#retries"} 
+run_opts = RunOptions(
+    max_attempts=3, initial_retry_interval=timedelta(seconds=2)
+)
+result = await DurableRunner.run(agent, req.message, run_options=run_opts)
 ```
 <GitHubLink url="https://github.com/restatedev/ai-examples/blob/error-handling/openai-agents/tour-of-agents/app/error_handling.py" />
 
-If you don't pass `RunOptions`, the LLM call inherits the retry policy configured at the [service or handler level](/services/configuration#how-to-configure), or otherwise the [Restate server's default policy](/guides/error-handling#configure-restate-server-defaults).
+Once these retries are exhausted, the invocation fails with a `TerminalError` and won't be retried further. You can catch the Terminal Error in your handler and act accordingly.
 
 </GlobalTab>
 <GlobalTab title="Google ADK">
 
-Configure the retry behavior for LLM calls via [`RunOptions`](https://github.com/restatedev/sdk-python/blob/main/python/restate/context.py#L37) when activating the Restate plugin for your ADK App:
+To set a separate retry policy for LLM calls, pass [`RunOptions`](https://github.com/restatedev/sdk-python/blob/main/python/restate/context.py#L37) to the Restate plugin when activating it for your ADK App:
 
 ```python error_handling.py {"CODE_LOAD::https://raw.githubusercontent.com/restatedev/ai-examples/refs/heads/error-handling/google-adk/tour-of-agents/app/error_handling.py#retries"} 
 run_options = RunOptions(max_attempts=3, initial_retry_interval=timedelta(seconds=1))
@@ -83,12 +123,12 @@ app = App(
 ```
 <GitHubLink url="https://github.com/restatedev/ai-examples/blob/error-handling/google-adk/tour-of-agents/app/error_handling.py" />
 
-If you don't pass `RunOptions`, the LLM call inherits the retry policy configured at the [service or handler level](/services/configuration#how-to-configure), or otherwise the [Restate server's default policy](/guides/error-handling#configure-restate-server-defaults).
+Once these retries are exhausted, the invocation fails with a `TerminalError` and won't be retried further. You can catch the Terminal Error in your handler and act accordingly.
 
 </GlobalTab>
 <GlobalTab title="Pydantic AI">
 
-Restate's `RestateAgent` lets you specify the retry behavior for LLM calls via [`RunOptions`](https://github.com/restatedev/sdk-python/blob/main/python/restate/context.py#L37):
+To set a separate retry policy for LLM calls, pass [`RunOptions`](https://github.com/restatedev/sdk-python/blob/main/python/restate/context.py#L37) to `RestateAgent`:
 
 ```python error_handling.py {"CODE_LOAD::https://raw.githubusercontent.com/restatedev/ai-examples/refs/heads/error-handling/pydantic-ai/tour-of-agents/app/error_handling.py#retries"} 
 restate_agent = RestateAgent(
@@ -98,12 +138,12 @@ restate_agent = RestateAgent(
 ```
 <GitHubLink url="https://github.com/restatedev/ai-examples/blob/error-handling/pydantic-ai/tour-of-agents/app/error_handling.py" />
 
-If you don't pass `RunOptions`, the LLM call inherits the retry policy configured at the [service or handler level](/services/configuration#how-to-configure), or otherwise the [Restate server's default policy](/guides/error-handling#configure-restate-server-defaults).
+Once these retries are exhausted, the invocation fails with a `TerminalError` and won't be retried further. You can catch the Terminal Error in your handler and act accordingly.
 
 </GlobalTab>
 <GlobalTab title="Restate TS">
 
-Wrap LLM calls in `ctx.run()` and pass [`RunOptions`](https://restatedev.github.io/sdk-typescript/types/_restatedev_restate-sdk.RunOptions.html) to control retry behavior:
+To set a separate retry policy for LLM calls, pass [`RunOptions`](https://restatedev.github.io/sdk-typescript/types/_restatedev_restate-sdk.RunOptions.html) to `ctx.run()`:
 
 ```typescript
 // Retries up to 3 times with exponential backoff
@@ -114,12 +154,12 @@ const result = await ctx.run(
 );
 ```
 
-If you don't pass `RunOptions`, the call inherits the retry policy configured at the [service or handler level](/services/configuration#how-to-configure), or otherwise the [Restate server's default policy](/guides/error-handling#configure-restate-server-defaults). See also [custom retry policies](/guides/error-handling#at-the-run-block-level) for `ctx.run` steps.
+Once these retries are exhausted, the invocation fails with a `TerminalError` and won't be retried further. You can catch the Terminal Error in your handler and act accordingly.
 
 </GlobalTab>
 <GlobalTab title="Restate Py">
 
-Wrap LLM calls in `ctx.run_typed()` and pass [`RunOptions`](https://github.com/restatedev/sdk-python/blob/main/python/restate/context.py#L37) to control retry behavior:
+To set a separate retry policy for LLM calls, pass [`RunOptions`](https://github.com/restatedev/sdk-python/blob/main/python/restate/context.py#L37) to `ctx.run_typed()`:
 
 ```python
 # Retries up to 3 times with exponential backoff
@@ -132,28 +172,24 @@ result = await ctx.run_typed(
 )
 ```
 
-If you don't pass `RunOptions`, the call inherits the retry policy configured at the [service or handler level](/services/configuration#how-to-configure), or otherwise the [Restate server's default policy](/guides/error-handling#configure-restate-server-defaults). See also [custom retry policies](/guides/error-handling#at-the-run-block-level) for `.run` actions.
+Once these retries are exhausted, the invocation fails with a `TerminalError` and won't be retried further. You can catch the Terminal Error in your handler and act accordingly.
+
 
 </GlobalTab>
 </GlobalTabs>
 
 ## Tool execution errors
 
-<GlobalTabs className={"hidden-tabs"}>
-<GlobalTab title="Vercel AI">
-
-When agent tools use Restate Context actions like `ctx.run`, Restate automatically retries transient errors in these operations. This makes your tools resilient to network failures, database hiccups, and other temporary issues. For all operations that might suffer from transient errors, use Context actions.
+Restate makes tool execution resilient by retrying transient errors and propagating terminal ones.
 
-For errors that should not be retried, throw a terminal error:
+### Transient errors
 
-```typescript {"CODE_LOAD::ts/src/tour/agents/terminal_error.ts#terminal_error"} 
-throw new TerminalError("This tool is not allowed to run for this input.");
-```
+<GlobalTabs className={"hidden-tabs"}>
+<GlobalTab title="Vercel AI">
 
-By default, the Vercel AI will convert any errors in tool executions into a message to the LLM, and the agent will decide how to proceed.
-This is often desirable, as the LLM can decide to use a different tool or provide a fallback answer.
+By default, the Vercel AI SDK converts any errors in tool executions into a message to the LLM, and the agent decides how to proceed. This is often desirable, as the LLM can decide to use a different tool or provide a fallback answer.
 
-However, if you use Restate Context actions like `ctx.run` in your tool execution, Restate will retry any transient errors in these actions until they succeed. For `ctx.run` actions specifically, you can set a retry policy via [`RunOptions`](https://restatedev.github.io/sdk-typescript/types/_restatedev_restate-sdk.RunOptions.html).
+When you wrap external calls in Restate Context actions like `ctx.run`, Restate retries transient errors within the Context action before the result reaches the agent. This makes your tools resilient to network failures, database hiccups, and other temporary issues. For all operations that might suffer from transient errors, use Context actions:
 
 ```typescript {"CODE_LOAD::ts/src/tour/agents/inline-tool-errors.ts#here"} 
 // Without ctx.run - error goes straight to agent
@@ -170,11 +206,159 @@ async function myToolWithRestate(ctx: restate.Context) {
 }
 ```
 
-Terminal errors thrown from Restate Context actions are not retried by Restate, and get processed by the Vercel AI.
-Also here, the Vercel AI will convert the error into a message to the LLM, and the agent will decide how to proceed.
+Restate then retries the whole invocation according to the policy configured at the [service or handler level](/services/configuration#how-to-configure), or otherwise the [Restate server's default policy](/guides/error-handling#configure-restate-server-defaults).
+
+</GlobalTab>
+<GlobalTab title="OpenAI Agents">
+
+Restate retries all transient errors to make your tools resilient to network failures, database hiccups, and other temporary issues.
+
+By default, it uses the policy configured at the [service or handler level](/services/configuration#how-to-configure), or otherwise the [Restate server's default policy](/guides/error-handling#configure-restate-server-defaults).
+
+</GlobalTab>
+<GlobalTab title="Google ADK">
+
+Restate retries all transient errors to make your tools resilient to network failures, database hiccups, and other temporary issues.
+
+By default, it uses the policy configured at the [service or handler level](/services/configuration#how-to-configure), or otherwise the [Restate server's default policy](/guides/error-handling#configure-restate-server-defaults).
+
+</GlobalTab>
+<GlobalTab title="Pydantic AI">
+
+Restate retries all transient errors to make your tools resilient to network failures, database hiccups, and other temporary issues.
+
+By default, it uses the policy configured at the [service or handler level](/services/configuration#how-to-configure), or otherwise the [Restate server's default policy](/guides/error-handling#configure-restate-server-defaults).
+
+</GlobalTab>
+<GlobalTab title="Restate TS">
+
+Restate retries all transient errors to make your tools resilient to network failures, database hiccups, and other temporary issues.
+
+By default, it uses the policy configured at the [service or handler level](/services/configuration#how-to-configure), or otherwise the [Restate server's default policy](/guides/error-handling#configure-restate-server-defaults).
+
+</GlobalTab>
+<GlobalTab title="Restate Py">
+
+Restate retries all transient errors to make your tools resilient to network failures, database hiccups, and other temporary issues.
+
+By default, it uses the policy configured at the [service or handler level](/services/configuration#how-to-configure), or otherwise the [Restate server's default policy](/guides/error-handling#configure-restate-server-defaults).
+
+</GlobalTab>
+</GlobalTabs>
+
+### Setting a retry policy on run actions
+
+<GlobalTabs className={"hidden-tabs"}>
+<GlobalTab title="Vercel AI">
+
+If you do run actions in your tools, you can override the default retry policy by passing [`RunOptions`](https://restatedev.github.io/sdk-typescript/types/_restatedev_restate-sdk.RunOptions.html):
+
+```ts {"CODE_LOAD::ts/src/ai/guides/errorhandling/error_handling.ts#retries"} 
+const result = await ctx.run(
+    "fetch-data",
+    () => fetch("/api/data"),
+    { maxRetryAttempts: 3 }
+);
+```
+
+See [custom retry policies](/guides/error-handling#at-the-run-block-level) for more options. When retries are exhausted, the tool will fail with a Terminal Error.
+
+</GlobalTab>
+<GlobalTab title="OpenAI Agents">
+
+If you do run actions in your tools, you can override the default retry policy by passing [`RunOptions`](https://github.com/restatedev/sdk-python/blob/main/python/restate/context.py#L37):
+
+```python {"CODE_LOAD::python/src/ai/error_handling.py#retries"} 
+result = await restate_context().run_typed(
+    "fetch data",
+    fetch_data,
+    RunOptions(max_attempts=3),
+    req=city,
+)
+```
+
+See [custom retry policies](/guides/error-handling#at-the-run-block-level) for more options. When retries are exhausted, the tool will fail with a Terminal Error.
+
+</GlobalTab>
+<GlobalTab title="Google ADK">
+
+If you do run actions in your tools, you can override the default retry policy by passing [`RunOptions`](https://github.com/restatedev/sdk-python/blob/main/python/restate/context.py#L37):
+
+```python {"CODE_LOAD::python/src/ai/error_handling.py#retries"} 
+result = await restate_context().run_typed(
+    "fetch data",
+    fetch_data,
+    RunOptions(max_attempts=3),
+    req=city,
+)
+```
+
+See [custom retry policies](/guides/error-handling#at-the-run-block-level) for more options. When retries are exhausted, the tool will fail with a Terminal Error.
+
+</GlobalTab>
+<GlobalTab title="Pydantic AI">
+
+If you do run actions in your tools, you can override the default retry policy by passing [`RunOptions`](https://github.com/restatedev/sdk-python/blob/main/python/restate/context.py#L37):
+
+```python {"CODE_LOAD::python/src/ai/error_handling.py#retries"} 
+result = await restate_context().run_typed(
+    "fetch data",
+    fetch_data,
+    RunOptions(max_attempts=3),
+    req=city,
+)
+```
+
+See [custom retry policies](/guides/error-handling#at-the-run-block-level) for more options. When retries are exhausted, the tool will fail with a Terminal Error.
+
+</GlobalTab>
+<GlobalTab title="Restate TS">
+
+If you do `ctx.run` actions in your tools, you can override the default retry policy by passing [`RunOptions`](https://restatedev.github.io/sdk-typescript/types/_restatedev_restate-sdk.RunOptions.html):
+
+```ts {"CODE_LOAD::ts/src/ai/guides/errorhandling/error_handling.ts#retries"} 
+const result = await ctx.run(
+    "fetch-data",
+    () => fetch("/api/data"),
+    { maxRetryAttempts: 3 }
+);
+```
+
+See [custom retry policies](/guides/error-handling#at-the-run-block-level) for more options. When retries are exhausted, the tool will fail with a Terminal Error.
+
+</GlobalTab>
+<GlobalTab title="Restate Py">
+
+For `ctx.run_typed` actions specifically, you can override the default retry policy by passing [`RunOptions`](https://github.com/restatedev/sdk-python/blob/main/python/restate/context.py#L37):
+
+```python {"CODE_LOAD::python/src/ai/error_handling.py#retries"} 
+result = await restate_context().run_typed(
+    "fetch data",
+    fetch_data,
+    RunOptions(max_attempts=3),
+    req=city,
+)
+```
+
+See [custom retry policies](/guides/error-handling#at-the-run-block-level) for more options. When retries are exhausted, the tool will fail with a Terminal Error.
+
+</GlobalTab>
+</GlobalTabs>
+
+### Terminal errors
+
+For errors that should not be retried (invalid input, business rule violations, resource not found), raise a `TerminalError` from your tool. Restate does not retry these:
+
+<GlobalTabs className={"hidden-tabs"}>
+<GlobalTab title="Vercel AI">
+
+```typescript {"CODE_LOAD::ts/src/tour/agents/terminal_error.ts#terminal_error"} 
+throw new TerminalError("This tool is not allowed to run for this input.");
+```
+
+By default, Vercel AI converts the terminal error into a message to the LLM, and the agent decides how to proceed.
 
-In some cases, you might want to treat terminal tool execution errors as permanent failures and stop the agent instead of letting the LLM decide how to proceed.
-The Restate middleware provides two utilities to help with this:
+If you want to treat terminal tool errors as permanent failures and stop the agent instead, the Restate middleware provides two utilities:
 
 <AccordionGroup>
 <Accordion title="Fail the agent on terminal tool errors">
@@ -233,78 +417,75 @@ if (terminalSteps.length > 0) {
 </GlobalTab>
 <GlobalTab title="OpenAI Agents">
 
-When agent tools use Restate Context actions like `ctx.run`, Restate automatically retries transient errors in these operations. This makes your tools resilient to network failures, database hiccups, and other temporary issues. For all operations that might suffer from transient errors, use Context actions.
-
-For errors that should not be retried, throw a terminal error:
-
-```python
+```python {"CODE_LOAD::python/src/ai/error_handling.py#terminal"} 
 from restate import TerminalError
 
 raise TerminalError("This tool is not allowed to run for this input.")
 ```
 
-For `ctx.run` actions specifically, you can set a retry policy via [`RunOptions`](https://github.com/restatedev/sdk-python/blob/main/python/restate/context.py#L37).
+The Restate OpenAI integration raises terminal errors to your handler, where you can catch and handle them:
+
+```python {"CODE_LOAD::https://raw.githubusercontent.com/restatedev/ai-examples/refs/heads/error-handling/openai-agents/tour-of-agents/app/error_handling.py#handle"} 
+@agent_service.handler()
+async def run(_ctx: restate.Context, req: WeatherPrompt) -> str:
+    try:
+        run_opts = RunOptions(
+            max_attempts=3, initial_retry_interval=timedelta(seconds=2)
+        )
+        result = await DurableRunner.run(agent, req.message, run_options=run_opts)
+    except restate.TerminalError as e:
+        # Handle terminal errors gracefully
+        return f"The agent couldn't complete the request: {e.message}"
 
-By default, the Restate OpenAI integration will raise any terminal errors in tool executions and will let you handle them in your handler.
+    return result.final_output
+```
 
-<Warning>
+<Accordion title={"Setting `failure_error_function` to `None`"}>
     The OpenAI Agent SDK also allows setting `failure_error_function` to `None`, which will rethrow any error in the agent execution as-is.
     Also for example invalid LLM responses (e.g. tool call with invalid arguments or to a tool that doesn't exist).
-    The error will then lead to Restate retries. Restate will recover the invocation by replaying the journal entries.
-    This can lead to infinite retries if the error is not transient.
-    Therefore, be careful when using this option and handle errors appropriately in your agent logic.
-    You also might want to set [a retry policy at the service or handler level](/services/configuration#how-to-configure) to avoid infinite retries.
-</Warning>
+    The error will then lead to Restate retries. Since the error isn't transient, the invocation will be paused when the retries are exhausted, and will require manual intervention.
+    Therefore, we do not recommend using this setting and instead recommend handling these errors appropriately in your agent logic.
+</Accordion>
 
 </GlobalTab>
 <GlobalTab title="Google ADK">
 
-When agent tools use Restate Context actions like `ctx.run`, Restate automatically retries transient errors in these operations. This makes your tools resilient to network failures, database hiccups, and other temporary issues. For all operations that might suffer from transient errors, use Context actions.
-
-For errors that should not be retried, throw a terminal error:
-
-```python
+```python {"CODE_LOAD::python/src/ai/error_handling.py#terminal"} 
 from restate import TerminalError
 
 raise TerminalError("This tool is not allowed to run for this input.")
 ```
 
-For `ctx.run` actions specifically, you can set a retry policy via [`RunOptions`](https://github.com/restatedev/sdk-python/blob/main/python/restate/context.py#L37).
+You can catch these terminal errors in your handler and handle them accordingly:
 
-For errors which should not be retried, raise terminal errors from within your tool implementations. You can catch these terminal errors in your handler and handle them accordingly.
+```python {"CODE_LOAD::https://raw.githubusercontent.com/restatedev/ai-examples/refs/heads/error-handling/google-adk/tour-of-agents/app/error_handling.py#handle"} 
+@agent_service.handler()
+async def run(ctx: restate.ObjectContext, req: WeatherPrompt) -> str | None:
+    try:
+        events = runner.run_async(
+            user_id=ctx.key(),
+            session_id=req.session_id,
+            new_message=Content(role="user", parts=[Part.from_text(text=req.message)]),
+        )
+        return await parse_agent_response(events)
+    except TerminalError as e:
+        # Handle the error appropriately, e.g., log it or return a default response
+        print(f"An error occurred: {e}")
+        return "Sorry, I'm unable to process your request at the moment."
+```
 
 </GlobalTab>
 <GlobalTab title="Pydantic AI">
 
-When agent tools use Restate Context actions like `ctx.run`, Restate automatically retries transient errors in these operations. This makes your tools resilient to network failures, database hiccups, and other temporary issues. For all operations that might suffer from transient errors, use Context actions.
-
-For example, wrapping a tool call in `restate_context().run_typed()` makes it durable with automatic retries:
-
-```python error_handling.py {"CODE_LOAD::https://raw.githubusercontent.com/restatedev/ai-examples/refs/heads/error-handling/pydantic-ai/tour-of-agents/app/error_handling.py#here"} 
-async def get_weather(city: WeatherRequest) -> WeatherResponse:
-    """Get the current weather for a given city."""
-    return await restate_context().run_typed(
-        f"Get weather {city}", fetch_weather, req=city
-    )
-```
-<GitHubLink url="https://github.com/restatedev/ai-examples/blob/error-handling/pydantic-ai/tour-of-agents/app/error_handling.py" />
-
-For `ctx.run` actions specifically, you can set a retry policy via [`RunOptions`](https://github.com/restatedev/sdk-python/blob/main/python/restate/context.py#L37).
-
-For errors that should not be retried, raise a terminal error:
-
-```python
+```python {"CODE_LOAD::python/src/ai/error_handling.py#terminal"} 
 from restate import TerminalError
 
 raise TerminalError("This tool is not allowed to run for this input.")
 ```
 
-Restate retries tool executions by default until they succeed.
-For errors which should not be retried, raise terminal errors from within your tool implementations.
-
 You can catch these terminal errors in your handler and handle them accordingly:
 
-```python error_handling.py {"CODE_LOAD::https://raw.githubusercontent.com/restatedev/ai-examples/refs/heads/error-handling/pydantic-ai/tour-of-agents/app/error_handling.py#handle"} 
+```python {"CODE_LOAD::https://raw.githubusercontent.com/restatedev/ai-examples/refs/heads/error-handling/pydantic-ai/tour-of-agents/app/error_handling.py#handle"} 
 @agent_service.handler()
 async def run(_ctx: restate.Context, req: WeatherPrompt) -> str:
     try:
@@ -314,17 +495,10 @@ async def run(_ctx: restate.Context, req: WeatherPrompt) -> str:
         return f"The agent couldn't complete the request: {e.message}"
     return result.output
 ```
-<GitHubLink url="https://github.com/restatedev/ai-examples/blob/error-handling/pydantic-ai/tour-of-agents/app/error_handling.py" />
 
 </GlobalTab>
 <GlobalTab title="Restate TS">
 
-Restate automatically retries transient errors. This makes your tools resilient to network failures, database hiccups, and other temporary issues.
-
-For `ctx.run` actions specifically, you can set a retry policy via [`RunOptions`](https://restatedev.github.io/sdk-typescript/types/_restatedev_restate-sdk.RunOptions.html).
-
-When a tool encounters an unrecoverable error (e.g., resource not found, invalid input, business rule violation), throw a `TerminalError` to stop retries immediately:
-
 ```typescript {"CODE_LOAD::ts/src/tour/agents/terminal_error.ts#terminal_error"} 
 throw new TerminalError("This tool is not allowed to run for this input.");
 ```
@@ -334,13 +508,7 @@ You can catch and handle terminal errors in your agent logic if needed.
 </GlobalTab>
 <GlobalTab title="Restate Py">
 
-Restate automatically retries transient errors. This makes your tools resilient to network failures, database hiccups, and other temporary issues.
-
-For `ctx.run` actions specifically, you can set a retry policy via [`RunOptions`](https://github.com/restatedev/sdk-python/blob/main/python/restate/context.py#L37).
-
-When a tool encounters an unrecoverable error (e.g., resource not found, invalid input, business rule violation), raise a `TerminalError` to stop retries immediately:
-
-```python
+```python {"CODE_LOAD::python/src/ai/error_handling.py#terminal"} 
 from restate import TerminalError
 
 raise TerminalError("This tool is not allowed to run for this input.")
@@ -355,6 +523,6 @@ You can catch and handle terminal errors in your agent logic if needed.
     To learn more about error handling with Restate, consult the [error handling guide](/guides/error-handling).
 </Tip>
 
-## Combining with rollback
+### Combining with rollback
 
-For multi-step agent workflows where steps have side effects (bookings, payments, emails), combine error handling with [compensation/rollback patterns](/ai/patterns/rollback) to undo completed work when later steps fail.
+For multi-step agent workflows where steps have side effects (bookings, payments, emails), combine terminal errors with [compensation/rollback patterns](/ai/patterns/rollback) to undo completed work before finishing.
diff --git a/docs/guides/error-handling.mdx b/docs/guides/error-handling.mdx
index c107b86f..bd1d0b10 100644
--- a/docs/guides/error-handling.mdx
+++ b/docs/guides/error-handling.mdx
@@ -4,7 +4,7 @@ description: "Learn how to handle transient and terminal errors in your applicat
 tags: ["development"]
 ---
 
-Restate handles retries for failed invocations. By default, Restate infinitely retries all errors with an exponential backoff strategy.
+Restate handles retries for failed invocations. By default, Restate retries all errors with an exponential backoff strategy.
 
 This guide helps you fine-tune the retry behavior for your use cases.
 
@@ -40,7 +40,12 @@ The retry policy can be set on each individual handler, or for all the handlers
         To configure the retry policy on a service/handler level, check [retry service configuration](/services/configuration#retries).
     </Accordion>
     <Accordion title="Configure Restate server defaults">
-        Via the [`restate-server` configuration file](/server/configuration):
+
+        The default retry policy will retry the invocation a limited number of times, after which the invocation will be paused if no progress can be made. To resume a paused invocation, check the [resume documentation](/services/invocation/managing-invocations#resume).
+
+        Check the [configuration reference](/references/server-config) for the `default-retry-policy`.
+
+        You can change the default behavior via the [`restate-server` configuration file](/server/configuration):
 
         ```toml restate.toml
         [invocation.default-retry-policy]
@@ -65,7 +70,6 @@ The retry policy can be set on each individual handler, or for all the handlers
         RESTATE_DEFAULT_RETRY_POLICY__MAX_INTERVAL="10s"
         ```
 
-        This retry policy will retry the invocation 100 times, after which the invocation will be paused if no progress can be made. To resume a paused invocation, check the paragraph below.
 
         You can also retry forever, without ever pausing or killing the invocation:
 
@@ -73,7 +77,6 @@ The retry policy can be set on each individual handler, or for all the handlers
         RESTATE_DEFAULT_RETRY_POLICY__MAX_ATTEMPTS=unlimited
         ```
 
-        Check the [configuration documentation](/server/configuration) and [reference](/references/server-config) for the `default-retry-policy`.
 
         When a retry policy is unset, Restate by default will retry undefinitely, alike setting `max-attempts = "unlimited"`.
     </Accordion>
@@ -160,7 +163,7 @@ If you set a maximum number of attempts, then the run block will fail with a Ter
 
 ## Application errors (terminal)
 
-By default, Restate infinitely retries all errors.
+By default, Restate retries all errors.
 In some cases, you might not want to retry an error (e.g. because of business logic, because the issue is not transient, ...).
 
 For these cases you can throw a terminal error. Terminal errors are permanent and are not retried by Restate.
diff --git a/snippets/python/src/ai/error_handling.py b/snippets/python/src/ai/error_handling.py
new file mode 100644
index 00000000..0ad60f92
--- /dev/null
+++ b/snippets/python/src/ai/error_handling.py
@@ -0,0 +1,29 @@
+import restate
+from restate import Context, Service
+from restate.exceptions import TerminalError
+
+
+my_service = Service("MyService")
+
+
+def fetch_data() -> str:
+    return ""
+
+
+@my_service.handler("myServiceHandler")
+async def my_service_handler(ctx: Context, greeting: str) -> str:
+    # <start_retries>
+    result = await restate_context().run_typed(
+        "fetch data",
+        fetch_data,
+        RunOptions(max_attempts=3),
+        req=city,
+    )
+    # <end_retries>
+
+
+    # <start_terminal>
+    from restate import TerminalError
+
+    raise TerminalError("This tool is not allowed to run for this input.")
+    # <end_terminal>
diff --git a/snippets/ts/src/ai/guides/errorhandling/error_handling.ts b/snippets/ts/src/ai/guides/errorhandling/error_handling.ts
new file mode 100644
index 00000000..29af6054
--- /dev/null
+++ b/snippets/ts/src/ai/guides/errorhandling/error_handling.ts
@@ -0,0 +1,18 @@
+import * as restate from "@restatedev/restate-sdk";
+
+export const myAgent = restate.service({
+    name: "my-agent",
+    handlers: {
+        run: async (ctx: restate.Context, { message }: { message: string }) => {
+
+            // <start_retries>
+            const result = await ctx.run(
+                "fetch-data",
+                () => fetch("/api/data"),
+                { maxRetryAttempts: 3 }
+            );
+            // <end_retries>
+            return `${message}!`;
+        },
+    },
+});

From 9b5eeedd3d3d26df37a42cae65291c2a004fc204 Mon Sep 17 00:00:00 2001
From: Giselle van Dongen <giselle@restate.dev>
Date: Wed, 15 Apr 2026 16:38:48 +0200
Subject: [PATCH 3/4] update error handling pages

---
 docs/ai/patterns/error-handling.mdx      |  4 ++--
 snippets/python/src/ai/error_handling.py | 19 ++++++++++++++-----
 2 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/docs/ai/patterns/error-handling.mdx b/docs/ai/patterns/error-handling.mdx
index 5f026434..9a6bb03e 100644
--- a/docs/ai/patterns/error-handling.mdx
+++ b/docs/ai/patterns/error-handling.mdx
@@ -347,7 +347,7 @@ See [custom retry policies](/guides/error-handling#at-the-run-block-level) for m
 
 ### Terminal errors
 
-For errors that should not be retried (invalid input, business rule violations, resource not found), raise a `TerminalError` from your tool. Restate does not retry these:
+For errors that should not be retried (invalid input, business rule violations, resource not found), use a `TerminalError` in your tool. Restate does not retry these:
 
 <GlobalTabs className={"hidden-tabs"}>
 <GlobalTab title="Vercel AI">
@@ -523,6 +523,6 @@ You can catch and handle terminal errors in your agent logic if needed.
     To learn more about error handling with Restate, consult the [error handling guide](/guides/error-handling).
 </Tip>
 
-### Combining with rollback
+## Combining with rollback
 
 For multi-step agent workflows where steps have side effects (bookings, payments, emails), combine terminal errors with [compensation/rollback patterns](/ai/patterns/rollback) to undo completed work before finishing.
diff --git a/snippets/python/src/ai/error_handling.py b/snippets/python/src/ai/error_handling.py
index 0ad60f92..7122d2de 100644
--- a/snippets/python/src/ai/error_handling.py
+++ b/snippets/python/src/ai/error_handling.py
@@ -1,26 +1,35 @@
 import restate
 from restate import Context, Service
-from restate.exceptions import TerminalError
-
+from restate import RunOptions
+from restate.ext.pydantic import restate_context
 
 my_service = Service("MyService")
 
 
-def fetch_data() -> str:
+def fetch_data(req: str) -> str:
     return ""
 
 
 @my_service.handler("myServiceHandler")
-async def my_service_handler(ctx: Context, greeting: str) -> str:
+async def my_service_handler(ctx: Context, req: str) -> str:
     # <start_retries>
     result = await restate_context().run_typed(
         "fetch data",
         fetch_data,
         RunOptions(max_attempts=3),
-        req=city,
+        req=req,
     )
     # <end_retries>
 
+    # <start_retries_restate>
+    result = await ctx.run_typed(
+        "fetch data",
+        fetch_data,
+        RunOptions(max_attempts=3),
+        req=req,
+    )
+    # <end_retries_restate>
+
 
     # <start_terminal>
     from restate import TerminalError

From a7f1e2b743ca9de0e39413437d126152fb087d86 Mon Sep 17 00:00:00 2001
From: Giselle van Dongen <giselle@restate.dev>
Date: Fri, 15 May 2026 12:09:43 +0200
Subject: [PATCH 4/4] small fix

---
 docs/ai/patterns/error-handling.mdx | 51 ++++++++++++++---------------
 1 file changed, 25 insertions(+), 26 deletions(-)

diff --git a/docs/ai/patterns/error-handling.mdx b/docs/ai/patterns/error-handling.mdx
index a96ab2e0..f61d215e 100644
--- a/docs/ai/patterns/error-handling.mdx
+++ b/docs/ai/patterns/error-handling.mdx
@@ -113,7 +113,7 @@ async def run(_ctx: restate.Context, req: WeatherPrompt) -> str:
 
     return result.final_output
 ```
-<GitHubLink url="https://github.com/restatedev/ai-examples/blob/error-handling/openai-agents/tour-of-agents/app/error_handling.py" />
+<GitHubLink url="https://github.com/restatedev/ai-examples/blob/main/openai-agents/tour-of-agents/app/error_handling.py" />
 
 Once these retries are exhausted, the invocation fails with a `TerminalError` and won't be retried further. You can catch the Terminal Error in your handler and act accordingly.
 
@@ -122,7 +122,7 @@ Once these retries are exhausted, the invocation fails with a `TerminalError` an
 
 To set a separate retry policy for LLM calls, pass [`RunOptions`](https://github.com/restatedev/sdk-python/blob/main/python/restate/context.py#L37) to the Restate plugin when activating it for your ADK App:
 
-```python error_handling.py {"CODE_LOAD::https://raw.githubusercontent.com/restatedev/ai-examples/refs/heads/main/google-adk/tour-of-agents/app/error_handling.py#retries"}
+```python error_handling.py {"CODE_LOAD::https://raw.githubusercontent.com/restatedev/ai-examples/refs/heads/main/google-adk/tour-of-agents/app/error_handling.py#retries"} 
 run_options = RunOptions(max_attempts=3, initial_retry_interval=timedelta(seconds=1))
 app = App(
     name=APP_NAME,
@@ -130,7 +130,7 @@ app = App(
     plugins=[RestatePlugin(run_options=run_options)],
 )
 ```
-<GitHubLink url="https://github.com/restatedev/ai-examples/blob/error-handling/google-adk/tour-of-agents/app/error_handling.py" />
+<GitHubLink url="https://github.com/restatedev/ai-examples/blob/main/google-adk/tour-of-agents/app/error_handling.py" />
 
 Once these retries are exhausted, the invocation fails with a `TerminalError` and won't be retried further. You can catch the Terminal Error in your handler and act accordingly.
 
@@ -139,13 +139,13 @@ Once these retries are exhausted, the invocation fails with a `TerminalError` an
 
 To set a separate retry policy for LLM calls, pass [`RunOptions`](https://github.com/restatedev/sdk-python/blob/main/python/restate/context.py#L37) to `RestateAgent`:
 
-```python error_handling.py {"CODE_LOAD::https://raw.githubusercontent.com/restatedev/ai-examples/refs/heads/error-handling/pydantic-ai/tour-of-agents/app/error_handling.py#retries"}
+```python error_handling.py {"CODE_LOAD::https://raw.githubusercontent.com/restatedev/ai-examples/refs/heads/main/pydantic-ai/tour-of-agents/app/error_handling.py#retries"} 
 restate_agent = RestateAgent(
     agent,
     run_options=RunOptions(max_attempts=3, initial_retry_interval=timedelta(seconds=2)),
 )
 ```
-<GitHubLink url="https://github.com/restatedev/ai-examples/blob/error-handling/pydantic-ai/tour-of-agents/app/error_handling.py" />
+<GitHubLink url="https://github.com/restatedev/ai-examples/blob/main/pydantic-ai/tour-of-agents/app/error_handling.py" />
 
 Once these retries are exhausted, the invocation fails with a `TerminalError` and won't be retried further. You can catch the Terminal Error in your handler and act accordingly.
 
@@ -278,7 +278,7 @@ By default, it uses the policy configured at the [service or handler level](/ser
 
 If you do run actions in your tools, you can override the default retry policy by passing [`RunOptions`](https://restatedev.github.io/sdk-typescript/types/_restatedev_restate-sdk.RunOptions.html):
 
-```ts {"CODE_LOAD::ts/src/ai/guides/errorhandling/error_handling.ts#retries"}
+```ts {"CODE_LOAD::ts/src/ai/guides/errorhandling/error_handling.ts#retries"} 
 const result = await ctx.run(
     "fetch-data",
     () => fetch("/api/data"),
@@ -293,12 +293,12 @@ See [custom retry policies](/guides/error-handling#at-the-run-block-level) for m
 
 If you do run actions in your tools, you can override the default retry policy by passing [`RunOptions`](https://github.com/restatedev/sdk-python/blob/main/python/restate/context.py#L37):
 
-```python {"CODE_LOAD::python/src/ai/error_handling.py#retries"}
+```python {"CODE_LOAD::python/src/ai/error_handling.py#retries"} 
 result = await restate_context().run_typed(
     "fetch data",
     fetch_data,
     RunOptions(max_attempts=3),
-    req=city,
+    req=req,
 )
 ```
 
@@ -309,12 +309,12 @@ See [custom retry policies](/guides/error-handling#at-the-run-block-level) for m
 
 If you do run actions in your tools, you can override the default retry policy by passing [`RunOptions`](https://github.com/restatedev/sdk-python/blob/main/python/restate/context.py#L37):
 
-```python {"CODE_LOAD::python/src/ai/error_handling.py#retries"}
+```python {"CODE_LOAD::python/src/ai/error_handling.py#retries"} 
 result = await restate_context().run_typed(
     "fetch data",
     fetch_data,
     RunOptions(max_attempts=3),
-    req=city,
+    req=req,
 )
 ```
 
@@ -325,12 +325,12 @@ See [custom retry policies](/guides/error-handling#at-the-run-block-level) for m
 
 If you do run actions in your tools, you can override the default retry policy by passing [`RunOptions`](https://github.com/restatedev/sdk-python/blob/main/python/restate/context.py#L37):
 
-```python {"CODE_LOAD::python/src/ai/error_handling.py#retries"}
+```python {"CODE_LOAD::python/src/ai/error_handling.py#retries"} 
 result = await restate_context().run_typed(
     "fetch data",
     fetch_data,
     RunOptions(max_attempts=3),
-    req=city,
+    req=req,
 )
 ```
 
@@ -341,7 +341,7 @@ See [custom retry policies](/guides/error-handling#at-the-run-block-level) for m
 
 If you do `ctx.run` actions in your tools, you can override the default retry policy by passing [`RunOptions`](https://restatedev.github.io/sdk-typescript/types/_restatedev_restate-sdk.RunOptions.html):
 
-```ts {"CODE_LOAD::ts/src/ai/guides/errorhandling/error_handling.ts#retries"}
+```ts {"CODE_LOAD::ts/src/ai/guides/errorhandling/error_handling.ts#retries"} 
 const result = await ctx.run(
     "fetch-data",
     () => fetch("/api/data"),
@@ -356,12 +356,12 @@ See [custom retry policies](/guides/error-handling#at-the-run-block-level) for m
 
 For `ctx.run_typed` actions specifically, you can override the default retry policy by passing [`RunOptions`](https://github.com/restatedev/sdk-python/blob/main/python/restate/context.py#L37):
 
-```python {"CODE_LOAD::python/src/ai/error_handling.py#retries"}
+```python {"CODE_LOAD::python/src/ai/error_handling.py#retries"} 
 result = await restate_context().run_typed(
     "fetch data",
     fetch_data,
     RunOptions(max_attempts=3),
-    req=city,
+    req=req,
 )
 ```
 
@@ -377,7 +377,7 @@ For errors that should not be retried (invalid input, business rule violations,
 <GlobalTabs className={"hidden-tabs"}>
 <GlobalTab title="Vercel AI">
 
-```typescript {"CODE_LOAD::ts/src/tour/agents/terminal_error.ts#terminal_error"}
+```typescript {"CODE_LOAD::ts/src/tour/agents/terminal_error.ts#terminal_error"} 
 throw new TerminalError("This tool is not allowed to run for this input.");
 ```
 
@@ -442,7 +442,7 @@ if (terminalSteps.length > 0) {
 </GlobalTab>
 <GlobalTab title="OpenAI Agents">
 
-```python {"CODE_LOAD::python/src/ai/error_handling.py#terminal"}
+```python {"CODE_LOAD::python/src/ai/error_handling.py#terminal"} 
 from restate import TerminalError
 
 raise TerminalError("This tool is not allowed to run for this input.")
@@ -450,7 +450,7 @@ raise TerminalError("This tool is not allowed to run for this input.")
 
 The Restate OpenAI integration raises terminal errors to your handler, where you can catch and handle them:
 
-```python {"CODE_LOAD::https://raw.githubusercontent.com/restatedev/ai-examples/refs/heads/error-handling/openai-agents/tour-of-agents/app/error_handling.py#handle"}
+```python {"CODE_LOAD::https://raw.githubusercontent.com/restatedev/ai-examples/refs/heads/main/openai-agents/tour-of-agents/app/error_handling.py#handle"} 
 @agent_service.handler()
 async def run(_ctx: restate.Context, req: WeatherPrompt) -> str:
     try:
@@ -475,7 +475,7 @@ async def run(_ctx: restate.Context, req: WeatherPrompt) -> str:
 </GlobalTab>
 <GlobalTab title="Google ADK">
 
-```python {"CODE_LOAD::python/src/ai/error_handling.py#terminal"}
+```python {"CODE_LOAD::python/src/ai/error_handling.py#terminal"} 
 from restate import TerminalError
 
 raise TerminalError("This tool is not allowed to run for this input.")
@@ -483,7 +483,7 @@ raise TerminalError("This tool is not allowed to run for this input.")
 
 You can catch these terminal errors in your handler and handle them accordingly:
 
-```python {"CODE_LOAD::https://raw.githubusercontent.com/restatedev/ai-examples/refs/heads/error-handling/google-adk/tour-of-agents/app/error_handling.py#handle"}
+```python {"CODE_LOAD::https://raw.githubusercontent.com/restatedev/ai-examples/refs/heads/main/google-adk/tour-of-agents/app/error_handling.py#handle"} 
 @agent_service.handler()
 async def run(ctx: restate.ObjectContext, req: WeatherPrompt) -> str | None:
     try:
@@ -495,14 +495,13 @@ async def run(ctx: restate.ObjectContext, req: WeatherPrompt) -> str | None:
         return await parse_agent_response(events)
     except TerminalError as e:
         # Handle the error appropriately, e.g., log it or return a default response
-        print(f"An error occurred: {e}")
         return "Sorry, I'm unable to process your request at the moment."
 ```
 
 </GlobalTab>
 <GlobalTab title="Pydantic AI">
 
-```python {"CODE_LOAD::python/src/ai/error_handling.py#terminal"}
+```python {"CODE_LOAD::python/src/ai/error_handling.py#terminal"} 
 from restate import TerminalError
 
 raise TerminalError("This tool is not allowed to run for this input.")
@@ -510,7 +509,7 @@ raise TerminalError("This tool is not allowed to run for this input.")
 
 You can catch these terminal errors in your handler and handle them accordingly:
 
-```python {"CODE_LOAD::https://raw.githubusercontent.com/restatedev/ai-examples/refs/heads/error-handling/pydantic-ai/tour-of-agents/app/error_handling.py#handle"}
+```python {"CODE_LOAD::https://raw.githubusercontent.com/restatedev/ai-examples/refs/heads/main/pydantic-ai/tour-of-agents/app/error_handling.py#handle"} 
 @agent_service.handler()
 async def run(_ctx: restate.Context, req: WeatherPrompt) -> str:
     try:
@@ -528,7 +527,7 @@ When agent tools use Restate Context actions like `ctx.run`, Restate automatical
 
 For example, wrapping a tool call in `restate_context().run_typed()` makes it durable with automatic retries:
 
-```python error_handling.py {"CODE_LOAD::https://raw.githubusercontent.com/restatedev/ai-examples/refs/heads/main/langchain-python/tour-of-agents/app/error_handling.py#here"}
+```python error_handling.py {"CODE_LOAD::https://raw.githubusercontent.com/restatedev/ai-examples/refs/heads/main/langchain-python/tour-of-agents/app/error_handling.py#here"} 
 @tool
 async def get_weather(city: WeatherRequest) -> WeatherResponse:
     """Get the current weather for a given city."""
@@ -548,7 +547,7 @@ raise TerminalError("This tool is not allowed to run for this input.")
 
 Restate retries tool executions until they succeed. Terminal errors propagate past LangChain's tool-error handling back to the service handler, where you can catch them:
 
-```python error_handling.py {"CODE_LOAD::https://raw.githubusercontent.com/restatedev/ai-examples/refs/heads/main/langchain-python/tour-of-agents/app/error_handling.py#handle"}
+```python error_handling.py {"CODE_LOAD::https://raw.githubusercontent.com/restatedev/ai-examples/refs/heads/main/langchain-python/tour-of-agents/app/error_handling.py#handle"} 
 try:
     result = await agent.ainvoke({"messages": req.message})
 except restate.TerminalError as e:
@@ -568,7 +567,7 @@ You can catch and handle terminal errors in your agent logic if needed.
 </GlobalTab>
 <GlobalTab title="Restate Py">
 
-```python {"CODE_LOAD::python/src/ai/error_handling.py#terminal"}
+```python {"CODE_LOAD::python/src/ai/error_handling.py#terminal"} 
 from restate import TerminalError
 
 raise TerminalError("This tool is not allowed to run for this input.")