Skip to content

Commit ec20fac

Browse files
Jeomonclaude
andcommitted
Add imagegen tool with auto image-pass from incoming messages
imagegen tool generates or edits images via the configured provider. Incoming image paths (from user messages on any channel) are injected per-message as _incoming_image_paths and used automatically when the images param is omitted — so a user sending a photo + caption triggers editing without the LLM needing to manually pass paths. Agent constructor gains image= param; _image_provider and _incoming_image_paths are registered as tool extensions. _build_agents and start.py wired to pass the configured image provider through. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 69207b5 commit ec20fac

4 files changed

Lines changed: 149 additions & 3 deletions

File tree

operator_use/agent/service.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ def __init__(
6666
subagent_config=None,
6767
acp_registry: dict | None = None,
6868
plugins: "list[Plugin] | None" = None,
69+
image=None,
6970
):
7071
self.agent_id = agent_id
7172
self.description = description
@@ -109,6 +110,7 @@ def __init__(
109110
self.tool_register.set_extension("_process_store", self.process_store)
110111
self.tool_register.set_extension("_acp_registry", acp_registry or {})
111112
self.tool_register.set_extension("_llm", self.llm)
113+
self.tool_register.set_extension("_image_provider", image)
112114
self.tool_register.set_extension("_agent", self)
113115
self.tool_register.set_extension("_agent_id", self.agent_id)
114116

@@ -184,6 +186,9 @@ async def run(
184186
self.tool_register.set_extension("_account_id", incoming.account_id)
185187
self.tool_register.set_extension("_metadata", incoming.metadata or {})
186188
self.tool_register.set_extension("_session_id", session_id)
189+
from operator_use.bus.views import ImagePart as _ImagePart
190+
_img_paths = [p for part in incoming.parts if isinstance(part, _ImagePart) for p in (part.paths or [])]
191+
self.tool_register.set_extension("_incoming_image_paths", _img_paths or None)
187192
if pending_replies is not None:
188193
self.tool_register.set_extension("_pending_replies", pending_replies)
189194

operator_use/agent/tools/builtin/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from operator_use.agent.tools.builtin.acp_agents import acpagents
1111
from operator_use.agent.tools.builtin.local_agents import localagents
1212
from operator_use.agent.tools.builtin.control_center import control_center
13+
from operator_use.agent.tools.builtin.imagegen import imagegen
1314

1415
FILESYSTEM_TOOLS = [read_file,write_file,edit_file,list_dir,patch_file]
1516
WEB_TOOLS = [web_search,web_fetch]
@@ -19,8 +20,9 @@
1920
PROCESS_TOOLS = [process, control_center]
2021
OTHER_AGENT_TOOLS = [subagents,acpagents,localagents]
2122
CHANNEL_TOOLS = [channel]
23+
IMAGE_TOOLS = [imagegen]
2224

23-
AGENT_TOOLS = FILESYSTEM_TOOLS + WEB_TOOLS + TERMINAL_TOOLS + CRON_TOOLS + PROCESS_TOOLS + OTHER_AGENT_TOOLS
25+
AGENT_TOOLS = FILESYSTEM_TOOLS + WEB_TOOLS + TERMINAL_TOOLS + CRON_TOOLS + PROCESS_TOOLS + OTHER_AGENT_TOOLS + IMAGE_TOOLS
2426

2527
NON_AGENT_TOOLS = MESSAGE_TOOLS + CHANNEL_TOOLS
2628

Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
"""Image generation and editing tool.
2+
3+
Generates or edits images using the configured image provider.
4+
5+
If the current conversation contains images (sent by the user via any channel),
6+
they are automatically available via the ``_incoming_image_paths`` extension and
7+
used as input images when the ``images`` parameter is omitted.
8+
9+
Flow:
10+
1. User sends a message — optionally with one or more images + caption.
11+
2. Agent calls ``imagegen`` with the caption as ``prompt``.
12+
3. Tool picks up any incoming image paths automatically.
13+
4. Provider generates or edits the image.
14+
5. Result is sent back to the user and the output path is returned.
15+
"""
16+
17+
from __future__ import annotations
18+
19+
import uuid
20+
from datetime import datetime
21+
from pathlib import Path
22+
from typing import Optional
23+
24+
from pydantic import BaseModel, Field
25+
26+
from operator_use.bus.views import ImagePart, OutgoingMessage, TextPart
27+
from operator_use.tools import Tool, ToolResult
28+
29+
30+
class ImageGen(BaseModel):
31+
prompt: str = Field(
32+
description=(
33+
"Text description of the image to generate, or the edit/modification to apply "
34+
"when input images are provided (e.g. 'make it look like a watercolour painting', "
35+
"'add a sunset sky', 'remove the background')."
36+
)
37+
)
38+
images: Optional[list[str]] = Field(
39+
default=None,
40+
description=(
41+
"Optional list of input image file paths to edit or use as references. "
42+
"If omitted, any images the user sent in the current message are used automatically. "
43+
"Pass an empty list [] to force pure text-to-image generation even when the user sent images."
44+
),
45+
)
46+
output_path: Optional[str] = Field(
47+
default=None,
48+
description=(
49+
"Where to save the generated image. "
50+
"If omitted, a unique file is created inside the agent workspace."
51+
),
52+
)
53+
caption: Optional[str] = Field(
54+
default=None,
55+
description="Optional caption to send alongside the generated image.",
56+
)
57+
send_result: bool = Field(
58+
default=True,
59+
description="If True (default), send the generated image back to the user automatically.",
60+
)
61+
62+
63+
@Tool(
64+
name="imagegen",
65+
description=(
66+
"Generate or edit an image using the configured image provider.\n\n"
67+
"Generation: call with just a prompt to create an image from scratch.\n"
68+
"Editing: provide input images (or let the tool pick up images the user sent) "
69+
"and a prompt describing the edit — e.g. 'make it a pencil sketch', "
70+
"'add a rainbow', 'change the background to a forest'.\n\n"
71+
"The generated image is sent back to the user automatically (send_result=True). "
72+
"The output file path is always returned so you can reference or share it further."
73+
),
74+
model=ImageGen,
75+
)
76+
async def imagegen(
77+
prompt: str,
78+
images: list[str] | None = None,
79+
output_path: str | None = None,
80+
caption: str | None = None,
81+
send_result: bool = True,
82+
**kwargs,
83+
) -> ToolResult:
84+
provider = kwargs.get("_image_provider")
85+
if provider is None:
86+
return ToolResult.error_result(
87+
"No image provider is configured. "
88+
"Enable one under 'image' in config (e.g. provider: openai, model: dall-e-3)."
89+
)
90+
91+
# Resolve input images: explicit list → incoming message images → None (pure generation)
92+
# Passing [] explicitly skips auto-detection and forces generation.
93+
if images is None:
94+
images = kwargs.get("_incoming_image_paths") or None
95+
elif images == []:
96+
images = None
97+
98+
# Build output path inside workspace if not provided
99+
if not output_path:
100+
workspace: Path | None = kwargs.get("_workspace")
101+
if workspace:
102+
gen_dir = workspace / "generated"
103+
else:
104+
import tempfile
105+
gen_dir = Path(tempfile.gettempdir()) / "operator_generated"
106+
gen_dir.mkdir(parents=True, exist_ok=True)
107+
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
108+
output_path = str(gen_dir / f"{ts}_{uuid.uuid4().hex[:8]}.png")
109+
110+
# Generate / edit
111+
try:
112+
await provider.agenerate(prompt, output_path, images=images)
113+
except Exception as e:
114+
return ToolResult.error_result(f"Image generation failed: {type(e).__name__}: {e}")
115+
116+
# Send result back to user
117+
if send_result:
118+
bus = kwargs.get("_bus")
119+
channel = kwargs.get("_channel")
120+
chat_id = kwargs.get("_chat_id")
121+
account_id = kwargs.get("_account_id", "")
122+
123+
if bus and channel and chat_id:
124+
parts: list = [ImagePart(paths=[output_path])]
125+
if caption:
126+
parts.append(TextPart(content=caption))
127+
await bus.publish_outgoing(
128+
OutgoingMessage(
129+
channel=channel,
130+
chat_id=chat_id,
131+
account_id=account_id,
132+
parts=parts,
133+
)
134+
)
135+
136+
action = "edited" if images else "generated"
137+
return ToolResult.success_result(f"Image {action} and saved to: {output_path}")

operator_use/cli/start.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,7 @@ def _get_plugin_registry() -> dict[str, type]:
216216
return PLUGIN_REGISTRY
217217

218218

219-
def _build_agents(config: Config, cron, gateway, bus) -> dict[str, Agent]:
219+
def _build_agents(config: Config, cron, gateway, bus, image=None) -> dict[str, Agent]:
220220
"""Instantiate one Agent per agent definition in config."""
221221
from operator_use.agent.tools.builtin import resolve_tools
222222

@@ -269,6 +269,7 @@ def _build_agents(config: Config, cron, gateway, bus) -> dict[str, Agent]:
269269
subagent_config=defaults.subagent,
270270
acp_registry=config.acp_agents,
271271
plugins=plugins,
272+
image=image,
272273
)
273274

274275
for agent in agents.values():
@@ -484,7 +485,8 @@ async def on_job(job: CronJob):
484485
cron_store = USERDATA_DIR / "crons.json"
485486
cron = Cron(store_path=cron_store, on_job=on_job)
486487

487-
agents = _build_agents(config, cron=cron, gateway=gateway, bus=bus)
488+
image_provider = _make_image(config)
489+
agents = _build_agents(config, cron=cron, gateway=gateway, bus=bus, image=image_provider)
488490

489491
async def _graceful_restart() -> None:
490492
"""Cancel all running asyncio tasks so main()'s finally block can run cleanly."""

0 commit comments

Comments
 (0)