CursorTouch
diff --git a/‎operator_use/providers/base.py‎
Lines changed: 16 additions & 8 deletions b/‎operator_use/providers/base.py‎
Lines changed: 16 additions & 8 deletions
diff --git a/‎operator_use/providers/fal/image.py‎
Lines changed: 78 additions & 41 deletions b/‎operator_use/providers/fal/image.py‎
Lines changed: 78 additions & 41 deletions
diff --git a/‎operator_use/providers/google/image.py‎
Lines changed: 79 additions & 34 deletions b/‎operator_use/providers/google/image.py‎
Lines changed: 79 additions & 34 deletions
@@ -158,23 +158,31 @@ def model(self) -> str:
         """The name of the image generation model being used."""
         ...
 
-    def generate(self, prompt: str, output_path: str, **kwargs) -> None:
-        """Generate an image from a text prompt and save it to a file.
+    def generate(self, prompt: str, output_path: str, images: list[str] | None = None, **kwargs) -> None:
+        """Generate or edit an image and save it to a file.
 
         Args:
-            prompt: The text description to generate an image from.
+            prompt: Text description of the image to generate or the edit to apply.
             output_path: Path where the generated image file will be saved.
-            **kwargs: Provider-specific parameters (size, quality, style, etc.).
+            images: Optional list of input image file paths. When provided, the
+                provider edits or uses these as references rather than generating
+                from scratch. Behaviour is provider-specific:
+                  - OpenAI gpt-image-1: up to 16 reference images
+                  - OpenAI dall-e-2: first image as source, second as mask (optional)
+                  - Google Imagen: first image as reference (Vertex AI required)
+                  - Together AI / fal.ai: first image used as img2img source
+            **kwargs: Provider-specific parameters (size, quality, style, strength, etc.).
         """
         ...
 
-    async def agenerate(self, prompt: str, output_path: str, **kwargs) -> None:
-        """Asynchronously generate an image from a text prompt and save it to a file.
+    async def agenerate(self, prompt: str, output_path: str, images: list[str] | None = None, **kwargs) -> None:
+        """Asynchronously generate or edit an image and save it to a file.
 
         Args:
-            prompt: The text description to generate an image from.
+            prompt: Text description of the image to generate or the edit to apply.
             output_path: Path where the generated image file will be saved.
-            **kwargs: Provider-specific parameters (size, quality, style, etc.).
+            images: Optional list of input image file paths. See generate() for details.
+            **kwargs: Provider-specific parameters (size, quality, style, strength, etc.).
         """
         ...
 
 
@@ -1,4 +1,6 @@
+import base64
 import logging
+import mimetypes
 import os
 import urllib.request
 from typing import Optional
@@ -8,22 +10,45 @@
 logger = logging.getLogger(__name__)
 
 
+def _encode_image_b64(path: str) -> str:
+    """Encode a local image file as a base64 data URL."""
+    mime, _ = mimetypes.guess_type(path)
+    mime = mime or "image/png"
+    with open(path, "rb") as f:
+        data = base64.b64encode(f.read()).decode()
+    return f"data:{mime};base64,{data}"
+
+
 class ImageFal(BaseImage):
-    """fal.ai image generation provider.
+    """fal.ai image generation and editing provider.
+
+    Uses the fal-client SDK to run FLUX and other models on fal.ai.
+    Requires the ``fal-client`` package: ``pip install fal-client``
 
-    Uses the fal-client SDK to run FLUX and other models on fal.ai infrastructure.
-    Requires the `fal-client` package: pip install fal-client
+    Generation (no images):
+        Runs the configured model with a text prompt.
+
+    Editing (images provided):
+        Switches to the ``image_to_image_model`` endpoint and passes the
+        first image as ``image_url``. ``strength`` controls how much the
+        output deviates from the input (0.0 = unchanged, 1.0 = fully
+        regenerated).
 
     Args:
-        model: The fal model ID to use (default: "fal-ai/flux/schnell").
+        model: The fal model ID for generation (default: "fal-ai/flux/schnell").
             Popular options:
-              "fal-ai/flux/schnell"       (fastest, 4 steps)
-              "fal-ai/flux/dev"           (higher quality)
-              "fal-ai/flux-pro"           (best quality, paid)
-              "fal-ai/flux-pro/v1.1"      (latest pro)
-              "fal-ai/flux-lora"          (LoRA support)
+              "fal-ai/flux/schnell"          (fastest, 4 steps)
+              "fal-ai/flux/dev"              (higher quality)
+              "fal-ai/flux-pro"              (best quality, paid)
+              "fal-ai/flux-pro/v1.1"
               "fal-ai/stable-diffusion-v3-medium"
-        image_size: Output image size preset (default: "landscape_4_3").
+        image_to_image_model: Model used when input images are provided
+            (default: "fal-ai/flux/dev/image-to-image").
+            Popular options:
+              "fal-ai/flux/dev/image-to-image"
+              "fal-ai/flux-pro/v1/redux"
+              "fal-ai/flux-lora/image-to-image"
+        image_size: Output size preset for generation (default: "landscape_4_3").
             Options: "square_hd", "square", "portrait_4_3", "portrait_16_9",
                      "landscape_4_3", "landscape_16_9".
         num_inference_steps: Steps for generation (default: 4 for schnell).
@@ -33,19 +58,26 @@ class ImageFal(BaseImage):
         ```python
         from operator_use.providers.fal import ImageFal
 
-        provider = ImageFal(model="fal-ai/flux/schnell")
+        provider = ImageFal()
+
+        # Generate from scratch
         provider.generate("a red panda coding on a laptop", "output.png")
+
+        # Edit with a reference image
+        provider.generate("make it sunset", "output.png", images=["input.png"], strength=0.85)
         ```
     """
 
     def __init__(
         self,
         model: str = "fal-ai/flux/schnell",
+        image_to_image_model: str = "fal-ai/flux/dev/image-to-image",
         image_size: str = "landscape_4_3",
         num_inference_steps: int = 4,
         api_key: Optional[str] = None,
     ):
         self._model = model
+        self.image_to_image_model = image_to_image_model
         self.image_size = image_size
         self.num_inference_steps = num_inference_steps
         self.api_key = api_key or os.environ.get("FAL_KEY")
@@ -56,51 +88,56 @@ def __init__(
     def model(self) -> str:
         return self._model
 
-    def _build_arguments(self, prompt: str, **kwargs) -> dict:
-        return {
-            "prompt": prompt,
-            "image_size": kwargs.get("image_size", self.image_size),
-            "num_inference_steps": kwargs.get("num_inference_steps", self.num_inference_steps),
-            "num_images": 1,
-            "enable_safety_checker": True,
-        }
-
-    def generate(self, prompt: str, output_path: str, **kwargs) -> None:
-        """Generate an image and save it to output_path.
-
-        Args:
-            prompt: Text description of the image to generate.
-            output_path: Path where the image will be saved.
-            **kwargs: Override image_size or num_inference_steps for this call.
-        """
+    def _build_arguments(self, prompt: str, images: list[str] | None, **kwargs) -> tuple[str, dict]:
+        """Return (endpoint, arguments) depending on whether images are provided."""
+        if images:
+            endpoint = kwargs.get("image_to_image_model", self.image_to_image_model)
+            args = {
+                "prompt": prompt,
+                "image_url": _encode_image_b64(images[0]),
+                "strength": kwargs.get("strength", 0.85),
+                "num_inference_steps": kwargs.get("num_inference_steps", 28),
+                "num_images": 1,
+                "enable_safety_checker": True,
+            }
+            if kwargs.get("image_size"):
+                args["image_size"] = kwargs["image_size"]
+        else:
+            endpoint = self._model
+            args = {
+                "prompt": prompt,
+                "image_size": kwargs.get("image_size", self.image_size),
+                "num_inference_steps": kwargs.get("num_inference_steps", self.num_inference_steps),
+                "num_images": 1,
+                "enable_safety_checker": True,
+            }
+        return endpoint, args
+
+    def _save_from_url(self, url: str, output_path: str) -> None:
+        urllib.request.urlretrieve(url, output_path)
+
+    def generate(self, prompt: str, output_path: str, images: list[str] | None = None, **kwargs) -> None:
         try:
             import fal_client
         except ImportError:
             raise ImportError("fal-client is required: pip install fal-client")
 
-        result = fal_client.run(self._model, arguments=self._build_arguments(prompt, **kwargs))
+        endpoint, args = self._build_arguments(prompt, images, **kwargs)
+        result = fal_client.run(endpoint, arguments=args)
         url = result["images"][0]["url"]
-        urllib.request.urlretrieve(url, output_path)
+        self._save_from_url(url, output_path)
         logger.debug(f"[ImageFal] Image saved to {output_path}")
 
-    async def agenerate(self, prompt: str, output_path: str, **kwargs) -> None:
-        """Asynchronously generate an image and save it to output_path.
-
-        Args:
-            prompt: Text description of the image to generate.
-            output_path: Path where the image will be saved.
-            **kwargs: Override image_size or num_inference_steps for this call.
-        """
+    async def agenerate(self, prompt: str, output_path: str, images: list[str] | None = None, **kwargs) -> None:
         try:
             import fal_client
         except ImportError:
             raise ImportError("fal-client is required: pip install fal-client")
 
         import aiohttp as _aiohttp
 
-        result = await fal_client.run_async(
-            self._model, arguments=self._build_arguments(prompt, **kwargs)
-        )
+        endpoint, args = self._build_arguments(prompt, images, **kwargs)
+        result = await fal_client.run_async(endpoint, arguments=args)
         url = result["images"][0]["url"]
         async with _aiohttp.ClientSession() as session:
             async with session.get(url) as resp:
 
@@ -9,33 +9,59 @@
 
 
 class ImageGoogle(BaseImage):
-    """Google Imagen image generation provider.
+    """Google Imagen image generation and editing provider.
 
-    Uses the Google GenAI SDK (Imagen 3) to generate images from text prompts.
+    Uses the Google GenAI SDK for text-to-image generation (Imagen 3) and
+    image editing (requires Vertex AI credentials).
+
+    Generation (no images):
+        Uses ``imagen-3.0-generate-002`` via the standard GenAI API key.
+
+    Editing (images provided):
+        Uses ``models.edit_image()`` with Vertex AI — requires
+        ``GOOGLE_CLOUD_PROJECT`` and ``GOOGLE_CLOUD_LOCATION`` environment
+        variables in addition to the API key, and model
+        ``imagen-3.0-capability-001``.
 
     Args:
-        model: The Imagen model to use (default: "imagen-3.0-generate-002").
+        model: Generation model (default: "imagen-3.0-generate-002").
+        edit_model: Editing model (default: "imagen-3.0-capability-001").
         api_key: Google API key. Falls back to GEMINI_API_KEY env variable.
-        negative_prompt: Optional description of what to exclude from the image.
+        negative_prompt: Optional description of what to exclude.
+        project: Google Cloud project ID for Vertex AI editing.
+            Falls back to GOOGLE_CLOUD_PROJECT env variable.
+        location: Google Cloud location for Vertex AI editing.
+            Falls back to GOOGLE_CLOUD_LOCATION env variable (default: "us-central1").
 
     Example:
         ```python
         from operator_use.providers.google import ImageGoogle
 
+        # Generation (standard API key)
         provider = ImageGoogle()
         provider.generate("a red panda coding on a laptop", "output.png")
+
+        # Editing (Vertex AI)
+        provider = ImageGoogle(project="my-project")
+        provider.generate("make it sunset", "output.png", images=["input.png"])
         ```
     """
 
     def __init__(
         self,
         model: str = "imagen-3.0-generate-002",
+        edit_model: str = "imagen-3.0-capability-001",
         api_key: Optional[str] = None,
         negative_prompt: Optional[str] = None,
+        project: Optional[str] = None,
+        location: Optional[str] = None,
     ):
         self._model = model
+        self.edit_model = edit_model
         self.negative_prompt = negative_prompt
         self.api_key = api_key or os.environ.get("GEMINI_API_KEY")
+        self.project = project or os.environ.get("GOOGLE_CLOUD_PROJECT")
+        self.location = location or os.environ.get("GOOGLE_CLOUD_LOCATION", "us-central1")
 
     @property
     def model(self) -> str:
@@ -45,38 +71,57 @@ def _make_client(self):
         from google import genai
         return genai.Client(api_key=self.api_key)
 
-    def generate(self, prompt: str, output_path: str, **kwargs) -> None:
-        """Generate an image and save it to output_path.
-
-        Args:
-            prompt: Text description of the image to generate.
-            output_path: Path where the PNG image will be saved.
-            **kwargs: Override negative_prompt for this call.
-        """
+    def _make_vertex_client(self):
         from google import genai
+        if not self.project:
+            raise ValueError(
+                "Google image editing requires a Vertex AI project. "
+                "Set GOOGLE_CLOUD_PROJECT env variable or pass project= to ImageGoogle()."
+            )
+        return genai.Client(vertexai=True, project=self.project, location=self.location)
+
+    def generate(self, prompt: str, output_path: str, images: list[str] | None = None, **kwargs) -> None:
+        from google import genai
+
+        if images:
+            client = self._make_vertex_client()
+            reference_images = [
+                genai.types.RawReferenceImage(
+                    reference_id=i + 1,
+                    reference_image=genai.types.Image.from_file(path),
+                )
+                for i, path in enumerate(images)
+            ]
+            config = genai.types.EditImageConfig(
+                edit_mode=kwargs.get("edit_mode", "EDIT_MODE_DEFAULT"),
+                number_of_images=1,
+                output_mime_type="image/png",
+                negative_prompt=kwargs.get("negative_prompt", self.negative_prompt),
+            )
+            response = client.models.edit_image(
+                model=self.edit_model,
+                prompt=prompt,
+                reference_images=reference_images,
+                config=config,
+            )
+            image_bytes = response.generated_images[0].image.image_bytes
+        else:
+            client = self._make_client()
+            config = genai.types.GenerateImagesConfig(
+                number_of_images=1,
+                output_mime_type="image/png",
+                negative_prompt=kwargs.get("negative_prompt", self.negative_prompt),
+            )
+            response = client.models.generate_images(
+                model=self._model,
+                prompt=prompt,
+                config=config,
+            )
+            image_bytes = response.generated_images[0].image.image_bytes
 
-        client = self._make_client()
-        config = genai.types.GenerateImagesConfig(
-            number_of_images=1,
-            output_mime_type="image/png",
-            negative_prompt=kwargs.get("negative_prompt", self.negative_prompt),
-        )
-        response = client.models.generate_images(
-            model=self._model,
-            prompt=prompt,
-            config=config,
-        )
-        image_data = response.generated_images[0].image.image_data
         with open(output_path, "wb") as f:
-            f.write(image_data)
+            f.write(image_bytes)
         logger.debug(f"[ImageGoogle] Image saved to {output_path}")
 
-    async def agenerate(self, prompt: str, output_path: str, **kwargs) -> None:
-        """Asynchronously generate an image and save it to output_path.
-
-        Args:
-            prompt: Text description of the image to generate.
-            output_path: Path where the PNG image will be saved.
-            **kwargs: Override negative_prompt for this call.
-        """
-        await asyncio.to_thread(self.generate, prompt, output_path, **kwargs)
+    async def agenerate(self, prompt: str, output_path: str, images: list[str] | None = None, **kwargs) -> None:
+        await asyncio.to_thread(self.generate, prompt, output_path, images, **kwargs)