OpenTechBio · djriffle · Jul 8, 2025 · Jul 7, 2025 · Jul 7, 2025
diff --git a/benchmarking/agents/system_blueprint.json b/benchmarking/agents/system_blueprint.json
@@ -5,7 +5,7 @@
       "neighbors": {
         "delegate_to_coder": {
           "target_agent": "coder_agent",
-          "description": "Use this command for any request that involves writing, debugging, or explaining code."
+          "description": "Use this command for any request that involves writing, debugging, or explaining code. It is also used to analyze single cell RNA data and spatial single cell data."
         },
         "delegate_to_researcher": {
           "target_agent": "research_agent",
@@ -14,7 +14,7 @@
       }
     },
     "coder_agent": {
-      "prompt": "You are a specialist coder agent. Your job is to write high-quality, executable code based on the user's request. You do not delegate tasks.",
+      "prompt": "You are a specialist single cell RNA coder agent. Your job is to write high-quality, executable code based on the user's request. You do not delegate tasks. The machine you run on has write disabled. You should never save to disk or modify files. Prioritize small step responses and avoid large code dumps.",
       "neighbors": {}
     },
     "research_agent": {

diff --git a/benchmarking/auto_metrics/AutoMetric.py b/benchmarking/auto_metrics/AutoMetric.py
@@ -0,0 +1,20 @@
+from abc import ABC, abstractmethod
+import json
+
+class AutoMetric(ABC):
+    """
+    Abstract base class for a metric to be applied to an AnnData object.
+    """
+    @abstractmethod
+    def metric(self, adata) -> dict:
+        """
+        Run the metric and return a dictionary of results.
+        """
+        pass
+
+    def run(self, adata):
+        """
+        Handles execution + JSON serialization.
+        """
+        result = self.metric(adata)
+        print(json.dumps(result))  # Always print result at the end
diff --git a/benchmarking/auto_metrics/AutoSilhouette.py b/benchmarking/auto_metrics/AutoSilhouette.py
@@ -0,0 +1,19 @@
+# Don't import AutomMetric
+# from AutoMetric import AutoMetric 
+import scanpy as sc
+
+class CellCountMetric(AutoMetric):
+    """
+    A simple metric to count the number of cells and genes.
+    """
+    def metric(self, adata) -> dict:
+        num_cells = adata.n_obs
+        num_genes = adata.n_vars
+
+        return {
+            "Number of Cells": num_cells,
+            "Number of Genes": num_genes
+        }
+
+# must run it here
+CellCountMetric().run(adata)
diff --git a/benchmarking/core/io_helpers.py b/benchmarking/core/io_helpers.py
@@ -110,7 +110,6 @@ def collect_resources(console, sandbox_sources_dir) -> List[Tuple[Path, str]]:
 
 def format_execute_response(resp: dict, output_dir) -> str:
     lines = ["Code execution result:"]
-    print(f"Response: {resp}")
     if resp.get("final_status") != "ok":
         lines.append(f"[status: {resp.get('status')}]")
     #if the key outputs in in resp we get the second dictionary