Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -245,3 +245,6 @@ local_experiments/
.claude/**
!.claude/CLAUDE.md
!.claude/settings.json

experiments/kdd 2026

6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,12 @@ client.delete_env(envId=env.environmentId)
SDK provides **code execution proxies** - tools for AI agents. You add it to your toolbox in Vercel AI SDK, Langchain or OpenAI Agents, making LLM write Python or Bash code to talk with Slack or Linear API. Requests will automatically be intercepted and routed to isolated test environments. This enables agents to interact with service replicas without any code changes. See more in: **[Python SDK](sdk/agent-diff-python/README.md)**


## Benchmark & Training

- **HuggingFace Dataset**: [hubertmarek/agent-diff-bench](https://huggingface.co/datasets/hubertmarek/agent-diff-bench) — 224 tasks across all 4 services (80/20 train/test split, stratified by service)
- **Prime Intellect Environment**: [agent-diff-bench on Prime Lab](https://app.primeintellect.ai/dashboard/environments/hubert-marek/agent-diff-bench) — run evaluations or RL training via Hosted Training
- **Paper**: [AgentDiff: Agentic API Evaluation via State Differencing (KDD 2026 pre-print)](https://drive.google.com/file/d/1BlmJTSMX7ohwvD1aYBByg7_Y815fgsxp/view?usp=sharing)

## Evaluations & Test Suites

Collections of test cases with assertions that you can run against agent runs using evaluations.
Expand Down
1 change: 1 addition & 0 deletions backend/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ RUN echo '#!/bin/sh\n\
python utils/seed_slack_template.py\n\
python utils/seed_linear_template.py\n\
python utils/seed_box_template.py\n\
python utils/seed_calendar_template.py\n\
python utils/seed_tests.py\n\
else\n\
echo "=== Skipping seed (set SEED=true to enable) ==="\n\
Expand Down
33 changes: 25 additions & 8 deletions backend/src/services/slack/api/methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,23 @@ def _slack_error(
raise SlackAPIError(code, status_code, extra)


def _parse_bool_param(value: Any, default: bool = False) -> bool:
"""Safely parse a boolean parameter from JSON (bool) or form data (string).

Handles:
- Boolean values: True/False
- String values: "true"/"false" (case-insensitive)
- None/missing: returns default
"""
if value is None:
return default
if isinstance(value, bool):
return value
if isinstance(value, str):
return value.lower() == "true"
return default


def _resolve_channel_id(channel: str, session=None) -> str:
"""Resolve channel name or ID to channel ID.

Expand Down Expand Up @@ -1033,7 +1050,7 @@ async def conversations_list(request: Request) -> JSONResponse:
except ValueError:
_slack_error("invalid_arguments")

exclude_archived = params.get("exclude_archived", "false").lower() == "true"
exclude_archived = _parse_bool_param(params.get("exclude_archived"), default=False)
types_param = params.get("types", "public_channel") # Default: public_channel

session = _session(request)
Expand Down Expand Up @@ -1146,7 +1163,7 @@ async def conversations_history(request: Request) -> JSONResponse:
_slack_error("invalid_cursor")
oldest_param = params.get("oldest")
latest_param = params.get("latest")
inclusive = params.get("inclusive", "false").lower() == "true"
inclusive = _parse_bool_param(params.get("inclusive"), default=False)

# Validate channel (required)
if not channel:
Expand Down Expand Up @@ -1270,7 +1287,7 @@ async def conversations_replies(request: Request) -> JSONResponse:

oldest_param = params.get("oldest")
latest_param = params.get("latest")
inclusive = params.get("inclusive", "false").lower() == "true"
inclusive = _parse_bool_param(params.get("inclusive"), default=False)

oldest_dt = None
latest_dt = None
Expand Down Expand Up @@ -1710,8 +1727,8 @@ async def conversations_open(request: Request) -> JSONResponse:
async def conversations_info(request: Request) -> JSONResponse:
params = await _get_params_async(request)
channel = params.get("channel")
include_locale = params.get("include_locale", "false").lower() == "true"
include_num_members = params.get("include_num_members", "false").lower() == "true"
include_locale = _parse_bool_param(params.get("include_locale"), default=False)
include_num_members = _parse_bool_param(params.get("include_num_members"), default=False)

# Validate channel (required)
if not channel:
Expand Down Expand Up @@ -2283,7 +2300,7 @@ async def users_info(request: Request) -> JSONResponse:
if user is None:
_slack_error("user_not_found")

include_locale = params.get("include_locale", "false").lower() == "true"
include_locale = _parse_bool_param(params.get("include_locale"), default=False)

session = _session(request)

Expand Down Expand Up @@ -2317,7 +2334,7 @@ async def users_list(request: Request) -> JSONResponse:
except ValueError:
_slack_error("invalid_cursor")

include_locale = params.get("include_locale", "false").lower() == "true"
include_locale = _parse_bool_param(params.get("include_locale"), default=False)
session = _session(request)
actor = _principal_user_id(request)
team_id = _get_env_team_id(request, channel_id=None, actor_user_id=actor)
Expand Down Expand Up @@ -2857,7 +2874,7 @@ async def search_messages(request: Request) -> JSONResponse:
if not query_str:
_slack_error("No query passed")

highlight = str(params.get("highlight", "false")).lower() == "true"
highlight = _parse_bool_param(params.get("highlight"), default=False)
sort = (params.get("sort") or "score").lower()
sort_dir = (params.get("sort_dir") or "desc").lower()
count_param = params.get("count")
Expand Down
Loading