Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
622 changes: 622 additions & 0 deletions mingle/ai_server/ai_server.py

Large diffs are not rendered by default.

File renamed without changes
5 changes: 4 additions & 1 deletion benchmark.py → mingle/ai_server/benchmark.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@

import sys, os
sys.path.insert(0, "cactus/python/src")
import pathlib

_REPO_ROOT = str(pathlib.Path(__file__).resolve().parents[2])
sys.path.insert(0, os.path.join(_REPO_ROOT, "cactus/python/src"))
os.environ["CACTUS_NO_CLOUD_TELE"] = "1"

import json
Expand Down
113 changes: 103 additions & 10 deletions main.py → mingle/ai_server/main.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,64 @@

import sys
sys.path.insert(0, "cactus/python/src")
functiongemma_path = "cactus/weights/functiongemma-270m-it"
import os as _os

# Resolve repo root (two levels up from mingle/ai_server/)
_REPO_ROOT = _os.path.normpath(_os.path.join(_os.path.dirname(_os.path.abspath(__file__)), "../.."))
sys.path.insert(0, _os.path.join(_REPO_ROOT, "cactus/python/src"))
functiongemma_path = _os.path.join(_REPO_ROOT, "cactus/weights/functiongemma-270m-it")

import json, os, time

# Load .env from repo root if present (dev convenience — does not override existing env vars)
try:
from dotenv import load_dotenv
load_dotenv(_os.path.join(_REPO_ROOT, ".env"), override=False)
except ImportError:
pass

from cactus import cactus_init, cactus_complete, cactus_destroy
from google import genai
from google.genai import types


# --- Model persistence singleton ---
_cactus_model = None

def _get_cactus_model():
global _cactus_model
if _cactus_model is None:
_cactus_model = cactus_init(functiongemma_path)
return _cactus_model


# --- Complexity classifier ---
_MULTI_ACTION_KW = ["and", "also", "then", "plus", "as well", "both", "additionally"]
_ACTION_VERBS = ["set", "send", "check", "play", "find", "remind", "text", "get", "search"]

def _classify_complexity(messages, tools) -> str:
user_text = " ".join(
m["content"] for m in messages if m["role"] == "user"
).lower()
tool_count = len(tools)
conjunction_count = sum(1 for kw in _MULTI_ACTION_KW if f" {kw} " in f" {user_text} ")
verb_count = sum(1 for v in _ACTION_VERBS if v in user_text.split())
if conjunction_count >= 1 and verb_count >= 2:
return "hard"
if tool_count >= 4 and verb_count >= 2:
return "hard"
if tool_count >= 3:
return "medium"
return "easy"


# Per-complexity routing table
_COMPLEXITY_CONFIG = {
"easy": {"tool_rag_top_k": 1, "confidence_threshold": 0.75, "max_tokens": 128},
"medium": {"tool_rag_top_k": 2, "confidence_threshold": 0.82, "max_tokens": 192},
"hard": {"tool_rag_top_k": 0, "confidence_threshold": 0.97, "max_tokens": 320},
}


def generate_cactus(messages, tools):
"""Run function calling on-device via FunctionGemma + Cactus."""
model = cactus_init(functiongemma_path)
Expand Down Expand Up @@ -94,18 +144,61 @@ def generate_cloud(messages, tools):
}


def generate_hybrid(messages, tools, confidence_threshold=0.99):
"""Baseline hybrid inference strategy; fall back to cloud if Cactus Confidence is below threshold."""
local = generate_cactus(messages, tools)
def generate_hybrid(messages, tools, confidence_threshold=None):
"""Hybrid inference: classify complexity, route to on-device or cloud.

Uses a model persistence singleton to avoid re-initialising Cactus on
every call (major latency improvement). Complexity-aware routing lowers
confidence thresholds for simple requests so more work stays on-device.
"""
complexity = _classify_complexity(messages, tools)
cfg = _COMPLEXITY_CONFIG[complexity]

# Use caller-supplied threshold if provided, otherwise use per-complexity default
threshold = confidence_threshold if confidence_threshold is not None else cfg["confidence_threshold"]

model = _get_cactus_model()

# Pass all tools — tool_rag_top_k in cactus_complete handles native RAG filtering
cactus_tools = [{"type": "function", "function": t} for t in tools]

if local["confidence"] >= confidence_threshold:
local["source"] = "on-device"
return local
raw_str = cactus_complete(
model,
[{"role": "system", "content": "You are a helpful assistant that can use tools."}] + messages,
tools=cactus_tools,
force_tools=True,
max_tokens=cfg["max_tokens"],
tool_rag_top_k=cfg["tool_rag_top_k"], # native Cactus RAG tool filtering
confidence_threshold=threshold, # native Cactus confidence gate
stop_sequences=["<|im_end|>", "<end_of_turn>"],
)

try:
raw = json.loads(raw_str)
except json.JSONDecodeError:
raw = {}

local_confidence = raw.get("confidence", 0)
local_function_calls = raw.get("function_calls", [])
local_time_ms = raw.get("total_time_ms", 0)
cloud_handoff = raw.get("cloud_handoff", False)

# Accept on-device result: not a cloud_handoff, confidence met, and non-empty calls
if not cloud_handoff and local_confidence >= threshold and local_function_calls:
return {
"function_calls": local_function_calls,
"total_time_ms": local_time_ms,
"confidence": local_confidence,
"source": "on-device",
"complexity": complexity,
}

# Fall back to cloud
cloud = generate_cloud(messages, tools)
cloud["source"] = "cloud (fallback)"
cloud["local_confidence"] = local["confidence"]
cloud["total_time_ms"] += local["total_time_ms"]
cloud["local_confidence"] = local_confidence
cloud["total_time_ms"] += local_time_ms
cloud["complexity"] = complexity
return cloud


Expand Down
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
Name: Joe Thompson
Role: VP Engineering
Company: Cactus AI
Bio: Leading the engineering team at Cactus AI. Passionate about on-device ML and edge computing.
Skills: ML Infrastructure, Edge Computing, Team Building
Looking For: Collaborators, Investors
Can Help With: Technical advice, Mentorship, Introductions
Domains: AI/ML, Hardware
LinkedIn: https://linkedin.com/in/joethompson
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
Name: Taylor Kim
Role: Product Manager
Company: Dropbox
Bio: PM who works closely with design teams. Can give design feedback but not a hands-on designer.
Skills: Product Strategy, User Research, Wireframing, Figma (basic)
Looking For: Co-founder, Advice
Can Help With: Product feedback, Design
Domains: Consumer, Enterprise SaaS
LinkedIn: https://linkedin.com/in/taylorkim
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
Name: Sam Martinez
Role: Frontend Engineer
Company: Vercel
Bio: Frontend dev with an eye for design. Can implement designs well and give basic UI feedback.
Skills: React, CSS, Tailwind, Basic UI sense
Looking For: Collaborators, Co-founder
Can Help With: Technical advice, Design
Domains: Developer Tools, AI/ML
LinkedIn: https://linkedin.com/in/sammartinez
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
Name: Chris Wong
Role: Product Designer
Company: Notion
Bio: 3 years designing productivity tools. Learning design systems and growing into senior roles.
Skills: UI Design, Figma, Sketch, Basic Prototyping
Looking For: Mentorship, Collaborators
Can Help With: Design, Product feedback
Domains: Consumer, Developer Tools
LinkedIn: https://linkedin.com/in/chriswong
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
Name: hannahyan
Role: tinkerer
Company: Mingle
Bio: making networking personalized and meaningful for introverts
Skills:
Looking For: Co-founder, Customers
Can Help With: Product feedback
Domains: AI/ML
LinkedIn:
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
Name: Hannah
Role: -
Company: -
Bio: Building a personalized social app
Skills:
Looking For: Co-founder
Can Help With:
Domains: AI/ML
LinkedIn:
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
Name: Maya Patel
Role: Principal Product Designer
Company: Figma
Bio: 10+ years designing world-class products. Led design systems at Airbnb and Figma. Passionate about accessible, delightful UX.
Skills: UI/UX Design, Design Systems, Figma, User Research, Prototyping, Visual Design, Interaction Design
Looking For: Collaborators, Mentorship
Can Help With: Design, Product feedback, Technical advice
Domains: AI/ML, Consumer, Developer Tools
LinkedIn: https://linkedin.com/in/mayapatel
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
Name: Jordan Lee
Role: Senior UX Designer
Company: Stripe
Bio: Designing fintech experiences that simplify complexity. Strong in user research and interaction design.
Skills: UX Design, User Research, Figma, Prototyping, Wireframing
Looking For: Co-founder, Collaborators
Can Help With: Design, Product feedback
Domains: FinTech, Enterprise SaaS
LinkedIn: https://linkedin.com/in/jordanlee
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
Name: Robin Chen
Role: Data Scientist
Company: Databricks
Bio: Data viz enthusiast. Can create charts and dashboards but not a product designer.
Skills: Python, Data Visualization, D3.js, Tableau
Looking For: Collaborators, Mentorship
Can Help With: Technical advice, Design
Domains: AI/ML, Enterprise SaaS
LinkedIn: https://linkedin.com/in/robinchen
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
Name: Test User
Role: Tester
Company: Test Co
Bio: Testing
Skills:
Looking For:
Can Help With:
Domains:
LinkedIn:
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
Name: Siddhi Sharma
Role: AI Researcher
Company: Google DeepMind
Bio: Working on agentic AI and multi-modal reasoning
Skills: PyTorch, Transformers, RL
Looking For: Collaborators
Can Help With: Technical advice
Domains: AI/ML
LinkedIn:
Binary file added mingle/ai_server/rag_corpus/data.bin
Binary file not shown.
Binary file added mingle/ai_server/rag_corpus/index.bin
Binary file not shown.
5 changes: 5 additions & 0 deletions mingle/ai_server/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
fastapi
uvicorn[standard]
google-genai
pydantic
python-dotenv
5 changes: 4 additions & 1 deletion submit.py → mingle/ai_server/submit.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,12 @@
"""

import argparse
import os
import time
import requests

_HERE = os.path.dirname(os.path.abspath(__file__))

SERVER_URL = "https://cactusevals.ngrok.app"
HEADERS = {"ngrok-skip-browser-warning": "true"}

Expand All @@ -19,7 +22,7 @@ def submit(team, location):
print("=" * 60)

try:
with open("main.py", "rb") as f:
with open(os.path.join(_HERE, "main.py"), "rb") as f:
resp = requests.post(
f"{SERVER_URL}/eval/submit",
data={"team": team, "location": location},
Expand Down
Loading