From b811fa17aa9d3a6af801e116467bf4878463b9d0 Mon Sep 17 00:00:00 2001 From: jerelvelarde Date: Tue, 24 Mar 2026 08:34:35 -0700 Subject: [PATCH 1/6] feat: add Render deployment blueprint and prepare for production Add render.yaml with two services: a public Docker-based Next.js frontend and a private Python LangGraph agent. Normalize the LANGGRAPH_DEPLOYMENT_URL to handle Render's bare host:port format, and make MCP server configuration opt-in via env var instead of hardcoding the excalidraw default. Co-Authored-By: Claude Opus 4.6 (1M context) --- .gitignore | 3 ++ apps/app/src/app/api/copilotkit/route.ts | 26 ++++++++++----- render.yaml | 40 ++++++++++++++++++++++++ 3 files changed, 61 insertions(+), 8 deletions(-) create mode 100644 render.yaml diff --git a/.gitignore b/.gitignore index 253d770..b352c66 100644 --- a/.gitignore +++ b/.gitignore @@ -60,3 +60,6 @@ bun.lockb # Demos .demos + +# References +.references diff --git a/apps/app/src/app/api/copilotkit/route.ts b/apps/app/src/app/api/copilotkit/route.ts index 9c73d9b..1086531 100644 --- a/apps/app/src/app/api/copilotkit/route.ts +++ b/apps/app/src/app/api/copilotkit/route.ts @@ -6,9 +6,17 @@ import { import { LangGraphAgent } from "@copilotkit/runtime/langgraph"; import { NextRequest } from "next/server"; +// Normalize Render's fromService hostport (bare host:port) into a full URL +const raw = process.env.LANGGRAPH_DEPLOYMENT_URL; +const deploymentUrl = !raw + ? "http://localhost:8123" + : raw.startsWith("http") + ? raw + : `http://${raw}`; + // 1. Define the agent connection to LangGraph const defaultAgent = new LangGraphAgent({ - deploymentUrl: process.env.LANGGRAPH_DEPLOYMENT_URL || "http://localhost:8123", + deploymentUrl, graphId: "sample_agent", langsmithApiKey: process.env.LANGSMITH_API_KEY || "", }); @@ -21,13 +29,15 @@ export const POST = async (req: NextRequest) => { runtime: new CopilotRuntime({ agents: { default: defaultAgent, }, a2ui: { injectA2UITool: true }, - mcpApps: { - servers: [{ - type: "http", - url: process.env.MCP_SERVER_URL || "https://mcp.excalidraw.com", - serverId: "example_mcp_app", - }], - }, + ...(process.env.MCP_SERVER_URL && { + mcpApps: { + servers: [{ + type: "http", + url: process.env.MCP_SERVER_URL, + serverId: "mcp_app", + }], + }, + }), }), }); diff --git a/render.yaml b/render.yaml new file mode 100644 index 0000000..6d36c77 --- /dev/null +++ b/render.yaml @@ -0,0 +1,40 @@ +services: + # ── Agent (LangGraph Python) — private, not exposed to internet ── + - type: pserv + name: open-generative-ui-agent + runtime: python + plan: starter + rootDir: apps/agent + buildCommand: "pip install uv && uv sync --frozen" + startCommand: "uv run langgraph dev --host 0.0.0.0 --port $PORT --no-browser --no-reload" + envVars: + - key: PYTHON_VERSION + value: "3.12" + - key: OPENAI_API_KEY + sync: false + - key: LANGSMITH_API_KEY + sync: false + buildFilter: + paths: + - apps/agent/** + + # ── Frontend (Next.js) — public web service ── + - type: web + name: open-generative-ui-app + runtime: docker + plan: starter + dockerfilePath: docker/Dockerfile.app + envVars: + - key: LANGGRAPH_DEPLOYMENT_URL + fromService: + name: open-generative-ui-agent + type: pserv + property: hostport + - key: LANGSMITH_API_KEY + sync: false + buildFilter: + paths: + - apps/app/** + - package.json + - pnpm-lock.yaml + - docker/Dockerfile.app From a73ebbce56679232ea0b40c9ebe5dde2ac5c74d8 Mon Sep 17 00:00:00 2001 From: jerelvelarde Date: Tue, 24 Mar 2026 09:21:02 -0700 Subject: [PATCH 2/6] feat: add per-IP rate limiting to /api/copilotkit endpoint Sliding-window rate limiter (20 req/min per IP) to prevent individual abuse of the public CopilotKit endpoint. In-memory with periodic cleanup to prevent unbounded Map growth. No new dependencies. Co-Authored-By: Claude Opus 4.6 (1M context) --- apps/app/src/app/api/copilotkit/route.ts | 28 ++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/apps/app/src/app/api/copilotkit/route.ts b/apps/app/src/app/api/copilotkit/route.ts index 1086531..4bbd9f5 100644 --- a/apps/app/src/app/api/copilotkit/route.ts +++ b/apps/app/src/app/api/copilotkit/route.ts @@ -6,6 +6,29 @@ import { import { LangGraphAgent } from "@copilotkit/runtime/langgraph"; import { NextRequest } from "next/server"; +// Simple sliding-window rate limiter (per IP) +const RATE_LIMIT_WINDOW_MS = 60_000; // 1 minute +const RATE_LIMIT_MAX = 20; // max requests per window +const hits = new Map(); + +function isRateLimited(ip: string): boolean { + const now = Date.now(); + const timestamps = hits.get(ip)?.filter(t => t > now - RATE_LIMIT_WINDOW_MS) ?? []; + timestamps.push(now); + hits.set(ip, timestamps); + return timestamps.length > RATE_LIMIT_MAX; +} + +// Prune stale entries every 5 min to prevent unbounded memory growth +setInterval(() => { + const cutoff = Date.now() - RATE_LIMIT_WINDOW_MS; + hits.forEach((timestamps, ip) => { + const recent = timestamps.filter(t => t > cutoff); + if (recent.length === 0) hits.delete(ip); + else hits.set(ip, recent); + }); +}, 300_000); + // Normalize Render's fromService hostport (bare host:port) into a full URL const raw = process.env.LANGGRAPH_DEPLOYMENT_URL; const deploymentUrl = !raw @@ -23,6 +46,11 @@ const defaultAgent = new LangGraphAgent({ // 3. Define the route and CopilotRuntime for the agent export const POST = async (req: NextRequest) => { + const ip = req.headers.get("x-forwarded-for")?.split(",")[0]?.trim() ?? "unknown"; + if (isRateLimited(ip)) { + return new Response("Too many requests", { status: 429 }); + } + const { handleRequest } = copilotRuntimeNextJSAppRouterEndpoint({ endpoint: "/api/copilotkit", serviceAdapter: new ExperimentalEmptyAdapter(), From 59537d4a371d22177dbc6af93da4499a1fe2a79b Mon Sep 17 00:00:00 2001 From: jerelvelarde Date: Tue, 24 Mar 2026 11:24:30 -0700 Subject: [PATCH 3/6] fix: address review findings for Render deployment - Switch agent from langgraph dev to production Docker image (langchain/langgraph-api) - Add health check endpoint (/ok) for agent private service - Add turbo.json to frontend buildFilter to prevent stale builds - Add Dockerfile.agent for production agent builds - Revert serverId to example_mcp_app for traceability --- apps/app/src/app/api/copilotkit/route.ts | 2 +- docker/Dockerfile.agent | 18 ++++++++++++++++++ render.yaml | 11 +++++------ 3 files changed, 24 insertions(+), 7 deletions(-) create mode 100644 docker/Dockerfile.agent diff --git a/apps/app/src/app/api/copilotkit/route.ts b/apps/app/src/app/api/copilotkit/route.ts index 4bbd9f5..3485f77 100644 --- a/apps/app/src/app/api/copilotkit/route.ts +++ b/apps/app/src/app/api/copilotkit/route.ts @@ -62,7 +62,7 @@ export const POST = async (req: NextRequest) => { servers: [{ type: "http", url: process.env.MCP_SERVER_URL, - serverId: "mcp_app", + serverId: "example_mcp_app", }], }, }), diff --git a/docker/Dockerfile.agent b/docker/Dockerfile.agent new file mode 100644 index 0000000..837809e --- /dev/null +++ b/docker/Dockerfile.agent @@ -0,0 +1,18 @@ +FROM langchain/langgraph-api:3.12 + +ADD apps/agent /deps/agent + +RUN for dep in /deps/*; do \ + echo "Installing $dep"; \ + if [ -d "$dep" ]; then \ + (cd "$dep" && PYTHONDONTWRITEBYTECODE=1 uv pip install --system --no-cache-dir -c /api/constraints.txt -e .); \ + fi; \ + done + +ENV LANGSERVE_GRAPHS='{"sample_agent": "/deps/agent/main.py:graph"}' + +RUN mkdir -p /api/langgraph_api /api/langgraph_runtime /api/langgraph_license \ + && touch /api/langgraph_api/__init__.py /api/langgraph_runtime/__init__.py /api/langgraph_license/__init__.py +RUN PYTHONDONTWRITEBYTECODE=1 uv pip install --system --no-cache-dir --no-deps -e /api + +WORKDIR /deps/agent diff --git a/render.yaml b/render.yaml index 6d36c77..0cf80d5 100644 --- a/render.yaml +++ b/render.yaml @@ -2,14 +2,11 @@ services: # ── Agent (LangGraph Python) — private, not exposed to internet ── - type: pserv name: open-generative-ui-agent - runtime: python + runtime: docker plan: starter - rootDir: apps/agent - buildCommand: "pip install uv && uv sync --frozen" - startCommand: "uv run langgraph dev --host 0.0.0.0 --port $PORT --no-browser --no-reload" + dockerfilePath: docker/Dockerfile.agent + healthCheckPath: /ok envVars: - - key: PYTHON_VERSION - value: "3.12" - key: OPENAI_API_KEY sync: false - key: LANGSMITH_API_KEY @@ -17,6 +14,7 @@ services: buildFilter: paths: - apps/agent/** + - docker/Dockerfile.agent # ── Frontend (Next.js) — public web service ── - type: web @@ -37,4 +35,5 @@ services: - apps/app/** - package.json - pnpm-lock.yaml + - turbo.json - docker/Dockerfile.app From 66603a0798fd1694923a915d6c4124c78f831994 Mon Sep 17 00:00:00 2001 From: jerelvelarde Date: Tue, 24 Mar 2026 11:46:57 -0700 Subject: [PATCH 4/6] feat: make LLM model and rate limits configurable via env vars - LLM_MODEL env var in agent (defaults to gpt-5.4-2026-03-05) - RATE_LIMIT_WINDOW_MS and RATE_LIMIT_MAX env vars (defaults 60s/40 req) - README callout: strong models required for generative UI (GPT-5.4, Claude Opus 4.6, Gemini 3.1 Pro) Co-Authored-By: Claude Opus 4.6 (1M context) --- .env.example | 10 +++++++++- README.md | 10 ++++++++++ apps/agent/main.py | 4 +++- apps/app/src/app/api/copilotkit/route.ts | 4 ++-- 4 files changed, 24 insertions(+), 4 deletions(-) diff --git a/.env.example b/.env.example index 9847a1d..95f6441 100644 --- a/.env.example +++ b/.env.example @@ -1 +1,9 @@ -OPENAI_API_KEY= \ No newline at end of file +OPENAI_API_KEY= + +# LLM model — strong models are required for reliable UI generation +# Recommended: gpt-5.4, gpt-5.4-pro, claude-opus-4-6, gemini-3.1-pro +LLM_MODEL=gpt-5.4-2026-03-05 + +# Rate limiting (per IP) +RATE_LIMIT_WINDOW_MS=60000 +RATE_LIMIT_MAX=40 \ No newline at end of file diff --git a/README.md b/README.md index 14149f9..2b5eff4 100644 --- a/README.md +++ b/README.md @@ -23,6 +23,16 @@ make setup # Install deps + create .env template make dev # Start all services ``` +> **Strong models required.** Generative UI demands high-capability models that can produce complex, well-structured HTML/SVG in a single pass. Set `LLM_MODEL` in your `.env` to one of: +> +> | Model | Provider | +> |-------|----------| +> | `gpt-5.4` / `gpt-5.4-pro` | OpenAI | +> | `claude-opus-4-6` | Anthropic | +> | `gemini-3.1-pro` | Google | +> +> Smaller or weaker models will produce broken layouts, missing interactivity, or incomplete visualizations. + - **App**: http://localhost:3000 - **Agent**: http://localhost:8123 diff --git a/apps/agent/main.py b/apps/agent/main.py index 62245c1..9d558e6 100644 --- a/apps/agent/main.py +++ b/apps/agent/main.py @@ -3,6 +3,8 @@ It defines the workflow graph, state, tools, nodes and edges. """ +import os + from copilotkit import CopilotKitMiddleware from langchain.agents import create_agent from langchain_openai import ChatOpenAI @@ -17,7 +19,7 @@ _skills_text = load_all_skills() agent = create_agent( - model=ChatOpenAI(model="gpt-5.4-2026-03-05"), + model=ChatOpenAI(model=os.environ.get("LLM_MODEL", "gpt-5.4-2026-03-05")), tools=[query_data, *todo_tools, generate_form, *template_tools], middleware=[CopilotKitMiddleware()], state_schema=AgentState, diff --git a/apps/app/src/app/api/copilotkit/route.ts b/apps/app/src/app/api/copilotkit/route.ts index 3485f77..1cff8a0 100644 --- a/apps/app/src/app/api/copilotkit/route.ts +++ b/apps/app/src/app/api/copilotkit/route.ts @@ -7,8 +7,8 @@ import { LangGraphAgent } from "@copilotkit/runtime/langgraph"; import { NextRequest } from "next/server"; // Simple sliding-window rate limiter (per IP) -const RATE_LIMIT_WINDOW_MS = 60_000; // 1 minute -const RATE_LIMIT_MAX = 20; // max requests per window +const RATE_LIMIT_WINDOW_MS = Number(process.env.RATE_LIMIT_WINDOW_MS) || 60_000; +const RATE_LIMIT_MAX = Number(process.env.RATE_LIMIT_MAX) || 40; const hits = new Map(); function isRateLimited(ip: string): boolean { From e45e254f993a3017529be2b5483b048ec1b89e85 Mon Sep 17 00:00:00 2001 From: jerelvelarde Date: Tue, 24 Mar 2026 13:35:50 -0700 Subject: [PATCH 5/6] fix: feature-flag in-memory rate limiter, disable by default In-memory rate limiting doesn't scale across multiple instances for high-traffic deployments. Disable by default via RATE_LIMIT_ENABLED env var so it doesn't silently misbehave at scale. Can be re-enabled for single-instance or low-traffic deployments. --- .env.example | 5 +++-- apps/app/src/app/api/copilotkit/route.ts | 24 +++++++++++++++--------- 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/.env.example b/.env.example index 95f6441..9f6d59f 100644 --- a/.env.example +++ b/.env.example @@ -4,6 +4,7 @@ OPENAI_API_KEY= # Recommended: gpt-5.4, gpt-5.4-pro, claude-opus-4-6, gemini-3.1-pro LLM_MODEL=gpt-5.4-2026-03-05 -# Rate limiting (per IP) +# Rate limiting (per IP) — disabled by default +RATE_LIMIT_ENABLED=false RATE_LIMIT_WINDOW_MS=60000 -RATE_LIMIT_MAX=40 \ No newline at end of file +RATE_LIMIT_MAX=40 diff --git a/apps/app/src/app/api/copilotkit/route.ts b/apps/app/src/app/api/copilotkit/route.ts index 1cff8a0..8d31691 100644 --- a/apps/app/src/app/api/copilotkit/route.ts +++ b/apps/app/src/app/api/copilotkit/route.ts @@ -6,12 +6,16 @@ import { import { LangGraphAgent } from "@copilotkit/runtime/langgraph"; import { NextRequest } from "next/server"; -// Simple sliding-window rate limiter (per IP) +// Simple in-memory sliding-window rate limiter (per IP) +// Enable via RATE_LIMIT_ENABLED=true — off by default. +// For high-traffic deployments, consider Redis-backed rate limiting instead. +const RATE_LIMIT_ENABLED = process.env.RATE_LIMIT_ENABLED === "true"; const RATE_LIMIT_WINDOW_MS = Number(process.env.RATE_LIMIT_WINDOW_MS) || 60_000; const RATE_LIMIT_MAX = Number(process.env.RATE_LIMIT_MAX) || 40; const hits = new Map(); function isRateLimited(ip: string): boolean { + if (!RATE_LIMIT_ENABLED) return false; const now = Date.now(); const timestamps = hits.get(ip)?.filter(t => t > now - RATE_LIMIT_WINDOW_MS) ?? []; timestamps.push(now); @@ -20,14 +24,16 @@ function isRateLimited(ip: string): boolean { } // Prune stale entries every 5 min to prevent unbounded memory growth -setInterval(() => { - const cutoff = Date.now() - RATE_LIMIT_WINDOW_MS; - hits.forEach((timestamps, ip) => { - const recent = timestamps.filter(t => t > cutoff); - if (recent.length === 0) hits.delete(ip); - else hits.set(ip, recent); - }); -}, 300_000); +if (RATE_LIMIT_ENABLED) { + setInterval(() => { + const cutoff = Date.now() - RATE_LIMIT_WINDOW_MS; + hits.forEach((timestamps, ip) => { + const recent = timestamps.filter(t => t > cutoff); + if (recent.length === 0) hits.delete(ip); + else hits.set(ip, recent); + }); + }, 300_000); +} // Normalize Render's fromService hostport (bare host:port) into a full URL const raw = process.env.LANGGRAPH_DEPLOYMENT_URL; From c00975eff5a92eeedace5d338f8975be732adcc1 Mon Sep 17 00:00:00 2001 From: jerelvelarde Date: Tue, 24 Mar 2026 13:40:27 -0700 Subject: [PATCH 6/6] fix: add missing env vars to render.yaml blueprint Wire LLM_MODEL to the agent service and rate limiter env vars to the frontend service so operators don't need to configure them manually in the Render dashboard. Co-Authored-By: Claude Opus 4.6 (1M context) --- render.yaml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/render.yaml b/render.yaml index 0cf80d5..cb296a0 100644 --- a/render.yaml +++ b/render.yaml @@ -11,6 +11,8 @@ services: sync: false - key: LANGSMITH_API_KEY sync: false + - key: LLM_MODEL + value: gpt-5.4-2026-03-05 buildFilter: paths: - apps/agent/** @@ -30,6 +32,12 @@ services: property: hostport - key: LANGSMITH_API_KEY sync: false + - key: RATE_LIMIT_ENABLED + value: "false" + - key: RATE_LIMIT_WINDOW_MS + value: "60000" + - key: RATE_LIMIT_MAX + value: "40" buildFilter: paths: - apps/app/**