From b811fa17aa9d3a6af801e116467bf4878463b9d0 Mon Sep 17 00:00:00 2001
From: jerelvelarde <Jereljohnvelarde@gmail.com>
Date: Tue, 24 Mar 2026 08:34:35 -0700
Subject: [PATCH 1/6] feat: add Render deployment blueprint and prepare for
 production

Add render.yaml with two services: a public Docker-based Next.js
frontend and a private Python LangGraph agent. Normalize the
LANGGRAPH_DEPLOYMENT_URL to handle Render's bare host:port format,
and make MCP server configuration opt-in via env var instead of
hardcoding the excalidraw default.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .gitignore                               |  3 ++
 apps/app/src/app/api/copilotkit/route.ts | 26 ++++++++++-----
 render.yaml                              | 40 ++++++++++++++++++++++++
 3 files changed, 61 insertions(+), 8 deletions(-)
 create mode 100644 render.yaml

diff --git a/.gitignore b/.gitignore
index 253d770..b352c66 100644
--- a/.gitignore
+++ b/.gitignore
@@ -60,3 +60,6 @@ bun.lockb
 
 # Demos
 .demos
+
+# References
+.references
diff --git a/apps/app/src/app/api/copilotkit/route.ts b/apps/app/src/app/api/copilotkit/route.ts
index 9c73d9b..1086531 100644
--- a/apps/app/src/app/api/copilotkit/route.ts
+++ b/apps/app/src/app/api/copilotkit/route.ts
@@ -6,9 +6,17 @@ import {
 import { LangGraphAgent } from "@copilotkit/runtime/langgraph";
 import { NextRequest } from "next/server";
 
+// Normalize Render's fromService hostport (bare host:port) into a full URL
+const raw = process.env.LANGGRAPH_DEPLOYMENT_URL;
+const deploymentUrl = !raw
+  ? "http://localhost:8123"
+  : raw.startsWith("http")
+    ? raw
+    : `http://${raw}`;
+
 // 1. Define the agent connection to LangGraph
 const defaultAgent = new LangGraphAgent({
-  deploymentUrl: process.env.LANGGRAPH_DEPLOYMENT_URL || "http://localhost:8123",
+  deploymentUrl,
   graphId: "sample_agent",
   langsmithApiKey: process.env.LANGSMITH_API_KEY || "",
 });
@@ -21,13 +29,15 @@ export const POST = async (req: NextRequest) => {
     runtime: new CopilotRuntime({
       agents: { default: defaultAgent, },
       a2ui: { injectA2UITool: true },
-      mcpApps: {
-        servers: [{
-          type: "http",
-          url: process.env.MCP_SERVER_URL || "https://mcp.excalidraw.com",
-          serverId: "example_mcp_app",
-        }],
-      },
+      ...(process.env.MCP_SERVER_URL && {
+        mcpApps: {
+          servers: [{
+            type: "http",
+            url: process.env.MCP_SERVER_URL,
+            serverId: "mcp_app",
+          }],
+        },
+      }),
     }),
   });
 
diff --git a/render.yaml b/render.yaml
new file mode 100644
index 0000000..6d36c77
--- /dev/null
+++ b/render.yaml
@@ -0,0 +1,40 @@
+services:
+  # ── Agent (LangGraph Python) — private, not exposed to internet ──
+  - type: pserv
+    name: open-generative-ui-agent
+    runtime: python
+    plan: starter
+    rootDir: apps/agent
+    buildCommand: "pip install uv && uv sync --frozen"
+    startCommand: "uv run langgraph dev --host 0.0.0.0 --port $PORT --no-browser --no-reload"
+    envVars:
+      - key: PYTHON_VERSION
+        value: "3.12"
+      - key: OPENAI_API_KEY
+        sync: false
+      - key: LANGSMITH_API_KEY
+        sync: false
+    buildFilter:
+      paths:
+        - apps/agent/**
+
+  # ── Frontend (Next.js) — public web service ──
+  - type: web
+    name: open-generative-ui-app
+    runtime: docker
+    plan: starter
+    dockerfilePath: docker/Dockerfile.app
+    envVars:
+      - key: LANGGRAPH_DEPLOYMENT_URL
+        fromService:
+          name: open-generative-ui-agent
+          type: pserv
+          property: hostport
+      - key: LANGSMITH_API_KEY
+        sync: false
+    buildFilter:
+      paths:
+        - apps/app/**
+        - package.json
+        - pnpm-lock.yaml
+        - docker/Dockerfile.app

From a73ebbce56679232ea0b40c9ebe5dde2ac5c74d8 Mon Sep 17 00:00:00 2001
From: jerelvelarde <Jereljohnvelarde@gmail.com>
Date: Tue, 24 Mar 2026 09:21:02 -0700
Subject: [PATCH 2/6] feat: add per-IP rate limiting to /api/copilotkit
 endpoint

Sliding-window rate limiter (20 req/min per IP) to prevent individual
abuse of the public CopilotKit endpoint. In-memory with periodic
cleanup to prevent unbounded Map growth. No new dependencies.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 apps/app/src/app/api/copilotkit/route.ts | 28 ++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/apps/app/src/app/api/copilotkit/route.ts b/apps/app/src/app/api/copilotkit/route.ts
index 1086531..4bbd9f5 100644
--- a/apps/app/src/app/api/copilotkit/route.ts
+++ b/apps/app/src/app/api/copilotkit/route.ts
@@ -6,6 +6,29 @@ import {
 import { LangGraphAgent } from "@copilotkit/runtime/langgraph";
 import { NextRequest } from "next/server";
 
+// Simple sliding-window rate limiter (per IP)
+const RATE_LIMIT_WINDOW_MS = 60_000; // 1 minute
+const RATE_LIMIT_MAX = 20;           // max requests per window
+const hits = new Map<string, number[]>();
+
+function isRateLimited(ip: string): boolean {
+  const now = Date.now();
+  const timestamps = hits.get(ip)?.filter(t => t > now - RATE_LIMIT_WINDOW_MS) ?? [];
+  timestamps.push(now);
+  hits.set(ip, timestamps);
+  return timestamps.length > RATE_LIMIT_MAX;
+}
+
+// Prune stale entries every 5 min to prevent unbounded memory growth
+setInterval(() => {
+  const cutoff = Date.now() - RATE_LIMIT_WINDOW_MS;
+  hits.forEach((timestamps, ip) => {
+    const recent = timestamps.filter(t => t > cutoff);
+    if (recent.length === 0) hits.delete(ip);
+    else hits.set(ip, recent);
+  });
+}, 300_000);
+
 // Normalize Render's fromService hostport (bare host:port) into a full URL
 const raw = process.env.LANGGRAPH_DEPLOYMENT_URL;
 const deploymentUrl = !raw
@@ -23,6 +46,11 @@ const defaultAgent = new LangGraphAgent({
 
 // 3. Define the route and CopilotRuntime for the agent
 export const POST = async (req: NextRequest) => {
+  const ip = req.headers.get("x-forwarded-for")?.split(",")[0]?.trim() ?? "unknown";
+  if (isRateLimited(ip)) {
+    return new Response("Too many requests", { status: 429 });
+  }
+
   const { handleRequest } = copilotRuntimeNextJSAppRouterEndpoint({
     endpoint: "/api/copilotkit",
     serviceAdapter: new ExperimentalEmptyAdapter(),

From 59537d4a371d22177dbc6af93da4499a1fe2a79b Mon Sep 17 00:00:00 2001
From: jerelvelarde <Jereljohnvelarde@gmail.com>
Date: Tue, 24 Mar 2026 11:24:30 -0700
Subject: [PATCH 3/6] fix: address review findings for Render deployment

- Switch agent from langgraph dev to production Docker image (langchain/langgraph-api)
- Add health check endpoint (/ok) for agent private service
- Add turbo.json to frontend buildFilter to prevent stale builds
- Add Dockerfile.agent for production agent builds
- Revert serverId to example_mcp_app for traceability
---
 apps/app/src/app/api/copilotkit/route.ts |  2 +-
 docker/Dockerfile.agent                  | 18 ++++++++++++++++++
 render.yaml                              | 11 +++++------
 3 files changed, 24 insertions(+), 7 deletions(-)
 create mode 100644 docker/Dockerfile.agent

diff --git a/apps/app/src/app/api/copilotkit/route.ts b/apps/app/src/app/api/copilotkit/route.ts
index 4bbd9f5..3485f77 100644
--- a/apps/app/src/app/api/copilotkit/route.ts
+++ b/apps/app/src/app/api/copilotkit/route.ts
@@ -62,7 +62,7 @@ export const POST = async (req: NextRequest) => {
           servers: [{
             type: "http",
             url: process.env.MCP_SERVER_URL,
-            serverId: "mcp_app",
+            serverId: "example_mcp_app",
           }],
         },
       }),
diff --git a/docker/Dockerfile.agent b/docker/Dockerfile.agent
new file mode 100644
index 0000000..837809e
--- /dev/null
+++ b/docker/Dockerfile.agent
@@ -0,0 +1,18 @@
+FROM langchain/langgraph-api:3.12
+
+ADD apps/agent /deps/agent
+
+RUN for dep in /deps/*; do \
+        echo "Installing $dep"; \
+        if [ -d "$dep" ]; then \
+            (cd "$dep" && PYTHONDONTWRITEBYTECODE=1 uv pip install --system --no-cache-dir -c /api/constraints.txt -e .); \
+        fi; \
+    done
+
+ENV LANGSERVE_GRAPHS='{"sample_agent": "/deps/agent/main.py:graph"}'
+
+RUN mkdir -p /api/langgraph_api /api/langgraph_runtime /api/langgraph_license \
+    && touch /api/langgraph_api/__init__.py /api/langgraph_runtime/__init__.py /api/langgraph_license/__init__.py
+RUN PYTHONDONTWRITEBYTECODE=1 uv pip install --system --no-cache-dir --no-deps -e /api
+
+WORKDIR /deps/agent
diff --git a/render.yaml b/render.yaml
index 6d36c77..0cf80d5 100644
--- a/render.yaml
+++ b/render.yaml
@@ -2,14 +2,11 @@ services:
   # ── Agent (LangGraph Python) — private, not exposed to internet ──
   - type: pserv
     name: open-generative-ui-agent
-    runtime: python
+    runtime: docker
     plan: starter
-    rootDir: apps/agent
-    buildCommand: "pip install uv && uv sync --frozen"
-    startCommand: "uv run langgraph dev --host 0.0.0.0 --port $PORT --no-browser --no-reload"
+    dockerfilePath: docker/Dockerfile.agent
+    healthCheckPath: /ok
     envVars:
-      - key: PYTHON_VERSION
-        value: "3.12"
       - key: OPENAI_API_KEY
         sync: false
       - key: LANGSMITH_API_KEY
@@ -17,6 +14,7 @@ services:
     buildFilter:
       paths:
         - apps/agent/**
+        - docker/Dockerfile.agent
 
   # ── Frontend (Next.js) — public web service ──
   - type: web
@@ -37,4 +35,5 @@ services:
         - apps/app/**
         - package.json
         - pnpm-lock.yaml
+        - turbo.json
         - docker/Dockerfile.app

From 66603a0798fd1694923a915d6c4124c78f831994 Mon Sep 17 00:00:00 2001
From: jerelvelarde <Jereljohnvelarde@gmail.com>
Date: Tue, 24 Mar 2026 11:46:57 -0700
Subject: [PATCH 4/6] feat: make LLM model and rate limits configurable via env
 vars

- LLM_MODEL env var in agent (defaults to gpt-5.4-2026-03-05)
- RATE_LIMIT_WINDOW_MS and RATE_LIMIT_MAX env vars (defaults 60s/40 req)
- README callout: strong models required for generative UI (GPT-5.4,
  Claude Opus 4.6, Gemini 3.1 Pro)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .env.example                             | 10 +++++++++-
 README.md                                | 10 ++++++++++
 apps/agent/main.py                       |  4 +++-
 apps/app/src/app/api/copilotkit/route.ts |  4 ++--
 4 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/.env.example b/.env.example
index 9847a1d..95f6441 100644
--- a/.env.example
+++ b/.env.example
@@ -1 +1,9 @@
-OPENAI_API_KEY=
\ No newline at end of file
+OPENAI_API_KEY=
+
+# LLM model — strong models are required for reliable UI generation
+# Recommended: gpt-5.4, gpt-5.4-pro, claude-opus-4-6, gemini-3.1-pro
+LLM_MODEL=gpt-5.4-2026-03-05
+
+# Rate limiting (per IP)
+RATE_LIMIT_WINDOW_MS=60000
+RATE_LIMIT_MAX=40
\ No newline at end of file
diff --git a/README.md b/README.md
index 14149f9..2b5eff4 100644
--- a/README.md
+++ b/README.md
@@ -23,6 +23,16 @@ make setup    # Install deps + create .env template
 make dev      # Start all services
 ```
 
+> **Strong models required.** Generative UI demands high-capability models that can produce complex, well-structured HTML/SVG in a single pass. Set `LLM_MODEL` in your `.env` to one of:
+>
+> | Model | Provider |
+> |-------|----------|
+> | `gpt-5.4` / `gpt-5.4-pro` | OpenAI |
+> | `claude-opus-4-6` | Anthropic |
+> | `gemini-3.1-pro` | Google |
+>
+> Smaller or weaker models will produce broken layouts, missing interactivity, or incomplete visualizations.
+
 - **App**: http://localhost:3000
 - **Agent**: http://localhost:8123
 
diff --git a/apps/agent/main.py b/apps/agent/main.py
index 62245c1..9d558e6 100644
--- a/apps/agent/main.py
+++ b/apps/agent/main.py
@@ -3,6 +3,8 @@
 It defines the workflow graph, state, tools, nodes and edges.
 """
 
+import os
+
 from copilotkit import CopilotKitMiddleware
 from langchain.agents import create_agent
 from langchain_openai import ChatOpenAI
@@ -17,7 +19,7 @@
 _skills_text = load_all_skills()
 
 agent = create_agent(
-    model=ChatOpenAI(model="gpt-5.4-2026-03-05"),
+    model=ChatOpenAI(model=os.environ.get("LLM_MODEL", "gpt-5.4-2026-03-05")),
     tools=[query_data, *todo_tools, generate_form, *template_tools],
     middleware=[CopilotKitMiddleware()],
     state_schema=AgentState,
diff --git a/apps/app/src/app/api/copilotkit/route.ts b/apps/app/src/app/api/copilotkit/route.ts
index 3485f77..1cff8a0 100644
--- a/apps/app/src/app/api/copilotkit/route.ts
+++ b/apps/app/src/app/api/copilotkit/route.ts
@@ -7,8 +7,8 @@ import { LangGraphAgent } from "@copilotkit/runtime/langgraph";
 import { NextRequest } from "next/server";
 
 // Simple sliding-window rate limiter (per IP)
-const RATE_LIMIT_WINDOW_MS = 60_000; // 1 minute
-const RATE_LIMIT_MAX = 20;           // max requests per window
+const RATE_LIMIT_WINDOW_MS = Number(process.env.RATE_LIMIT_WINDOW_MS) || 60_000;
+const RATE_LIMIT_MAX = Number(process.env.RATE_LIMIT_MAX) || 40;
 const hits = new Map<string, number[]>();
 
 function isRateLimited(ip: string): boolean {

From e45e254f993a3017529be2b5483b048ec1b89e85 Mon Sep 17 00:00:00 2001
From: jerelvelarde <Jereljohnvelarde@gmail.com>
Date: Tue, 24 Mar 2026 13:35:50 -0700
Subject: [PATCH 5/6] fix: feature-flag in-memory rate limiter, disable by
 default

In-memory rate limiting doesn't scale across multiple instances for
high-traffic deployments. Disable by default via RATE_LIMIT_ENABLED
env var so it doesn't silently misbehave at scale. Can be re-enabled
for single-instance or low-traffic deployments.
---
 .env.example                             |  5 +++--
 apps/app/src/app/api/copilotkit/route.ts | 24 +++++++++++++++---------
 2 files changed, 18 insertions(+), 11 deletions(-)

diff --git a/.env.example b/.env.example
index 95f6441..9f6d59f 100644
--- a/.env.example
+++ b/.env.example
@@ -4,6 +4,7 @@ OPENAI_API_KEY=
 # Recommended: gpt-5.4, gpt-5.4-pro, claude-opus-4-6, gemini-3.1-pro
 LLM_MODEL=gpt-5.4-2026-03-05
 
-# Rate limiting (per IP)
+# Rate limiting (per IP) — disabled by default
+RATE_LIMIT_ENABLED=false
 RATE_LIMIT_WINDOW_MS=60000
-RATE_LIMIT_MAX=40
\ No newline at end of file
+RATE_LIMIT_MAX=40
diff --git a/apps/app/src/app/api/copilotkit/route.ts b/apps/app/src/app/api/copilotkit/route.ts
index 1cff8a0..8d31691 100644
--- a/apps/app/src/app/api/copilotkit/route.ts
+++ b/apps/app/src/app/api/copilotkit/route.ts
@@ -6,12 +6,16 @@ import {
 import { LangGraphAgent } from "@copilotkit/runtime/langgraph";
 import { NextRequest } from "next/server";
 
-// Simple sliding-window rate limiter (per IP)
+// Simple in-memory sliding-window rate limiter (per IP)
+// Enable via RATE_LIMIT_ENABLED=true — off by default.
+// For high-traffic deployments, consider Redis-backed rate limiting instead.
+const RATE_LIMIT_ENABLED = process.env.RATE_LIMIT_ENABLED === "true";
 const RATE_LIMIT_WINDOW_MS = Number(process.env.RATE_LIMIT_WINDOW_MS) || 60_000;
 const RATE_LIMIT_MAX = Number(process.env.RATE_LIMIT_MAX) || 40;
 const hits = new Map<string, number[]>();
 
 function isRateLimited(ip: string): boolean {
+  if (!RATE_LIMIT_ENABLED) return false;
   const now = Date.now();
   const timestamps = hits.get(ip)?.filter(t => t > now - RATE_LIMIT_WINDOW_MS) ?? [];
   timestamps.push(now);
@@ -20,14 +24,16 @@ function isRateLimited(ip: string): boolean {
 }
 
 // Prune stale entries every 5 min to prevent unbounded memory growth
-setInterval(() => {
-  const cutoff = Date.now() - RATE_LIMIT_WINDOW_MS;
-  hits.forEach((timestamps, ip) => {
-    const recent = timestamps.filter(t => t > cutoff);
-    if (recent.length === 0) hits.delete(ip);
-    else hits.set(ip, recent);
-  });
-}, 300_000);
+if (RATE_LIMIT_ENABLED) {
+  setInterval(() => {
+    const cutoff = Date.now() - RATE_LIMIT_WINDOW_MS;
+    hits.forEach((timestamps, ip) => {
+      const recent = timestamps.filter(t => t > cutoff);
+      if (recent.length === 0) hits.delete(ip);
+      else hits.set(ip, recent);
+    });
+  }, 300_000);
+}
 
 // Normalize Render's fromService hostport (bare host:port) into a full URL
 const raw = process.env.LANGGRAPH_DEPLOYMENT_URL;

From c00975eff5a92eeedace5d338f8975be732adcc1 Mon Sep 17 00:00:00 2001
From: jerelvelarde <Jereljohnvelarde@gmail.com>
Date: Tue, 24 Mar 2026 13:40:27 -0700
Subject: [PATCH 6/6] fix: add missing env vars to render.yaml blueprint

Wire LLM_MODEL to the agent service and rate limiter env vars to the
frontend service so operators don't need to configure them manually
in the Render dashboard.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 render.yaml | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/render.yaml b/render.yaml
index 0cf80d5..cb296a0 100644
--- a/render.yaml
+++ b/render.yaml
@@ -11,6 +11,8 @@ services:
         sync: false
       - key: LANGSMITH_API_KEY
         sync: false
+      - key: LLM_MODEL
+        value: gpt-5.4-2026-03-05
     buildFilter:
       paths:
         - apps/agent/**
@@ -30,6 +32,12 @@ services:
           property: hostport
       - key: LANGSMITH_API_KEY
         sync: false
+      - key: RATE_LIMIT_ENABLED
+        value: "false"
+      - key: RATE_LIMIT_WINDOW_MS
+        value: "60000"
+      - key: RATE_LIMIT_MAX
+        value: "40"
     buildFilter:
       paths:
         - apps/app/**