diff --git a/app/src/app/tools/[toolId]/page.tsx b/app/src/app/tools/[toolId]/page.tsx
index 154a99e..3c0901c 100644
--- a/app/src/app/tools/[toolId]/page.tsx
+++ b/app/src/app/tools/[toolId]/page.tsx
@@ -1,9 +1,9 @@
"use client";
import { Button, Label, Textarea } from "@ansospace/ui";
-import { ArrowUpRight, Crown, Lock, Play, X } from "lucide-react";
+import { ArrowUpRight, Crown, Lock, Play, Plus, X } from "lucide-react";
import { notFound, useParams } from "next/navigation";
-import { useCallback, useEffect, useState } from "react";
+import { useCallback, useEffect, useRef, useState } from "react";
import { CodeEditor } from "@/components/code-editor";
import { ResultViewer } from "@/components/result-viewer";
import { ToolLayout } from "@/components/tool-layout";
@@ -44,6 +44,9 @@ function ToolPageContent({ toolId }: { toolId: string }) {
return initial;
});
+ // Length selector modal state
+ const [showLengthModal, setShowLengthModal] = useState(false);
+
// Tier2 tools MUST bypass Next.js proxy buffering to prevent silent timeouts on long executions
const runnerUrl = process.env.NEXT_PUBLIC_TOOL_RUNNER_URL || "http://localhost:9080";
const apiBase = tool.tier === "tier2" ? `${runnerUrl}/api/tools` : "/api/tools";
@@ -83,23 +86,25 @@ function ToolPageContent({ toolId }: { toolId: string }) {
}
}, [mounted, toolUsage.limitReached]);
+ // Check if all required fields are filled
+ const isReady = tool.requiredFields.every((field) => fields[field]?.trim());
+
const handleExecute = () => {
- // Check per-tool usage limit
if (!canExecute(tool.id)) {
setShowUpgradeDialog(true);
return;
}
- // Check required fields
for (const field of tool.requiredFields) {
if (!fields[field]?.trim()) return;
}
- execute({ ...fields, model });
- // Track usage for THIS tool
- trackExecution(tool.id);
- };
- // Check if all required fields are filled
- const isReady = tool.requiredFields.every((field) => fields[field]?.trim());
+ if (tool.requireLengthSelection) {
+ setShowLengthModal(true);
+ } else {
+ execute({ ...fields, model });
+ trackExecution(tool.id);
+ }
+ };
return (
<>
@@ -117,6 +122,7 @@ function ToolPageContent({ toolId }: { toolId: string }) {
config={input}
value={fields[input.key] || ""}
onChange={(value) => setField(input.key, value)}
+ onFieldChange={(key, value) => setField(key, value)}
/>
))}
@@ -202,11 +208,66 @@ function ToolPageContent({ toolId }: { toolId: string }) {
{/* Results */}
-
+ {tool.ResultComponent && result ? (
+
+ ) : (
+
+ )}
+ {/* Length selector modal (for tools that require it) */}
+ {tool.requireLengthSelection && showLengthModal && (
+
+
+
+
+
+
+
Caption Length
+
+ How long do you want your captions to be?
+
+
+
+
+
+
+
+
+ )}
+
{/* ─── Upgrade Dialog Popup ──────────────────────────── */}
{showUpgradeDialog && (
@@ -297,11 +358,18 @@ function InputField({
config,
value,
onChange,
+ onFieldChange,
}: {
config: InputFieldConfig;
value: string;
onChange: (value: string) => void;
+ onFieldChange?: (key: string, value: string) => void;
}) {
+ const textareaFileRef = useRef(null);
+ const [textareaShowPreview, setTextareaShowPreview] = useState(false);
+ const [textareaSpinning, setTextareaSpinning] = useState(false);
+ const [textareaAttachedImage, setTextareaAttachedImage] = useState("");
+
switch (config.type) {
case "code":
return (
@@ -316,19 +384,103 @@ function InputField({
);
- case "textarea":
+ case "textarea": {
+ const hasImage = config.attachable && !!textareaAttachedImage;
return (
);
+ }
case "select":
return (
@@ -479,7 +631,7 @@ function InputField({
)
);
const valid = Array.from(files).filter((f) => {
- const ext = "." + f.name.split(".").pop()?.toLowerCase();
+ const ext = `.${f.name.split(".").pop()?.toLowerCase()}`;
const p = f.webkitRelativePath || f.name;
if (
p.includes("__pycache__") ||
diff --git a/app/src/components/tools/caption-result-display.tsx b/app/src/components/tools/caption-result-display.tsx
new file mode 100644
index 0000000..79b163a
--- /dev/null
+++ b/app/src/components/tools/caption-result-display.tsx
@@ -0,0 +1,142 @@
+"use client";
+
+import { Check, Copy } from "lucide-react";
+import { useCallback, useState } from "react";
+
+function VariationCopyButton({ text }: { text: string }) {
+ const [copied, setCopied] = useState(false);
+ const handleCopy = useCallback(async () => {
+ await navigator.clipboard.writeText(text);
+ setCopied(true);
+ setTimeout(() => setCopied(false), 2000);
+ }, [text]);
+
+ return (
+
+ );
+}
+
+function CaptionVariationDisplay({
+ variations: rawVariations,
+ title,
+ platformName,
+ lengthType,
+}: {
+ variations?: { text: string; chars: number; limit: number; title?: string }[];
+ title?: string | null;
+ platformName?: string;
+ lengthType?: string;
+}) {
+ const [activeIdx, setActiveIdx] = useState(0);
+ if (!rawVariations || rawVariations.length === 0) return null;
+
+ const v = rawVariations[activeIdx];
+ const varTitle = v.title || title;
+ const copyText = varTitle ? `Title: ${varTitle}\n\nCaption: ${v.text}` : v.text;
+ const charRatio = v.chars / v.limit;
+ const barWidth = Math.min(charRatio * 100, 100);
+ const barColor =
+ charRatio > 1.0 ? "bg-red-500" : charRatio > 0.8 ? "bg-amber-500" : "bg-green-500";
+
+ return (
+
+ {platformName && (
+
+
{platformName}
+
+ {lengthType === "short" ? "Short" : "Long"}
+
+
+ )}
+
+
+ {rawVariations.map((v, i) => (
+
+ ))}
+
+
+
+ {varTitle && (
+
+
+
+
Title {activeIdx + 1}
+
{varTitle}
+
+
+
+
+ )}
+
+
+
+
+
+
+ {v.chars} / {v.limit}
+
+
+
+
+
+ );
+}
+
+export function CaptionResultDisplay({ result }: { result: string }) {
+ let parsed: {
+ variations?: { text: string; chars: number; limit: number; title?: string }[];
+ title?: string | null;
+ metadata?: { platform_name?: string; platform?: string; length_type?: string };
+ } | null = null;
+
+ try {
+ const data = JSON.parse(result);
+ if (data.variations || data.title) {
+ parsed = data;
+ }
+ } catch {
+ // not JSON, fall through to raw display
+ }
+
+ if (!parsed) {
+ return {result};
+ }
+
+ return (
+
+ );
+}
diff --git a/app/src/lib/tools/caption-generator.ts b/app/src/lib/tools/caption-generator.ts
index a419e17..4c33a4c 100644
--- a/app/src/lib/tools/caption-generator.ts
+++ b/app/src/lib/tools/caption-generator.ts
@@ -1,4 +1,5 @@
-import type { ToolDefinition } from "@/types";
+import { CaptionResultDisplay } from "@/components/tools/caption-result-display";
+import type { ToolDefinition } from "@/types";
export const captionGenerator: ToolDefinition = {
id: "caption-generator",
@@ -8,58 +9,37 @@ export const captionGenerator: ToolDefinition = {
category: "content",
icon: "PenTool",
status: "active",
-
- requiredFields: ["contentDescription"],
- defaultModel: "llama-3.3-70b",
-
- buildSystemPrompt: ({ platform }) =>
- `You are a social media content strategist. Generate engaging, platform-optimized captions. Rules:
-
-1. **Match the platform tone** - ${platform || "All platforms"} style and conventions
-2. **Hook first** - Start with an attention-grabbing line
-3. **Include CTAs** - ask questions, invite engagement
-4. **Hashtags** - 5-10 relevant hashtags (platform-appropriate)
-5. **Emojis** - Use strategically, not excessively
-6. **Character limits** - Respect platform limits (Twitter: 280, Instagram caption: 2200)
-
-Generate 3 caption variations: Professional, Casual, and Bold/Edgy.`,
-
- buildUserPrompt: ({ contentDescription, platform, tone, cta }) =>
- `**CONTENT:** ${contentDescription}\n\n**PLATFORM:** ${platform || "All platforms"}\n\n${tone ? `**TONE:** ${tone}\n` : ""}${cta ? `**CALL TO ACTION:** ${cta}\n` : ""}\n\nGenerate 3 caption variations.`,
+ tier: "tier2",
+ requiredFields: ["prompt", "platform"],
+ defaultModel: "kimi-k2.5",
+ buildSystemPrompt: () => "",
+ buildUserPrompt: () => "",
inputs: [
- {
- key: "contentDescription",
- label: "Content Description",
- type: "textarea",
- placeholder:
- "E.g. 'We just launched our AI-powered developer tools platform. It has 22+ free tools for debugging, testing, and code generation.'",
- rows: 4,
- },
{
key: "platform",
label: "Platform",
type: "select",
options: [
- { value: "All platforms", label: "All Platforms" },
- { value: "Instagram", label: "Instagram" },
- { value: "Twitter/X", label: "Twitter / X" },
- { value: "LinkedIn", label: "LinkedIn" },
- { value: "TikTok", label: "TikTok" },
- { value: "YouTube", label: "YouTube (description)" },
+ { value: "youtube", label: "YouTube" },
+ { value: "youtube_shorts", label: "YouTube Shorts" },
+ { value: "tiktok", label: "TikTok" },
+ { value: "instagram", label: "Instagram" },
+ { value: "reddit", label: "Reddit" },
+ { value: "linkedin", label: "LinkedIn" },
+ { value: "x_twitter", label: "X (Twitter)" },
],
},
{
- key: "tone",
- label: "Tone (optional)",
- type: "text",
- placeholder: "E.g. 'Professional but approachable'",
- },
- {
- key: "cta",
- label: "Call to Action (optional)",
- type: "text",
- placeholder: "E.g. 'Sign up for the beta'",
+ key: "prompt",
+ label: "Caption Prompt",
+ type: "textarea",
+ rows: 6,
+ attachable: { accept: "image/jpeg,image/png,image/webp,image/gif" },
+ placeholder:
+ "E.g. 'We just launched our AI-powered developer tools platform. It has 22+ free tools for debugging, testing, and code generation.'",
},
],
+ ResultComponent: CaptionResultDisplay,
+ requireLengthSelection: true,
};
diff --git a/app/src/types/index.ts b/app/src/types/index.ts
index 5ba04ee..088e3ad 100644
--- a/app/src/types/index.ts
+++ b/app/src/types/index.ts
@@ -11,6 +11,8 @@ export interface InputFieldConfig {
options?: { value: string; label: string }[];
/** For "files" type: accepted file extensions (e.g. ".py,.js,.zip") */
accept?: string;
+ /** For "textarea" type: allows attaching files (e.g. images) */
+ attachable?: { accept: string };
/** For "files" type: max number of files */
maxFiles?: number;
/** For "files" type: max total upload size in MB */
@@ -55,6 +57,14 @@ export interface ToolDefinition {
// --- UI config ---
/** Declarative form field definitions */
inputs: InputFieldConfig[];
+ /** Custom component for rendering tool-specific results. Receives raw result string. */
+ ResultComponent?: React.ComponentType<{
+ result: string;
+ isLoading?: boolean;
+ error?: { message: string; code?: string; action?: string } | null;
+ }>;
+ /** When true, shows a length-selection (short/long) dialog before execution. */
+ requireLengthSelection?: boolean;
}
export interface CategoryInfo {
diff --git a/services/python-tools/tools/caption-generator/generator.py b/services/python-tools/tools/caption-generator/generator.py
new file mode 100644
index 0000000..d48b03d
--- /dev/null
+++ b/services/python-tools/tools/caption-generator/generator.py
@@ -0,0 +1,314 @@
+import re
+import asyncio
+from openai import AsyncOpenAI
+from rules import PLATFORM_LIMITS, PLATFORM_REVERSE_MAP, get_limits
+
+
+def sanitize_output(text: str) -> str:
+ text = text.replace("\\n", "\n")
+ text = re.sub(r'-{2,}', '-', text)
+ text = re.sub(r'—', '-', text)
+ text = re.sub(r'\n{3,}', '\n\n', text)
+ text = text.strip()
+ return text
+
+async def extra_text(image_data:str, api_key: str) ->str:
+ if not image_data:
+ return ""
+
+ client = AsyncOpenAI(
+ base_url="https://api.oxlo.ai/v1",
+ api_key=api_key
+ )
+
+ try:
+ response = await client.chat.completions.create(
+ model="kimi-k2.5",
+ messages=[
+ {
+ "role": "user",
+ "content": [
+ {"type": "text", "text": "Extract all visible text from this image. Return only the text content, nothing else."},
+ {"type": "image_url", "image_url": {"url": image_data}}
+ ]
+ }
+ ],
+ max_tokens=1000,
+ )
+ return response.choices[0].message.content or ""
+ except Exception as e:
+ print(f"OCR error: {e}")
+ return ""
+
+def build_prompt(platform: str,length_type: str, prompt: str, context_from_image: str = "") -> str:
+ plat = PLATFORM_LIMITS.get(platform, PLATFORM_LIMITS["linkedin"])
+ limits = get_limits(platform, length_type)
+
+ is_short = length_type == "short"
+ is_reddit = platform == "reddit"
+ title_optional = plat.get("title_optional", True)
+ has_title = (plat.get("title_max", 0) > 0 and title_optional) or "title_short_min" in plat
+
+ style_guides = {
+ "engaging_informative": "engaging, informative, YouTube-friendly. Hook viewers in the first line. Clear and conversational.",
+ "punchy_short_form": "punchy, fast-paced, hook-first. Perfect for short attention spans. Bold and energetic.",
+ "trending_energetic": "trending, energetic, TikTok-native. Use popular phrases naturally. Fun and relatable.",
+ "visual_storytelling": "visual-friendly, storytelling-focused. Complement the image/video. Emotional and engaging.",
+ "authentic_community": "authentic, community-focused, Reddit-native. No clickbait. Honest and direct.",
+ "professional_thoughtful": "professional, thought-provoking, LinkedIn-appropriate. No emojis or minimal. Value-driven.",
+ "concise_punchy": "concise, punchy, hook-first. Every word counts. Bold and direct.",
+ }
+ style_guide = style_guides.get(plat.get("style", "concise_punchy"), "concise and engaging")
+
+ emoji_count = plat.get("emoji_limit", (1, 3))
+ emoji_guide = f"Use {emoji_count[0]}-{emoji_count[1]} emojis, placed at the end of sentences or at the very end of the caption."
+ if plat.get("style") == "professional_thoughtful":
+ emoji_guide = "Use 0-1 emoji only if truly needed, or skip emojis entirely."
+
+ cta_patterns = plat.get("cta_patterns", [])
+ cta_text = ", ".join(cta_patterns[:3])
+
+ hashtag_count = plat.get("hashtag_count", (3, 5))
+ if hashtag_count[1] == 0:
+ hashtag_text = "Do NOT use any hashtags."
+ else:
+ hashtag_text = f"Add {hashtag_count[0]}-{hashtag_count[1]} relevant hashtags at the end on a new line."
+
+ word_limits = {
+ "youtube": {"short": 30, "long": 60},
+ "youtube_shorts": {"short": 15, "long": 25},
+ "tiktok": {"short": 15, "long": 25},
+ "instagram": {"short": 15, "long": 25},
+ "reddit": {"short": 100, "long": 200},
+ "linkedin": {"short": 100, "long": 200},
+ "x_twitter": {"short": 50, "long": 100}
+ }
+ max_words = word_limits.get(platform, {}).get(length_type, 25)
+
+ if is_reddit:
+ title_min = limits.get("title_min", 50)
+ title_max = limits.get("title_max", 120)
+ caption_min = limits.get("caption_min", 500)
+ caption_max = limits.get("caption_max", 1000)
+
+ prompt_text = f"""Create 3 Reddit posts with different titles.
+
+Write 3 posts:
+Title 1: [title {title_min}-{title_max} chars]
+Description 1: [post {caption_min}-{caption_max} chars, {max_words} words max]
+
+Title 2: [different title]
+Description 2: [different post]
+
+Title 3: [different title]
+Description 3: [different post]
+
+Rules:
+- All 3 titles must use different words
+- Use line breaks in body
+- End each with a question
+- {emoji_guide}
+
+Topic: {prompt}"""
+
+ if context_from_image:
+ prompt_text += f"\n\nAdditional context from image: {context_from_image}"
+
+ return prompt_text
+
+ elif has_title:
+ title_max_val = plat.get("title_max", 60)
+ caption_min = limits.get("caption_min", 60)
+ caption_max = limits.get("caption_max", 100)
+
+ prompt_text = f"""Create 3 YouTube video captions with different titles.
+
+Write 3 posts:
+Title 1: [title max {title_max_val} chars]
+Description 1: [caption {caption_min}-{caption_max} chars, {max_words} words max - make it detailed and engaging]
+
+Title 2: [different title]
+Description 2: [different caption]
+
+Title 3: [different title]
+Description 3: [different caption]
+
+Rules:
+- All 3 titles must use different words/angles
+- Don't repeat same title words
+- Descriptions should be {caption_min}-{caption_max} characters - write close to the max
+- {hashtag_text}
+- {cta_text}
+- {emoji_guide}
+
+Topic: {prompt}"""
+
+ if context_from_image:
+ prompt_text += f"\n\nAdditional context from image: {context_from_image}"
+
+ return prompt_text
+
+ else:
+ caption_min = limits.get("caption_min", 100)
+ caption_max = limits.get("caption_max", 280)
+
+ prompt_text = f"""Write a natural, human-like social media caption.
+
+Platform: {plat['name']}
+Style: {style_guide}
+
+Write EXACTLY {caption_min}-{caption_max} characters.
+
+Requirements:
+- Hook viewers in the first line - this is the most important part
+- Be engaging, {style_guide}
+- {hashtag_text}
+- {cta_text}
+- {emoji_guide}
+- Do NOT use double dashes (--) or em dashes (---) - use a single hyphen (-) instead
+- Write like a real person, not like an AI
+- MAXIMUM {max_words} WORDS
+
+User's topic: {prompt}"""
+
+ if context_from_image:
+ prompt_text += f"\n\nAdditional context from image: {context_from_image}"
+
+ return prompt_text
+
+
+async def generate_caption(
+ client: AsyncOpenAI,
+ prompt: str,
+ platform: str,
+ length_type: str,
+ context_from_image: str = ""
+) -> str:
+ enhanced_prompt = build_prompt(platform, length_type, prompt, context_from_image)
+
+ plat = PLATFORM_LIMITS.get(platform, PLATFORM_LIMITS["linkedin"])
+ limits = get_limits(platform, length_type)
+ max_tokens = min(limits.get("caption_max", 280) // 4, 500)
+
+ response = await client.chat.completions.create(
+ model="kimi-k2.5",
+ messages=[
+ {
+ "role": "system",
+ "content": f"You are a social media caption writing assistant. Return ONLY the caption text - no explanations, no markdown formatting, no extra text. CRITICAL: The caption must be EXACTLY between {limits.get('caption_min', 50)} and {limits.get('caption_max', 100)} characters. NEVER exceed {limits.get('caption_max', 100)} characters. Do not use double dashes (--) or em dashes (---). Use a single hyphen (-) instead."
+ },
+ {"role": "user", "content": enhanced_prompt}
+ ],
+ temperature=0.7,
+ max_tokens=max_tokens,
+ )
+
+ return response.choices[0].message.content or ""
+
+
+async def generate_all_captions(
+ client: AsyncOpenAI,
+ prompt: str,
+ platform: str,
+ length_type: str,
+ context_from_image: str = ""
+) -> dict:
+ plat = PLATFORM_LIMITS.get(platform, PLATFORM_LIMITS["linkedin"])
+ limits = get_limits(platform, length_type)
+ is_reddit = platform == "reddit"
+
+ variations = await asyncio.gather(
+ generate_caption(client, prompt, platform, length_type, context_from_image),
+ generate_caption(client, prompt, platform, length_type, context_from_image),
+ generate_caption(client, prompt, platform, length_type, context_from_image),
+ )
+
+ results = []
+ titles = []
+ title_optional = plat.get("title_optional", True)
+ has_title = (plat.get("title_max", 0) > 0 and title_optional) or "title_short_min" in plat
+
+ all_variations_text = []
+ for variation in variations:
+ if variation:
+ text = sanitize_output(variation.strip())
+ parts = re.split(r'(?:Title\s*\d*:)|(?:Description\s*\d*:)|(?:Option\s*[123]:)|(?:\d+\.)|(?:---)', text, flags=re.IGNORECASE)
+ found_parts = [p.strip() for p in parts if p.strip() and len(p.strip()) > 10]
+ if len(found_parts) >= 3:
+ all_variations_text.extend(found_parts[:3])
+ else:
+ all_variations_text.append(text)
+
+ for text in all_variations_text[:3]:
+ var_title = None
+
+ if has_title or is_reddit:
+ lines = [l.strip() for l in text.split("\n") if l.strip()]
+
+ title_pattern = re.search(r'(?:Title\s*\d*[\s:]*)', text, re.IGNORECASE)
+ if title_pattern:
+ start = title_pattern.end()
+ remaining = text[start:].strip()
+ newline_pos = remaining.find("\n")
+ if newline_pos > 0:
+ potential_title = remaining[:newline_pos].strip()
+ else:
+ potential_title = remaining.strip()
+
+ title_limit = plat.get("title_max", 60)
+ if is_reddit:
+ title_limit = limits.get("title_max", 120)
+ if potential_title and len(potential_title) <= title_limit:
+ var_title = potential_title
+ after_title = remaining[newline_pos:] if newline_pos > 0 else ""
+ text = after_title.strip()
+
+ if not var_title and lines:
+ first_line = lines[0]
+ title_limit = plat.get("title_max", 60)
+ if is_reddit:
+ title_limit = limits.get("title_max", 120)
+ if len(first_line) <= title_limit and not first_line.startswith("#"):
+ var_title = first_line
+ text = " ".join(lines[1:]) if len(lines) > 1 else ""
+
+ text = re.sub(r'^(?:Description\s*\d*:)\s*', '', text, flags=re.IGNORECASE).strip()
+
+ caption_text = text
+ else:
+ caption_text = text
+
+ caption_limit = limits.get("caption_max", 280)
+
+ if len(caption_text) > caption_limit:
+ caption_text = caption_text[:caption_limit]
+ last_space = caption_text.rfind(" ")
+ if last_space > 0:
+ caption_text = caption_text[:last_space]
+ caption_text = caption_text.strip()
+
+ if len(caption_text) > caption_limit:
+ caption_text = caption_text[:caption_limit]
+
+ results.append({
+ "text": caption_text,
+ "chars": len(caption_text),
+ "limit": caption_limit,
+ "title": var_title,
+ })
+ if var_title:
+ titles.append(var_title)
+
+ for i, r in enumerate(results):
+ if i < len(titles):
+ r["title"] = titles[i]
+
+ main_title = titles[0] if titles else None
+
+ return {
+ "title": main_title,
+ "titles": titles,
+ "variation_type": length_type,
+ "platform": plat.get("name", platform),
+ "variations": results,
+ }
\ No newline at end of file
diff --git a/services/python-tools/tools/caption-generator/helper.py b/services/python-tools/tools/caption-generator/helper.py
new file mode 100644
index 0000000..ebe9a98
--- /dev/null
+++ b/services/python-tools/tools/caption-generator/helper.py
@@ -0,0 +1,25 @@
+from typing import List, Tuple
+
+def jaccard_similarity(text1: str, text2: str) -> float:
+ words1 = set(text1.lower().split())
+ words2 = set(text2.lower().split())
+
+ if not words1 or not words2:
+ return 0.0
+
+ intersection = words1.intersection(words2)
+ union = words1.union(words2)
+
+ return len(intersection) / len(union) if union else 0.0
+
+
+def check_variations(captions: List[str]) -> List[Tuple[int, int, float]]:
+ similarities = []
+ n = len(captions)
+
+ for i in range(n):
+ for j in range(i + 1, n):
+ sim = jaccard_similarity(captions[i], captions[j])
+ similarities.append((i, j, sim))
+
+ return similarities
\ No newline at end of file
diff --git a/services/python-tools/tools/caption-generator/requirements.txt b/services/python-tools/tools/caption-generator/requirements.txt
new file mode 100644
index 0000000..3ceaffc
--- /dev/null
+++ b/services/python-tools/tools/caption-generator/requirements.txt
@@ -0,0 +1 @@
+openai>=1.0.0
\ No newline at end of file
diff --git a/services/python-tools/tools/caption-generator/rules.py b/services/python-tools/tools/caption-generator/rules.py
new file mode 100644
index 0000000..280257c
--- /dev/null
+++ b/services/python-tools/tools/caption-generator/rules.py
@@ -0,0 +1,148 @@
+# platform-specific character limits and caption generation rules
+
+PLATFORM_LIMITS = {
+ "youtube": {
+ "name": "YouTube",
+ "caption_short_min": 80,
+ "caption_short_max": 150,
+ "caption_long_min": 150,
+ "caption_long_max": 200,
+ "title_max": 60,
+ "title_optional": True,
+ "hashtag_count": (3, 5),
+ "cta_patterns": ["Like and subscribe", "Let me know in comments", "Share your thoughts", "Don't forget to subscribe"],
+ "style": "engaging_informative",
+ "emoji_limit": (1, 3),
+ },
+ "youtube_shorts": {
+ "name": "YouTube Shorts",
+ "caption_short_min": 60,
+ "caption_short_max": 100,
+ "caption_long_min": 80,
+ "caption_long_max": 100,
+ "title_max": 40,
+ "title_optional": True,
+ "hashtag_count": (2, 3),
+ "cta_patterns": ["Follow for more", "Like if you enjoyed", "Share with friends"],
+ "style": "punchy_short_form",
+ "emoji_limit": (1, 2),
+ },
+ "tiktok": {
+ "name": "TikTok",
+ "caption_short_min": 50,
+ "caption_short_max": 80,
+ "caption_long_min": 100,
+ "caption_long_max": 150,
+ "title_max": 35,
+ "title_optional": False,
+ "hashtag_count": (3, 5),
+ "cta_patterns": ["Follow for more", "Duet this", "Share with friends", "Save this"],
+ "style": "trending_energetic",
+ "emoji_limit": (2, 5),
+ },
+ "instagram": {
+ "name": "Instagram",
+ "caption_short_min": 80,
+ "caption_short_max": 100,
+ "caption_long_min": 125,
+ "caption_long_max": 150,
+ "title_max": 0,
+ "title_optional": False,
+ "hashtag_count": (3, 5),
+ "cta_patterns": ["Double tap if you agree", "Tag someone", "Share with a friend", "Link in bio"],
+ "style": "visual_storytelling",
+ "emoji_limit": (3, 6),
+ },
+ "reddit": {
+ "name": "Reddit",
+ "title_short_min": 60,
+ "title_short_max": 120,
+ "title_long_min": 120,
+ "title_long_max": 200,
+ "caption_short_min": 500,
+ "caption_short_max": 1000,
+ "caption_long_min": 1000,
+ "caption_long_max": 2000,
+ "hashtag_count": (0, 0),
+ "cta_patterns": ["What do you think?", "Share your experience", "Comments welcome"],
+ "style": "authentic_community",
+ "emoji_limit": (0, 1),
+ },
+ "linkedin": {
+ "name": "LinkedIn",
+ "caption_short_min": 150,
+ "caption_short_max": 300,
+ "caption_long_min": 600,
+ "caption_long_max": 2000,
+ "title_max": 0,
+ "title_optional": False,
+ "hashtag_count": (3, 5),
+ "cta_patterns": ["What are your thoughts?", "Share your experience", "Let's connect", "Comments welcome"],
+ "style": "professional_thoughtful",
+ "emoji_limit": (0, 2),
+ },
+ "x_twitter": {
+ "name": "X (Twitter)",
+ "caption_short_min": 100,
+ "caption_short_max": 140,
+ "caption_long_min": 200,
+ "caption_long_max": 280,
+ "title_max": 0,
+ "title_optional": False,
+ "hashtag_count": (2, 3),
+ "cta_patterns": ["Quote this", "Repost", "Share your thoughts"],
+ "style": "concise_punchy",
+ "emoji_limit": (0, 1),
+ },
+}
+
+# Platform mapping for frontend values
+PLATFORM_KEYS = {
+ "youtube": "youtube",
+ "youtube_shorts": "youtube_shorts",
+ "tiktok": "tiktok",
+ "instagram": "instagram",
+ "reddit": "reddit",
+ "linkedin": "linkedin",
+ "x_twitter": "x_twitter",
+}
+
+# Reverse mapping from display names
+PLATFORM_REVERSE_MAP = {
+ "YouTube": "youtube",
+ "YouTube Shorts": "youtube_shorts",
+ "TikTok": "tiktok",
+ "Instagram": "instagram",
+ "Reddit": "reddit",
+ "LinkedIn": "linkedin",
+ "X (Twitter)": "x_twitter",
+ "X(Twitter)": "x_twitter",
+}
+
+def get_limits(platform: str, length_type: str) -> dict:
+ plat = PLATFORM_LIMITS.get(platform, PLATFORM_LIMITS["linkedin"])
+ is_short = length_type == "short"
+
+ title_optional = plat.get("title_optional", True)
+ has_title = (plat.get("title_max", 0) > 0 and title_optional) or "title_short_min" in plat
+ is_title_only = "title_short_min" in plat
+
+ if is_title_only:
+ return {
+ "title_min": plat["title_short_min"] if is_short else plat["title_long_min"],
+ "title_max": plat["title_short_max"] if is_short else plat["title_long_max"],
+ "caption_min": plat["caption_short_min"] if is_short else plat["caption_long_min"],
+ "caption_max": plat["caption_short_max"] if is_short else plat["caption_long_max"],
+ }
+
+ if has_title:
+ return {
+ "caption_min": plat["caption_short_min"] if is_short else plat["caption_long_min"],
+ "caption_max": plat["caption_short_max"] if is_short else plat["caption_long_max"],
+ "title_max": plat["title_max"],
+ }
+
+ return {
+ "caption_min": plat["caption_short_min"] if is_short else plat["caption_long_min"],
+ "caption_max": plat["caption_short_max"] if is_short else plat["caption_long_max"],
+ }
\ No newline at end of file
diff --git a/services/python-tools/tools/caption-generator/tool.py b/services/python-tools/tools/caption-generator/tool.py
new file mode 100644
index 0000000..c815156
--- /dev/null
+++ b/services/python-tools/tools/caption-generator/tool.py
@@ -0,0 +1,210 @@
+import os
+import re
+from openai import AsyncOpenAI
+import asyncio
+from generator import extra_text, generate_all_captions
+from rules import PLATFORM_LIMITS, PLATFORM_REVERSE_MAP
+from helper import check_variations
+
+MANIFEST = {
+ "id": "caption-generator",
+ "name": "Social Media Captions",
+ "description": "Generate platform-optimized captions with hashtag suggestions for YouTube, TikTok, Instagram, LinkedIn, Reddit, and X/Twitter.",
+ "author": "Oxlo Team",
+ "version": "3.0.0",
+}
+
+MAX_RETRIES = 2
+SIMILARITY_THRESHOLD = 0.70
+
+
+def normalize_platform(platform: str) -> str:
+ if not platform:
+ return "linkedin"
+
+ platform_lower = platform.lower().strip()
+
+ direct_map = {
+ "youtube": "youtube",
+ "youtube_shorts": "youtube_shorts",
+ "youtube shorts": "youtube_shorts",
+ "tiktok": "tiktok",
+ "instagram": "instagram",
+ "reddit": "reddit",
+ "linkedin": "linkedin",
+ "x": "x_twitter",
+ "x_twitter": "x_twitter",
+ "x(twitter)": "x_twitter",
+ "twitter": "x_twitter",
+ }
+
+ if platform_lower in direct_map:
+ return direct_map[platform_lower]
+
+ if platform in PLATFORM_REVERSE_MAP:
+ return PLATFORM_REVERSE_MAP[platform]
+
+ for key in PLATFORM_LIMITS.keys():
+ if key in platform_lower or platform_lower in key:
+ return key
+
+ return "linkedin"
+
+
+def validate_length_type(length_type: str) -> str:
+ if length_type and length_type.lower() in ["short", "long"]:
+ return length_type.lower()
+ return "short"
+
+
+async def run(data: dict) -> dict:
+ prompt = data.get("prompt", "")
+ platform_input = data.get("platform", "linkedin")
+ length_type_input = data.get("length_type", "short")
+ image_data = data.get("image", "")
+
+ import logging
+ logger = logging.getLogger("caption-generator")
+ logger.info(f"Received request: platform={platform_input}, length={length_type_input}, prompt_len={len(prompt)}, has_image={bool(image_data)}")
+
+ platform = normalize_platform(platform_input)
+ length_type = validate_length_type(length_type_input)
+
+ api_key = os.getenv("OXLO_API_KEY")
+
+ if not api_key:
+ logger.error("No OXLO_API_KEY set")
+ return {
+ "error": "Please enter your Oxlo API key.",
+ "result": None
+ }
+
+ if not prompt:
+ logger.error("Empty prompt")
+ return {
+ "error": "Please enter a prompt for captions.",
+ "result": None
+ }
+
+ words = prompt.strip().split()
+ if len(words) < 5:
+ logger.error(f"Prompt too short: {len(words)} words")
+ return {
+ "error": "Please describe your content in more detail. Add more information about what you want to share.",
+ "result": None
+ }
+
+ random_patterns = ["asdf", "qwerty", "12345", "abc", "xxx", "yyy", "test", "ffff", "dddd"]
+ lower_prompt = prompt.lower()
+ if len(words) < 10 and any(p in lower_prompt for p in random_patterns):
+ logger.error("Random pattern detected in prompt")
+ return {
+ "error": "Please describe your content in more detail. Add more information about what you want to share.",
+ "result": None
+ }
+
+ client = AsyncOpenAI(
+ base_url="https://api.oxlo.ai/v1",
+ api_key=api_key,
+ )
+
+ context_from_image = ""
+ if image_data:
+ logger.info("Extracting text from image")
+ context_from_image = await extra_text(image_data, api_key)
+ logger.info(f"Image OCR result: {len(context_from_image)} chars")
+
+ plat_info = PLATFORM_LIMITS.get(platform, PLATFORM_LIMITS["linkedin"])
+ logger.info(f"Generating captions for {platform} ({length_type})")
+
+ all_results = []
+ retry_count = 0
+
+ while len(all_results) < 3 and retry_count <= MAX_RETRIES:
+ logger.info(f"Generation attempt {retry_count + 1}")
+ new_results = await asyncio.gather(
+ generate_all_captions(client, prompt, platform, length_type, context_from_image),
+ generate_all_captions(client, prompt, platform, length_type, context_from_image),
+ generate_all_captions(client, prompt, platform, length_type, context_from_image),
+ )
+
+ new_variations = [r.get("variations", []) for r in new_results]
+
+ flat_captions = []
+ for variation_set in new_variations:
+ for v in variation_set:
+ flat_captions.append(v.get("text", ""))
+
+ if len(flat_captions) >= 3:
+ similarities = check_variations(flat_captions)
+ high_similarity_pairs = [(i, j, s) for i, j, s in similarities if s > SIMILARITY_THRESHOLD]
+
+ if high_similarity_pairs and retry_count < MAX_RETRIES:
+ retry_count += 1
+ continue
+
+ all_results = new_results
+ break
+
+ plat_name = plat_info.get("name", platform.capitalize())
+
+ output_lines = []
+
+ title = None
+ for r in all_results:
+ if r.get("title"):
+ t = r.get("title", "")
+ t = re.sub(r'\*\*', '', t)
+ t = re.sub(r'-{2,}', '-', t)
+ t = re.sub(r'—', '-', t)
+ t = t.strip()
+ title = t
+ break
+
+ variations_output = []
+ for result in all_results:
+ for variation in result.get("variations", []):
+ text = variation.get("text", "")
+ if not text:
+ continue
+ text = re.sub(r'\*\*', '', text)
+ text = re.sub(r'-{2,}', '-', text)
+ text = re.sub(r'—', '-', text)
+ text = text.replace("\\n", "\n")
+ text = re.sub(r'^[#*>\s]+', '', text, flags=re.MULTILINE)
+ text = re.sub(r'\n{3,}', '\n\n', text)
+ text = text.strip()
+ if text:
+ variations_output.append({
+ "text": text,
+ "chars": len(text),
+ "limit": variation.get("limit", 280),
+ "title": variation.get("title", ""),
+ })
+
+ logger.info(f"Title: {title}")
+ logger.info(f"Variations generated: {len(variations_output)}")
+ if variations_output:
+ logger.info(f"First variation chars: {variations_output[0]['chars']}")
+
+ for i, v in enumerate(variations_output[:3], 1):
+ if v.get("title"):
+ output_lines.append(v["title"])
+ output_lines.append("")
+ output_lines.append(f"Variation {i} - Description:")
+ output_lines.append(v["text"])
+ output_lines.append(f"[{v['chars']}/{v['limit']} chars]")
+ output_lines.append("")
+
+ return {
+ "result": "\n".join(output_lines),
+ "metadata": {
+ "platform": platform,
+ "platform_name": plat_name,
+ "length_type": length_type,
+ "variation_type": length_type,
+ "has_image_context": bool(context_from_image),
+ },
+ "title": title,
+ "variations": variations_output[:3],
+ }
\ No newline at end of file