diff --git a/src/partnerRecruiter.ts b/src/partnerRecruiter.ts new file mode 100644 index 0000000..2ac2c20 --- /dev/null +++ b/src/partnerRecruiter.ts @@ -0,0 +1,56 @@ +// partnerRecruiter.ts + +/** + * PartnerRecruiterAgent – simple utility to discover potential software partners in Medellín. + * In a production system this would call LinkedIn, Crunchbase, or a local DB. Here we provide + * a deterministic mock that can be extended later. + */ + +export interface CompanyProfile { + name: string; + sector: string; + website: string; + employeeCount: number; + description: string; +} + +/** + * Returns a curated list of software companies in Medellín that match the given sector filter. + * @param sector Desired sector (e.g., "AI", "FinTech", "DevOps"). If omitted, returns all. + */ +export function findMedellinPartners(sector?: string): CompanyProfile[] { + const companies: CompanyProfile[] = [ + { + name: "CleverTech", + sector: "AI", + website: "https://clevertech.co", + employeeCount: 45, + description: "AI‑driven automation platform for enterprises.", + }, + { + name: "FinScope", + sector: "FinTech", + website: "https://finscope.com", + employeeCount: 30, + description: "Analytics suite for financial institutions.", + }, + { + name: "DevOpsM", + sector: "DevOps", + website: "https://devopsm.io", + employeeCount: 22, + description: "Continuous delivery pipelines with zero‑downtime deployments.", + }, + { + name: "DataPulse", + sector: "Data", + website: "https://datapulse.io", + employeeCount: 55, + description: "Real‑time data streaming and observability.", + }, + ]; + + if (!sector) return companies; + const lowered = sector.toLowerCase(); + return companies.filter((c) => c.sector.toLowerCase().includes(lowered)); +} diff --git a/src/rag.ts b/src/rag.ts new file mode 100644 index 0000000..aa51605 --- /dev/null +++ b/src/rag.ts @@ -0,0 +1,156 @@ +// rag.ts + +/** + * Enhanced Retrieval-Augmented Generation (RAG) utilities for Isaac Forge. + * Implements document ingestion, external scholarly metadata fetching, vector indexing, + * and citation-aware retrieval. + */ + +import axios from 'axios'; +import { Document, VectorStoreIndex, SimpleRetriever } from 'llamaindex'; // Assume llamaindex provides these exports +import { AgentFunction, FunctionDescriptor } from './types'; +import { logDebugMessage } from './util'; +import { v4 as uuidv4 } from 'uuid'; + +/** Configuration for external services */ +export interface RAGConfig { + /** Semantic Scholar API base URL */ + semanticScholarBase?: string; + /** Optional API key for Semantic Scholar (if needed) */ + semanticScholarKey?: string; + /** LlamaIndex index persistence directory */ + indexPath?: string; +} + +/** Default configuration values */ +const DEFAULT_CONFIG: Required = { + semanticScholarBase: 'https://api.semanticscholar.org/graph/v1/paper/search', + semanticScholarKey: '', + indexPath: './rag-index', +}; + +/** Ingest a raw text document and return a LlamaIndex Document */ +export function ingestDocument(content: string, metadata: Record = {}): Document { + const id = metadata.id ?? uuidv4(); + const doc = new Document({ + id, + text: content, + metadata: { ...metadata, source: metadata.source ?? 'local' }, + }); + logDebugMessage(true, 'Ingested document', { id, length: content.length, metadata }); + return doc; +} + +/** Fetch scholarly papers from Semantic Scholar matching a query */ +export async function fetchSemanticScholarPapers( + query: string, + limit: number = 5, + config?: RAGConfig, +): Promise { + const cfg = { ...DEFAULT_CONFIG, ...config }; + const params = new URLSearchParams({ + query, + limit: limit.toString(), + fields: 'title,abstract,year,authors,venue,url', + }); + const url = `${cfg.semanticScholarBase}?${params.toString()}`; + const headers: Record = {}; + if (cfg.semanticScholarKey) { + headers['x-api-key'] = cfg.semanticScholarKey; + } + try { + const resp = await axios.get(url, { headers }); + const papers = resp.data?.data ?? []; + return papers.map((p: any) => { + const text = `${p.title}\n${p.abstract ?? ''}`; + const meta = { + id: p.paperId, + source: 'semantic-scholar', + title: p.title, + year: p.year, + authors: p.authors?.map((a: any) => a.name).join(', '), + venue: p.venue, + url: p.url, + }; + return ingestDocument(text, meta); + }); + } catch (e: any) { + logDebugMessage(true, 'Semantic Scholar fetch error', { error: e.message }); + throw new Error(`Failed to fetch from Semantic Scholar: ${e.message}`); + } +} + +/** Build or load a vector index from a collection of documents */ +export async function buildIndex( + docs: Document[], + config?: RAGConfig, +): Promise { + const cfg = { ...DEFAULT_CONFIG, ...config }; + // LlamaIndex can persist to a directory; we assume a simple in‑memory index for now. + const index = await VectorStoreIndex.fromDocuments(docs); + logDebugMessage(true, 'Built vector index', { docCount: docs.length, indexPath: cfg.indexPath }); + return index; +} + +/** Retrieve top‑k documents relevant to a query */ +export async function retrieveRelevantDocs( + query: string, + index: VectorStoreIndex, + topK: number = 4, +): Promise { + const retriever = new SimpleRetriever({ + index, + similarityTopK: topK, + }); + const results = await retriever.retrieve(query); + logDebugMessage(true, 'Retrieved relevant docs', { query, topK, resultCount: results.length }); + return results; +} + +/** Agent function wrapper exposing RAG retrieval to Swarm agents */ +export const retrieveRelevantDocsFunction: AgentFunction = { + name: 'retrieveRelevantDocs', + func: async (args: { query: string; topK?: number }) => { + const { query, topK } = args; + // For demonstration we lazily build an index from a static corpus. In production this + // would be cached or persisted. + const localDocs = [ + ingestDocument('Placeholder local scientific note about quantum entanglement.', { + source: 'local', + title: 'Quantum Entanglement Overview', + }), + ]; + const scholarlyDocs = await fetchSemanticScholarPapers(query, topK ?? 4); + const allDocs = [...localDocs, ...scholarlyDocs]; + const index = await buildIndex(allDocs); + const relevant = await retrieveRelevantDocs(query, index, topK ?? 4); + // Return a formatted string that includes citations. + const formatted = relevant + .map((doc, i) => { + const meta = doc.metadata as any; + const citation = meta.source === 'semantic-scholar' ? `[${meta.year}] ${meta.title}` : `[Local] ${meta.title ?? 'Document']`; + return `Result ${i + 1}: ${doc.text.slice(0, 200)}...\nCitation: ${citation}`; + }) + .join('\n\n'); + return formatted; + }, + descriptor: { + name: 'retrieveRelevantDocs', + description: 'Retrieve top‑k relevant scientific documents (local or Semantic Scholar) for a given query, returning a formatted string with citations.', + parameters: { + query: { + type: 'string', + required: true, + description: 'Natural language query describing the research question.', + }, + topK: { + type: 'number', + required: false, + description: 'Number of documents to retrieve (default 4).', + }, + }, + }, +}; + +/** Export a collection of RAG‑related functions for easy registration */ +export const RAG_FUNCTIONS: AgentFunction[] = [retrieveRelevantDocsFunction];