diff --git a/ui/__tests__/scientific-rag.test.ts b/ui/__tests__/scientific-rag.test.ts
new file mode 100644
index 0000000..c6396db
--- /dev/null
+++ b/ui/__tests__/scientific-rag.test.ts
@@ -0,0 +1,77 @@
+import { describe, expect, it } from 'vitest';
+
+import {
+  buildCitationKey,
+  buildRagMetadata,
+  detectScientificSection,
+  formatRetrievedDocuments,
+  parseSemanticScholarReferences,
+  semanticScholarReferenceToText,
+} from '@/utils/server/scientific-rag';
+
+describe('scientific RAG helpers', () => {
+  it('detects compound scientific sections before generic methods', () => {
+    expect(detectScientificSection('Materials and Methods\nWe collected samples')).toBe(
+      'materials-and-methods',
+    );
+    expect(detectScientificSection('Abstract\nThis paper studies retrieval')).toBe('abstract');
+  });
+
+  it('builds stable citation keys for uploaded documents', () => {
+    expect(
+      buildCitationKey({ title: 'My Paper.pdf', page: 3, chunkIndex: 2 }),
+    ).toBe('doc:my-paper-pdf:p3:c2');
+  });
+
+  it('builds stable citation keys for Semantic Scholar references', () => {
+    expect(
+      buildCitationKey({
+        sourceType: 'semantic-scholar',
+        paperId: 'abc123',
+        title: 'Ignored when paper id exists',
+        chunkIndex: 1,
+      }),
+    ).toBe('scholar:abc123:ref:c1');
+  });
+
+  it('converts Semantic Scholar references into indexable text', () => {
+    const text = semanticScholarReferenceToText({
+      paperId: 'paper-1',
+      title: 'Retrieval for Science',
+      abstract: 'A study of citation-grounded retrieval.',
+      authors: [{ name: 'Ada Lovelace' }, 'Grace Hopper'],
+      year: 2026,
+      venue: 'ISAAC',
+    });
+
+    expect(text).toContain('Title: Retrieval for Science');
+    expect(text).toContain('Authors: Ada Lovelace, Grace Hopper');
+    expect(text).toContain('Semantic Scholar Paper ID: paper-1');
+  });
+
+  it('parses saved Semantic Scholar references from form fields', () => {
+    const refs = parseSemanticScholarReferences([
+      JSON.stringify([{ paperId: 'paper-1', title: 'A' }]),
+    ]);
+
+    expect(refs).toEqual([{ paperId: 'paper-1', title: 'A' }]);
+  });
+
+  it('formats retrieval results with citation keys and distances', () => {
+    const formatted = formatRetrievedDocuments({
+      documents: [['Chunk text']],
+      metadatas: [[buildRagMetadata({ title: 'Paper', page: 1, chunkIndex: 0 })]],
+      distances: [[0.123456]],
+    });
+
+    expect(formatted).toContain('[doc:paper:p1:c0]');
+    expect(formatted).toContain('Distance: 0.1235');
+    expect(formatted).toContain('Chunk text');
+  });
+
+  it('handles empty retrieval results defensively', () => {
+    expect(formatRetrievedDocuments({ documents: [[]], metadatas: [[]] })).toBe(
+      'No relevant documents were retrieved.',
+    );
+  });
+});
diff --git a/ui/pages/api/fetch-documents.ts b/ui/pages/api/fetch-documents.ts
index 9304e48..0e8b7e7 100644
--- a/ui/pages/api/fetch-documents.ts
+++ b/ui/pages/api/fetch-documents.ts
@@ -1,23 +1,35 @@
-import type { NextApiRequest, NextApiResponse } from "next";
-import { ChromaClient, TransformersEmbeddingFunction } from "chromadb";
+import type { NextApiRequest, NextApiResponse } from 'next';
+
+import { ChromaClient, TransformersEmbeddingFunction } from 'chromadb';
 
 export default async function handler(req: NextApiRequest, res: NextApiResponse) {
   try {
+    if (req.method !== 'POST') {
+      return res.status(405).end();
+    }
+
+    const query = typeof req.body?.input === 'string' ? req.body.input.trim() : '';
+    if (!query) {
+      return res.status(400).json({ error: 'Missing retrieval query' });
+    }
+
+    const requestedResults = Number(req.body?.nResults || 6);
+    const nResults = Math.min(Math.max(requestedResults, 1), 10);
+
     const client = new ChromaClient({
-      path: "http://chroma-server:8000",
+      path: process.env.CHROMA_PATH || 'http://chroma-server:8000',
     });
 
-    const query = req.body.input;
-
     const embedder = new TransformersEmbeddingFunction();
+    const collection = await client.getOrCreateCollection({
+      name: 'default-collection',
+      embeddingFunction: embedder,
+    });
 
-    const collection = await client.getOrCreateCollection({ name: "default-collection", embeddingFunction: embedder });
-
-  // query the collection
-  const results = await collection.query({
-      nResults: 4, 
-      queryTexts: [query]
-  }) 
+    const results = await collection.query({
+      nResults,
+      queryTexts: [query],
+    });
 
     res.status(200).json(results);
   } catch (error) {
@@ -29,4 +41,4 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
     }
     res.status(500).json({ error: 'An unexpected error occurred :(' });
   }
-}
\ No newline at end of file
+}
diff --git a/ui/pages/api/inject-documents.ts b/ui/pages/api/inject-documents.ts
index 532a635..9b74cce 100644
--- a/ui/pages/api/inject-documents.ts
+++ b/ui/pages/api/inject-documents.ts
@@ -3,10 +3,17 @@ import type { NextApiRequest, NextApiResponse } from 'next';
 import { ChromaClient, TransformersEmbeddingFunction } from 'chromadb';
 import { IncomingForm } from 'formidable';
 import { PDFLoader } from 'langchain/document_loaders/fs/pdf';
-import { RecursiveCharacterTextSplitter } from "langchain/text_splitter";
-
+import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
 import path from 'path';
-import { v4 as uuidv4 } from 'uuid';
+
+import {
+  buildRagMetadata,
+  detectScientificSection,
+  parseSemanticScholarReferences,
+  SCIENTIFIC_TEXT_SEPARATORS,
+  semanticScholarReferenceToText,
+  type ScientificReference,
+} from '@/utils/server/scientific-rag';
 
 export const config = {
   api: {
@@ -29,25 +36,36 @@ export default async function handler(
         return res.status(400).json({ error: 'Failed to upload file' });
       }
 
+      const pdfFile = Array.isArray(files.pdf) ? files.pdf[0] : files.pdf;
+      const references = parseSemanticScholarReferences(fields.references);
+
+      if (!pdfFile?.filepath && references.length === 0) {
+        return res.status(400).json({
+          error: 'Upload a PDF or provide Semantic Scholar references',
+        });
+      }
+
       const client = new ChromaClient({
         path: process.env.CHROMA_PATH || 'http://chroma-server:8000',
       });
 
-      const loader = new PDFLoader(files.pdf[0].filepath);
-
-      const originalDocs = await loader.load();
+      const originalDocs = [];
 
-      console.log(JSON.stringify(originalDocs));
+      if (pdfFile?.filepath) {
+        const loader = new PDFLoader(pdfFile.filepath);
+        originalDocs.push(...(await loader.load()));
+      }
 
+      originalDocs.push(...semanticScholarReferencesToDocuments(references));
 
       const splitter = new RecursiveCharacterTextSplitter({
-        chunkSize: 500,
-        chunkOverlap: 100,
-      });      
+        chunkSize: 700,
+        chunkOverlap: 120,
+        separators: SCIENTIFIC_TEXT_SEPARATORS,
+      });
 
       const docs = await splitter.splitDocuments(originalDocs);
- 
-      // Process the documents and perform other logic
+
       const { ids, metadatas, documentContents } = processDocuments(docs);
 
       const embedder = new TransformersEmbeddingFunction();
@@ -65,6 +83,7 @@ export default async function handler(
       res.status(200).json({
         message: 'Documents processed successfully',
         documentCount: ids.length,
+        semanticScholarReferenceCount: references.length,
       });
     });
   } catch (error) {
@@ -75,30 +94,53 @@ export default async function handler(
   }
 }
 
-function processDocuments(docs: any) {
+function semanticScholarReferencesToDocuments(references: ScientificReference[]) {
+  return references.map((reference) => ({
+    pageContent: semanticScholarReferenceToText(reference),
+    metadata: {
+      sourceType: 'semantic-scholar',
+      source: reference.url || reference.paperId || reference.title,
+      title: reference.title,
+      paperId: reference.paperId,
+      url: reference.url,
+      year: reference.year,
+      loc: { pageNumber: 'ref' },
+    },
+  }));
+}
+
+function processDocuments(docs: any[]) {
   const ids = [];
   const metadatas = [];
   const documentContents = [];
+  const pageChunkCounts = new Map<string, number>();
 
   for (const document of docs) {
-    // Generate an ID for each document, or use some existing unique identifier
-    const id = uuidv4();
-    ids.push(id);
-
-    const fallbackTitle = path.basename(document.metadata.source);
-    const titleFromMetadata = document.metadata.pdf.info.Title;
-
-    const title = titleFromMetadata && titleFromMetadata.length > 0 ? titleFromMetadata : fallbackTitle;
+    const sourceType = document.metadata.sourceType || 'upload';
+    const fallbackTitle = document.metadata.source
+      ? path.basename(document.metadata.source)
+      : 'Semantic Scholar reference';
+    const titleFromMetadata = document.metadata.pdf?.info?.Title;
+    const title = titleFromMetadata || document.metadata.title || fallbackTitle;
+    const page = document.metadata.loc?.pageNumber || document.metadata.page || 'ref';
+    const pageChunkKey = `${sourceType}:${title}:${page}`;
+    const chunkIndex = pageChunkCounts.get(pageChunkKey) || 0;
+    pageChunkCounts.set(pageChunkKey, chunkIndex + 1);
+
+    const metadata = buildRagMetadata({
+      title,
+      page,
+      source: document.metadata.source,
+      sourceType,
+      section: detectScientificSection(document.pageContent),
+      chunkIndex,
+      paperId: document.metadata.paperId,
+      url: document.metadata.url,
+      year: document.metadata.year,
+    });
 
-  
-    const metadata = {
-      title: title,
-      page: document.metadata.loc.pageNumber, // Define this function to extract chapter info
-      source: document.metadata.source, // Define this function to extract verse info
-    };
+    ids.push(String(metadata.citationKey));
     metadatas.push(metadata);
-
-    // Add the page content to the documents array
     documentContents.push(document.pageContent);
   }
 
diff --git a/ui/pages/api/rag-chat.ts b/ui/pages/api/rag-chat.ts
index ce84d67..5ef7a59 100644
--- a/ui/pages/api/rag-chat.ts
+++ b/ui/pages/api/rag-chat.ts
@@ -1,6 +1,7 @@
 import { DEFAULT_SYSTEM_PROMPT, DEFAULT_TEMPERATURE } from '@/utils/app/const';
 import { OpenAIError, OpenAIStream } from '@/utils/server';
-import { codeBlock, oneLine } from 'common-tags'
+import { formatRetrievedDocuments } from '@/utils/server/scientific-rag';
+import { codeBlock, oneLine } from 'common-tags';
 
 import { ChatBody, Message } from '@/types/chat';
 
@@ -14,41 +15,28 @@ export const config = {
   runtime: 'edge',
 };
 
-// Function to fetch and format documents
-async function fetchAndFormatDocuments(lastMessageContent: string) {
+async function fetchAndFormatDocuments(lastMessageContent: string, req: Request) {
   try {
-    console.log("fetching documents")
-    const response = await fetch('http://localhost:3000/api/fetch-documents', {
+    const url = new URL('/api/fetch-documents', req.url);
+    const response = await fetch(url, {
       method: 'POST',
       headers: { 'Content-Type': 'application/json' },
-      body: JSON.stringify({ input: lastMessageContent }),
+      body: JSON.stringify({ input: lastMessageContent, nResults: 6 }),
     });
-    
+
     if (!response.ok) {
       throw new Error(`Error fetching documents: ${response.statusText}`);
     }
 
     const data = await response.json();
-    const result = data.metadatas[0].map((metadata: any, index: number) => {
-      return `Source ${index + 1}) Title: ${metadata.title}, Page: ${metadata.page}, Content: ${data.documents[0][index]}\n`;
-    }).join('');
-
-    console.log(result);
-
-    return result;
-
+    return formatRetrievedDocuments(data);
   } catch (error) {
     console.error('Error fetching and formatting documents:', error);
-    throw error; // You may want to throw a more specific error object here
+    return 'No relevant documents were retrieved.';
   }
 }
 
-
-
-
-
 const handler = async (req: Request): Promise<Response> => {
-
   try {
     const { model, messages, key, prompt, temperature } =
       (await req.json()) as ChatBody;
@@ -62,87 +50,79 @@ const handler = async (req: Request): Promise<Response> => {
 
     let promptToSend = codeBlock`
     ${oneLine`
-      You are a very enthusiastic AI assistant  who loves
-      to help people! Given the following information from
-      relevant documentation, answer the user's question using
-      only that information, outputted in markdown format.
+      You are a careful scientific research assistant. Given the following
+      retrieved document context, answer the user's question using only that
+      context and output markdown.
     `}
 
     ${oneLine`
-      If you are unsure
-      and the answer is not explicitly written in the documentation, say
-      "Sorry, I don't know how to help with that."
+      Every factual claim that depends on retrieved context must cite the exact
+      bracketed citation key, for example [doc:paper-title:p3:c1] or
+      [scholar:paper-id:ref:c0]. Prefer lower-distance sources when multiple
+      chunks contain similar information.
     `}
-    
+
     ${oneLine`
-      Always include citations from the documentation.
+      If the answer is not explicitly supported by the retrieved context, say
+      "Sorry, I don't know how to help with that from the available documents."
     `}
   `;
 
     if (!promptToSend) {
-      promptToSend = DEFAULT_SYSTEM_PROMPT;
+      promptToSend = prompt || DEFAULT_SYSTEM_PROMPT;
     }
 
     const lastMessage = messages[messages.length - 1];
+    const relevantDocuments = await fetchAndFormatDocuments(lastMessage.content, req);
 
-    const relevantDocuments = await fetchAndFormatDocuments(lastMessage.content);
-    
     let temperatureToUse = temperature;
     if (temperatureToUse == null) {
       temperatureToUse = DEFAULT_TEMPERATURE;
     }
 
     const prompt_tokens = encoding.encode(promptToSend);
-
     let tokenCount = prompt_tokens.length;
     let messagesToSend: Message[] = [];
 
-
     encoding.free();
 
-    console.log(model, promptToSend, temperatureToUse, key, messagesToSend);
-
-  
-  messagesToSend = [
+    messagesToSend = [
       {
-        role: "user",
+        role: 'user',
         content: codeBlock`
-          Here is the relevant documentation:
+          Here is the retrieved scientific context:
           ${relevantDocuments}
         `,
       },
       {
-        role: "user",
+        role: 'user',
         content: codeBlock`
           ${oneLine`
-            Answer my next question using only the above documentation.
-            You must also follow the below rules when answering:
+            Answer my next question using only the context above.
+            Follow these rules:
           `}
           ${oneLine`
-            - Do not make up answers that are not provided in the documentation.
+            - Do not invent answers that are not supported by the retrieved context.
           `}
           ${oneLine`
-            - If you are unsure and the answer is not explicitly written
-            in the documentation context, say
-            "Sorry, I don't know how to help with that."
+            - Cite factual claims with the exact bracketed citation keys shown in the context.
           `}
           ${oneLine`
-            - Prefer splitting your response into multiple paragraphs.
+            - If no relevant context is available, say you do not know from the available documents.
           `}
           ${oneLine`
-            - Output as markdown with citations based on the documentation.
+            - Prefer concise markdown with direct citations.
           `}
         `,
       },
       {
-        role: "user",
+        role: 'user',
         content: codeBlock`
           Here is my question:
           ${oneLine`${lastMessage.content}`}
       `,
       },
-    ]
-
+    ];
 
     const stream = await OpenAIStream(
       model,
diff --git a/ui/utils/server/scientific-rag.ts b/ui/utils/server/scientific-rag.ts
new file mode 100644
index 0000000..23a095e
--- /dev/null
+++ b/ui/utils/server/scientific-rag.ts
@@ -0,0 +1,174 @@
+export type ScientificSourceType = 'upload' | 'semantic-scholar';
+
+export interface ScientificReference {
+  paperId?: string;
+  title?: string;
+  abstract?: string;
+  authors?: Array<string | { name?: string }>;
+  year?: number | string;
+  venue?: string;
+  url?: string;
+}
+
+export interface RagMetadataInput {
+  title?: string;
+  page?: number | string;
+  source?: string;
+  sourceType?: ScientificSourceType;
+  section?: string;
+  chunkIndex?: number;
+  paperId?: string;
+  url?: string;
+  year?: number | string;
+}
+
+export interface RetrievedDocuments {
+  documents?: string[][];
+  metadatas?: Array<Array<Record<string, unknown>>>;
+  distances?: number[][];
+}
+
+const SECTION_PATTERNS: Array<[string, RegExp]> = [
+  ['materials-and-methods', /\bmaterials?\s+(and|&)\s+methods?\b/i],
+  ['abstract', /\babstract\b/i],
+  ['introduction', /\bintroduction\b/i],
+  ['background', /\bbackground\b/i],
+  ['methods', /\b(methods?|methodology)\b/i],
+  ['results', /\bresults?\b/i],
+  ['discussion', /\bdiscussion\b/i],
+  ['conclusion', /\bconclusions?\b/i],
+  ['limitations', /\blimitations?\b/i],
+  ['references', /\breferences?\b/i],
+];
+
+export const SCIENTIFIC_TEXT_SEPARATORS = [
+  '\nAbstract',
+  '\nIntroduction',
+  '\nBackground',
+  '\nMaterials and Methods',
+  '\nMethods',
+  '\nMethodology',
+  '\nResults',
+  '\nDiscussion',
+  '\nConclusion',
+  '\nReferences',
+  '\n\n',
+  '\n',
+  '. ',
+  ' ',
+  '',
+];
+
+export function detectScientificSection(text = ''): string {
+  const firstLines = text.split('\n').slice(0, 6).join(' ');
+  const sample = firstLines || text.slice(0, 400);
+
+  for (const [section, pattern] of SECTION_PATTERNS) {
+    if (pattern.test(sample)) {
+      return section;
+    }
+  }
+
+  return 'body';
+}
+
+export function normalizeCitationPart(value: unknown, fallback = 'source'): string {
+  const normalized = String(value || fallback)
+    .toLowerCase()
+    .replace(/[^a-z0-9]+/g, '-')
+    .replace(/^-+|-+$/g, '')
+    .slice(0, 72);
+
+  return normalized || fallback;
+}
+
+export function buildCitationKey(input: RagMetadataInput): string {
+  const source = input.sourceType === 'semantic-scholar' ? 'scholar' : 'doc';
+  const title = normalizeCitationPart(input.paperId || input.title || input.source, 'source');
+  const page = input.page == null || input.page === '' ? 'ref' : `p${input.page}`;
+  const chunk = Number.isInteger(input.chunkIndex) ? `c${input.chunkIndex}` : 'c0';
+
+  return `${source}:${title}:${page}:${chunk}`;
+}
+
+export function buildRagMetadata(input: RagMetadataInput): Record<string, string | number> {
+  const metadata: Record<string, string | number> = {
+    title: String(input.title || 'Untitled source'),
+    page: input.page == null || input.page === '' ? 'ref' : input.page,
+    source: String(input.source || input.paperId || input.url || 'unknown'),
+    sourceType: input.sourceType || 'upload',
+    section: input.section || 'body',
+    chunk: input.chunkIndex || 0,
+  };
+
+  const citationKey = buildCitationKey({ ...input, ...metadata });
+  metadata.citationKey = citationKey;
+
+  if (input.paperId) metadata.paperId = input.paperId;
+  if (input.url) metadata.url = input.url;
+  if (input.year) metadata.year = input.year;
+
+  return metadata;
+}
+
+function formatAuthors(authors: ScientificReference['authors']): string {
+  if (!Array.isArray(authors)) return '';
+
+  return authors
+    .map((author) => (typeof author === 'string' ? author : author?.name))
+    .filter(Boolean)
+    .join(', ');
+}
+
+export function semanticScholarReferenceToText(reference: ScientificReference): string {
+  const title = reference.title || 'Untitled Semantic Scholar reference';
+  const authors = formatAuthors(reference.authors);
+  const parts = [
+    `Title: ${title}`,
+    authors ? `Authors: ${authors}` : '',
+    reference.year ? `Year: ${reference.year}` : '',
+    reference.venue ? `Venue: ${reference.venue}` : '',
+    reference.abstract ? `Abstract: ${reference.abstract}` : '',
+    reference.url ? `URL: ${reference.url}` : '',
+    reference.paperId ? `Semantic Scholar Paper ID: ${reference.paperId}` : '',
+  ].filter(Boolean);
+
+  return parts.join('\n');
+}
+
+export function parseSemanticScholarReferences(value: unknown): ScientificReference[] {
+  const raw = Array.isArray(value) ? value[0] : value;
+  if (!raw || typeof raw !== 'string') return [];
+
+  try {
+    const parsed = JSON.parse(raw);
+    if (!Array.isArray(parsed)) return [];
+
+    return parsed.filter((item) => item && typeof item === 'object');
+  } catch {
+    return [];
+  }
+}
+
+export function formatRetrievedDocuments(data: RetrievedDocuments): string {
+  const documents = data.documents?.[0] || [];
+  const metadatas = data.metadatas?.[0] || [];
+  const distances = data.distances?.[0] || [];
+
+  if (!documents.length) {
+    return 'No relevant documents were retrieved.';
+  }
+
+  return documents
+    .map((content, index) => {
+      const metadata = metadatas[index] || {};
+      const citationKey = metadata.citationKey || `source-${index + 1}`;
+      const title = metadata.title || 'Untitled source';
+      const page = metadata.page || 'ref';
+      const section = metadata.section || 'body';
+      const distance = typeof distances[index] === 'number' ? `, Distance: ${distances[index].toFixed(4)}` : '';
+
+      return `Source ${index + 1} [${citationKey}] Title: ${title}, Page: ${page}, Section: ${section}${distance}\n${content}\n`;
+    })
+    .join('\n');
+}