diff --git a/app/src/features/myspace/capture-classifier.js b/app/src/features/myspace/capture-classifier.js new file mode 100644 index 0000000..7f321e0 --- /dev/null +++ b/app/src/features/myspace/capture-classifier.js @@ -0,0 +1,110 @@ +'use strict'; + +const path = require('path'); + +// Mock ML classifier - in a real implementation, this would use a trained model +const mockClassify = (text) => { + const lowerText = text.toLowerCase(); + + // Simple keyword-based mock classification + if (lowerText.includes('note') || lowerText.includes('reminder')) { + return { category: 'note', confidence: 0.85 }; + } + + if (lowerText.includes('schedule') || lowerText.includes('timetable') || lowerText.includes('class')) { + return { category: 'timetable', confidence: 0.80 }; + } + + if (lowerText.includes('study') || lowerText.includes('material') || lowerText.includes('lecture')) { + return { category: 'study_material', confidence: 0.75 }; + } + + if (lowerText.includes('http') || lowerText.includes('www.')) { + return { category: 'link', confidence: 0.90 }; + } + + if (lowerText.includes('.pdf') || lowerText.includes('.doc') || lowerText.includes('file')) { + return { category: 'file', confidence: 0.70 }; + } + + // Default fallback + return { category: 'note', confidence: 0.50 }; +}; + +// Rule-based fallback classifier +const ruleBasedClassify = (text) => { + const lowerText = text.toLowerCase(); + + // Timetable rules + const timePattern = /\b(\d{1,2}:\d{2}|am|pm)\b/i; + const dayPattern = /\b(monday|tuesday|wednesday|thursday|friday|saturday|sunday)\b/i; + + if (timePattern.test(lowerText) && dayPattern.test(lowerText)) { + return { category: 'timetable', confidence: 0.95 }; + } + + // Study material rules + const studyKeywords = ['syllabus', 'assignment', 'homework', 'exam', 'quiz', 'test']; + if (studyKeywords.some(keyword => lowerText.includes(keyword))) { + return { category: 'study_material', confidence: 0.90 }; + } + + // Link rules + const urlPattern = /(https?:\/\/|www\.)[\w\-]+(\.[\w\-]+)+([\w\-\.,@?^=%&:/~\+#]*[\w\-\@?^=%&/~\+#])?/i; + if (urlPattern.test(lowerText)) { + return { category: 'link', confidence: 0.95 }; + } + + // File rules + const fileExtensions = ['.pdf', '.doc', '.docx', '.txt', '.xls', '.xlsx', '.ppt', '.pptx']; + if (fileExtensions.some(ext => lowerText.includes(ext))) { + return { category: 'file', confidence: 0.90 }; + } + + // Screenshot rules (based on common screenshot naming) + const screenshotPatterns = ['screenshot', 'screen shot', 'capture', 'image']; + if (screenshotPatterns.some(pattern => lowerText.includes(pattern))) { + return { category: 'screenshot', confidence: 0.85 }; + } + + // Default to note + return { category: 'note', confidence: 0.60 }; +}; + +// Normalize category names to match expected output +const normalizeCategory = (category) => { + const categoryMap = { + 'study_material': 'study material' + }; + + return categoryMap[category] || category; +}; + +// Main classification function +const classifyCapture = (captureText) => { + try { + // First try ML classification + let result = mockClassify(captureText); + + // If confidence is too low, fall back to rule-based classification + if (result.confidence < 0.7) { + result = ruleBasedClassify(captureText); + } + + // Normalize the category name + result.category = normalizeCategory(result.category); + + return result; + } catch (error) { + // If anything fails, fall back to rule-based classification + const fallbackResult = ruleBasedClassify(captureText); + fallbackResult.category = normalizeCategory(fallbackResult.category); + return fallbackResult; + } +}; + +module.exports = { + classifyCapture, + mockClassify, + ruleBasedClassify +}; \ No newline at end of file diff --git a/app/src/features/myspace/capture-classifier.test.js b/app/src/features/myspace/capture-classifier.test.js new file mode 100644 index 0000000..c19f0fc --- /dev/null +++ b/app/src/features/myspace/capture-classifier.test.js @@ -0,0 +1,99 @@ +import { classifyCapture } from './capture-classifier'; + +describe('Capture Classifier', () => { + describe('ML Classification', () => { + test('classifies note content', () => { + const content = 'This is a personal note about my thoughts'; + const result = classifyCapture(content); + expect(result.category).toBe('note'); + }); + + test('classifies timetable content', () => { + const content = 'Meeting at 3pm with team for project review'; + const result = classifyCapture(content); + expect(result.category).toBe('timetable'); + }); + + test('classifies screenshot references', () => { + const content = 'Screenshot_2023-01-01_12-00-00.png'; + const result = classifyCapture(content); + expect(result.category).toBe('screenshot'); + }); + + test('classifies link content', () => { + const content = 'Check out this article: https://example.com'; + const result = classifyCapture(content); + expect(result.category).toBe('link'); + }); + + test('classifies file attachments', () => { + const content = 'document.pdf'; + const result = classifyCapture(content); + expect(result.category).toBe('file'); + }); + + test('classifies study material', () => { + const content = 'Chapter 5 notes for mathematics exam'; + const result = classifyCapture(content); + expect(result.category).toBe('study material'); + }); + }); + + describe('Fallback Rules', () => { + test('uses URL pattern fallback', () => { + const content = 'Visit https://github.com for code repositories'; + const result = classifyCapture(content); + expect(result.category).toBe('link'); + }); + + test('uses file extension fallback', () => { + const content = 'presentation.pptx'; + const result = classifyCapture(content); + expect(result.category).toBe('file'); + }); + + test('uses screenshot pattern fallback', () => { + const content = 'Screen Shot 2023-12-25 at 14.30.45.png'; + const result = classifyCapture(content); + expect(result.category).toBe('screenshot'); + }); + + test('defaults to note for unknown content', () => { + const content = 'Random text without clear pattern'; + const result = classifyCapture(content); + expect(result.category).toBe('note'); + }); + }); + + describe('Myspace Capture Fixtures', () => { + test('handles typical note capture', () => { + const content = 'Remember to complete the project by Friday'; + const result = classifyCapture(content); + expect(result.category).toBe('note'); + }); + + test('handles meeting capture', () => { + const content = 'Team meeting tomorrow 10am agenda discussion'; + const result = classifyCapture(content); + expect(result.category).toBe('timetable'); + }); + + test('handles web capture with URL', () => { + const content = 'Interesting read: https://medium.com/article'; + const result = classifyCapture(content); + expect(result.category).toBe('link'); + }); + + test('handles document capture', () => { + const content = 'Final_report_v2.docx'; + const result = classifyCapture(content); + expect(result.category).toBe('file'); + }); + + test('handles study related capture', () => { + const content = 'Biology chapter 7 summary notes'; + const result = classifyCapture(content); + expect(result.category).toBe('study material'); + }); + }); +}); \ No newline at end of file diff --git a/app/src/features/myspace/classification-fixtures.js b/app/src/features/myspace/classification-fixtures.js new file mode 100644 index 0000000..e828b0d --- /dev/null +++ b/app/src/features/myspace/classification-fixtures.js @@ -0,0 +1,93 @@ +// Classification test fixtures for Myspace capture inputs + +const classificationFixtures = { + // Text samples representing different categories + textSamples: [ + { + text: "Meeting notes from team sync - discussed Q4 priorities and budget allocation", + expectedCategory: "work" + }, + { + text: "Recipe for chocolate chip cookies: flour, eggs, sugar, chocolate chips", + expectedCategory: "personal" + }, + { + text: "Lecture notes on quantum physics - wave particle duality and Schrodinger's equation", + expectedCategory: "education" + }, + { + text: "Grocery list: milk, bread, eggs, vegetables, chicken breast", + expectedCategory: "personal" + }, + { + text: "Project proposal draft for client presentation - include timeline and budget", + expectedCategory: "work" + } + ], + + // File names representing different categories + fileNames: [ + { + name: "Q4_Budget_Spreadsheet.xlsx", + expectedCategory: "work" + }, + { + name: "Vacation_Photos_2023.zip", + expectedCategory: "personal" + }, + { + name: "Machine_Learning_Course_Syllabus.pdf", + expectedCategory: "education" + }, + { + name: "Team_Member_Performance_Reviews.docx", + expectedCategory: "work" + }, + { + name: "Home_Renovation_Contracts.pdf", + expectedCategory: "personal" + } + ], + + // URLs representing different categories + urls: [ + { + url: "https://company.intranet/projects/q4-initiatives", + expectedCategory: "work" + }, + { + url: "https://recipes.com/chocolate-chip-cookies-master-recipe", + expectedCategory: "personal" + }, + { + url: "https://university.edu/courses/quantum-physics/lecture-notes", + expectedCategory: "education" + }, + { + url: "https://github.com/company/project-management-tool", + expectedCategory: "work" + }, + { + url: "https://fitness-tracker.app/dashboard/weekly-summary", + expectedCategory: "personal" + } + ], + + // Mixed content captures + mixedContent: [ + { + content: "Team meeting notes + Q4_Budget_Spreadsheet.xlsx attachment", + expectedCategory: "work" + }, + { + content: "Vacation planning notes with https://travel-site.com/destinations links", + expectedCategory: "personal" + }, + { + content: "Study group discussion about https://university.edu/assignments/hw3.pdf", + expectedCategory: "education" + } + ] +}; + +export default classificationFixtures; \ No newline at end of file diff --git a/backend/src/main/java/com/sentri/backend/service/MyspaceIntelligenceService.java b/backend/src/main/java/com/sentri/backend/service/MyspaceIntelligenceService.java index fd68c63..84eac9d 100644 --- a/backend/src/main/java/com/sentri/backend/service/MyspaceIntelligenceService.java +++ b/backend/src/main/java/com/sentri/backend/service/MyspaceIntelligenceService.java @@ -6,4 +6,6 @@ public interface MyspaceIntelligenceService { MyspaceSearchResponse search(MyspaceSearchRequest request); + + MyspaceSearchResponse classify(MyspaceSearchRequest request); } diff --git a/backend/src/main/java/com/sentri/backend/service/MyspaceIntelligenceServiceImpl.java b/backend/src/main/java/com/sentri/backend/service/MyspaceIntelligenceServiceImpl.java index b669827..8d0f856 100644 --- a/backend/src/main/java/com/sentri/backend/service/MyspaceIntelligenceServiceImpl.java +++ b/backend/src/main/java/com/sentri/backend/service/MyspaceIntelligenceServiceImpl.java @@ -53,6 +53,88 @@ public MyspaceIntelligenceServiceImpl( this.myspaceVectorStoreService = myspaceVectorStoreService; } + // Myspace intelligence service implementation for capture classification + @Override + public CaptureClassification classifyCapture(String content) { + if (content == null || content.trim().isEmpty()) { + return new CaptureClassification("uncategorized", 0.0, "No content provided"); + } + + String lowerContent = content.toLowerCase(); + + // First try ML-based classification (placeholder) + CaptureClassification mlClassification = performMLClassification(lowerContent); + if (mlClassification.confidence() >= 0.8) { + return mlClassification; + } + + // Fallback to deterministic rules + return performDeterministicClassification(lowerContent); + } + + private CaptureClassification performMLClassification(String content) { + // Placeholder for ML classification logic + // In a real implementation, this would call an ML model + return new CaptureClassification("ml_placeholder", 0.5, "ML model placeholder"); + } + + private CaptureClassification performDeterministicClassification(String content) { + Map categoryScores = new HashMap<>(); + + // Score content against subject aliases + for (Map.Entry> entry : SUBJECT_ALIASES.entrySet()) { + String category = entry.getKey(); + List aliases = entry.getValue(); + + int score = 0; + for (String alias : aliases) { + if (content.contains(alias.toLowerCase())) { + score += 10; + } + } + + if (score > 0) { + categoryScores.put(category, score); + } + } + + // Score content against context aliases + for (Map.Entry> entry : CONTEXT_ALIASES.entrySet()) { + String category = entry.getKey(); + List aliases = entry.getValue(); + + int score = 0; + for (String alias : aliases) { + if (content.contains(alias.toLowerCase())) { + score += 5; + } + } + + if (score > 0) { + categoryScores.put(category, categoryScores.getOrDefault(category, 0) + score); + } + } + + // Find category with highest score + String bestCategory = "uncategorized"; + int bestScore = 0; + double confidence = 0.0; + + for (Map.Entry entry : categoryScores.entrySet()) { + if (entry.getValue() > bestScore) { + bestCategory = entry.getKey(); + bestScore = entry.getValue(); + } + } + + // Calculate confidence based on score + if (bestScore > 0) { + confidence = Math.min(1.0, (double) bestScore / 50.0); + } + + return new CaptureClassification(bestCategory, confidence, "Deterministic classification"); + } + @Override public MyspaceSearchResponse search(MyspaceSearchRequest request) { if (request == null) {