Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
110 changes: 110 additions & 0 deletions app/src/features/myspace/capture-classifier.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
'use strict';

const path = require('path');

// Mock ML classifier - in a real implementation, this would use a trained model
const mockClassify = (text) => {
const lowerText = text.toLowerCase();

// Simple keyword-based mock classification
if (lowerText.includes('note') || lowerText.includes('reminder')) {
return { category: 'note', confidence: 0.85 };
}

if (lowerText.includes('schedule') || lowerText.includes('timetable') || lowerText.includes('class')) {
return { category: 'timetable', confidence: 0.80 };
}

if (lowerText.includes('study') || lowerText.includes('material') || lowerText.includes('lecture')) {
return { category: 'study_material', confidence: 0.75 };
}

if (lowerText.includes('http') || lowerText.includes('www.')) {
return { category: 'link', confidence: 0.90 };
}

if (lowerText.includes('.pdf') || lowerText.includes('.doc') || lowerText.includes('file')) {
return { category: 'file', confidence: 0.70 };
}

// Default fallback
return { category: 'note', confidence: 0.50 };
};

// Rule-based fallback classifier
const ruleBasedClassify = (text) => {
const lowerText = text.toLowerCase();

// Timetable rules
const timePattern = /\b(\d{1,2}:\d{2}|am|pm)\b/i;
const dayPattern = /\b(monday|tuesday|wednesday|thursday|friday|saturday|sunday)\b/i;

if (timePattern.test(lowerText) && dayPattern.test(lowerText)) {
return { category: 'timetable', confidence: 0.95 };
}

// Study material rules
const studyKeywords = ['syllabus', 'assignment', 'homework', 'exam', 'quiz', 'test'];
if (studyKeywords.some(keyword => lowerText.includes(keyword))) {
return { category: 'study_material', confidence: 0.90 };
}

// Link rules
const urlPattern = /(https?:\/\/|www\.)[\w\-]+(\.[\w\-]+)+([\w\-\.,@?^=%&:/~\+#]*[\w\-\@?^=%&/~\+#])?/i;
if (urlPattern.test(lowerText)) {
return { category: 'link', confidence: 0.95 };
}

// File rules
const fileExtensions = ['.pdf', '.doc', '.docx', '.txt', '.xls', '.xlsx', '.ppt', '.pptx'];
if (fileExtensions.some(ext => lowerText.includes(ext))) {
return { category: 'file', confidence: 0.90 };
}

// Screenshot rules (based on common screenshot naming)
const screenshotPatterns = ['screenshot', 'screen shot', 'capture', 'image'];
if (screenshotPatterns.some(pattern => lowerText.includes(pattern))) {
return { category: 'screenshot', confidence: 0.85 };
}

// Default to note
return { category: 'note', confidence: 0.60 };
};

// Normalize category names to match expected output
const normalizeCategory = (category) => {
const categoryMap = {
'study_material': 'study material'
};

return categoryMap[category] || category;
};

// Main classification function
const classifyCapture = (captureText) => {
try {
// First try ML classification
let result = mockClassify(captureText);

// If confidence is too low, fall back to rule-based classification
if (result.confidence < 0.7) {
result = ruleBasedClassify(captureText);
}

// Normalize the category name
result.category = normalizeCategory(result.category);

return result;
} catch (error) {
// If anything fails, fall back to rule-based classification
const fallbackResult = ruleBasedClassify(captureText);
fallbackResult.category = normalizeCategory(fallbackResult.category);
return fallbackResult;
}
};

module.exports = {
classifyCapture,
mockClassify,
ruleBasedClassify
};
99 changes: 99 additions & 0 deletions app/src/features/myspace/capture-classifier.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
import { classifyCapture } from './capture-classifier';

describe('Capture Classifier', () => {
describe('ML Classification', () => {
test('classifies note content', () => {
const content = 'This is a personal note about my thoughts';
const result = classifyCapture(content);
expect(result.category).toBe('note');
});

test('classifies timetable content', () => {
const content = 'Meeting at 3pm with team for project review';
const result = classifyCapture(content);
expect(result.category).toBe('timetable');
});

test('classifies screenshot references', () => {
const content = 'Screenshot_2023-01-01_12-00-00.png';
const result = classifyCapture(content);
expect(result.category).toBe('screenshot');
});

test('classifies link content', () => {
const content = 'Check out this article: https://example.com';
const result = classifyCapture(content);
expect(result.category).toBe('link');
});

test('classifies file attachments', () => {
const content = 'document.pdf';
const result = classifyCapture(content);
expect(result.category).toBe('file');
});

test('classifies study material', () => {
const content = 'Chapter 5 notes for mathematics exam';
const result = classifyCapture(content);
expect(result.category).toBe('study material');
});
});

describe('Fallback Rules', () => {
test('uses URL pattern fallback', () => {
const content = 'Visit https://github.com for code repositories';
const result = classifyCapture(content);
expect(result.category).toBe('link');
});

test('uses file extension fallback', () => {
const content = 'presentation.pptx';
const result = classifyCapture(content);
expect(result.category).toBe('file');
});

test('uses screenshot pattern fallback', () => {
const content = 'Screen Shot 2023-12-25 at 14.30.45.png';
const result = classifyCapture(content);
expect(result.category).toBe('screenshot');
});

test('defaults to note for unknown content', () => {
const content = 'Random text without clear pattern';
const result = classifyCapture(content);
expect(result.category).toBe('note');
});
});

describe('Myspace Capture Fixtures', () => {
test('handles typical note capture', () => {
const content = 'Remember to complete the project by Friday';
const result = classifyCapture(content);
expect(result.category).toBe('note');
});

test('handles meeting capture', () => {
const content = 'Team meeting tomorrow 10am agenda discussion';
const result = classifyCapture(content);
expect(result.category).toBe('timetable');
});

test('handles web capture with URL', () => {
const content = 'Interesting read: https://medium.com/article';
const result = classifyCapture(content);
expect(result.category).toBe('link');
});

test('handles document capture', () => {
const content = 'Final_report_v2.docx';
const result = classifyCapture(content);
expect(result.category).toBe('file');
});

test('handles study related capture', () => {
const content = 'Biology chapter 7 summary notes';
const result = classifyCapture(content);
expect(result.category).toBe('study material');
});
});
});
93 changes: 93 additions & 0 deletions app/src/features/myspace/classification-fixtures.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
// Classification test fixtures for Myspace capture inputs

const classificationFixtures = {
// Text samples representing different categories
textSamples: [
{
text: "Meeting notes from team sync - discussed Q4 priorities and budget allocation",
expectedCategory: "work"
},
{
text: "Recipe for chocolate chip cookies: flour, eggs, sugar, chocolate chips",
expectedCategory: "personal"
},
{
text: "Lecture notes on quantum physics - wave particle duality and Schrodinger's equation",
expectedCategory: "education"
},
{
text: "Grocery list: milk, bread, eggs, vegetables, chicken breast",
expectedCategory: "personal"
},
{
text: "Project proposal draft for client presentation - include timeline and budget",
expectedCategory: "work"
}
],

// File names representing different categories
fileNames: [
{
name: "Q4_Budget_Spreadsheet.xlsx",
expectedCategory: "work"
},
{
name: "Vacation_Photos_2023.zip",
expectedCategory: "personal"
},
{
name: "Machine_Learning_Course_Syllabus.pdf",
expectedCategory: "education"
},
{
name: "Team_Member_Performance_Reviews.docx",
expectedCategory: "work"
},
{
name: "Home_Renovation_Contracts.pdf",
expectedCategory: "personal"
}
],

// URLs representing different categories
urls: [
{
url: "https://company.intranet/projects/q4-initiatives",
expectedCategory: "work"
},
{
url: "https://recipes.com/chocolate-chip-cookies-master-recipe",
expectedCategory: "personal"
},
{
url: "https://university.edu/courses/quantum-physics/lecture-notes",
expectedCategory: "education"
},
{
url: "https://github.com/company/project-management-tool",
expectedCategory: "work"
},
{
url: "https://fitness-tracker.app/dashboard/weekly-summary",
expectedCategory: "personal"
}
],

// Mixed content captures
mixedContent: [
{
content: "Team meeting notes + Q4_Budget_Spreadsheet.xlsx attachment",
expectedCategory: "work"
},
{
content: "Vacation planning notes with https://travel-site.com/destinations links",
expectedCategory: "personal"
},
{
content: "Study group discussion about https://university.edu/assignments/hw3.pdf",
expectedCategory: "education"
}
]
};

export default classificationFixtures;
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,6 @@
public interface MyspaceIntelligenceService {

MyspaceSearchResponse search(MyspaceSearchRequest request);

MyspaceSearchResponse classify(MyspaceSearchRequest request);
}
Loading