-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathschema.sql
More file actions
197 lines (171 loc) · 9.19 KB
/
schema.sql
File metadata and controls
197 lines (171 loc) · 9.19 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
-- SCRIBA v0.2.0 Database Schema
-- Designed for future AI-powered knowledge base capabilities
CREATE TABLE IF NOT EXISTS recordings (
id INTEGER PRIMARY KEY AUTOINCREMENT,
directory_name TEXT NOT NULL UNIQUE, -- e.g., "2025-08-10_14-30-25_meeting-notes"
display_name TEXT, -- User-friendly name
created_at DATETIME NOT NULL, -- When recording was created
updated_at DATETIME NOT NULL, -- Last modified
duration_seconds INTEGER, -- Audio duration in seconds
file_size_bytes INTEGER, -- Size of audio file
audio_format TEXT DEFAULT 'wav', -- Audio file format
sample_rate INTEGER DEFAULT 48000, -- Audio sample rate
channels INTEGER DEFAULT 1, -- Mono/Stereo
has_transcript BOOLEAN DEFAULT 0, -- Whether transcript exists
transcript_status TEXT DEFAULT 'pending', -- pending, processing, completed, failed
language_code TEXT DEFAULT 'auto', -- Language used for transcription
model_used TEXT DEFAULT 'whisper-1', -- AI model used for transcription
-- Future AI features
tags TEXT, -- JSON array of tags
summary TEXT, -- AI-generated summary
key_points TEXT, -- JSON array of key points
action_items TEXT, -- JSON array of action items
speakers TEXT, -- JSON array of detected speakers
sentiment_score REAL, -- Overall sentiment (-1 to 1)
-- Metadata for search and organization
search_index TEXT, -- Full-text search index
categories TEXT, -- JSON array of categories
confidence_score REAL, -- Transcription confidence (0-1)
-- File paths (relative to base directory)
audio_path TEXT NOT NULL, -- Path to audio file
transcript_path TEXT -- Path to transcript file
);
CREATE TABLE IF NOT EXISTS transcripts (
id INTEGER PRIMARY KEY AUTOINCREMENT,
recording_id INTEGER NOT NULL,
content TEXT NOT NULL, -- Full transcript text
created_at DATETIME NOT NULL,
updated_at DATETIME NOT NULL,
-- Transcript metadata
word_count INTEGER,
character_count INTEGER,
language_detected TEXT,
confidence_scores TEXT, -- JSON array of per-segment confidence
-- Segmentation (for future features like speaker diarization)
segments TEXT, -- JSON array of time-stamped segments
-- AI processing results
entities TEXT, -- JSON array of named entities (people, places, etc.)
topics TEXT, -- JSON array of detected topics
FOREIGN KEY (recording_id) REFERENCES recordings(id) ON DELETE CASCADE
);
-- Table for AI-powered search and knowledge base
CREATE TABLE IF NOT EXISTS knowledge_items (
id INTEGER PRIMARY KEY AUTOINCREMENT,
recording_id INTEGER NOT NULL,
transcript_id INTEGER,
-- Knowledge extraction
item_type TEXT NOT NULL, -- 'fact', 'action_item', 'decision', 'question', etc.
content TEXT NOT NULL, -- The extracted knowledge
context TEXT, -- Surrounding context
confidence REAL, -- Extraction confidence
-- Temporal info
timestamp_start REAL, -- Start time in recording (seconds)
timestamp_end REAL, -- End time in recording (seconds)
created_at DATETIME NOT NULL,
-- Relationships and references
related_items TEXT, -- JSON array of related knowledge item IDs
entities_mentioned TEXT, -- JSON array of people/places/things mentioned
FOREIGN KEY (recording_id) REFERENCES recordings(id) ON DELETE CASCADE,
FOREIGN KEY (transcript_id) REFERENCES transcripts(id) ON DELETE CASCADE
);
-- Table for tagging and categorization
CREATE TABLE IF NOT EXISTS tags (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT NOT NULL UNIQUE,
color TEXT DEFAULT '#6366f1', -- Hex color for UI
created_at DATETIME NOT NULL,
usage_count INTEGER DEFAULT 0 -- How many recordings use this tag
);
CREATE TABLE IF NOT EXISTS recording_tags (
recording_id INTEGER NOT NULL,
tag_id INTEGER NOT NULL,
created_at DATETIME NOT NULL,
PRIMARY KEY (recording_id, tag_id),
FOREIGN KEY (recording_id) REFERENCES recordings(id) ON DELETE CASCADE,
FOREIGN KEY (tag_id) REFERENCES tags(id) ON DELETE CASCADE
);
-- Global entity registry for people, organizations, and other entities
CREATE TABLE IF NOT EXISTS entities (
id INTEGER PRIMARY KEY AUTOINCREMENT,
entity_type TEXT NOT NULL, -- 'person', 'organization', 'project', etc.
canonical_name TEXT NOT NULL, -- "John Smith" - the primary/official name
aliases TEXT, -- JSON: ["John", "Johnny", "J. Smith"]
context TEXT, -- AI-maintained description/context
metadata TEXT, -- JSON: {"role": "CTO", "company_id": 5}
mention_count INTEGER DEFAULT 1, -- How many times mentioned across recordings
first_seen_at DATETIME, -- When first encountered
last_seen_at DATETIME, -- When last mentioned
created_at DATETIME NOT NULL,
updated_at DATETIME NOT NULL
);
-- Link mentions in recordings to entities
CREATE TABLE IF NOT EXISTS entity_mentions (
id INTEGER PRIMARY KEY AUTOINCREMENT,
entity_id INTEGER, -- FK to entities (NULL if unlinked)
recording_id INTEGER NOT NULL, -- FK to recordings
mention_text TEXT NOT NULL, -- "John" as appeared in transcript
context_snippet TEXT, -- ~100 chars around the mention
confidence REAL DEFAULT 1.0, -- AI linking confidence (0-1)
linked_at DATETIME, -- When the link was established
created_at DATETIME NOT NULL,
FOREIGN KEY (entity_id) REFERENCES entities(id) ON DELETE SET NULL,
FOREIGN KEY (recording_id) REFERENCES recordings(id) ON DELETE CASCADE
);
-- Indexes for entity queries
CREATE INDEX IF NOT EXISTS idx_entities_type ON entities(entity_type);
CREATE INDEX IF NOT EXISTS idx_entities_name ON entities(canonical_name);
CREATE INDEX IF NOT EXISTS idx_entity_mentions_entity ON entity_mentions(entity_id);
CREATE INDEX IF NOT EXISTS idx_entity_mentions_recording ON entity_mentions(recording_id);
-- Table for AI chat/query history (future feature)
CREATE TABLE IF NOT EXISTS ai_queries (
id INTEGER PRIMARY KEY AUTOINCREMENT,
query TEXT NOT NULL, -- User's question
response TEXT NOT NULL, -- AI's response
context_recordings TEXT, -- JSON array of recording IDs used for context
created_at DATETIME NOT NULL,
response_time_ms INTEGER, -- How long the query took
satisfaction_rating INTEGER -- User feedback (1-5 stars)
);
-- Indexes for performance
CREATE INDEX IF NOT EXISTS idx_recordings_created_at ON recordings(created_at DESC);
CREATE INDEX IF NOT EXISTS idx_recordings_directory_name ON recordings(directory_name);
CREATE INDEX IF NOT EXISTS idx_recordings_search ON recordings(display_name, tags, summary);
CREATE INDEX IF NOT EXISTS idx_transcripts_recording_id ON transcripts(recording_id);
CREATE INDEX IF NOT EXISTS idx_knowledge_items_recording_id ON knowledge_items(recording_id);
CREATE INDEX IF NOT EXISTS idx_knowledge_items_type ON knowledge_items(item_type);
CREATE INDEX IF NOT EXISTS idx_recording_tags_recording_id ON recording_tags(recording_id);
-- Full-text search virtual table (for transcript content)
-- Note: Virtual tables don't support IF NOT EXISTS, so we drop and recreate
DROP TABLE IF EXISTS transcripts_fts;
CREATE VIRTUAL TABLE transcripts_fts USING fts5(
content,
recording_id UNINDEXED,
content='transcripts',
content_rowid='id'
);
-- Triggers to maintain FTS index
-- Note: Triggers don't support IF NOT EXISTS, so we drop and recreate
DROP TRIGGER IF EXISTS transcripts_fts_insert;
CREATE TRIGGER transcripts_fts_insert AFTER INSERT ON transcripts BEGIN
INSERT INTO transcripts_fts(rowid, content, recording_id)
VALUES (new.id, new.content, new.recording_id);
END;
DROP TRIGGER IF EXISTS transcripts_fts_delete;
CREATE TRIGGER transcripts_fts_delete AFTER DELETE ON transcripts BEGIN
INSERT INTO transcripts_fts(transcripts_fts, rowid, content, recording_id)
VALUES('delete', old.id, old.content, old.recording_id);
END;
DROP TRIGGER IF EXISTS transcripts_fts_update;
CREATE TRIGGER transcripts_fts_update AFTER UPDATE ON transcripts BEGIN
INSERT INTO transcripts_fts(transcripts_fts, rowid, content, recording_id)
VALUES('delete', old.id, old.content, old.recording_id);
INSERT INTO transcripts_fts(rowid, content, recording_id)
VALUES (new.id, new.content, new.recording_id);
END;
-- Initial data (only insert if not already present)
INSERT OR IGNORE INTO tags (name, color, created_at) VALUES
('meeting', '#3b82f6', datetime('now')),
('interview', '#10b981', datetime('now')),
('brainstorm', '#f59e0b', datetime('now')),
('call', '#8b5cf6', datetime('now')),
('personal', '#ef4444', datetime('now'));