-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathschema.sql
More file actions
210 lines (196 loc) · 13 KB
/
Copy pathschema.sql
File metadata and controls
210 lines (196 loc) · 13 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
-- Singularity corpus schema (Cloudflare D1 / SQLite): the VortX source hive-mind, PUBLIC plane.
--
-- This database holds SOURCE facts only, never user data and never content: an infohash -> title
-- index, a debrid-cache boolean map, live swarm health, and node trust/penalty bookkeeping. It is the
-- same public torrent metadata that open indexers already publish. The user's debrid TOKEN and any RESOLVED
-- playback link are NEVER stored here (see src/corpus.ts sanitizeContribution + the VortX LEGAL/DMCA
-- posture at https://github.com/VortXTV/VortX/blob/main/docs/LEGAL.md). This DB
-- is entirely separate from the private sync relay (vortx-sync / api.vortx.tv), which holds E2E
-- ciphertext and no source facts. The two planes never mix.
--
-- SAFETY (do not remove). This file is IDEMPOTENT and NON-DESTRUCTIVE: safe to run against the live
-- database at any time, and it NEVER drops a table. Additive changes go in migrations/ as numbered
-- ALTER files. NEVER put DROP TABLE / DELETE / TRUNCATE in any file run against remote D1.
-- A federation node = a contributor identified by an Ed25519 keypair. id is sha256(pubkey) so a node
-- cannot claim another's identity. penalties accrue on false cache claims / abusive reports; once a
-- node is barred, its contributions are dropped and it consumes nothing (invisible to the user).
CREATE TABLE IF NOT EXISTS nodes (
id TEXT PRIMARY KEY, -- sha256(pubkey), hex
pubkey TEXT NOT NULL, -- base64 raw Ed25519 public key
trust_score INTEGER NOT NULL DEFAULT 0, -- reputation; good contributors are prioritized (later)
penalties INTEGER NOT NULL DEFAULT 0, -- false claims / abusive reports
banned INTEGER NOT NULL DEFAULT 0, -- 1 once barred from the benefits
contributions INTEGER NOT NULL DEFAULT 0, -- accepted facts contributed (drives the leaderboard)
version TEXT, -- node software version (self-reported via /hive/telemetry)
created_at INTEGER NOT NULL,
last_seen INTEGER NOT NULL
);
-- Torrent index: which infohash serves which title/episode. Infohash metadata only. added_at is touched
-- on every re-contribution, so it doubles as the association's last-seen for freshness.
CREATE TABLE IF NOT EXISTS torrents (
info_hash TEXT NOT NULL, -- 40-hex lowercase btih
meta_id TEXT NOT NULL, -- "tt1234567" (movie) or "tt1234567:1:5" (series episode)
quality TEXT, -- "2160p" | "1080p" | ...
size INTEGER, -- bytes
source TEXT, -- the contributing add-on's own name
file_idx INTEGER, -- file index within a multi-file torrent (Stremio fileIdx)
tags TEXT, -- comma-joined normalized release tags (hdr,dv,atmos,hevc,cam,...)
languages TEXT, -- comma-joined audio-language slugs (en,es,fr,...,multi,dual)
sources INTEGER NOT NULL DEFAULT 1, -- distinct non-barred nodes vouching for this association (anti-fake-infohash)
episodes TEXT, -- season pack only: JSON episode-number -> file-index map
added_at INTEGER NOT NULL, -- last-seen (touched on re-contribution)
PRIMARY KEY (info_hash, meta_id)
);
CREATE INDEX IF NOT EXISTS idx_torrents_meta ON torrents (meta_id);
-- One row per (torrent association, confirming node) so the `sources` count above is DISTINCT nodes, not
-- re-posts. A fake infohash->title association from one node stays at sources=1; the crowd raises real ones.
CREATE TABLE IF NOT EXISTS torrent_confirmations (
info_hash TEXT NOT NULL,
meta_id TEXT NOT NULL,
node_id TEXT NOT NULL,
ts INTEGER NOT NULL,
PRIMARY KEY (info_hash, meta_id, node_id)
);
-- Debrid cache map (the killer feature): which service has an infohash cached. BOOLEAN only - never a
-- resolved link, never a token. Trusted once 3 independent nodes confirm OR the reader's own debrid
-- confirms (see src/corpus.ts isCacheTrusted). last_verified drives the TTL; recent outranks old.
CREATE TABLE IF NOT EXISTS cache_facts (
-- info_hash holds a 40-hex torrent btih OR a 32-hex nzb MD5; the CHECK blocks any malformed key from
-- polluting trust counts (lengths are provably non-overlapping, so the two kinds never collide).
info_hash TEXT NOT NULL CHECK (length(info_hash) IN (32, 40)),
service TEXT NOT NULL, -- debrid OR usenet service slug
cached INTEGER NOT NULL, -- 1 cached / 0 not
confirmations INTEGER NOT NULL DEFAULT 0, -- denormalized distinct-node count (see cache_confirmations)
last_verified INTEGER NOT NULL,
PRIMARY KEY (info_hash, service)
);
-- One row per (cache fact, confirming node): the gate counts DISTINCT node_ids (a single node cannot
-- inflate the count by re-posting; banned/stale rows are excluded at read time). NOTE: this is NOT
-- Sybil-resistant on its own - Ed25519 keypairs are free to mint, so a motivated actor could spin up 3
-- throwaway nodes. Real resistance (proof-of-work / stake / contribution-history / IP diversity) is later
-- federation work; see README "Not yet implemented". Do not present the 3-node gate as Sybil-proof.
CREATE TABLE IF NOT EXISTS cache_confirmations (
info_hash TEXT NOT NULL CHECK (length(info_hash) IN (32, 40)),
service TEXT NOT NULL,
node_id TEXT NOT NULL,
ts INTEGER NOT NULL,
PRIMARY KEY (info_hash, service, node_id)
);
-- VortX-native ENGINE-MERGEABLE signed cache facts (mirrors vortx-core crates/hive CacheFact, one row per
-- (key, signer)). Distinct from cache_confirmations above: that table powers Singularity's own count-based
-- read-side trust; THIS table stores each contributor's self-contained Ed25519-signed CacheFact verbatim so
-- /hive/sync can re-emit it and the engine's hive client can feed it straight into merge_fact. signer_pubkey
-- + sig are the engine's required PUBLIC attestation (not a secret, not a debrid token). file_idx -1 = whole
-- torrent (engine sentinel). LWW by verified_at on conflict (keep the newest per signer).
CREATE TABLE IF NOT EXISTS signed_cache_facts (
info_hash TEXT NOT NULL CHECK (length(info_hash) IN (32, 40)),
service TEXT NOT NULL,
file_idx INTEGER NOT NULL DEFAULT -1, -- -1 = whole torrent
signer_pubkey TEXT NOT NULL, -- base64url(no-pad) 32-byte ed25519 verifying key
cached INTEGER NOT NULL, -- 1/0; a signed negative is first-class
size INTEGER, -- nullable (engine optional)
quality TEXT, -- nullable (engine optional, <=16 chars, no '|')
verified_at INTEGER NOT NULL, -- unix SECONDS (inside the signed bytes)
ttl INTEGER NOT NULL, -- seconds (inside the signed bytes)
sig TEXT NOT NULL, -- base64url(no-pad) 64-byte detached ed25519 sig
stored_at INTEGER NOT NULL, -- server epoch ms (delta-sync cursor)
PRIMARY KEY (info_hash, service, file_idx, signer_pubkey)
);
CREATE INDEX IF NOT EXISTS idx_signed_cache_stored ON signed_cache_facts (stored_at);
-- Live swarm health: continuously re-scraped seeders so dead swarms are filtered BEFORE the click
-- (many indexers serve stale counts; live freshness is our edge). Keyed by infohash, freshest wins.
CREATE TABLE IF NOT EXISTS health (
info_hash TEXT PRIMARY KEY CHECK (length(info_hash) = 40), -- torrent swarms only
seeders INTEGER,
leechers INTEGER,
last_seen INTEGER NOT NULL
);
-- "Showed cached but was not" reports (anti-poisoning). Recording is the seam for the penalty system:
-- a confirmed false claim penalizes the contributor; a confirmed false report penalizes the reporter.
-- Adjudication (re-verify before penalizing either side) is later federation work.
CREATE TABLE IF NOT EXISTS reports (
id INTEGER PRIMARY KEY AUTOINCREMENT,
info_hash TEXT NOT NULL,
service TEXT NOT NULL,
reporter TEXT NOT NULL, -- node id
ts INTEGER NOT NULL
);
CREATE INDEX IF NOT EXISTS idx_reports_hash ON reports (info_hash, service);
-- One report per (claim, reporter): handleReport INSERTs OR IGNORE so a reporter counts once per claim and
-- cannot pad the table or the distinct-reporter threshold. Mirrors the composite PK on the *_confirmations tables.
CREATE UNIQUE INDEX IF NOT EXISTS idx_reports_unique ON reports (info_hash, service, reporter);
-- HTTP / direct stream index: which STABLE PUBLIC stream URL serves a title. Public, tokenless URLs only
-- (sanitizeHttpFact rejects userinfo + token/session query params at ingest), so these are safe to share
-- and play on any client. added_at is touched on re-contribution (doubles as last-seen for freshness).
-- Unlike a torrent (client resolves the infohash) or an NZB (resolved on-device), an HTTP URL is played
-- VERBATIM by every client, so a single node must NOT be able to push one to everyone. It is gated behind
-- the SAME distinct-node confirmation as the cache: a URL is only surfaced once `confirmations` reaches
-- MIN_CONFIRMATIONS (see http_confirmations + handleStream).
CREATE TABLE IF NOT EXISTS http_streams (
url TEXT NOT NULL,
meta_id TEXT NOT NULL, -- "tt1234567" or "tt1234567:1:5"
quality TEXT,
size INTEGER,
source TEXT,
tags TEXT, -- comma-joined normalized release tags
languages TEXT, -- comma-joined audio-language slugs
confirmations INTEGER NOT NULL DEFAULT 0, -- distinct non-barred nodes that contributed this URL
added_at INTEGER NOT NULL,
PRIMARY KEY (url, meta_id)
);
CREATE INDEX IF NOT EXISTS idx_http_meta ON http_streams (meta_id);
-- One row per (http url, title, confirming node) so the gate counts DISTINCT nodes, not re-posts.
CREATE TABLE IF NOT EXISTS http_confirmations (
url TEXT NOT NULL,
meta_id TEXT NOT NULL,
node_id TEXT NOT NULL,
ts INTEGER NOT NULL,
PRIMARY KEY (url, meta_id, node_id)
);
-- NZB / Usenet index: which .nzb hash serves a title. Hash metadata only - never the .nzb body, the
-- indexer apikey, or NNTP creds; playback is resolved on-device with the user's own provider. The
-- per-usenet-service cache booleans reuse the cache_facts + cache_confirmations tables above (keyed by
-- the nzb_hash in the info_hash column - a 32-hex MD5 never collides with a 40-hex torrent btih).
CREATE TABLE IF NOT EXISTS nzbs (
nzb_hash TEXT NOT NULL, -- 32-hex (MD5 of the .nzb)
meta_id TEXT NOT NULL,
quality TEXT,
size INTEGER,
source TEXT,
tags TEXT, -- comma-joined normalized release tags
languages TEXT, -- comma-joined audio-language slugs
added_at INTEGER NOT NULL,
PRIMARY KEY (nzb_hash, meta_id)
);
CREATE INDEX IF NOT EXISTS idx_nzbs_meta ON nzbs (meta_id);
-- Node-to-node gossip: per-peer delta-sync cursor. A peer is an operator-allowlisted Singularity instance
-- (env.PEERS), never a user-supplied URL. The scheduled puller pulls each peer's /hive/sync from `cursor`,
-- ingests INDEX + health facts only (cache/http trust is NEVER imported - see ingestSyncDelta), and advances
-- the cursor. last_pull is for the dashboard.
CREATE TABLE IF NOT EXISTS peers (
url TEXT NOT NULL PRIMARY KEY CHECK (url LIKE 'https://%' AND length(url) <= 1024), -- https base URL
cursor INTEGER NOT NULL DEFAULT 0, -- last delta timestamp pulled from this peer
last_pull INTEGER NOT NULL DEFAULT 0
);
-- VortX Verified Sources: the health-scored source registry (content-moat #2). A "source" is a scraper/add-on
-- the corpus aggregates; `url` is METADATA ONLY (displayed, NEVER fetched server-side - no SSRF). Health comes
-- from nodes PROBING sources and reporting a boolean verdict (the Worker never probes), ranked by a transparent
-- score over DISTINCT fresh non-barred node probes (mirrors cache_confirmations). GET /sources reads this.
CREATE TABLE IF NOT EXISTS sources (
id TEXT NOT NULL PRIMARY KEY, -- stable lowercase slug
name TEXT NOT NULL,
kind TEXT NOT NULL, -- torrent | http | nzb (what the source provides)
category TEXT NOT NULL, -- VortX-generic region/niche label
url TEXT, -- optional public base/manifest (metadata only, never fetched)
added_at INTEGER NOT NULL
);
-- One latest probe verdict per (source, node), LWW by ts. The health score counts DISTINCT fresh non-barred
-- nodes (mirrors the cache/http confirmation trust model), so a single node cannot inflate a source's health.
CREATE TABLE IF NOT EXISTS source_probes (
source_id TEXT NOT NULL,
node_id TEXT NOT NULL,
ok INTEGER NOT NULL, -- 1 = node reached the source ok, 0 = failed
ts INTEGER NOT NULL,
PRIMARY KEY (source_id, node_id)
);
CREATE INDEX IF NOT EXISTS idx_source_probes_src ON source_probes (source_id);