diff --git a/lfs-pointer-integrity-guard/README.md b/lfs-pointer-integrity-guard/README.md new file mode 100644 index 00000000..0d58c228 --- /dev/null +++ b/lfs-pointer-integrity-guard/README.md @@ -0,0 +1,40 @@ +# Git LFS Pointer Integrity Guard + +Self-contained reviewer slice for SCIBASE issue #10, Project Repository & Version Control. + +This guard focuses on the Git LFS and hash-based integrity requirement in the project repository spec. It audits large scientific files before a tagged repository release or export bundle is allowed: + +- parses canonical Git LFS pointer files +- compares pointer SHA-256 and byte size against large-object storage metadata +- verifies release/export manifest coverage for DOI bundles +- flags missing, stale, malformed, or mismatched large objects +- checks retention and quota risk before release +- emits deterministic release, hold, and remediation actions + +The sample data is synthetic and the module does not call GitHub, Git LFS, DOI providers, object stores, payment providers, or external services. + +## Files + +- `index.js` - audit engine, pointer parser, report renderers +- `sample-data.js` - synthetic repository, manifest, and object-store records +- `test.js` - deterministic regression tests +- `demo.js` - generates JSON, Markdown, and SVG reviewer artifacts +- `render-video.js` - renders the required short MP4 demo +- `reports/` - generated reviewer packet + +## Validation + +```bash +node lfs-pointer-integrity-guard/test.js +node lfs-pointer-integrity-guard/demo.js +node lfs-pointer-integrity-guard/render-video.js +node --check lfs-pointer-integrity-guard/index.js +node --check lfs-pointer-integrity-guard/sample-data.js +node --check lfs-pointer-integrity-guard/test.js +node --check lfs-pointer-integrity-guard/demo.js +node --check lfs-pointer-integrity-guard/render-video.js +git diff --check +ffprobe -v error -select_streams v:0 -show_entries stream=pix_fmt,width,height -show_entries format=duration,size -of default=nw=1 lfs-pointer-integrity-guard/reports/demo.mp4 +``` + +Expected decision for the synthetic fixture is `block-release`: it intentionally includes one passing LFS object plus malformed, missing, checksum-mismatched, size-mismatched, retention, manifest coverage, DOI/export, and quota-risk cases. diff --git a/lfs-pointer-integrity-guard/demo.js b/lfs-pointer-integrity-guard/demo.js new file mode 100644 index 00000000..50e5df67 --- /dev/null +++ b/lfs-pointer-integrity-guard/demo.js @@ -0,0 +1,25 @@ +const path = require('path'); +const { evaluateRepository, writeReports } = require('./index'); +const { repository } = require('./sample-data'); + +function runDemo(outputDir = path.join(__dirname, 'reports')) { + const audit = evaluateRepository(repository); + const reports = writeReports(audit, outputDir); + + console.log(`Decision: ${audit.decision}`); + console.log(`Checked ${audit.summary.lfsFilesChecked} Git LFS files`); + console.log(`Blockers: ${audit.summary.blockers}`); + console.log(`Holds: ${audit.summary.holds}`); + console.log(`Reports written:`); + console.log(`- ${reports.jsonPath}`); + console.log(`- ${reports.markdownPath}`); + console.log(`- ${reports.svgPath}`); + + return { audit, reports }; +} + +if (require.main === module) { + runDemo(); +} + +module.exports = { runDemo }; diff --git a/lfs-pointer-integrity-guard/index.js b/lfs-pointer-integrity-guard/index.js new file mode 100644 index 00000000..b5a2636e --- /dev/null +++ b/lfs-pointer-integrity-guard/index.js @@ -0,0 +1,399 @@ +const fs = require('fs'); +const path = require('path'); + +const LFS_POINTER_VERSION = 'https://git-lfs.github.com/spec/v1'; +const SHA256_RE = /^[a-f0-9]{64}$/; + +function parseLfsPointer(pointerText) { + const errors = []; + const fields = {}; + + if (typeof pointerText !== 'string' || pointerText.trim() === '') { + return { valid: false, errors: ['pointer-empty'], fields }; + } + + for (const rawLine of pointerText.trim().split(/\r?\n/)) { + const line = rawLine.trim(); + if (!line) continue; + const [key, ...rest] = line.split(/\s+/); + fields[key] = rest.join(' '); + } + + if (fields.version !== LFS_POINTER_VERSION) { + errors.push('version-invalid'); + } + + const oid = fields.oid || ''; + if (!oid.startsWith('sha256:')) { + errors.push('oid-missing-sha256-prefix'); + } + + const sha256 = oid.startsWith('sha256:') ? oid.slice('sha256:'.length) : ''; + if (!SHA256_RE.test(sha256)) { + errors.push('oid-invalid-sha256'); + } + + const size = Number(fields.size); + if (!Number.isSafeInteger(size) || size < 0) { + errors.push('size-invalid'); + } + + return { + valid: errors.length === 0, + errors, + version: fields.version || null, + oid: sha256 || null, + size: Number.isSafeInteger(size) ? size : null, + fields, + }; +} + +function formatBytes(bytes) { + if (!Number.isFinite(bytes)) return 'unknown'; + const units = ['B', 'KB', 'MB', 'GB', 'TB']; + let value = bytes; + let unit = 0; + while (value >= 1024 && unit < units.length - 1) { + value /= 1024; + unit += 1; + } + return `${value.toFixed(value >= 10 || unit === 0 ? 0 : 1)} ${units[unit]}`; +} + +function normalizeDate(value) { + if (!value) return null; + const date = new Date(value); + return Number.isNaN(date.getTime()) ? null : date; +} + +function addFinding(findings, severity, code, filePath, message, evidence = {}) { + findings.push({ severity, code, path: filePath, message, evidence }); +} + +function evaluateRepository(repository) { + const storageByOid = new Map((repository.lfsObjects || []).map((object) => [object.oid, object])); + const manifestByPath = new Map((repository.releaseManifest || []).map((entry) => [entry.path, entry])); + const findings = []; + const fileResults = []; + const releaseDate = normalizeDate(repository.releaseDate) || new Date(); + const retentionMinimum = repository.retentionMinimumDays || 3650; + + for (const file of repository.files || []) { + if (file.storage !== 'git-lfs') continue; + + const parsed = parseLfsPointer(file.pointer); + const result = { + path: file.path, + parsed, + manifest: manifestByPath.get(file.path) || null, + storageObject: parsed.oid ? storageByOid.get(parsed.oid) || null : null, + status: 'pass', + findings: [], + }; + + if (!parsed.valid) { + addFinding( + result.findings, + 'blocker', + 'pointer-malformed', + file.path, + `Git LFS pointer is malformed: ${parsed.errors.join(', ')}`, + { errors: parsed.errors }, + ); + result.status = 'block'; + fileResults.push(result); + findings.push(...result.findings); + continue; + } + + const object = result.storageObject; + if (!object) { + addFinding( + result.findings, + 'blocker', + 'lfs-object-missing', + file.path, + 'Pointer references an object that is absent from large-object storage.', + { oid: parsed.oid, expectedSize: parsed.size }, + ); + } else { + if (object.sha256 !== parsed.oid) { + addFinding( + result.findings, + 'blocker', + 'lfs-checksum-mismatch', + file.path, + 'Stored large-object checksum does not match the pointer object id.', + { pointerSha256: parsed.oid, storedSha256: object.sha256 }, + ); + } + + if (object.size !== parsed.size) { + addFinding( + result.findings, + 'blocker', + 'lfs-size-mismatch', + file.path, + 'Stored large-object byte size does not match the pointer size.', + { pointerSize: parsed.size, storedSize: object.size }, + ); + } + + const expiresAt = normalizeDate(object.expiresAt); + if (expiresAt) { + const retentionDays = Math.floor((expiresAt.getTime() - releaseDate.getTime()) / 86400000); + if (retentionDays < retentionMinimum) { + addFinding( + result.findings, + 'hold', + 'lfs-retention-too-short', + file.path, + 'Large-object retention expires before the repository citation retention window.', + { expiresAt: object.expiresAt, retentionDays, requiredDays: retentionMinimum }, + ); + } + } + } + + const manifest = result.manifest; + if (!manifest) { + addFinding( + result.findings, + 'hold', + 'release-manifest-missing', + file.path, + 'Git LFS file is not represented in the tagged release/export manifest.', + { releaseTag: repository.releaseTag }, + ); + } else { + if (manifest.oid !== parsed.oid) { + addFinding( + result.findings, + 'blocker', + 'manifest-oid-drift', + file.path, + 'Release manifest points at a different large-object id than the Git LFS pointer.', + { pointerSha256: parsed.oid, manifestSha256: manifest.oid }, + ); + } + + if (manifest.size !== parsed.size) { + addFinding( + result.findings, + 'blocker', + 'manifest-size-drift', + file.path, + 'Release manifest byte size differs from the Git LFS pointer.', + { pointerSize: parsed.size, manifestSize: manifest.size }, + ); + } + + if (!manifest.doiBundleIncluded) { + addFinding( + result.findings, + 'hold', + 'doi-export-coverage-missing', + file.path, + 'Large object is omitted from the DOI/export bundle coverage map.', + { releaseTag: repository.releaseTag }, + ); + } + } + + result.status = result.findings.some((finding) => finding.severity === 'blocker') + ? 'block' + : result.findings.length > 0 + ? 'hold' + : 'pass'; + fileResults.push(result); + findings.push(...result.findings); + } + + const uniqueObjects = new Map(); + for (const result of fileResults) { + if (result.parsed.oid && result.storageObject) { + uniqueObjects.set(result.parsed.oid, result.storageObject.size); + } + } + const lfsUsageBytes = [...uniqueObjects.values()].reduce((total, size) => total + size, 0); + const projectedBytes = lfsUsageBytes + (repository.pendingLfsBytes || 0); + + if (repository.lfsQuotaBytes && projectedBytes > repository.lfsQuotaBytes) { + addFinding( + findings, + 'hold', + 'lfs-quota-risk', + repository.releaseTag, + 'Tagged release would exceed the configured Git LFS storage quota after pending uploads.', + { + currentUsage: lfsUsageBytes, + pendingBytes: repository.pendingLfsBytes || 0, + quotaBytes: repository.lfsQuotaBytes, + }, + ); + } + + const blockers = findings.filter((finding) => finding.severity === 'blocker'); + const holds = findings.filter((finding) => finding.severity === 'hold'); + const decision = blockers.length > 0 ? 'block-release' : holds.length > 0 ? 'hold-for-review' : 'allow-release'; + + return { + repository: { + name: repository.name, + releaseTag: repository.releaseTag, + releaseDate: repository.releaseDate, + lfsQuota: formatBytes(repository.lfsQuotaBytes), + projectedLfsUsage: formatBytes(projectedBytes), + }, + decision, + releaseEligible: decision === 'allow-release', + summary: { + lfsFilesChecked: fileResults.length, + blockers: blockers.length, + holds: holds.length, + passed: fileResults.filter((result) => result.status === 'pass').length, + projectedLfsUsageBytes: projectedBytes, + lfsQuotaBytes: repository.lfsQuotaBytes, + }, + findings, + fileResults: fileResults.map((result) => ({ + path: result.path, + status: result.status, + oid: result.parsed.oid, + pointerSize: result.parsed.size, + manifestCovered: Boolean(result.manifest), + storagePresent: Boolean(result.storageObject), + findings: result.findings, + })), + remediation: buildRemediation(findings), + }; +} + +function buildRemediation(findings) { + const actions = []; + const hasCode = (code) => findings.some((finding) => finding.code === code); + + if (hasCode('pointer-malformed')) { + actions.push('Regenerate malformed pointer files with git-lfs pointer canonical output before tagging.'); + } + if (hasCode('lfs-object-missing')) { + actions.push('Upload or restore missing large objects, then re-run pointer verification from a clean clone.'); + } + if (hasCode('lfs-checksum-mismatch') || hasCode('lfs-size-mismatch')) { + actions.push('Quarantine mismatched large objects and republish from verified source artifacts.'); + } + if (hasCode('manifest-oid-drift') || hasCode('manifest-size-drift')) { + actions.push('Regenerate the release/export manifest from the verified pointer set.'); + } + if (hasCode('release-manifest-missing') || hasCode('doi-export-coverage-missing')) { + actions.push('Add every Git LFS artifact to the DOI/export coverage map or document an explicit exclusion.'); + } + if (hasCode('lfs-retention-too-short')) { + actions.push('Move release artifacts to a retention tier that satisfies the citation retention window.'); + } + if (hasCode('lfs-quota-risk')) { + actions.push('Resolve pending large uploads, archive superseded objects, or raise quota before release.'); + } + + return actions; +} + +function renderMarkdown(audit) { + const lines = [ + `# Git LFS Pointer Integrity Guard`, + ``, + `Repository: ${audit.repository.name}`, + `Release: ${audit.repository.releaseTag}`, + `Decision: ${audit.decision}`, + ``, + `## Summary`, + ``, + `- LFS files checked: ${audit.summary.lfsFilesChecked}`, + `- Blockers: ${audit.summary.blockers}`, + `- Holds: ${audit.summary.holds}`, + `- Passed: ${audit.summary.passed}`, + `- Projected LFS usage: ${audit.repository.projectedLfsUsage} of ${audit.repository.lfsQuota}`, + ``, + `## Findings`, + ``, + ]; + + if (audit.findings.length === 0) { + lines.push(`No findings. Release can proceed.`); + } else { + for (const finding of audit.findings) { + lines.push(`- ${finding.severity.toUpperCase()} ${finding.code} (${finding.path}): ${finding.message}`); + } + } + + lines.push(``, `## Remediation`, ``); + for (const action of audit.remediation) { + lines.push(`- ${action}`); + } + + lines.push(``, `## File Results`, ``); + for (const result of audit.fileResults) { + lines.push( + `- ${result.status.toUpperCase()} ${result.path} (${result.storagePresent ? 'storage present' : 'storage missing'}, ${result.manifestCovered ? 'manifest covered' : 'manifest gap'})`, + ); + } + + return `${lines.join('\n')}\n`; +} + +function renderSvg(audit) { + const statusColor = audit.decision === 'allow-release' ? '#16a34a' : audit.decision === 'hold-for-review' ? '#ca8a04' : '#dc2626'; + const rows = audit.fileResults + .map((result, index) => { + const y = 178 + index * 34; + const color = result.status === 'pass' ? '#16a34a' : result.status === 'hold' ? '#ca8a04' : '#dc2626'; + return `${escapeXml(result.path)}${result.status.toUpperCase()}`; + }) + .join('\n'); + + return ` + + + + Git LFS Pointer Integrity Guard + ${escapeXml(audit.repository.name)} ${escapeXml(audit.repository.releaseTag)} + + ${escapeXml(audit.decision)} + Checked ${audit.summary.lfsFilesChecked} LFS files: ${audit.summary.blockers} blockers, ${audit.summary.holds} holds, ${audit.summary.passed} pass + ${rows} + + Projected LFS usage: ${escapeXml(audit.repository.projectedLfsUsage)} of ${escapeXml(audit.repository.lfsQuota)} + +`; +} + +function escapeXml(value) { + return String(value) + .replace(/&/g, '&') + .replace(//g, '>') + .replace(/"/g, '"'); +} + +function writeReports(audit, outputDir = path.join(__dirname, 'reports')) { + fs.mkdirSync(outputDir, { recursive: true }); + const jsonPath = path.join(outputDir, 'integrity-audit.json'); + const markdownPath = path.join(outputDir, 'release-gate.md'); + const svgPath = path.join(outputDir, 'summary.svg'); + + fs.writeFileSync(jsonPath, `${JSON.stringify(audit, null, 2)}\n`); + fs.writeFileSync(markdownPath, renderMarkdown(audit)); + fs.writeFileSync(svgPath, renderSvg(audit)); + + return { jsonPath, markdownPath, svgPath }; +} + +module.exports = { + LFS_POINTER_VERSION, + parseLfsPointer, + evaluateRepository, + formatBytes, + renderMarkdown, + renderSvg, + writeReports, +}; diff --git a/lfs-pointer-integrity-guard/render-video.js b/lfs-pointer-integrity-guard/render-video.js new file mode 100644 index 00000000..3960f80c --- /dev/null +++ b/lfs-pointer-integrity-guard/render-video.js @@ -0,0 +1,157 @@ +const fs = require('fs'); +const os = require('os'); +const path = require('path'); +const { spawnSync } = require('child_process'); +const { runDemo } = require('./demo'); + +const FONT = { + A: ['01110', '10001', '10001', '11111', '10001', '10001', '10001'], + B: ['11110', '10001', '10001', '11110', '10001', '10001', '11110'], + C: ['01111', '10000', '10000', '10000', '10000', '10000', '01111'], + D: ['11110', '10001', '10001', '10001', '10001', '10001', '11110'], + E: ['11111', '10000', '10000', '11110', '10000', '10000', '11111'], + F: ['11111', '10000', '10000', '11110', '10000', '10000', '10000'], + G: ['01111', '10000', '10000', '10111', '10001', '10001', '01111'], + H: ['10001', '10001', '10001', '11111', '10001', '10001', '10001'], + I: ['11111', '00100', '00100', '00100', '00100', '00100', '11111'], + J: ['00111', '00010', '00010', '00010', '10010', '10010', '01100'], + K: ['10001', '10010', '10100', '11000', '10100', '10010', '10001'], + L: ['10000', '10000', '10000', '10000', '10000', '10000', '11111'], + M: ['10001', '11011', '10101', '10101', '10001', '10001', '10001'], + N: ['10001', '11001', '10101', '10011', '10001', '10001', '10001'], + O: ['01110', '10001', '10001', '10001', '10001', '10001', '01110'], + P: ['11110', '10001', '10001', '11110', '10000', '10000', '10000'], + Q: ['01110', '10001', '10001', '10001', '10101', '10010', '01101'], + R: ['11110', '10001', '10001', '11110', '10100', '10010', '10001'], + S: ['01111', '10000', '10000', '01110', '00001', '00001', '11110'], + T: ['11111', '00100', '00100', '00100', '00100', '00100', '00100'], + U: ['10001', '10001', '10001', '10001', '10001', '10001', '01110'], + V: ['10001', '10001', '10001', '10001', '10001', '01010', '00100'], + W: ['10001', '10001', '10001', '10101', '10101', '10101', '01010'], + X: ['10001', '10001', '01010', '00100', '01010', '10001', '10001'], + Y: ['10001', '10001', '01010', '00100', '00100', '00100', '00100'], + Z: ['11111', '00001', '00010', '00100', '01000', '10000', '11111'], + 0: ['01110', '10001', '10011', '10101', '11001', '10001', '01110'], + 1: ['00100', '01100', '00100', '00100', '00100', '00100', '01110'], + 2: ['01110', '10001', '00001', '00010', '00100', '01000', '11111'], + 3: ['11110', '00001', '00001', '01110', '00001', '00001', '11110'], + 4: ['00010', '00110', '01010', '10010', '11111', '00010', '00010'], + 5: ['11111', '10000', '10000', '11110', '00001', '00001', '11110'], + 6: ['01111', '10000', '10000', '11110', '10001', '10001', '01110'], + 7: ['11111', '00001', '00010', '00100', '01000', '01000', '01000'], + 8: ['01110', '10001', '10001', '01110', '10001', '10001', '01110'], + 9: ['01110', '10001', '10001', '01111', '00001', '00001', '11110'], + '-': ['00000', '00000', '00000', '11111', '00000', '00000', '00000'], + ':': ['00000', '00100', '00100', '00000', '00100', '00100', '00000'], + '/': ['00001', '00010', '00010', '00100', '01000', '01000', '10000'], + '.': ['00000', '00000', '00000', '00000', '00000', '01100', '01100'], + ' ': ['00000', '00000', '00000', '00000', '00000', '00000', '00000'], +}; + +function rgb(hex) { + const value = hex.replace('#', ''); + return [parseInt(value.slice(0, 2), 16), parseInt(value.slice(2, 4), 16), parseInt(value.slice(4, 6), 16)]; +} + +function createCanvas(width, height, background) { + const buffer = Buffer.alloc(width * height * 3); + fillRect(buffer, width, height, 0, 0, width, height, background); + return buffer; +} + +function fillRect(buffer, width, height, x, y, rectWidth, rectHeight, color) { + const [r, g, b] = color; + const x0 = Math.max(0, Math.floor(x)); + const y0 = Math.max(0, Math.floor(y)); + const x1 = Math.min(width, Math.ceil(x + rectWidth)); + const y1 = Math.min(height, Math.ceil(y + rectHeight)); + for (let py = y0; py < y1; py += 1) { + for (let px = x0; px < x1; px += 1) { + const offset = (py * width + px) * 3; + buffer[offset] = r; + buffer[offset + 1] = g; + buffer[offset + 2] = b; + } + } +} + +function drawText(buffer, width, height, text, x, y, scale, color) { + let cursor = x; + for (const raw of String(text).toUpperCase()) { + const glyph = FONT[raw] || FONT[' ']; + for (let row = 0; row < glyph.length; row += 1) { + for (let col = 0; col < glyph[row].length; col += 1) { + if (glyph[row][col] === '1') { + fillRect(buffer, width, height, cursor + col * scale, y + row * scale, scale, scale, color); + } + } + } + cursor += 6 * scale; + } +} + +function writePpm(framePath, buffer, width, height) { + fs.writeFileSync(framePath, Buffer.concat([Buffer.from(`P6\n${width} ${height}\n255\n`), buffer])); +} + +function renderFrame(framePath, audit, frameIndex, totalFrames) { + const width = 960; + const height = 540; + const bg = rgb('#0f172a'); + const slate = rgb('#334155'); + const ink = rgb('#0f172a'); + const paper = rgb('#f8fafc'); + const red = rgb('#dc2626'); + const amber = rgb('#ca8a04'); + const green = rgb('#16a34a'); + const buffer = createCanvas(width, height, bg); + const progress = frameIndex / Math.max(1, totalFrames - 1); + + fillRect(buffer, width, height, 46, 42, 868, 456, paper); + fillRect(buffer, width, height, 46, 42, 868, 78, rgb('#e2e8f0')); + fillRect(buffer, width, height, 76, 388, 808, 24, rgb('#cbd5e1')); + fillRect(buffer, width, height, 76, 388, 808 * progress, 24, audit.decision === 'block-release' ? red : amber); + + drawText(buffer, width, height, 'GIT LFS POINTER GUARD', 76, 72, 5, ink); + drawText(buffer, width, height, `DECISION ${audit.decision.replace('-', ' ')}`, 76, 146, 4, red); + drawText(buffer, width, height, `${audit.summary.blockers} BLOCKERS ${audit.summary.holds} HOLDS`, 76, 206, 4, slate); + drawText(buffer, width, height, 'CHECKS SHA SIZE MANIFEST QUOTA', 76, 256, 3, slate); + drawText(buffer, width, height, 'JSON MD SVG MP4 GENERATED', 76, 316, 3, green); + drawText(buffer, width, height, 'SYNTHETIC DATA ONLY', 76, 440, 3, slate); + + writePpm(framePath, buffer, width, height); +} + +function renderVideo() { + const outputDir = path.join(__dirname, 'reports'); + fs.mkdirSync(outputDir, { recursive: true }); + const { audit } = runDemo(outputDir); + const outputPath = path.join(outputDir, 'demo.mp4'); + const framesDir = fs.mkdtempSync(path.join(os.tmpdir(), 'lfs-video-')); + const frameCount = 120; + + for (let i = 0; i < frameCount; i += 1) { + renderFrame(path.join(framesDir, `frame-${String(i).padStart(3, '0')}.ppm`), audit, i, frameCount); + } + + const result = spawnSync( + 'ffmpeg', + ['-y', '-framerate', '30', '-i', path.join(framesDir, 'frame-%03d.ppm'), '-an', '-pix_fmt', 'yuv420p', outputPath], + { encoding: 'utf8' }, + ); + + fs.rmSync(framesDir, { recursive: true, force: true }); + + if (result.status !== 0) { + throw new Error(`ffmpeg failed:\n${result.stderr}`); + } + + console.log(`Video written: ${outputPath}`); + return outputPath; +} + +if (require.main === module) { + renderVideo(); +} + +module.exports = { renderVideo }; diff --git a/lfs-pointer-integrity-guard/reports/demo.mp4 b/lfs-pointer-integrity-guard/reports/demo.mp4 new file mode 100644 index 00000000..19cd502d Binary files /dev/null and b/lfs-pointer-integrity-guard/reports/demo.mp4 differ diff --git a/lfs-pointer-integrity-guard/reports/integrity-audit.json b/lfs-pointer-integrity-guard/reports/integrity-audit.json new file mode 100644 index 00000000..b23d35bb --- /dev/null +++ b/lfs-pointer-integrity-guard/reports/integrity-audit.json @@ -0,0 +1,253 @@ +{ + "repository": { + "name": "SCIBASE.AI reproducibility-demo", + "releaseTag": "preprint-v2.1", + "releaseDate": "2026-05-31T00:00:00Z", + "lfsQuota": "260 MB", + "projectedLfsUsage": "268 MB" + }, + "decision": "block-release", + "releaseEligible": false, + "summary": { + "lfsFilesChecked": 7, + "blockers": 4, + "holds": 4, + "passed": 1, + "projectedLfsUsageBytes": 281018368, + "lfsQuotaBytes": 272629760 + }, + "findings": [ + { + "severity": "blocker", + "code": "lfs-checksum-mismatch", + "path": "models/cell-segmentation.bin", + "message": "Stored large-object checksum does not match the pointer object id.", + "evidence": { + "pointerSha256": "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb", + "storedSha256": "1111111111111111111111111111111111111111111111111111111111111111" + } + }, + { + "severity": "blocker", + "code": "lfs-size-mismatch", + "path": "results/calibration-video.mov", + "message": "Stored large-object byte size does not match the pointer size.", + "evidence": { + "pointerSize": 23068672, + "storedSize": 15728640 + } + }, + { + "severity": "blocker", + "code": "lfs-object-missing", + "path": "results/microscopy-stack.tif", + "message": "Pointer references an object that is absent from large-object storage.", + "evidence": { + "oid": "dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd", + "expectedSize": 150994944 + } + }, + { + "severity": "blocker", + "code": "pointer-malformed", + "path": "results/highres-figure.tiff", + "message": "Git LFS pointer is malformed: oid-invalid-sha256, size-invalid", + "evidence": { + "errors": [ + "oid-invalid-sha256", + "size-invalid" + ] + } + }, + { + "severity": "hold", + "code": "lfs-retention-too-short", + "path": "data/supplementary-sensors.parquet", + "message": "Large-object retention expires before the repository citation retention window.", + "evidence": { + "expiresAt": "2028-01-01T00:00:00Z", + "retentionDays": 580, + "requiredDays": 3650 + } + }, + { + "severity": "hold", + "code": "release-manifest-missing", + "path": "data/supplementary-sensors.parquet", + "message": "Git LFS file is not represented in the tagged release/export manifest.", + "evidence": { + "releaseTag": "preprint-v2.1" + } + }, + { + "severity": "hold", + "code": "doi-export-coverage-missing", + "path": "data/private-cohort-dictionary.xlsx", + "message": "Large object is omitted from the DOI/export bundle coverage map.", + "evidence": { + "releaseTag": "preprint-v2.1" + } + }, + { + "severity": "hold", + "code": "lfs-quota-risk", + "path": "preprint-v2.1", + "message": "Tagged release would exceed the configured Git LFS storage quota after pending uploads.", + "evidence": { + "currentUsage": 239075328, + "pendingBytes": 41943040, + "quotaBytes": 272629760 + } + } + ], + "fileResults": [ + { + "path": "data/trial-baseline.csv", + "status": "pass", + "oid": "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + "pointerSize": 12582912, + "manifestCovered": true, + "storagePresent": true, + "findings": [] + }, + { + "path": "models/cell-segmentation.bin", + "status": "block", + "oid": "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb", + "pointerSize": 134217728, + "manifestCovered": true, + "storagePresent": true, + "findings": [ + { + "severity": "blocker", + "code": "lfs-checksum-mismatch", + "path": "models/cell-segmentation.bin", + "message": "Stored large-object checksum does not match the pointer object id.", + "evidence": { + "pointerSha256": "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb", + "storedSha256": "1111111111111111111111111111111111111111111111111111111111111111" + } + } + ] + }, + { + "path": "results/calibration-video.mov", + "status": "block", + "oid": "cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc", + "pointerSize": 23068672, + "manifestCovered": true, + "storagePresent": true, + "findings": [ + { + "severity": "blocker", + "code": "lfs-size-mismatch", + "path": "results/calibration-video.mov", + "message": "Stored large-object byte size does not match the pointer size.", + "evidence": { + "pointerSize": 23068672, + "storedSize": 15728640 + } + } + ] + }, + { + "path": "results/microscopy-stack.tif", + "status": "block", + "oid": "dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd", + "pointerSize": 150994944, + "manifestCovered": true, + "storagePresent": false, + "findings": [ + { + "severity": "blocker", + "code": "lfs-object-missing", + "path": "results/microscopy-stack.tif", + "message": "Pointer references an object that is absent from large-object storage.", + "evidence": { + "oid": "dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd", + "expectedSize": 150994944 + } + } + ] + }, + { + "path": "results/highres-figure.tiff", + "status": "block", + "oid": "not-a-valid-sha", + "pointerSize": null, + "manifestCovered": false, + "storagePresent": false, + "findings": [ + { + "severity": "blocker", + "code": "pointer-malformed", + "path": "results/highres-figure.tiff", + "message": "Git LFS pointer is malformed: oid-invalid-sha256, size-invalid", + "evidence": { + "errors": [ + "oid-invalid-sha256", + "size-invalid" + ] + } + } + ] + }, + { + "path": "data/supplementary-sensors.parquet", + "status": "hold", + "oid": "eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee", + "pointerSize": 57671680, + "manifestCovered": false, + "storagePresent": true, + "findings": [ + { + "severity": "hold", + "code": "lfs-retention-too-short", + "path": "data/supplementary-sensors.parquet", + "message": "Large-object retention expires before the repository citation retention window.", + "evidence": { + "expiresAt": "2028-01-01T00:00:00Z", + "retentionDays": 580, + "requiredDays": 3650 + } + }, + { + "severity": "hold", + "code": "release-manifest-missing", + "path": "data/supplementary-sensors.parquet", + "message": "Git LFS file is not represented in the tagged release/export manifest.", + "evidence": { + "releaseTag": "preprint-v2.1" + } + } + ] + }, + { + "path": "data/private-cohort-dictionary.xlsx", + "status": "hold", + "oid": "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff", + "pointerSize": 18874368, + "manifestCovered": true, + "storagePresent": true, + "findings": [ + { + "severity": "hold", + "code": "doi-export-coverage-missing", + "path": "data/private-cohort-dictionary.xlsx", + "message": "Large object is omitted from the DOI/export bundle coverage map.", + "evidence": { + "releaseTag": "preprint-v2.1" + } + } + ] + } + ], + "remediation": [ + "Regenerate malformed pointer files with git-lfs pointer canonical output before tagging.", + "Upload or restore missing large objects, then re-run pointer verification from a clean clone.", + "Quarantine mismatched large objects and republish from verified source artifacts.", + "Add every Git LFS artifact to the DOI/export coverage map or document an explicit exclusion.", + "Move release artifacts to a retention tier that satisfies the citation retention window.", + "Resolve pending large uploads, archive superseded objects, or raise quota before release." + ] +} diff --git a/lfs-pointer-integrity-guard/reports/release-gate.md b/lfs-pointer-integrity-guard/reports/release-gate.md new file mode 100644 index 00000000..fe6f5101 --- /dev/null +++ b/lfs-pointer-integrity-guard/reports/release-gate.md @@ -0,0 +1,43 @@ +# Git LFS Pointer Integrity Guard + +Repository: SCIBASE.AI reproducibility-demo +Release: preprint-v2.1 +Decision: block-release + +## Summary + +- LFS files checked: 7 +- Blockers: 4 +- Holds: 4 +- Passed: 1 +- Projected LFS usage: 268 MB of 260 MB + +## Findings + +- BLOCKER lfs-checksum-mismatch (models/cell-segmentation.bin): Stored large-object checksum does not match the pointer object id. +- BLOCKER lfs-size-mismatch (results/calibration-video.mov): Stored large-object byte size does not match the pointer size. +- BLOCKER lfs-object-missing (results/microscopy-stack.tif): Pointer references an object that is absent from large-object storage. +- BLOCKER pointer-malformed (results/highres-figure.tiff): Git LFS pointer is malformed: oid-invalid-sha256, size-invalid +- HOLD lfs-retention-too-short (data/supplementary-sensors.parquet): Large-object retention expires before the repository citation retention window. +- HOLD release-manifest-missing (data/supplementary-sensors.parquet): Git LFS file is not represented in the tagged release/export manifest. +- HOLD doi-export-coverage-missing (data/private-cohort-dictionary.xlsx): Large object is omitted from the DOI/export bundle coverage map. +- HOLD lfs-quota-risk (preprint-v2.1): Tagged release would exceed the configured Git LFS storage quota after pending uploads. + +## Remediation + +- Regenerate malformed pointer files with git-lfs pointer canonical output before tagging. +- Upload or restore missing large objects, then re-run pointer verification from a clean clone. +- Quarantine mismatched large objects and republish from verified source artifacts. +- Add every Git LFS artifact to the DOI/export coverage map or document an explicit exclusion. +- Move release artifacts to a retention tier that satisfies the citation retention window. +- Resolve pending large uploads, archive superseded objects, or raise quota before release. + +## File Results + +- PASS data/trial-baseline.csv (storage present, manifest covered) +- BLOCK models/cell-segmentation.bin (storage present, manifest covered) +- BLOCK results/calibration-video.mov (storage present, manifest covered) +- BLOCK results/microscopy-stack.tif (storage missing, manifest covered) +- BLOCK results/highres-figure.tiff (storage missing, manifest gap) +- HOLD data/supplementary-sensors.parquet (storage present, manifest gap) +- HOLD data/private-cohort-dictionary.xlsx (storage present, manifest covered) diff --git a/lfs-pointer-integrity-guard/reports/summary.svg b/lfs-pointer-integrity-guard/reports/summary.svg new file mode 100644 index 00000000..49e60b41 --- /dev/null +++ b/lfs-pointer-integrity-guard/reports/summary.svg @@ -0,0 +1,19 @@ + + + + + Git LFS Pointer Integrity Guard + SCIBASE.AI reproducibility-demo preprint-v2.1 + + block-release + Checked 7 LFS files: 4 blockers, 4 holds, 1 pass + data/trial-baseline.csvPASS +models/cell-segmentation.binBLOCK +results/calibration-video.movBLOCK +results/microscopy-stack.tifBLOCK +results/highres-figure.tiffBLOCK +data/supplementary-sensors.parquetHOLD +data/private-cohort-dictionary.xlsxHOLD + + Projected LFS usage: 268 MB of 260 MB + diff --git a/lfs-pointer-integrity-guard/sample-data.js b/lfs-pointer-integrity-guard/sample-data.js new file mode 100644 index 00000000..90b1c053 --- /dev/null +++ b/lfs-pointer-integrity-guard/sample-data.js @@ -0,0 +1,136 @@ +const { LFS_POINTER_VERSION } = require('./index'); + +function pointer(oid, size, version = LFS_POINTER_VERSION) { + return [`version ${version}`, `oid sha256:${oid}`, `size ${size}`].join('\n'); +} + +const OIDS = { + baselineCsv: 'a'.repeat(64), + modelWeights: 'b'.repeat(64), + calibrationVideo: 'c'.repeat(64), + microscopyStack: 'd'.repeat(64), + sensorParquet: 'e'.repeat(64), + cohortDictionary: 'f'.repeat(64), +}; + +const repository = { + name: 'SCIBASE.AI reproducibility-demo', + releaseTag: 'preprint-v2.1', + releaseDate: '2026-05-31T00:00:00Z', + retentionMinimumDays: 3650, + lfsQuotaBytes: 260 * 1024 * 1024, + pendingLfsBytes: 40 * 1024 * 1024, + files: [ + { + path: 'data/trial-baseline.csv', + storage: 'git-lfs', + pointer: pointer(OIDS.baselineCsv, 12 * 1024 * 1024), + }, + { + path: 'models/cell-segmentation.bin', + storage: 'git-lfs', + pointer: pointer(OIDS.modelWeights, 128 * 1024 * 1024), + }, + { + path: 'results/calibration-video.mov', + storage: 'git-lfs', + pointer: pointer(OIDS.calibrationVideo, 22 * 1024 * 1024), + }, + { + path: 'results/microscopy-stack.tif', + storage: 'git-lfs', + pointer: pointer(OIDS.microscopyStack, 144 * 1024 * 1024), + }, + { + path: 'results/highres-figure.tiff', + storage: 'git-lfs', + pointer: pointer('not-a-valid-sha', 'many-bytes'), + }, + { + path: 'data/supplementary-sensors.parquet', + storage: 'git-lfs', + pointer: pointer(OIDS.sensorParquet, 55 * 1024 * 1024), + }, + { + path: 'data/private-cohort-dictionary.xlsx', + storage: 'git-lfs', + pointer: pointer(OIDS.cohortDictionary, 18 * 1024 * 1024), + }, + { + path: 'manuscript/main.md', + storage: 'git', + contentSha256: '9'.repeat(64), + }, + ], + lfsObjects: [ + { + oid: OIDS.baselineCsv, + sha256: OIDS.baselineCsv, + size: 12 * 1024 * 1024, + storageClass: 'archive-10y', + expiresAt: '2037-06-01T00:00:00Z', + }, + { + oid: OIDS.modelWeights, + sha256: '1'.repeat(64), + size: 128 * 1024 * 1024, + storageClass: 'archive-10y', + expiresAt: '2037-06-01T00:00:00Z', + }, + { + oid: OIDS.calibrationVideo, + sha256: OIDS.calibrationVideo, + size: 15 * 1024 * 1024, + storageClass: 'archive-10y', + expiresAt: '2037-06-01T00:00:00Z', + }, + { + oid: OIDS.sensorParquet, + sha256: OIDS.sensorParquet, + size: 55 * 1024 * 1024, + storageClass: 'standard', + expiresAt: '2028-01-01T00:00:00Z', + }, + { + oid: OIDS.cohortDictionary, + sha256: OIDS.cohortDictionary, + size: 18 * 1024 * 1024, + storageClass: 'archive-10y', + expiresAt: '2037-06-01T00:00:00Z', + }, + ], + releaseManifest: [ + { + path: 'data/trial-baseline.csv', + oid: OIDS.baselineCsv, + size: 12 * 1024 * 1024, + doiBundleIncluded: true, + }, + { + path: 'models/cell-segmentation.bin', + oid: OIDS.modelWeights, + size: 128 * 1024 * 1024, + doiBundleIncluded: true, + }, + { + path: 'results/calibration-video.mov', + oid: OIDS.calibrationVideo, + size: 22 * 1024 * 1024, + doiBundleIncluded: true, + }, + { + path: 'results/microscopy-stack.tif', + oid: OIDS.microscopyStack, + size: 144 * 1024 * 1024, + doiBundleIncluded: true, + }, + { + path: 'data/private-cohort-dictionary.xlsx', + oid: OIDS.cohortDictionary, + size: 18 * 1024 * 1024, + doiBundleIncluded: false, + }, + ], +}; + +module.exports = { repository, OIDS, pointer }; diff --git a/lfs-pointer-integrity-guard/test.js b/lfs-pointer-integrity-guard/test.js new file mode 100644 index 00000000..15f54254 --- /dev/null +++ b/lfs-pointer-integrity-guard/test.js @@ -0,0 +1,71 @@ +const assert = require('assert'); +const fs = require('fs'); +const os = require('os'); +const path = require('path'); +const { + LFS_POINTER_VERSION, + evaluateRepository, + parseLfsPointer, + renderMarkdown, + renderSvg, + writeReports, +} = require('./index'); +const { OIDS, pointer, repository } = require('./sample-data'); + +function runTests() { + const parsed = parseLfsPointer(pointer(OIDS.baselineCsv, 1024)); + assert.strictEqual(parsed.valid, true); + assert.strictEqual(parsed.version, LFS_POINTER_VERSION); + assert.strictEqual(parsed.oid, OIDS.baselineCsv); + assert.strictEqual(parsed.size, 1024); + + const malformed = parseLfsPointer('version https://example.invalid\nsize nope\noid md5:abc'); + assert.strictEqual(malformed.valid, false); + assert(malformed.errors.includes('version-invalid')); + assert(malformed.errors.includes('oid-missing-sha256-prefix')); + assert(malformed.errors.includes('oid-invalid-sha256')); + assert(malformed.errors.includes('size-invalid')); + + const audit = evaluateRepository(repository); + assert.strictEqual(audit.decision, 'block-release'); + assert.strictEqual(audit.releaseEligible, false); + assert.strictEqual(audit.summary.lfsFilesChecked, 7); + assert(audit.summary.blockers >= 4); + assert(audit.summary.holds >= 3); + assert.strictEqual(audit.summary.passed, 1); + + const codes = new Set(audit.findings.map((finding) => finding.code)); + assert(codes.has('lfs-checksum-mismatch')); + assert(codes.has('lfs-size-mismatch')); + assert(codes.has('lfs-object-missing')); + assert(codes.has('pointer-malformed')); + assert(codes.has('release-manifest-missing')); + assert(codes.has('doi-export-coverage-missing')); + assert(codes.has('lfs-retention-too-short')); + assert(codes.has('lfs-quota-risk')); + + const markdown = renderMarkdown(audit); + assert(markdown.includes('Git LFS Pointer Integrity Guard')); + assert(markdown.includes('block-release')); + assert(markdown.includes('models/cell-segmentation.bin')); + + const svg = renderSvg(audit); + assert(svg.includes('