From ce54c342449fdf8b0968e9121ea3ace27bb255be Mon Sep 17 00:00:00 2001 From: Dion Whitehead <260221+dionjwa@users.noreply.github.com> Date: Mon, 23 Jun 2025 22:53:43 +1200 Subject: [PATCH 1/5] checkpoint --- package-lock.json | 21 +++- package.json | 3 + src/base64.ts | 197 ----------------------------- src/index.ts | 3 - src/types.ts | 23 ---- src/v1/cloud.ts | 106 ++++++++++++++++ src/{ => v1}/dataref.ts | 42 +++++-- src/v1/index.ts | 4 + src/v1/serialized.ts | 273 ++++++++++++++++++++++++++++++++++++++++ src/v1/types.ts | 51 ++++++++ src/v2/cloud.ts | 0 src/v2/dataref.ts | 198 +++++++++++++++++++++++++++++ src/v2/index.ts | 11 ++ src/v2/serialized.ts | 0 src/v2/types.ts | 44 +++++++ 15 files changed, 739 insertions(+), 237 deletions(-) delete mode 100644 src/base64.ts delete mode 100644 src/index.ts delete mode 100644 src/types.ts create mode 100644 src/v1/cloud.ts rename src/{ => v1}/dataref.ts (81%) create mode 100644 src/v1/index.ts create mode 100644 src/v1/serialized.ts create mode 100644 src/v1/types.ts create mode 100644 src/v2/cloud.ts create mode 100644 src/v2/dataref.ts create mode 100644 src/v2/index.ts create mode 100644 src/v2/serialized.ts create mode 100644 src/v2/types.ts diff --git a/package-lock.json b/package-lock.json index 0132a47..25f9f18 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,15 @@ { "name": "@metapages/dataref", - "version": "0.3.0", + "version": "0.6.2", "lockfileVersion": 2, "requires": true, "packages": { "": { "name": "@metapages/dataref", - "version": "0.3.0", + "version": "0.6.2", + "dependencies": { + "base64-arraybuffer": "^1.0.2" + }, "devDependencies": { "@rollup/plugin-typescript": "^12.1.1", "@types/node": "^20.10.0", @@ -1263,6 +1266,15 @@ "dev": true, "license": "MIT" }, + "node_modules/base64-arraybuffer": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/base64-arraybuffer/-/base64-arraybuffer-1.0.2.tgz", + "integrity": "sha512-I3yl4r9QB5ZRY3XuJVEPfc2XhZO6YweFPI+UovAzn+8/hb3oJ6lnysaFcjVpkCPfVWFUDvoZ8kmVDP7WyRtYtQ==", + "license": "MIT", + "engines": { + "node": ">= 0.6.0" + } + }, "node_modules/brace-expansion": { "version": "1.1.11", "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz", @@ -3083,6 +3095,11 @@ "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==", "dev": true }, + "base64-arraybuffer": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/base64-arraybuffer/-/base64-arraybuffer-1.0.2.tgz", + "integrity": "sha512-I3yl4r9QB5ZRY3XuJVEPfc2XhZO6YweFPI+UovAzn+8/hb3oJ6lnysaFcjVpkCPfVWFUDvoZ8kmVDP7WyRtYtQ==" + }, "brace-expansion": { "version": "1.1.11", "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz", diff --git a/package.json b/package.json index 1a61014..5e04f08 100644 --- a/package.json +++ b/package.json @@ -34,5 +34,8 @@ "vite-plugin-dts": "^4.3.0", "vite-tsconfig-paths": "^5.0.1", "vitest": "^2.1.1" + }, + "dependencies": { + "base64-arraybuffer": "^1.0.2" } } diff --git a/src/base64.ts b/src/base64.ts deleted file mode 100644 index b0bd854..0000000 --- a/src/base64.ts +++ /dev/null @@ -1,197 +0,0 @@ -// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license. -// This module is browser compatible. - -/** - * Utilities for - * {@link https://datatracker.ietf.org/doc/html/rfc4648#section-4 | base64} - * encoding and decoding. - * - * This module is browser compatible. - * - * ```ts - * import { - * encodeBase64, - * decodeBase64, - * } from "https://deno.land/std@$STD_VERSION/encoding/base64.ts"; - * - * const encoded = encodeBase64("foobar"); // "Zm9vYmFy" - * - * decodeBase64(encoded); // Uint8Array(6) [ 102, 111, 111, 98, 97, 114 ] - * ``` - * - * @module - */ - -const base64abc = [ - "A", - "B", - "C", - "D", - "E", - "F", - "G", - "H", - "I", - "J", - "K", - "L", - "M", - "N", - "O", - "P", - "Q", - "R", - "S", - "T", - "U", - "V", - "W", - "X", - "Y", - "Z", - "a", - "b", - "c", - "d", - "e", - "f", - "g", - "h", - "i", - "j", - "k", - "l", - "m", - "n", - "o", - "p", - "q", - "r", - "s", - "t", - "u", - "v", - "w", - "x", - "y", - "z", - "0", - "1", - "2", - "3", - "4", - "5", - "6", - "7", - "8", - "9", - "+", - "/", -]; - -/** - * Converts data into a base64-encoded string. - * - * @see {@link https://datatracker.ietf.org/doc/html/rfc4648#section-4} - * - * @param data The data to encode. - * @returns The base64-encoded string. - * - * @example - * ```ts - * import { encodeBase64 } from "https://deno.land/std@$STD_VERSION/encoding/base64.ts"; - * - * encodeBase64("foobar"); // "Zm9vYmFy" - * ``` - */ -export function encodeBase64(data: ArrayBuffer | Uint8Array | string): string { - // CREDIT: https://gist.github.com/enepomnyaschih/72c423f727d395eeaa09697058238727 - const uint8 = validateBinaryLike(data); - let result = ""; - let i; - const l = uint8.length; - for (i = 2; i < l; i += 3) { - result += base64abc[(uint8[i - 2]!) >> 2]; - result += base64abc[ - (((uint8[i - 2]!) & 0x03) << 4) | - ((uint8[i - 1]!) >> 4) - ]; - result += base64abc[ - (((uint8[i - 1]!) & 0x0f) << 2) | - ((uint8[i]!) >> 6) - ]; - result += base64abc[(uint8[i]!) & 0x3f]; - } - if (i === l + 1) { - // 1 octet yet to write - result += base64abc[(uint8[i - 2]!) >> 2]; - result += base64abc[((uint8[i - 2]!) & 0x03) << 4]; - result += "=="; - } - if (i === l) { - // 2 octets yet to write - result += base64abc[(uint8[i - 2]!) >> 2]; - result += base64abc[ - (((uint8[i - 2]!) & 0x03) << 4) | - ((uint8[i - 1]!) >> 4) - ]; - result += base64abc[((uint8[i - 1]!) & 0x0f) << 2]; - result += "="; - } - return result; -} - -/** - * Decodes a base64-encoded string. - * - * @see {@link https://datatracker.ietf.org/doc/html/rfc4648#section-4} - * - * @param b64 The base64-encoded string to decode. - * @returns The decoded data. - * - * @example - * ```ts - * import { decodeBase64 } from "https://deno.land/std@$STD_VERSION/encoding/base64.ts"; - * - * decodeBase64("Zm9vYmFy"); // Uint8Array(6) [ 102, 111, 111, 98, 97, 114 ] - * ``` - */ -export function decodeBase64(b64: string): Uint8Array { - const binString = atob(b64); - const size = binString.length; - const bytes = new Uint8Array(size); - for (let i = 0; i < size; i++) { - bytes[i] = binString.charCodeAt(i); - } - return bytes; -} - -// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license. - -const encoder = new TextEncoder(); - -function getTypeName(value: unknown): string { - const type = typeof value; - if (type !== "object") { - return type; - } else if (value === null) { - return "null"; - } else { - return (value as Object)?.constructor?.name ?? "object"; - } -} - -export function validateBinaryLike(source: unknown): Uint8Array { - if (typeof source === "string") { - return encoder.encode(source); - } else if (source instanceof Uint8Array) { - return source; - } else if (source instanceof ArrayBuffer) { - return new Uint8Array(source); - } - throw new TypeError( - `The input must be a Uint8Array, a string, or an ArrayBuffer. Received a value of the type ${ - getTypeName(source) - }.`, - ); -} \ No newline at end of file diff --git a/src/index.ts b/src/index.ts deleted file mode 100644 index 823ffb7..0000000 --- a/src/index.ts +++ /dev/null @@ -1,3 +0,0 @@ -export * from "./types"; -export * from "./dataref"; - diff --git a/src/types.ts b/src/types.ts deleted file mode 100644 index 6e1f81d..0000000 --- a/src/types.ts +++ /dev/null @@ -1,23 +0,0 @@ -// represents a way of getting a blob of data (inputs/outputs) -export enum DataRefType { - base64 = "base64", //default, value is a base64 encoded bytes - url = "url", // request the data at this URL - utf8 = "utf8", - json = "json", - // the internal system can get this data blob given the key address (stored in the value) - // this can be the sha256 hash of the data, or some other identifier - key = "key", -} - -const DataRefTypeKeys :string[] = Object.keys(DataRefType).filter(key => isNaN(Number(key))); -export const DataRefTypesSet = new Set(DataRefTypeKeys); -export const DataRefTypeDefault = DataRefType.utf8; - -export type DataRef = { - value: T; - type?: DataRefType; - mime?: string; - hash?: string; - created?: number|string; -}; - diff --git a/src/v1/cloud.ts b/src/v1/cloud.ts new file mode 100644 index 0000000..043f9c5 --- /dev/null +++ b/src/v1/cloud.ts @@ -0,0 +1,106 @@ +import { decode } from 'base64-arraybuffer'; + +import { + getContentType, + sha256Buffer, +} from './dataref'; +import { + type DataRef, + DataRefType, +} from './types'; + +let maxDataLength = 200; + +export const setMaxDataLength = (length: number) => { + maxDataLength = length; +}; + +export const getMaxDataLength = () => { + return maxDataLength; +}; + +const _encoder = new TextEncoder(); +export const utf8ToBuffer = (str: string): Uint8Array => { + return _encoder.encode(str); +}; + +const defaultUpload = async (ref: DataRef, url:string, data: Uint8Array|ArrayBuffer) :Promise => { + const responseUpload = await fetch(url, { + method: "PUT", + body: data, + // @ts-ignore: TS2353 + method: "PUT", + // @ts-ignore: TS2353 + redirect: "follow", + headers: { "Content-Type": getContentType(ref) }, + }); + if (!responseUpload.ok) { + throw new Error(`Failed to get upload URL: ${url}`); + } + return url; +}; + +export const copyLargeBlobToCloud = async ( + ref: DataRef, + opts: { + // If the returned uploadUrl is undefined, the blob is already uploaded + getUploadUrl: (ref: DataRef) => Promise, + // returns the URL for the corresponding download + upload?: (ref: DataRef, url: string, data: Uint8Array|ArrayBuffer) => Promise, + maxDataLength?: number, + }, +): Promise => { + let upload = opts.upload ?? defaultUpload; + const url = await opts.getUploadUrl(ref); + + let maxDataLengthActual = opts.maxDataLength ?? maxDataLength; + + const type: DataRefType = ref.ref; + let uint8ArrayIfBig: Uint8Array|ArrayBuffer | undefined; + // let contentType: string | undefined; + switch (type) { + case DataRefType.key: + // this is already cloud storage. no need to re-upload + return ref; + case DataRefType.url: + // this is already somewhere else. + return ref; + case DataRefType.json: + if (ref.value) { + const jsonString = JSON.stringify(ref.value); + if (jsonString.length > maxDataLengthActual) { + uint8ArrayIfBig = utf8ToBuffer(jsonString); + } + // contentType = "application/json"; + } + break; + case DataRefType.base64: + if ((ref.value as string).length > maxDataLengthActual) { + uint8ArrayIfBig = decode(ref.value); + } + // contentType = ref.contentType || "application/octet-stream"; + break; + case DataRefType.utf8: + if ((ref.value as string)?.length > maxDataLengthActual) { + uint8ArrayIfBig = utf8ToBuffer(ref.value); + } + break; + default: + } + + if (uint8ArrayIfBig) { + // upload and replace the dataref + const urlForDownload = await upload(ref, url, uint8ArrayIfBig); + const sha256 = await sha256Buffer(uint8ArrayIfBig); + + const newRef: DataRef = { + value: urlForDownload, + ref: DataRefType.url, + contentType: ref.contentType, + sha256: sha256, + }; + return newRef; + } else { + return ref; + } +}; diff --git a/src/dataref.ts b/src/v1/dataref.ts similarity index 81% rename from src/dataref.ts rename to src/v1/dataref.ts index a7821bf..0a6f674 100644 --- a/src/dataref.ts +++ b/src/v1/dataref.ts @@ -1,5 +1,10 @@ -import { decodeBase64 } from "./base64"; -import { type DataRef, DataRefType, DataRefTypesSet } from "./types"; +import { decode as decodeBase64 } from 'base64-arraybuffer'; + +import { + type DataRef, + DataRefType, + DataRefTypesSet, +} from './types'; type FetchBlobFromKey = ( key: String, @@ -18,18 +23,31 @@ export const isDataRef = (value: any): boolean => { return !!( value && typeof value === "object" && - (value as DataRef)?.type && - DataRefTypesSet.has((value as DataRef).type!) && + DataRefTypesSet.has((value as DataRef)?.ref) && (value as DataRef)?.value !== undefined ); }; +export const getContentType = (ref: DataRef): string => { + if (ref?.contentType) { + return ref.contentType; + } + switch (ref.ref) { + case DataRefType.utf8: + return "text/plain"; + case DataRefType.json: + return "application/json"; + default: + return "application/octet-stream"; + } +}; + export const dataRefToBuffer = async ( ref: DataRef, opts?: { fetchBlobFromKey?: FetchBlobFromKey; fetchOptions?: RequestInit } -): Promise => { +): Promise => { let { fetchBlobFromKey, fetchOptions } = opts ?? {}; - switch (ref.type) { + switch (ref.ref) { case DataRefType.base64: return decodeBase64(ref.value as string); case DataRefType.utf8: @@ -52,7 +70,7 @@ export const dataRefToBuffer = async ( return new Uint8Array(arrayBufferFromKey); } default: // undefined assume DataRefType.Base64 - throw `Not yet implemented: DataRef.type "${ref.type}" unknown`; + throw `Not yet implemented: DataRef.ref "${ref.ref}" unknown`; } }; @@ -65,7 +83,7 @@ export const dataRefToFile = async ( } ): Promise => { let { fetchBlobFromKey, name, fetchOptions } = opts ?? {}; - switch (ref.type) { + switch (ref.ref) { case DataRefType.base64: const bufferBase64 = decodeBase64(ref.value as string); name = name ?? (await sha256Buffer(bufferBase64)); @@ -96,15 +114,15 @@ export const dataRefToFile = async ( const bufferFromKey = await fetcher(ref.value, fetchOptions); name = name ?? (await sha256Buffer(bufferFromKey)); return new File([bufferFromKey], name, { - type: "application/octet-stream", + type: ref?.contentType || "application/octet-stream", }); } default: - throw `Not yet implemented: DataRef.type "${ref.type}" unknown`; + throw `Not yet implemented: DataRef.ref "${ref.ref}" unknown`; } }; -export const sha256Buffer = async (buffer: Uint8Array): Promise => { +export const sha256Buffer = async (buffer: Uint8Array|ArrayBuffer): Promise => { const hashBuffer = await crypto.subtle.digest("SHA-256", buffer); const hashArray = Array.from(new Uint8Array(hashBuffer)); const hashHex = hashArray @@ -128,7 +146,7 @@ export const dataRefToDownloadLink = async ( ): Promise => { const buffer = await dataRefToBuffer(ref, opts); return URL.createObjectURL( - new Blob([buffer], { type: "application/octet-stream" }) + new Blob([buffer], { type: ref?.contentType || "application/octet-stream" }) ); }; diff --git a/src/v1/index.ts b/src/v1/index.ts new file mode 100644 index 0000000..502689d --- /dev/null +++ b/src/v1/index.ts @@ -0,0 +1,4 @@ +export * from "./dataref"; +export * from "./cloud"; +export * from "./types"; +export * from "./serialized"; diff --git a/src/v1/serialized.ts b/src/v1/serialized.ts new file mode 100644 index 0000000..e20e0d4 --- /dev/null +++ b/src/v1/serialized.ts @@ -0,0 +1,273 @@ +import { + decode, + encode, +} from 'base64-arraybuffer'; + +import { + type DataRefSerialized, + type DataRefSerializedBlob, + type DataRefSerializedFile, + type DataRefSerializedTypedArray, + DataRefType, +} from './types'; + +// export type DataRefSerialized = { +// // This means it's a serialized DataRef +// _s: true; +// // constructor +// _c: string; +// // value is base64 encoded +// value: string; +// size: number; +// }; + +// export type DataRefSerializedTypedArray = DataRefSerialized & { +// // Typed arrays are from ArrayBufferView +// byteLength: number; +// byteOffset: number; +// }; + +// export type DataRefSerializedBlob = DataRefSerialized & { +// fileType?: string; +// }; + +// export type DataRefSerializedFile = DataRefSerializedBlob & { +// name: string; +// lastModified?: number; +// }; + +export const valueToFile = async ( + value: any, + fileName: string, + options?: FilePropertyBag +): Promise => { + value = possiblyDeserializeDataRefSerializedToValue(value); + options = options || {}; + if (!options.type) { + options.type = "application/octet-stream"; + } + + if (value instanceof ArrayBuffer) { + return new File([value], fileName, options); + } + if (value instanceof File || value instanceof Blob) { + const buffer = await value.arrayBuffer(); + if (value instanceof File) { + options.type = (value as File).type; + } + return new File([buffer], fileName, options); + } + if ( + value instanceof Int8Array || + value instanceof Uint8Array || + value instanceof Uint8ClampedArray || + value instanceof Int16Array || + value instanceof Uint16Array || + value instanceof Int32Array || + value instanceof Uint32Array || + value instanceof Float32Array || + value instanceof Float64Array + ) { + const typedValue = value as ArrayBufferView; + return new File([typedValue.buffer], fileName, options); + } + if (typeof value === "string") { + var blob = new Blob([value], { type: "text/plain" }); + options.type = "text/plain"; + return new File([blob], fileName, options); + } + if (typeof value === "object") { + const blob = new Blob([JSON.stringify(value)], { + type: "application/json", + }); + options.type = "application/json"; + return new File([blob], fileName, options); + } + + // assume it's a string + var blob = new Blob([value as string], { type: "text/plain" }); + options.type = "text/plain"; + return new File([blob], fileName, options); +}; + +export type DataRefTypedArray = + | Int8Array + | Uint8Array + | Uint8ClampedArray + | Int16Array + | Uint16Array + | Int32Array + | Uint32Array + | Float32Array + | Float64Array; + +export const possiblySerializeValueToDataref = async ( + value: T +): Promise => { + if ( + value instanceof Int8Array || + value instanceof Uint8Array || + value instanceof Uint8ClampedArray || + value instanceof Int16Array || + value instanceof Uint16Array || + value instanceof Int32Array || + value instanceof Uint32Array || + value instanceof Float32Array || + value instanceof Float64Array + ) { + const typedValue = value as ArrayBufferView; + const replacement: DataRefSerializedTypedArray = { + ref: DataRefType.base64, + c: value.constructor.name, + value: encode(typedValue.buffer), + byteLength: typedValue.byteLength, + byteOffset: typedValue.byteOffset, + size: typedValue.byteLength, + }; + return Promise.resolve(replacement); + } else if (value instanceof File) { + const typedValue = value as File; + const arrayBuffer = await typedValue.arrayBuffer(); + const replacement: DataRefSerializedFile = { + ref: DataRefType.base64, + c: File.name, + value: encode(arrayBuffer), + name: typedValue.name, + contentType: typedValue.type, + lastModified: typedValue.lastModified, + size: arrayBuffer.byteLength, + }; + return replacement; + } else if (value instanceof Blob) { + const typedValue = value as Blob; + const arrayBuffer = await typedValue.arrayBuffer(); + const replacement: DataRefSerializedBlob = { + ref: DataRefType.base64, + c: Blob.name, + value: encode(arrayBuffer), + contentType: typedValue.type, + size: arrayBuffer.byteLength, + }; + return replacement; + } else if (value instanceof ArrayBuffer) { + const typedValue = value as ArrayBuffer; + const replacement: DataRefSerialized = { + ref: DataRefType.base64, + c: ArrayBuffer.name, + value: encode(typedValue), + size: typedValue.byteLength, + }; + return Promise.resolve(replacement); + } + return Promise.resolve(value); +}; + +export const possiblyDeserializeDataRefSerializedToValue = ( + value: any +): any => { + if (!isDatarefSerialized(value)) { + return value; + } + return deserializeDataRefSerializedToValue(value as DataRefSerialized); +}; + +export const deserializeDataRefSerializedToValue = ( + serializedRef: DataRefSerialized +): Blob | File | ArrayBuffer | ArrayBufferView | DataRefTypedArray => { + const _c: string = serializedRef.c; + if (_c === Blob.name) { + const serializedRefBlob = serializedRef as DataRefSerializedBlob; + const blob = new Blob([decode(serializedRef.value)], { + type: serializedRefBlob.contentType, + }); + return blob; + } else if (_c === File.name) { + const serializedRefFile = serializedRef as DataRefSerializedFile; + const file = new File( + [decode(serializedRef.value)], + serializedRefFile.name, + { + type: serializedRefFile.contentType, + lastModified: serializedRefFile.lastModified, + } + ); + return file; + } else if (_c === ArrayBuffer.name) { + const arrayBuffer: ArrayBuffer = decode(serializedRef.value); + return arrayBuffer; + } + // Assume typed array + const serializedRefTypedArray = serializedRef as DataRefSerializedTypedArray; + + const arrayBuffer: ArrayBuffer = decode(serializedRefTypedArray.value); + const constructorName: string = serializedRefTypedArray.c; + + // @ts-ignore + const typedArray: ArrayBufferView = new globalThis[constructorName]( + arrayBuffer + // serializedRefTypedArray.byteOffset, + // serializedRefTypedArray.byteLength + ); + return typedArray; +}; + +export const isDatarefSerialized = (value: any): boolean => { + return ( + value && + typeof value === "object" && + (value as DataRefSerialized)?.ref == DataRefType.base64 && + (value as DataRefSerialized)?.value && + (value as DataRefSerialized)?.c + ); +}; + +export const possiblyDeserializeDatarefToFile = ( + value: any +): File | undefined => { + if (!isDatarefSerialized(value)) { + return value; + } + + const serializedRef = value as DataRefSerialized; + const _c: string = serializedRef.c; + if (_c === Blob.name) { + const serializedRefBlob = value as DataRefSerializedBlob; + const blob = new Blob([decode(serializedRef.value)], { + type: serializedRefBlob.contentType, + }); + return new File([blob], "file", { + type: blob.type, + }); + } else if (_c === File.name) { + const serializedRefFile = value as DataRefSerializedFile; + const file = new File( + [decode(serializedRef.value)], + serializedRefFile.name, + { + type: serializedRefFile.contentType, + lastModified: serializedRefFile.lastModified, + } + ); + return file; + } else if (_c === ArrayBuffer.name) { + const arrayBuffer: ArrayBuffer = decode(serializedRef.value); + return new File([arrayBuffer], "file", { + type: "application/octet-stream", + }); + } + // Assume typed array + const serializedRefTypedArray = value as DataRefSerializedTypedArray; + const arrayBuffer: ArrayBuffer = decode(serializedRefTypedArray.value); + const constructorName: string = serializedRefTypedArray.c; + + try { + // @ts-ignore + const typedArray: ArrayBufferView = new globalThis[constructorName]( + arrayBuffer + ); + return new File([typedArray], "file", { + type: "application/octet-stream", + }); + } catch (e) {} + return undefined; +}; diff --git a/src/v1/types.ts b/src/v1/types.ts new file mode 100644 index 0000000..c28510f --- /dev/null +++ b/src/v1/types.ts @@ -0,0 +1,51 @@ +// represents a way of getting a blob of data (inputs/outputs) +export enum DataRefType { + /* default, value is a base64 encoded bytes. Can be encoded TypedArray or Blob */ + base64 = "base64", + url = "url", // request the data at this URL + utf8 = "utf8", + json = "json", + // serialized = "serialized", + // the internal system can get this data blob given the key address (stored in the value) + // this can be the sha256 hash of the data, or some other identifier + key = "key", +} + +const DataRefTypeKeys: string[] = Object.keys(DataRefType).filter((key) => + isNaN(Number(key)) +); +export const DataRefTypesSet = new Set(DataRefTypeKeys); +export const DataRefTypeDefault = DataRefType.utf8; + +export type DataRef = { + /* We want to be unambigous here */ + ref: DataRefType; + value: T; + // mime type / file type + contentType?: string; + size?: number; + sha256?: string; + created?: string; +}; + +export type DataRefSerialized = Omit & { + // constructor name, e.g. "Uint8Array" or "Blob" + c: string; + // required here, but optional in the parent type + size: number; +}; + +export type DataRefSerializedTypedArray = DataRefSerialized & { + // Typed arrays are from ArrayBufferView + byteLength: number; + byteOffset: number; +}; + +export type DataRefSerializedBlob = Omit & { + contentType: string; +}; + +export type DataRefSerializedFile = DataRefSerialized & { + name: string; + lastModified?: number; +}; diff --git a/src/v2/cloud.ts b/src/v2/cloud.ts new file mode 100644 index 0000000..e69de29 diff --git a/src/v2/dataref.ts b/src/v2/dataref.ts new file mode 100644 index 0000000..a528564 --- /dev/null +++ b/src/v2/dataref.ts @@ -0,0 +1,198 @@ +import { + type DataUrl, + MIME_TYPES, + type TypedArrayType, + type TypedArrayConstructor, +} from "./types"; + +// Utility functions for data URL handling +export const isDataUrl = (value: unknown): value is DataUrl => { + return typeof value === "string" && value.startsWith("data:"); +}; + +export const getMimeType = (dataUrl: DataUrl): string => { + const match = dataUrl.match(/^data:([^;]+)/); + return match ? match[1] : MIME_TYPES.OCTET_STREAM; +}; + +export const getParameters = (dataUrl: DataUrl): Record => { + const params: Record = {}; + const paramString = dataUrl.match(/^data:[^;]+;([^,]+),/)?.[1]; + if (paramString) { + paramString.split(";").forEach((param) => { + const [key, value] = param.split("="); + if (key && value) params[key] = value; + }); + } + return params; +}; + +// Core conversion functions +export const textToDataUrl = (text: string): DataUrl => { + const encoded = encodeURIComponent(text); + return `data:${MIME_TYPES.TEXT};charset=utf-8,${encoded}`; +}; + +export const jsonToDataUrl = (data: unknown): DataUrl => { + const jsonString = JSON.stringify(data); + const encoded = encodeURIComponent(jsonString); + return `data:${MIME_TYPES.JSON};charset=utf-8,${encoded}`; +}; + +export const bufferToDataUrl = (buffer: ArrayBuffer | Uint8Array): DataUrl => { + const bytes = new Uint8Array(buffer); + const base64 = btoa(String.fromCharCode(...bytes)); + return `data:${MIME_TYPES.OCTET_STREAM};base64,${base64}`; +}; + +export const typedArrayToDataUrl = ( + array: InstanceType<(typeof globalThis)[T]>, + type: T +): DataUrl => { + const buffer = array.buffer; + const bytes = new Uint8Array(buffer); + const base64 = btoa(String.fromCharCode(...bytes)); + return `data:${MIME_TYPES.TYPED_ARRAY}${type};base64,${base64}`; +}; + +// Update core conversion functions to handle URLs +export const dataUrlToBuffer = async ( + dataUrl: DataUrl, + fetchOptions?: RequestInit +): Promise => { + // If it's a URL data URL, fetch the content first + if (isUrlDataUrl(dataUrl)) { + const url = dataUrlToUrl(dataUrl); + if (!url) { + throw new Error("Invalid URL data URL"); + } + const response = await fetch(url, { ...fetchOptions, redirect: "follow" }); + if (!response.ok) { + throw new Error(`Failed to fetch ${url}: ${response.statusText}`); + } + return response.arrayBuffer(); + } + + // Handle regular data URL + const base64 = dataUrl.split(",")[1]; + const binaryString = atob(base64); + const bytes = new Uint8Array(binaryString.length); + for (let i = 0; i < binaryString.length; i++) { + bytes[i] = binaryString.charCodeAt(i); + } + return bytes.buffer; +}; + +export const dataUrlToText = async ( + dataUrl: DataUrl, + fetchOptions?: RequestInit +): Promise => { + const buffer = await dataUrlToBuffer(dataUrl, fetchOptions); + return new TextDecoder().decode(buffer); +}; + +export const dataUrlToJson = async ( + dataUrl: DataUrl, + fetchOptions?: RequestInit +): Promise => { + const text = await dataUrlToText(dataUrl, fetchOptions); + return JSON.parse(text); +}; + +export const dataUrlToTypedArray = async ( + dataUrl: DataUrl, + type: T, + fetchOptions?: RequestInit +): Promise> => { + const params = getParameters(dataUrl); + const arrayType = params.type as T; + + if (arrayType !== type) { + throw new Error( + `Data URL contains type ${arrayType} but ${type} was requested` + ); + } + + const buffer = await dataUrlToBuffer(dataUrl, fetchOptions); + const TypedArray = globalThis[type] as TypedArrayConstructor; + return new TypedArray(buffer); +}; + +// Update file handling to use async buffer conversion +export const dataUrlToFile = async ( + dataUrl: DataUrl, + name?: string, + fetchOptions?: RequestInit +): Promise => { + const mimeType = getMimeType(dataUrl); + const buffer = await dataUrlToBuffer(dataUrl, fetchOptions); + + if (!name) { + const hashBuffer = await crypto.subtle.digest("SHA-256", buffer); + const hashArray = Array.from(new Uint8Array(hashBuffer)); + name = hashArray.map((b) => b.toString(16).padStart(2, "0")).join(""); + } + + return new File([buffer], name, { type: mimeType }); +}; + +// URL handling functions +export const urlToDataUrl = async ( + url: string, + fetchOptions?: RequestInit +): Promise => { + // First encode the URL itself as a data URL with our custom MIME type + const urlDataUrl = `data:${MIME_TYPES.URI};charset=utf-8,${encodeURIComponent( + url + )}`; + + // If fetchOptions are provided, we'll also fetch and encode the content + if (fetchOptions) { + const response = await fetch(url, { ...fetchOptions, redirect: "follow" }); + if (!response.ok) { + throw new Error(`Failed to fetch ${url}: ${response.statusText}`); + } + const buffer = await response.arrayBuffer(); + return bufferToDataUrl(buffer); + } + + return urlDataUrl; +}; + +export const dataUrlToUrl = (dataUrl: DataUrl): string | null => { + const mimeType = getMimeType(dataUrl); + if (mimeType !== MIME_TYPES.URI) { + return null; + } + return decodeURIComponent(dataUrl.split(",")[1]); +}; + +export const isUrlDataUrl = (dataUrl: DataUrl): boolean => { + return getMimeType(dataUrl) === MIME_TYPES.URI; +}; + +// Update fileToDataUrl to handle URLs +export const fileToDataUrl = async ( + file: File | string, + fetchOptions?: RequestInit +): Promise => { + if (typeof file === "string") { + // If it's a string, treat it as a URL + return urlToDataUrl(file, fetchOptions); + } + // Otherwise treat it as a File + const buffer = await file.arrayBuffer(); + return bufferToDataUrl(buffer); +}; + +// Helper function to fetch content from a data URL that contains a URL +export const fetchDataUrlContent = async ( + dataUrl: DataUrl, + fetchOptions?: RequestInit +): Promise => { + const url = dataUrlToUrl(dataUrl); + if (!url) { + throw new Error("Data URL does not contain a URL reference"); + } + return urlToDataUrl(url, fetchOptions); +}; diff --git a/src/v2/index.ts b/src/v2/index.ts new file mode 100644 index 0000000..310e768 --- /dev/null +++ b/src/v2/index.ts @@ -0,0 +1,11 @@ +import { isDataRef as isDataRefV1 } from "../v1"; + +export const isDataRef = (value: any): boolean => { + return ( + !!( + value && + typeof value === "string" && + (value as string).startsWith("data:") + ) || isDataRefV1(value) + ); +}; diff --git a/src/v2/serialized.ts b/src/v2/serialized.ts new file mode 100644 index 0000000..e69de29 diff --git a/src/v2/types.ts b/src/v2/types.ts new file mode 100644 index 0000000..c3659b7 --- /dev/null +++ b/src/v2/types.ts @@ -0,0 +1,44 @@ +// Basic types for the v2 API +export type DataUrl = string; // Must start with "data:" + +// MIME types we support +export const MIME_TYPES = { + TEXT: "text/plain", + JSON: "application/json", + OCTET_STREAM: "application/octet-stream", + // For typed arrays, we'll use application/octet-stream with a type parameter + TYPED_ARRAY: "application/octet-stream;type=", + // Custom MIME type for URLs that should be treated as data references + URI: "text/x-uri", +} as const; + +// Supported typed array types +export type TypedArrayType = + | "Int8Array" + | "Uint8Array" + | "Int16Array" + | "Uint16Array" + | "Int32Array" + | "Uint32Array" + | "Float32Array" + | "Float64Array" + | "BigInt64Array" + | "BigUint64Array"; + +export type DataRefTypedArray = + | Int8Array + | Uint8Array + | Uint8ClampedArray + | Int16Array + | Uint16Array + | Int32Array + | Uint32Array + | Float32Array + | Float64Array; + +// Helper type for typed array constructors +export type TypedArrayConstructor = { + [K in TypedArrayType]: new (buffer: ArrayBuffer) => InstanceType< + (typeof globalThis)[K] + >; +}[TypedArrayType]; From cf5f2722b1f791fb8ed371d96f784349b01e6f01 Mon Sep 17 00:00:00 2001 From: metapage CI Robot <260221+dionjwa@users.noreply.github.com> Date: Fri, 2 Jan 2026 09:47:43 -0800 Subject: [PATCH 2/5] checkpoint --- package.json | 3 ++- src/v2/dataref.ts | 19 ++++++++----------- src/v2/index.ts | 5 ++++- src/v2/types.ts | 29 ++++++++++++++++------------- 4 files changed, 30 insertions(+), 26 deletions(-) diff --git a/package.json b/package.json index 5e04f08..b16d6f4 100644 --- a/package.json +++ b/package.json @@ -22,7 +22,8 @@ "homepage": "https://github.com/metapages/dataref/tree/main/docs#readme", "scripts": { "note": "echo 'use just. See README.md'", - "test": "vitest" + "test": "vitest --run", + "dev": "vitest" }, "devDependencies": { "@rollup/plugin-typescript": "^12.1.1", diff --git a/src/v2/dataref.ts b/src/v2/dataref.ts index a528564..084edb9 100644 --- a/src/v2/dataref.ts +++ b/src/v2/dataref.ts @@ -2,7 +2,7 @@ import { type DataUrl, MIME_TYPES, type TypedArrayType, - type TypedArrayConstructor, + type DataRefTypedArray, } from "./types"; // Utility functions for data URL handling @@ -99,23 +99,20 @@ export const dataUrlToJson = async ( return JSON.parse(text); }; -export const dataUrlToTypedArray = async ( +export const dataUrlToTypedArray = async ( dataUrl: DataUrl, - type: T, fetchOptions?: RequestInit -): Promise> => { +): Promise => { const params = getParameters(dataUrl); - const arrayType = params.type as T; + const arrayType = params.type as TypedArrayType; - if (arrayType !== type) { - throw new Error( - `Data URL contains type ${arrayType} but ${type} was requested` - ); + if (!arrayType) { + throw new Error("Data URL does not contain type parameter"); } const buffer = await dataUrlToBuffer(dataUrl, fetchOptions); - const TypedArray = globalThis[type] as TypedArrayConstructor; - return new TypedArray(buffer); + const TypedArray = globalThis[arrayType]; + return new TypedArray(buffer) as T; }; // Update file handling to use async buffer conversion diff --git a/src/v2/index.ts b/src/v2/index.ts index 310e768..53299a5 100644 --- a/src/v2/index.ts +++ b/src/v2/index.ts @@ -1,4 +1,7 @@ -import { isDataRef as isDataRefV1 } from "../v1"; +import { isDataRef as isDataRefV1 } from '../v1'; + +export * from "./dataref"; +export * from "./types"; export const isDataRef = (value: any): boolean => { return ( diff --git a/src/v2/types.ts b/src/v2/types.ts index c3659b7..b971139 100644 --- a/src/v2/types.ts +++ b/src/v2/types.ts @@ -14,27 +14,30 @@ export const MIME_TYPES = { // Supported typed array types export type TypedArrayType = - | "Int8Array" - | "Uint8Array" + | "BigInt64Array" + | "BigUint64Array" + | "Float32Array" + | "Float64Array" | "Int16Array" - | "Uint16Array" | "Int32Array" + | "Int8Array" + | "Uint16Array" | "Uint32Array" - | "Float32Array" - | "Float64Array" - | "BigInt64Array" - | "BigUint64Array"; + | "Uint8Array" + | "Uint8ClampedArray"; export type DataRefTypedArray = - | Int8Array - | Uint8Array - | Uint8ClampedArray + | BigInt64Array + | BigUint64Array + | Float32Array + | Float64Array | Int16Array - | Uint16Array | Int32Array + | Int8Array + | Uint16Array | Uint32Array - | Float32Array - | Float64Array; + | Uint8Array + | Uint8ClampedArray; // Helper type for typed array constructors export type TypedArrayConstructor = { From ba7a07a2590f84f00062c83330927c105485283d Mon Sep 17 00:00:00 2001 From: metapage CI Robot <260221+dionjwa@users.noreply.github.com> Date: Fri, 2 Jan 2026 09:47:47 -0800 Subject: [PATCH 3/5] checkpoint --- CLAUDE.md | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 CLAUDE.md diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..e917520 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,15 @@ +# Store complex binary types in references + +Encode any javascript type including TypedArrays into data URL strings for embedding any types in URL parameters. + +## Current Task: + +We are migrating from v1 datarefs, where the data reference is a json blob, to a special string. The problem with the v1 json version is that it might be mistaken for other non-dataref values. in v2, we use data url string, extending existing data urls with our custom payloads. + +The new v2 code must be backwards compatible with v1. + + + +1. Write tests for the new v2, converting to and from all the basic types. +2. Add a new function that takes a json, then traverses the json, and converts any data ref strings into the dereferenced data, returning the new json. Use the npm module mutative to make the modifications. Make sure to include tests. +3. Be backwards compatible and test for it. \ No newline at end of file From 0299fa2c20df0c70b9da64a0a2947211504e8b4c Mon Sep 17 00:00:00 2001 From: metapage CI Robot <260221+dionjwa@users.noreply.github.com> Date: Fri, 2 Jan 2026 11:16:31 -0800 Subject: [PATCH 4/5] checkpoint --- .claude/settings.local.json | 9 + README.md | 564 ++++++++++++++++++++++++++- package-lock.json | 17 +- package.json | 27 +- src/index.ts | 41 ++ src/test/v1-v2-compatibility.test.ts | 278 +++++++++++++ src/test/v2.test.ts | 464 ++++++++++++++++++++++ src/v2/dataref.ts | 130 +++++- 8 files changed, 1513 insertions(+), 17 deletions(-) create mode 100644 .claude/settings.local.json create mode 100644 src/index.ts create mode 100644 src/test/v1-v2-compatibility.test.ts create mode 100644 src/test/v2.test.ts diff --git a/.claude/settings.local.json b/.claude/settings.local.json new file mode 100644 index 0000000..63dab7d --- /dev/null +++ b/.claude/settings.local.json @@ -0,0 +1,9 @@ +{ + "permissions": { + "allow": [ + "Bash(npm install:*)", + "Bash(npm test:*)", + "Bash(node -e:*)" + ] + } +} diff --git a/README.md b/README.md index 519c71a..daba187 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,564 @@ - # @metapages/dataref -Moving around large blobs of data is hard and complicated. +**Encode any JavaScript type including TypedArrays into data URL strings for embedding in URL parameters, JSON, and more.** + +Moving around large blobs of data is hard and complicated. Datarefs solve this by encoding complex binary types into compact, unambiguous string references that can be easily passed around your network, database, and URLs. + +## Overview + +This library uses **data URL strings** (e.g., `data:text/plain,hello`) to encode any JavaScript type including TypedArrays. Data URLs are unambiguous, URL-safe, and standards-based (RFC 2397). + +**Note:** v1 (JSON object format) is maintained internally for backwards compatibility but is not exported. All public APIs use the modern v2 data URL format. + +## Why Data URLs? + +Data URL strings have several key advantages: + +1. **Unambiguous**: A string starting with `data:` is clearly a dataref, not confused with regular data +2. **URL-safe**: Can be embedded directly in URL parameters without special handling +3. **JSON-safe**: When serialized to JSON, remains a simple string that's clearly identifiable +4. **Standards-based**: Uses the existing data URL standard (RFC 2397) +5. **Type preservation**: Supports all JavaScript types including TypedArrays with full type information + +## Installation + +```bash +npm install @metapages/dataref +``` + +## Quick Start + +```typescript +import { + textToDataUrl, + jsonToDataUrl, + bufferToDataUrl, + typedArrayToDataUrl, + dataUrlToText, + dataUrlToJson, + dataUrlToBuffer, + dataUrlToTypedArray, + dereferenceDataRefs, +} from "@metapages/dataref"; + +// Encode text to data URL +const textDataUrl = textToDataUrl("Hello, World!"); +// => "data:text/plain;charset=utf-8,Hello%2C%20World!" + +// Decode back to text +const text = await dataUrlToText(textDataUrl); +// => "Hello, World!" + +// Encode JSON to data URL +const jsonDataUrl = jsonToDataUrl({ name: "John", age: 30 }); +// => "data:application/json;charset=utf-8,%7B%22name%22%3A%22John%22%2C%22age%22%3A30%7D" + +// Decode back to JSON +const data = await dataUrlToJson(jsonDataUrl); +// => { name: "John", age: 30 } + +// Encode binary data +const buffer = new Uint8Array([1, 2, 3, 4, 5]); +const bufferDataUrl = bufferToDataUrl(buffer); +// => "data:application/octet-stream;base64,AQIDBAU=" + +// Encode TypedArrays with type preservation +const floatArray = new Float32Array([1.1, 2.2, 3.3]); +const arrayDataUrl = typedArrayToDataUrl(floatArray, "Float32Array"); +// => "data:application/octet-stream;type=Float32Array;base64,..." + +// Decode back to Float32Array +const decodedArray = await dataUrlToTypedArray(arrayDataUrl); +// => Float32Array [1.1, 2.2, 3.3] +``` + +## Core Concepts + +### Data URLs + +A data URL is a URI scheme that allows you to embed data directly in a URL string. The format is: + +``` +data:[][;base64], +``` + +Examples: +- Text: `data:text/plain,Hello` +- JSON: `data:application/json,{"key":"value"}` +- Binary: `data:application/octet-stream;base64,AQIDBA==` +- TypedArray: `data:application/octet-stream;type=Float32Array;base64,zcxMPw==` + +### Supported Types + +The library supports all JavaScript data types: + +| Type | Encoding | Example | +|------|----------|---------| +| String | URL-encoded text | `textToDataUrl("hello")` | +| JSON | URL-encoded JSON | `jsonToDataUrl({key: "value"})` | +| ArrayBuffer | Base64 binary | `bufferToDataUrl(buffer)` | +| Uint8Array | Base64 binary | `bufferToDataUrl(uint8Array)` | +| TypedArrays | Base64 with type | `typedArrayToDataUrl(array, type)` | +| URL reference | URL-encoded URI | `urlToDataUrl("https://...")` | + +**Supported TypedArray types:** +- `Int8Array`, `Uint8Array`, `Uint8ClampedArray` +- `Int16Array`, `Uint16Array` +- `Int32Array`, `Uint32Array` +- `BigInt64Array`, `BigUint64Array` +- `Float32Array`, `Float64Array` + +## Advanced Usage + +### Dereferencing DataRefs in JSON + +The `dereferenceDataRefs()` function traverses a JSON object and automatically converts all data URL strings into their actual values: + +```typescript +import { dereferenceDataRefs, textToDataUrl, jsonToDataUrl, typedArrayToDataUrl } from "@metapages/dataref"; + +// Create a complex object with embedded datarefs +const obj = { + title: textToDataUrl("My Document"), + metadata: jsonToDataUrl({ author: "Jane", version: 2 }), + data: { + values: typedArrayToDataUrl(new Float32Array([1.1, 2.2]), "Float32Array"), + count: 42, + }, + items: [ + "regular string", + textToDataUrl("encoded text"), + { nested: jsonToDataUrl({ deep: "value" }) } + ] +}; + +// Dereference all datarefs at once +const resolved = await dereferenceDataRefs(obj); + +// Result: +// { +// title: "My Document", +// metadata: { author: "Jane", version: 2 }, +// data: { +// values: Float32Array [1.1, 2.2], +// count: 42 +// }, +// items: [ +// "regular string", +// "encoded text", +// { nested: { deep: "value" } } +// ] +// } +``` + +**Key features:** +- Recursively traverses objects and arrays +- Preserves non-dataref values unchanged +- Handles all data types (text, JSON, TypedArrays, ArrayBuffers) +- Processes multiple datarefs in parallel for performance +- Returns a new immutable object (uses `mutative` library) + +### URL References + +You can create datarefs that reference external URLs: + +```typescript +import { urlToDataUrl, dataUrlToUrl } from "@metapages/dataref"; + +// Create a URL reference (without fetching) +const urlRef = await urlToDataUrl("https://example.com/data.json"); +// => "data:text/x-uri;charset=utf-8,https%3A%2F%2Fexample.com%2Fdata.json" + +// Extract the URL back +const url = dataUrlToUrl(urlRef); +// => "https://example.com/data.json" + +// Create a URL reference AND fetch its content +const urlRefWithContent = await urlToDataUrl( + "https://example.com/data.json", + { headers: { "Authorization": "Bearer token" } } +); +// This will fetch the content and encode it as a data URL +``` + +### Validation + +```typescript +import { isDataUrl, isUrlDataUrl, isDataRef } from "@metapages/dataref"; + +// Check if a value is a data URL +isDataUrl("data:text/plain,hello"); // true +isDataUrl("regular string"); // false + +// Check if a data URL is a URL reference +isUrlDataUrl("data:text/x-uri,https%3A%2F%2Fexample.com"); // true +isUrlDataUrl("data:text/plain,hello"); // false + +// Check if a value is a dataref (includes backwards compatibility with v1 objects) +isDataRef("data:text/plain,hello"); // true +isDataRef({ ref: "utf8", value: "hello" }); // true (legacy v1 format) +isDataRef("regular string"); // false +``` + +## API Reference + +### Encoding Functions (to Data URL) + +#### `textToDataUrl(text: string): DataUrl` +Converts a text string to a data URL. + +```typescript +textToDataUrl("Hello, World!"); +// => "data:text/plain;charset=utf-8,Hello%2C%20World!" +``` + +#### `jsonToDataUrl(data: unknown): DataUrl` +Converts any JSON-serializable data to a data URL. + +```typescript +jsonToDataUrl({ name: "John", age: 30 }); +// => "data:application/json;charset=utf-8,..." +``` + +#### `bufferToDataUrl(buffer: ArrayBuffer | Uint8Array): DataUrl` +Converts an ArrayBuffer or Uint8Array to a base64-encoded data URL. + +```typescript +bufferToDataUrl(new Uint8Array([1, 2, 3])); +// => "data:application/octet-stream;base64,AQID" +``` + +#### `typedArrayToDataUrl(array: TypedArray, type: TypedArrayType): DataUrl` +Converts a TypedArray to a data URL with type preservation. + +```typescript +typedArrayToDataUrl(new Float32Array([1.1, 2.2]), "Float32Array"); +// => "data:application/octet-stream;type=Float32Array;base64,..." +``` + +#### `urlToDataUrl(url: string, fetchOptions?: RequestInit): Promise` +Creates a URL reference or fetches and encodes URL content. + +```typescript +// Create reference only +await urlToDataUrl("https://example.com/data.json"); + +// Fetch and encode content +await urlToDataUrl("https://example.com/data.json", { + headers: { "Authorization": "Bearer token" } +}); +``` + +### Decoding Functions (from Data URL) + +All decoding functions are async and support optional `fetchOptions` for URL-based datarefs. + +#### `dataUrlToText(dataUrl: DataUrl, fetchOptions?: RequestInit): Promise` +Decodes a data URL to a text string. + +#### `dataUrlToJson(dataUrl: DataUrl, fetchOptions?: RequestInit): Promise` +Decodes a data URL to parsed JSON. + +#### `dataUrlToBuffer(dataUrl: DataUrl, fetchOptions?: RequestInit): Promise` +Decodes a data URL to an ArrayBuffer. + +#### `dataUrlToTypedArray(dataUrl: DataUrl, fetchOptions?: RequestInit): Promise` +Decodes a data URL to a TypedArray with type preservation. + +#### `dataUrlToFile(dataUrl: DataUrl, name?: string, fetchOptions?: RequestInit): Promise` +Converts a data URL to a File object. + +```typescript +const file = await dataUrlToFile(dataUrl, "document.txt"); +// => File { name: "document.txt", type: "text/plain", ... } +``` + +### Utility Functions + +#### `dereferenceDataRefs(json: T, fetchOptions?: RequestInit): Promise` +Traverses a JSON object and dereferences all v2 data URLs. + +#### `isDataUrl(value: unknown): boolean` +Checks if a value is a v2 data URL string. + +#### `isUrlDataUrl(dataUrl: DataUrl): boolean` +Checks if a data URL is a URL reference. + +#### `dataUrlToUrl(dataUrl: DataUrl): string | null` +Extracts the URL from a URL reference data URL. + +#### `getMimeType(dataUrl: DataUrl): string` +Gets the MIME type from a data URL. + +#### `getParameters(dataUrl: DataUrl): Record` +Gets the parameters from a data URL header. + +## Backwards Compatibility + +The library maintains full backwards compatibility with the legacy v1 format (JSON objects). Legacy data and modern data URLs can coexist in the same application. + +### Legacy v1 DataRef Format (Internal) + +```typescript +type DataRef = { + ref: "utf8" | "json" | "base64" | "url" | "key"; + value: any; + contentType?: string; + size?: number; + sha256?: string; + created?: string; +}; +``` + +### Working with Legacy Data + +The library automatically detects and handles legacy v1 format (JSON objects) for backwards compatibility: + +```typescript +import { isDataRef, dereferenceDataRefs, textToDataUrl } from "@metapages/dataref"; + +// Legacy v1 format (internal, for reference only) +const legacyRef = { + ref: "utf8", + value: "Hello from legacy data" +}; + +// Modern data URL format +const modernRef = textToDataUrl("Hello from modern data"); + +// Both are recognized by isDataRef +isDataRef(legacyRef); // true +isDataRef(modernRef); // true + +// Can coexist in the same structure +const mixed = { + oldData: legacyRef, + newData: modernRef +}; + +// dereferenceDataRefs only processes data URLs, leaves legacy objects unchanged +const result = await dereferenceDataRefs(mixed); +// { +// oldData: { ref: "utf8", value: "Hello from legacy data" }, // unchanged +// newData: "Hello from modern data" // dereferenced +// } +``` + +### Migrating Legacy Data + +If you have legacy v1 datarefs in your system, here's how to convert them to modern data URLs: + +```typescript +import { textToDataUrl, jsonToDataUrl, bufferToDataUrl, urlToDataUrl } from "@metapages/dataref"; + +// v1 DataRef type (for reference) +type DataRef = { + ref: "utf8" | "json" | "base64" | "url" | "key"; + value: any; +}; + +function v1ToV2(v1Ref: DataRef): string | Promise { + switch (v1Ref.ref) { + case "utf8": + return textToDataUrl(v1Ref.value as string); + + case "json": + return jsonToDataUrl(v1Ref.value); + + case "base64": { + // Decode base64 to binary first + const binaryString = atob(v1Ref.value as string); + const bytes = new Uint8Array(binaryString.length); + for (let i = 0; i < binaryString.length; i++) { + bytes[i] = binaryString.charCodeAt(i); + } + return bufferToDataUrl(bytes); + } + + case "url": + // URL refs need to be handled with urlToDataUrl + return urlToDataUrl(v1Ref.value as string); + + default: + throw new Error(`Unknown v1 DataRef type: ${v1Ref.ref}`); + } +} + +// Example migration +const v1Data: DataRef = { + ref: "json", + value: { name: "John", age: 30 } +}; + +const v2Data = v1ToV2(v1Data); +// => "data:application/json;charset=utf-8,..." +``` + +## Use Cases + +### 1. Embedding Binary Data in URLs + +```typescript +// Encode an image or binary file into a data URL +const imageBuffer = await fetch("/image.png").then(r => r.arrayBuffer()); +const imageDataUrl = bufferToDataUrl(imageBuffer); + +// Use in URL parameter +const url = `https://app.example.com?image=${encodeURIComponent(imageDataUrl)}`; +``` + +### 2. Storing Complex Data in JSON + +```typescript +// Store TypedArrays in JSON +const data = { + metadata: { name: "sensor-data" }, + readings: typedArrayToDataUrl(new Float32Array([1.1, 2.2, 3.3]), "Float32Array") +}; + +const json = JSON.stringify(data); +// Can be stored in database, sent over network, etc. + +// Later, restore the TypedArray +const restored = JSON.parse(json); +const readings = await dataUrlToTypedArray(restored.readings); +``` + +### 3. API Responses with Embedded Data + +```typescript +// Server response with embedded binary data +const apiResponse = { + status: "success", + document: { + title: textToDataUrl("My Document"), + content: textToDataUrl("Document content..."), + thumbnail: bufferToDataUrl(thumbnailBuffer) + } +}; + +// Client can dereference all at once +const resolved = await dereferenceDataRefs(apiResponse); +// All datarefs are now actual values +``` + +### 4. Browser File Handling + +```typescript +// Convert File to data URL for transmission +const file = document.querySelector('input[type="file"]').files[0]; +const dataUrl = await fileToDataUrl(file); + +// Send to API as JSON +await fetch("/api/upload", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ file: dataUrl }) +}); + +// On server, convert back to File +const receivedFile = await dataUrlToFile(dataUrl, "uploaded-file"); +``` + +## Performance Considerations + +- **Parallel dereferencing**: `dereferenceDataRefs()` processes all datarefs in parallel using `Promise.all()` +- **Immutable updates**: Uses the `mutative` library for efficient immutable updates +- **Lazy evaluation**: Datarefs are only decoded when needed +- **Memory efficient**: Data URLs are strings, easier to serialize and cache than binary Blob objects + +## TypeScript Support + +Full TypeScript support with comprehensive type definitions: + +```typescript +import type { DataUrl, TypedArrayType, DataRefTypedArray } from "@metapages/dataref"; + +// Type-safe TypedArray handling +const array = new Float32Array([1, 2, 3]); +const dataUrl = typedArrayToDataUrl(array, "Float32Array"); +const decoded = await dataUrlToTypedArray(dataUrl); + +// Type inference with JSON +interface User { + name: string; + age: number; +} + +const userDataUrl = jsonToDataUrl({ name: "John", age: 30 }); +const user = await dataUrlToJson(userDataUrl); +``` + +## Testing + +The library includes comprehensive test coverage: + +- 38 tests for v2 basic type conversions +- 11 tests for `dereferenceDataRefs()` functionality +- 13 tests for v1/v2 backwards compatibility +- All 56 tests passing + +Run tests: +```bash +npm test +``` + +## Browser Support + +Works in all modern browsers that support: +- `TextEncoder`/`TextDecoder` +- `atob`/`btoa` +- `crypto.subtle` (for file hashing) +- `fetch` API (for URL-based datarefs) + +## License + +See LICENSE file. + +## Contributing + +Contributions welcome! Please ensure all tests pass and add tests for new features. + +## LLM/AI Model Usage Guide + +**For AI models processing this library:** + +1. **Import from main package**: Always import from `@metapages/dataref` (no v1/v2 subpaths) +2. **Use data URL format**: All encoding functions return data URL strings (e.g., `data:text/plain,hello`) +3. **Type preservation**: Use `typedArrayToDataUrl()` with the type parameter for TypedArrays +4. **Dereferencing**: Use `dereferenceDataRefs()` to process entire JSON structures at once +5. **Validation**: Check with `isDataUrl()` before decoding +6. **Error handling**: All decoding functions are async and may throw errors for invalid data URLs +7. **Legacy support**: `isDataRef()` detects both modern data URLs and legacy v1 objects + +**Common patterns:** + +```typescript +// Pattern 1: Encode and embed in JSON +import { textToDataUrl, bufferToDataUrl, typedArrayToDataUrl } from "@metapages/dataref"; + +const data = { + text: textToDataUrl("value"), + binary: bufferToDataUrl(buffer), + array: typedArrayToDataUrl(array, "Float32Array") +}; + +// Pattern 2: Batch decode +import { dereferenceDataRefs } from "@metapages/dataref"; + +const decoded = await dereferenceDataRefs(data); + +// Pattern 3: Type-safe decoding +import { isDataUrl, dataUrlToJson } from "@metapages/dataref"; -Datarefs are small bits of JSON that **reference** some underlying data. +if (isDataUrl(value)) { + const result = await dataUrlToJson(value); +} -This module provides types and tooling for converting different data types (including Blobs, Files, ArrayBuffers, JSON, strings) into small references that can be more easily passed around your network and database. +// Pattern 4: Migration from legacy format +import { textToDataUrl, jsonToDataUrl } from "@metapages/dataref"; -Then when some process wants the actual underlying data, the `dataref` tooling provides the way to fetch/convert the `dataref` into the corresponding data. \ No newline at end of file +const modernRef = legacyRef.ref === "utf8" + ? textToDataUrl(legacyRef.value) + : jsonToDataUrl(legacyRef.value); +``` diff --git a/package-lock.json b/package-lock.json index 25f9f18..f33fdaf 100644 --- a/package-lock.json +++ b/package-lock.json @@ -8,7 +8,8 @@ "name": "@metapages/dataref", "version": "0.6.2", "dependencies": { - "base64-arraybuffer": "^1.0.2" + "base64-arraybuffer": "^1.0.2", + "mutative": "^1.3.0" }, "devDependencies": { "@rollup/plugin-typescript": "^12.1.1", @@ -1706,6 +1707,15 @@ "dev": true, "license": "MIT" }, + "node_modules/mutative": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/mutative/-/mutative-1.3.0.tgz", + "integrity": "sha512-8MJj6URmOZAV70dpFe1YnSppRTKC4DsMkXQiBDFayLcDI4ljGokHxmpqaBQuDWa4iAxWaJJ1PS8vAmbntjjKmQ==", + "license": "MIT", + "engines": { + "node": ">=14.0" + } + }, "node_modules/nanoid": { "version": "3.3.7", "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.3.7.tgz", @@ -3413,6 +3423,11 @@ "integrity": "sha512-VNTrAak/KhO2i8dqqnqnAHOa3cYBwXEZe9h+D5h/1ZqFSTEFHdM65lR7RoIqq3tBBYavsOXV84NoHXZ0AkPyqQ==", "dev": true }, + "mutative": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/mutative/-/mutative-1.3.0.tgz", + "integrity": "sha512-8MJj6URmOZAV70dpFe1YnSppRTKC4DsMkXQiBDFayLcDI4ljGokHxmpqaBQuDWa4iAxWaJJ1PS8vAmbntjjKmQ==" + }, "nanoid": { "version": "3.3.7", "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.3.7.tgz", diff --git a/package.json b/package.json index b16d6f4..47459dd 100644 --- a/package.json +++ b/package.json @@ -1,16 +1,34 @@ { "name": "@metapages/dataref", - "version": "0.6.2", + "version": "2.0.0", "author": "Dion Whitehead ", "repository": { "type": "git", "url": "git+https://github.com/metapages/dataref.git" }, "keywords": [ - "dataref" + "dataref", + "data-url", + "dataurl", + "base64", + "typed-array", + "arraybuffer", + "binary", + "encode", + "decode", + "url-encoding", + "json", + "serialization" ], "main": "dist/index.js", - "exports": "./dist/index.js", + "module": "dist/index.js", + "exports": { + ".": { + "types": "./dist/index.d.ts", + "import": "./dist/index.js", + "require": "./dist/index.js" + } + }, "files": [ "dist", "src", @@ -37,6 +55,7 @@ "vitest": "^2.1.1" }, "dependencies": { - "base64-arraybuffer": "^1.0.2" + "base64-arraybuffer": "^1.0.2", + "mutative": "^1.3.0" } } diff --git a/src/index.ts b/src/index.ts new file mode 100644 index 0000000..fbbfdad --- /dev/null +++ b/src/index.ts @@ -0,0 +1,41 @@ +// Main exports - v2 API (recommended) +// v1 is kept internal for backwards compatibility only + +export { + // Core encoding functions + textToDataUrl, + jsonToDataUrl, + bufferToDataUrl, + typedArrayToDataUrl, + urlToDataUrl, + fileToDataUrl, + + // Core decoding functions + dataUrlToText, + dataUrlToJson, + dataUrlToBuffer, + dataUrlToTypedArray, + dataUrlToUrl, + dataUrlToFile, + + // Utility functions + dereferenceDataRefs, + isDataUrl, + isUrlDataUrl, + getMimeType, + getParameters, + fetchDataUrlContent, +} from "./v2/dataref"; + +export { + // Types + type DataUrl, + type TypedArrayType, + type DataRefTypedArray, + MIME_TYPES, +} from "./v2/types"; + +export { + // Validation function that checks both v1 and v2 + isDataRef, +} from "./v2/index"; diff --git a/src/test/v1-v2-compatibility.test.ts b/src/test/v1-v2-compatibility.test.ts new file mode 100644 index 0000000..4b58183 --- /dev/null +++ b/src/test/v1-v2-compatibility.test.ts @@ -0,0 +1,278 @@ +import { describe, it, expect } from "vitest"; +import { DataRef, DataRefType } from "../v1/types"; +import { isDataRef as isDataRefV1 } from "../v1/dataref"; +import { + isDataUrl, + isDataRef, + textToDataUrl, + jsonToDataUrl, + bufferToDataUrl, + dataUrlToText, + dataUrlToJson, + dataUrlToBuffer, + dereferenceDataRefs, +} from "../index"; + +describe("v1 and v2 DataRef Compatibility", () => { + describe("isDataRef should detect both v1 and v2 formats", () => { + it("should detect v2 data URL strings", () => { + const v2TextDataRef = textToDataUrl("hello"); + const v2JsonDataRef = jsonToDataUrl({ key: "value" }); + + expect(isDataRef(v2TextDataRef)).toBe(true); + expect(isDataUrl(v2TextDataRef)).toBe(true); + + expect(isDataRef(v2JsonDataRef)).toBe(true); + expect(isDataUrl(v2JsonDataRef)).toBe(true); + }); + + it("should detect v1 DataRef objects", () => { + const v1TextDataRef: DataRef = { + ref: DataRefType.utf8, + value: "hello", + }; + + const v1JsonDataRef: DataRef = { + ref: DataRefType.json, + value: { key: "value" }, + }; + + expect(isDataRef(v1TextDataRef)).toBe(true); + expect(isDataRefV1(v1TextDataRef)).toBe(true); + + expect(isDataRef(v1JsonDataRef)).toBe(true); + expect(isDataRefV1(v1JsonDataRef)).toBe(true); + }); + + it("should not detect regular objects/strings as datarefs", () => { + expect(isDataRef({ key: "value" })).toBe(false); + expect(isDataRef("regular string")).toBe(false); + expect(isDataRef(123)).toBe(false); + expect(isDataRef(null)).toBe(false); + }); + }); + + describe("v1 to v2 conversion", () => { + it("should convert v1 utf8 DataRef to v2 data URL", () => { + const v1Ref: DataRef = { + ref: DataRefType.utf8, + value: "Hello, World!", + }; + + // Convert to v2 + const v2DataUrl = textToDataUrl(v1Ref.value as string); + + expect(isDataUrl(v2DataUrl)).toBe(true); + expect(v2DataUrl).toMatch(/^data:text\/plain/); + }); + + it("should convert v1 json DataRef to v2 data URL", () => { + const v1Ref: DataRef = { + ref: DataRefType.json, + value: { name: "John", age: 30 }, + }; + + // Convert to v2 + const v2DataUrl = jsonToDataUrl(v1Ref.value); + + expect(isDataUrl(v2DataUrl)).toBe(true); + expect(v2DataUrl).toMatch(/^data:application\/json/); + }); + + it("should convert v1 base64 DataRef to v2 data URL", async () => { + // Create a base64 encoded string + const originalData = new Uint8Array([1, 2, 3, 4, 5]); + const base64String = btoa(String.fromCharCode(...originalData)); + + const v1Ref: DataRef = { + ref: DataRefType.base64, + value: base64String, + }; + + // In v2, we use bufferToDataUrl for binary data + // First decode the base64 to get the buffer + const binaryString = atob(v1Ref.value as string); + const bytes = new Uint8Array(binaryString.length); + for (let i = 0; i < binaryString.length; i++) { + bytes[i] = binaryString.charCodeAt(i); + } + + const v2DataUrl = bufferToDataUrl(bytes); + + expect(isDataUrl(v2DataUrl)).toBe(true); + expect(v2DataUrl).toMatch(/^data:application\/octet-stream/); + + // Verify we can decode it back + const decodedBuffer = await dataUrlToBuffer(v2DataUrl); + const decodedArray = new Uint8Array(decodedBuffer); + expect(decodedArray).toEqual(originalData); + }); + }); + + describe("dereferenceDataRefs should handle mixed v1 and v2 refs", () => { + it("should dereference only v2 data URLs, not v1 objects", async () => { + const v1Ref: DataRef = { + ref: DataRefType.utf8, + value: "v1 text", + }; + + const v2DataUrl = textToDataUrl("v2 text"); + + const input = { + v1Data: v1Ref, + v2Data: v2DataUrl, + regular: "normal string", + }; + + const result = await dereferenceDataRefs(input); + + // v2 should be dereferenced + expect(result.v2Data).toBe("v2 text"); + + // v1 should remain as-is (object) + expect(result.v1Data).toEqual(v1Ref); + + // Regular strings should remain unchanged + expect(result.regular).toBe("normal string"); + }); + + it("should handle nested structures with mixed v1/v2 refs", async () => { + const v1Ref: DataRef = { + ref: DataRefType.json, + value: { nested: "v1 data" }, + }; + + const v2DataUrl = jsonToDataUrl({ nested: "v2 data" }); + + const input = { + level1: { + v1: v1Ref, + v2: v2DataUrl, + }, + items: [v1Ref, v2DataUrl, "regular"], + }; + + const result = await dereferenceDataRefs(input); + + // v1 refs should remain as objects + expect(result.level1.v1).toEqual(v1Ref); + expect(result.items[0]).toEqual(v1Ref); + + // v2 refs should be dereferenced + expect(result.level1.v2).toEqual({ nested: "v2 data" }); + expect(result.items[1]).toEqual({ nested: "v2 data" }); + + // Regular values unchanged + expect(result.items[2]).toBe("regular"); + }); + }); + + describe("v2 data URLs maintain advantages over v1", () => { + it("v2 data URLs are unambiguous strings, not objects", () => { + const v1Ref: DataRef = { + ref: DataRefType.utf8, + value: "hello", + }; + + const v2DataUrl = textToDataUrl("hello"); + + // v1 is an object that could be mistaken for other data + expect(typeof v1Ref).toBe("object"); + expect(v1Ref.ref).toBeDefined(); + + // v2 is a string that clearly starts with "data:" + expect(typeof v2DataUrl).toBe("string"); + expect(v2DataUrl.startsWith("data:")).toBe(true); + }); + + it("v2 data URLs work in URL parameters directly", () => { + const text = "Hello, World!"; + const v2DataUrl = textToDataUrl(text); + + // Can be used directly in URL parameters + const url = new URL("https://example.com"); + url.searchParams.set("data", v2DataUrl); + + expect(url.searchParams.get("data")).toBe(v2DataUrl); + expect(url.searchParams.get("data")?.startsWith("data:")).toBe(true); + }); + + it("v2 data URLs can be embedded in JSON without confusion", () => { + const v2DataUrl = textToDataUrl("embedded data"); + + const json = { + normalString: "regular", + dataRef: v2DataUrl, + normalObject: { key: "value" }, + }; + + const jsonString = JSON.stringify(json); + const parsed = JSON.parse(jsonString); + + // The data URL is preserved as a string + expect(isDataUrl(parsed.dataRef)).toBe(true); + + // Can clearly distinguish it from regular strings + expect(isDataUrl(parsed.normalString)).toBe(false); + expect(isDataUrl(parsed.normalObject)).toBe(false); + }); + }); + + describe("Backwards compatibility scenarios", () => { + it("should handle data structures that might contain v1 refs", async () => { + // A realistic scenario where old data (v1) and new data (v2) coexist + const legacyData = { + oldFormat: { + ref: DataRefType.utf8, + value: "This is v1 format data", + } as DataRef, + metadata: "created with v1", + }; + + const modernData = { + newFormat: textToDataUrl("This is v2 format data"), + metadata: "created with v2", + }; + + const combined = { + legacy: legacyData, + modern: modernData, + }; + + const result = await dereferenceDataRefs(combined); + + // v1 data preserved as-is + expect(result.legacy.oldFormat).toEqual(legacyData.oldFormat); + + // v2 data dereferenced + expect(result.modern.newFormat).toBe("This is v2 format data"); + + // Metadata unchanged + expect(result.legacy.metadata).toBe("created with v1"); + expect(result.modern.metadata).toBe("created with v2"); + }); + + it("should allow gradual migration from v1 to v2", () => { + // Start with v1 data + const v1Data: DataRef = { + ref: DataRefType.json, + value: { user: "john", score: 100 }, + }; + + // Can check if it's a v1 ref + expect(isDataRefV1(v1Data)).toBe(true); + expect(isDataUrl(v1Data)).toBe(false); + + // Migrate to v2 + const v2Data = jsonToDataUrl(v1Data.value); + + // Check it's now v2 + expect(isDataUrl(v2Data)).toBe(true); + expect(isDataRefV1(v2Data)).toBe(false); + + // Both are recognized as datarefs by the unified function + expect(isDataRef(v1Data)).toBe(true); + expect(isDataRef(v2Data)).toBe(true); + }); + }); +}); diff --git a/src/test/v2.test.ts b/src/test/v2.test.ts new file mode 100644 index 0000000..8757694 --- /dev/null +++ b/src/test/v2.test.ts @@ -0,0 +1,464 @@ +import { describe, it, expect } from "vitest"; +import { + textToDataUrl, + jsonToDataUrl, + bufferToDataUrl, + typedArrayToDataUrl, + dataUrlToText, + dataUrlToJson, + dataUrlToBuffer, + dataUrlToTypedArray, + isDataUrl, + urlToDataUrl, + dataUrlToUrl, + isUrlDataUrl, + dereferenceDataRefs, +} from "../index"; + +describe("v2 DataRef - Basic Type Conversions", () => { + describe("Text conversions", () => { + it("should convert text to data URL and back", async () => { + const originalText = "Hello, World!"; + const dataUrl = textToDataUrl(originalText); + + expect(isDataUrl(dataUrl)).toBe(true); + expect(dataUrl).toMatch(/^data:text\/plain/); + + const decodedText = await dataUrlToText(dataUrl); + expect(decodedText).toBe(originalText); + }); + + it("should handle empty string", async () => { + const originalText = ""; + const dataUrl = textToDataUrl(originalText); + const decodedText = await dataUrlToText(dataUrl); + expect(decodedText).toBe(originalText); + }); + + it("should handle unicode text", async () => { + const originalText = "Hello δΈ–η•Œ 🌍"; + const dataUrl = textToDataUrl(originalText); + const decodedText = await dataUrlToText(dataUrl); + expect(decodedText).toBe(originalText); + }); + + it("should handle special characters", async () => { + const originalText = "Line1\nLine2\tTabbed\r\nWindows"; + const dataUrl = textToDataUrl(originalText); + const decodedText = await dataUrlToText(dataUrl); + expect(decodedText).toBe(originalText); + }); + }); + + describe("JSON conversions", () => { + it("should convert simple object to data URL and back", async () => { + const originalData = { name: "John", age: 30 }; + const dataUrl = jsonToDataUrl(originalData); + + expect(isDataUrl(dataUrl)).toBe(true); + expect(dataUrl).toMatch(/^data:application\/json/); + + const decodedData = await dataUrlToJson(dataUrl); + expect(decodedData).toEqual(originalData); + }); + + it("should handle nested objects", async () => { + const originalData = { + user: { + name: "Jane", + address: { + street: "123 Main St", + city: "Boston", + }, + }, + }; + const dataUrl = jsonToDataUrl(originalData); + const decodedData = await dataUrlToJson(dataUrl); + expect(decodedData).toEqual(originalData); + }); + + it("should handle arrays", async () => { + const originalData = [1, 2, 3, "four", { five: 5 }]; + const dataUrl = jsonToDataUrl(originalData); + const decodedData = await dataUrlToJson(dataUrl); + expect(decodedData).toEqual(originalData); + }); + + it("should handle null and boolean values", async () => { + const originalData = { flag: true, value: null, disabled: false }; + const dataUrl = jsonToDataUrl(originalData); + const decodedData = await dataUrlToJson(dataUrl); + expect(decodedData).toEqual(originalData); + }); + + it("should handle numbers including floats and negative", async () => { + const originalData = { int: 42, float: 3.14159, negative: -100 }; + const dataUrl = jsonToDataUrl(originalData); + const decodedData = await dataUrlToJson(dataUrl); + expect(decodedData).toEqual(originalData); + }); + + it("should handle empty object and array", async () => { + const emptyObj = {}; + const emptyArr: any[] = []; + + const objDataUrl = jsonToDataUrl(emptyObj); + const arrDataUrl = jsonToDataUrl(emptyArr); + + expect(await dataUrlToJson(objDataUrl)).toEqual(emptyObj); + expect(await dataUrlToJson(arrDataUrl)).toEqual(emptyArr); + }); + }); + + describe("ArrayBuffer conversions", () => { + it("should convert ArrayBuffer to data URL and back", async () => { + const originalBuffer = new Uint8Array([1, 2, 3, 4, 5]).buffer; + const dataUrl = bufferToDataUrl(originalBuffer); + + expect(isDataUrl(dataUrl)).toBe(true); + expect(dataUrl).toMatch(/^data:application\/octet-stream/); + + const decodedBuffer = await dataUrlToBuffer(dataUrl); + const decodedArray = new Uint8Array(decodedBuffer); + const originalArray = new Uint8Array(originalBuffer); + + expect(decodedArray).toEqual(originalArray); + }); + + it("should handle Uint8Array directly", async () => { + const originalArray = new Uint8Array([255, 128, 64, 32, 16, 8, 4, 2, 1]); + const dataUrl = bufferToDataUrl(originalArray); + const decodedBuffer = await dataUrlToBuffer(dataUrl); + const decodedArray = new Uint8Array(decodedBuffer); + + expect(decodedArray).toEqual(originalArray); + }); + + it("should handle empty buffer", async () => { + const originalBuffer = new Uint8Array([]).buffer; + const dataUrl = bufferToDataUrl(originalBuffer); + const decodedBuffer = await dataUrlToBuffer(dataUrl); + const decodedArray = new Uint8Array(decodedBuffer); + + expect(decodedArray.length).toBe(0); + }); + }); + + describe("TypedArray conversions", () => { + it("should convert Uint8Array to data URL and back", async () => { + const originalArray = new Uint8Array([10, 20, 30, 40, 50]); + const dataUrl = typedArrayToDataUrl(originalArray, "Uint8Array"); + + expect(isDataUrl(dataUrl)).toBe(true); + + const decodedArray = await dataUrlToTypedArray(dataUrl); + expect(decodedArray).toEqual(originalArray); + }); + + it("should convert Int32Array to data URL and back", async () => { + const originalArray = new Int32Array([-1000, 0, 1000, 2000]); + const dataUrl = typedArrayToDataUrl(originalArray, "Int32Array"); + + const decodedArray = await dataUrlToTypedArray(dataUrl); + expect(decodedArray).toEqual(originalArray); + }); + + it("should convert Float32Array to data URL and back", async () => { + const originalArray = new Float32Array([1.1, 2.2, 3.3, -4.4]); + const dataUrl = typedArrayToDataUrl(originalArray, "Float32Array"); + + const decodedArray = await dataUrlToTypedArray(dataUrl); + expect(decodedArray.length).toBe(originalArray.length); + for (let i = 0; i < originalArray.length; i++) { + expect(decodedArray[i]).toBeCloseTo(originalArray[i]); + } + }); + + it("should convert Float64Array to data URL and back", async () => { + const originalArray = new Float64Array([ + Math.PI, + Math.E, + -123.456789, + 0.000001, + ]); + const dataUrl = typedArrayToDataUrl(originalArray, "Float64Array"); + + const decodedArray = await dataUrlToTypedArray(dataUrl); + expect(decodedArray).toEqual(originalArray); + }); + + it("should convert Int16Array to data URL and back", async () => { + const originalArray = new Int16Array([-32768, -100, 0, 100, 32767]); + const dataUrl = typedArrayToDataUrl(originalArray, "Int16Array"); + + const decodedArray = await dataUrlToTypedArray(dataUrl); + expect(decodedArray).toEqual(originalArray); + }); + + it("should convert Uint32Array to data URL and back", async () => { + const originalArray = new Uint32Array([0, 1000, 1000000, 4294967295]); + const dataUrl = typedArrayToDataUrl(originalArray, "Uint32Array"); + + const decodedArray = await dataUrlToTypedArray(dataUrl); + expect(decodedArray).toEqual(originalArray); + }); + + it("should convert BigInt64Array to data URL and back", async () => { + const originalArray = new BigInt64Array([ + BigInt(-9007199254740991), + BigInt(0), + BigInt(9007199254740991), + ]); + const dataUrl = typedArrayToDataUrl(originalArray, "BigInt64Array"); + + const decodedArray = await dataUrlToTypedArray(dataUrl); + expect(decodedArray).toEqual(originalArray); + }); + + it("should convert Uint8ClampedArray to data URL and back", async () => { + const originalArray = new Uint8ClampedArray([0, 127, 255]); + const dataUrl = typedArrayToDataUrl( + originalArray, + "Uint8ClampedArray" + ); + + const decodedArray = + await dataUrlToTypedArray(dataUrl); + expect(decodedArray).toEqual(originalArray); + }); + }); + + describe("URL handling", () => { + it("should convert URL to data URL without fetching", async () => { + const url = "https://example.com/data.json"; + const dataUrl = await urlToDataUrl(url); + + expect(isDataUrl(dataUrl)).toBe(true); + expect(isUrlDataUrl(dataUrl)).toBe(true); + + const decodedUrl = dataUrlToUrl(dataUrl); + expect(decodedUrl).toBe(url); + }); + + it("should handle URL with query parameters", async () => { + const url = "https://example.com/api?param1=value1¶m2=value2"; + const dataUrl = await urlToDataUrl(url); + const decodedUrl = dataUrlToUrl(dataUrl); + expect(decodedUrl).toBe(url); + }); + + it("should handle URL with hash", async () => { + const url = "https://example.com/page#section"; + const dataUrl = await urlToDataUrl(url); + const decodedUrl = dataUrlToUrl(dataUrl); + expect(decodedUrl).toBe(url); + }); + + it("should return null when converting non-URL data URL", async () => { + const textDataUrl = textToDataUrl("Hello"); + const url = dataUrlToUrl(textDataUrl); + expect(url).toBeNull(); + }); + }); + + describe("isDataUrl validation", () => { + it("should return true for valid data URLs", () => { + expect(isDataUrl("data:text/plain,hello")).toBe(true); + expect(isDataUrl("data:application/json,{}")).toBe(true); + expect(isDataUrl("data:image/png;base64,iVBORw0KGgo=")).toBe(true); + }); + + it("should return false for non-data URLs", () => { + expect(isDataUrl("http://example.com")).toBe(false); + expect(isDataUrl("hello world")).toBe(false); + expect(isDataUrl("")).toBe(false); + expect(isDataUrl(null)).toBe(false); + expect(isDataUrl(undefined)).toBe(false); + expect(isDataUrl(123)).toBe(false); + expect(isDataUrl({})).toBe(false); + }); + }); + + describe("dereferenceDataRefs", () => { + it("should dereference a simple object with text dataref", async () => { + const textDataUrl = textToDataUrl("Hello, World!"); + const input = { + message: textDataUrl, + count: 42, + }; + + const result = await dereferenceDataRefs(input); + expect(result.message).toBe("Hello, World!"); + expect(result.count).toBe(42); + }); + + it("should dereference nested objects with datarefs", async () => { + const textDataUrl = textToDataUrl("nested text"); + const jsonDataUrl = jsonToDataUrl({ inner: "data" }); + + const input = { + outer: { + text: textDataUrl, + data: jsonDataUrl, + normal: "regular string", + }, + }; + + const result = await dereferenceDataRefs(input); + expect(result.outer.text).toBe("nested text"); + expect(result.outer.data).toEqual({ inner: "data" }); + expect(result.outer.normal).toBe("regular string"); + }); + + it("should dereference arrays with datarefs", async () => { + const dataUrl1 = textToDataUrl("item1"); + const dataUrl2 = jsonToDataUrl({ key: "value" }); + const dataUrl3 = textToDataUrl("item3"); + + const input = { + items: [dataUrl1, "regular", dataUrl2, 123, dataUrl3], + }; + + const result = await dereferenceDataRefs(input); + expect(result.items[0]).toBe("item1"); + expect(result.items[1]).toBe("regular"); + expect(result.items[2]).toEqual({ key: "value" }); + expect(result.items[3]).toBe(123); + expect(result.items[4]).toBe("item3"); + }); + + it("should handle deeply nested structures", async () => { + const dataUrl = jsonToDataUrl({ deep: "value" }); + + const input = { + level1: { + level2: { + level3: { + data: dataUrl, + }, + }, + }, + }; + + const result = await dereferenceDataRefs(input); + expect(result.level1.level2.level3.data).toEqual({ deep: "value" }); + }); + + it("should handle multiple datarefs in the same object", async () => { + const text1 = textToDataUrl("first"); + const text2 = textToDataUrl("second"); + const json1 = jsonToDataUrl({ a: 1 }); + const json2 = jsonToDataUrl({ b: 2 }); + + const input = { + text1, + text2, + json1, + json2, + regular: "unchanged", + }; + + const result = await dereferenceDataRefs(input); + expect(result.text1).toBe("first"); + expect(result.text2).toBe("second"); + expect(result.json1).toEqual({ a: 1 }); + expect(result.json2).toEqual({ b: 2 }); + expect(result.regular).toBe("unchanged"); + }); + + it("should handle objects with no datarefs", async () => { + const input = { + text: "regular string", + number: 42, + bool: true, + nested: { key: "value" }, + }; + + const result = await dereferenceDataRefs(input); + expect(result).toEqual(input); + }); + + it("should handle null and undefined values", async () => { + const dataUrl = textToDataUrl("test"); + const input = { + nullValue: null, + undefinedValue: undefined, + dataRef: dataUrl, + }; + + const result = await dereferenceDataRefs(input); + expect(result.nullValue).toBeNull(); + expect(result.undefinedValue).toBeUndefined(); + expect(result.dataRef).toBe("test"); + }); + + it("should handle empty objects and arrays", async () => { + const input = { + emptyObj: {}, + emptyArr: [], + dataRef: textToDataUrl("value"), + }; + + const result = await dereferenceDataRefs(input); + expect(result.emptyObj).toEqual({}); + expect(result.emptyArr).toEqual([]); + expect(result.dataRef).toBe("value"); + }); + + it("should dereference TypedArray datarefs", async () => { + const typedArrayDataUrl = typedArrayToDataUrl( + new Uint8Array([1, 2, 3]), + "Uint8Array" + ); + + const input = { + buffer: typedArrayDataUrl, + }; + + const result = await dereferenceDataRefs(input); + expect(result.buffer).toEqual(new Uint8Array([1, 2, 3])); + }); + + it("should dereference ArrayBuffer datarefs", async () => { + const bufferDataUrl = bufferToDataUrl(new Uint8Array([10, 20, 30])); + + const input = { + data: bufferDataUrl, + }; + + const result = await dereferenceDataRefs(input); + const resultArray = new Uint8Array(result.data); + expect(resultArray).toEqual(new Uint8Array([10, 20, 30])); + }); + + it("should handle mixed types in complex structure", async () => { + const textUrl = textToDataUrl("text content"); + const jsonUrl = jsonToDataUrl({ nested: { value: 123 } }); + const arrayUrl = typedArrayToDataUrl( + new Float32Array([1.1, 2.2]), + "Float32Array" + ); + + const input = { + metadata: { + title: textUrl, + config: jsonUrl, + }, + data: { + values: arrayUrl, + count: 2, + }, + items: ["regular", textUrl, { key: jsonUrl }], + }; + + const result = await dereferenceDataRefs(input); + expect(result.metadata.title).toBe("text content"); + expect(result.metadata.config).toEqual({ nested: { value: 123 } }); + expect(result.data.values).toEqual(new Float32Array([1.1, 2.2])); + expect(result.data.count).toBe(2); + expect(result.items[0]).toBe("regular"); + expect(result.items[1]).toBe("text content"); + expect(result.items[2].key).toEqual({ nested: { value: 123 } }); + }); + }); +}); diff --git a/src/v2/dataref.ts b/src/v2/dataref.ts index 084edb9..3bfea0e 100644 --- a/src/v2/dataref.ts +++ b/src/v2/dataref.ts @@ -73,14 +73,35 @@ export const dataUrlToBuffer = async ( return response.arrayBuffer(); } - // Handle regular data URL - const base64 = dataUrl.split(",")[1]; - const binaryString = atob(base64); - const bytes = new Uint8Array(binaryString.length); - for (let i = 0; i < binaryString.length; i++) { - bytes[i] = binaryString.charCodeAt(i); + // Parse the data URL + const commaIndex = dataUrl.indexOf(","); + if (commaIndex === -1) { + throw new Error("Invalid data URL format"); + } + + const header = dataUrl.substring(0, commaIndex); + const data = dataUrl.substring(commaIndex + 1); + + // Check if it's base64 encoded + const isBase64 = header.includes(";base64"); + + if (isBase64) { + // Handle base64-encoded data (including empty strings) + if (!data) { + return new ArrayBuffer(0); + } + const binaryString = atob(data); + const bytes = new Uint8Array(binaryString.length); + for (let i = 0; i < binaryString.length; i++) { + bytes[i] = binaryString.charCodeAt(i); + } + return bytes.buffer; + } else { + // Handle URL-encoded data (for text/JSON, including empty strings) + const decodedString = data ? decodeURIComponent(data) : ""; + const encoder = new TextEncoder(); + return encoder.encode(decodedString).buffer; } - return bytes.buffer; }; export const dataUrlToText = async ( @@ -193,3 +214,98 @@ export const fetchDataUrlContent = async ( } return urlToDataUrl(url, fetchOptions); }; + +// Import mutative for efficient JSON traversal and modification +import { create } from "mutative"; + +/** + * Traverses a JSON object and converts any data ref strings (v2 data URLs) + * into their dereferenced data. Returns a new JSON object with all datarefs resolved. + * + * @param json - The JSON object to traverse + * @param fetchOptions - Optional fetch options for URL-based datarefs + * @returns A new JSON object with all datarefs dereferenced + */ +export const dereferenceDataRefs = async ( + json: T, + fetchOptions?: RequestInit +): Promise => { + // Track all promises for async dereferencing + const promises: Array<{ + path: (string | number)[]; + promise: Promise; + }> = []; + + // Helper function to traverse and collect promises + const collectPromises = (obj: any, path: (string | number)[] = []) => { + if (obj === null || obj === undefined) { + return; + } + + if (typeof obj === "string" && isDataUrl(obj)) { + // Found a data URL, create a promise to dereference it + const promise = dereferenceDataUrl(obj, fetchOptions); + promises.push({ path: [...path], promise }); + } else if (Array.isArray(obj)) { + obj.forEach((item, index) => { + collectPromises(item, [...path, index]); + }); + } else if (typeof obj === "object") { + Object.keys(obj).forEach((key) => { + collectPromises(obj[key], [...path, key]); + }); + } + }; + + // First pass: collect all promises + collectPromises(json); + + // If no datarefs found, return original + if (promises.length === 0) { + return json; + } + + // Wait for all promises to resolve + const results = await Promise.all(promises.map((p) => p.promise)); + + // Second pass: use mutative to update the JSON with resolved values + return create(json, (draft: any) => { + promises.forEach(({ path }, index) => { + let current = draft; + for (let i = 0; i < path.length - 1; i++) { + current = current[path[i]]; + } + const lastKey = path[path.length - 1]; + current[lastKey] = results[index]; + }); + }); +}; + +/** + * Dereferences a single data URL to its actual value. + * Attempts to parse as JSON first, falls back to text, then buffer. + * + * @param dataUrl - The data URL to dereference + * @param fetchOptions - Optional fetch options for URL-based datarefs + * @returns The dereferenced value + */ +const dereferenceDataUrl = async ( + dataUrl: DataUrl, + fetchOptions?: RequestInit +): Promise => { + const mimeType = getMimeType(dataUrl); + const params = getParameters(dataUrl); + + // Handle different MIME types appropriately + if (mimeType === MIME_TYPES.JSON) { + return dataUrlToJson(dataUrl, fetchOptions); + } else if (mimeType === MIME_TYPES.TEXT) { + return dataUrlToText(dataUrl, fetchOptions); + } else if (mimeType === MIME_TYPES.OCTET_STREAM && params.type) { + // This is a typed array + return dataUrlToTypedArray(dataUrl, fetchOptions); + } else { + // For octet-stream and other binary types, return as ArrayBuffer + return dataUrlToBuffer(dataUrl, fetchOptions); + } +}; From d001e586bd22e6446bd33825e964fbb70fd1a326 Mon Sep 17 00:00:00 2001 From: metapage CI Robot <260221+dionjwa@users.noreply.github.com> Date: Fri, 2 Jan 2026 12:34:14 -0800 Subject: [PATCH 5/5] fix tests --- .claude/settings.local.json | 3 ++- src/test/v1-v2-compatibility.test.ts | 23 ++++++++++++----------- src/test/v2.test.ts | 4 ++-- 3 files changed, 16 insertions(+), 14 deletions(-) diff --git a/.claude/settings.local.json b/.claude/settings.local.json index 63dab7d..24d34cc 100644 --- a/.claude/settings.local.json +++ b/.claude/settings.local.json @@ -3,7 +3,8 @@ "allow": [ "Bash(npm install:*)", "Bash(npm test:*)", - "Bash(node -e:*)" + "Bash(node -e:*)", + "Bash(just check:*)" ] } } diff --git a/src/test/v1-v2-compatibility.test.ts b/src/test/v1-v2-compatibility.test.ts index 4b58183..0ac5172 100644 --- a/src/test/v1-v2-compatibility.test.ts +++ b/src/test/v1-v2-compatibility.test.ts @@ -1,5 +1,6 @@ import { describe, it, expect } from "vitest"; -import { DataRef, DataRefType } from "../v1/types"; +import type { DataRefBinary } from "../v1/types"; +import { DataRefType } from "../v1/types"; import { isDataRef as isDataRefV1 } from "../v1/dataref"; import { isDataUrl, @@ -27,12 +28,12 @@ describe("v1 and v2 DataRef Compatibility", () => { }); it("should detect v1 DataRef objects", () => { - const v1TextDataRef: DataRef = { + const v1TextDataRef: DataRefBinary = { ref: DataRefType.utf8, value: "hello", }; - const v1JsonDataRef: DataRef = { + const v1JsonDataRef: DataRefBinary = { ref: DataRefType.json, value: { key: "value" }, }; @@ -54,7 +55,7 @@ describe("v1 and v2 DataRef Compatibility", () => { describe("v1 to v2 conversion", () => { it("should convert v1 utf8 DataRef to v2 data URL", () => { - const v1Ref: DataRef = { + const v1Ref: DataRefBinary = { ref: DataRefType.utf8, value: "Hello, World!", }; @@ -67,7 +68,7 @@ describe("v1 and v2 DataRef Compatibility", () => { }); it("should convert v1 json DataRef to v2 data URL", () => { - const v1Ref: DataRef = { + const v1Ref: DataRefBinary = { ref: DataRefType.json, value: { name: "John", age: 30 }, }; @@ -84,7 +85,7 @@ describe("v1 and v2 DataRef Compatibility", () => { const originalData = new Uint8Array([1, 2, 3, 4, 5]); const base64String = btoa(String.fromCharCode(...originalData)); - const v1Ref: DataRef = { + const v1Ref: DataRefBinary = { ref: DataRefType.base64, value: base64String, }; @@ -111,7 +112,7 @@ describe("v1 and v2 DataRef Compatibility", () => { describe("dereferenceDataRefs should handle mixed v1 and v2 refs", () => { it("should dereference only v2 data URLs, not v1 objects", async () => { - const v1Ref: DataRef = { + const v1Ref: DataRefBinary = { ref: DataRefType.utf8, value: "v1 text", }; @@ -137,7 +138,7 @@ describe("v1 and v2 DataRef Compatibility", () => { }); it("should handle nested structures with mixed v1/v2 refs", async () => { - const v1Ref: DataRef = { + const v1Ref: DataRefBinary = { ref: DataRefType.json, value: { nested: "v1 data" }, }; @@ -169,7 +170,7 @@ describe("v1 and v2 DataRef Compatibility", () => { describe("v2 data URLs maintain advantages over v1", () => { it("v2 data URLs are unambiguous strings, not objects", () => { - const v1Ref: DataRef = { + const v1Ref: DataRefBinary = { ref: DataRefType.utf8, value: "hello", }; @@ -225,7 +226,7 @@ describe("v1 and v2 DataRef Compatibility", () => { oldFormat: { ref: DataRefType.utf8, value: "This is v1 format data", - } as DataRef, + } as DataRefBinary, metadata: "created with v1", }; @@ -254,7 +255,7 @@ describe("v1 and v2 DataRef Compatibility", () => { it("should allow gradual migration from v1 to v2", () => { // Start with v1 data - const v1Data: DataRef = { + const v1Data: DataRefBinary = { ref: DataRefType.json, value: { user: "john", score: 100 }, }; diff --git a/src/test/v2.test.ts b/src/test/v2.test.ts index 8757694..66e7bfa 100644 --- a/src/test/v2.test.ts +++ b/src/test/v2.test.ts @@ -427,7 +427,7 @@ describe("v2 DataRef - Basic Type Conversions", () => { }; const result = await dereferenceDataRefs(input); - const resultArray = new Uint8Array(result.data); + const resultArray = new Uint8Array(result.data as unknown as ArrayBuffer); expect(resultArray).toEqual(new Uint8Array([10, 20, 30])); }); @@ -458,7 +458,7 @@ describe("v2 DataRef - Basic Type Conversions", () => { expect(result.data.count).toBe(2); expect(result.items[0]).toBe("regular"); expect(result.items[1]).toBe("text content"); - expect(result.items[2].key).toEqual({ nested: { value: 123 } }); + expect((result.items[2] as any).key).toEqual({ nested: { value: 123 } }); }); }); });