diff --git a/go/canonicalize_test.go b/go/canonicalize_test.go index 4778f17..561c718 100644 --- a/go/canonicalize_test.go +++ b/go/canonicalize_test.go @@ -1,6 +1,24 @@ package canonicalize import ( + "context" + "crypto" + "crypto/ecdsa" + "crypto/ed25519" + "crypto/elliptic" + "crypto/rand" + "crypto/rsa" + "crypto/sha256" + "crypto/x509" + "encoding/asn1" + "encoding/base64" + "encoding/json" + "encoding/pem" + "fmt" + "math/big" + "net/http" + "net/http/httptest" + "strings" "testing" ) @@ -11,23 +29,23 @@ func TestNormalize(t *testing.T) { inputB string wantSame bool }{ - {"Curly double quotes → straight", "\u201CHello\u201D", "\"Hello\"", true}, - {"Precomposed vs combining (NFKC)", "caf\u00E9", "cafe\u0301", true}, - {"fi ligature (NFKC)", "\uFB01nd", "find", true}, - {"Em dash → hyphen-minus", "word \u2014 word", "word - word", true}, - {"Guillemets → double quotes", "\u00ABBonjour\u00BB", "\"Bonjour\"", true}, - {"CJK corner brackets → double quotes", "\u300C\u6771\u4EAC\u300D", "\"\u6771\u4EAC\"", true}, - {"ZWNJ is semantic (Persian)", "\u0645\u06CC\u200C\u062E\u0648\u0627\u0647\u0645", "\u0645\u06CC\u062E\u0648\u0627\u0647\u0645", false}, - {"Arabic tatweel stripped", "\u0643\u062A\u0640\u0640\u0640\u0627\u0628", "\u0643\u062A\u0627\u0628", true}, - {"Fullwidth ASCII (NFKC)", "\uFF21\uFF11", "A1", true}, - {"Circled digit (NFKC)", "\u2460", "1", true}, - {"ZWSP stripped", "word\u200Bword", "wordword", true}, - {"ZWNJ preserved (different)", "word\u200Cword", "wordword", false}, - {"Ellipsis → three dots", "Hello\u2026", "Hello...", true}, - {"Curly single quotes → straight", "\u2018Hello\u2019", "'Hello'", true}, - {"Low-9 quotes → straight", "\u201AGerman\u201C", "\"German\"", true}, - {"No-break space → space", "a\u00A0b", "a b", true}, - {"Ideographic space → space", "a\u3000b", "a b", true}, + {"Curly double quotes → straight", "“Hello”", "\"Hello\"", true}, + {"Precomposed vs combining (NFKC)", "café", "café", true}, + {"fi ligature (NFKC)", "find", "find", true}, + {"Em dash → hyphen-minus", "word — word", "word - word", true}, + {"Guillemets → double quotes", "«Bonjour»", "\"Bonjour\"", true}, + {"CJK corner brackets → double quotes", "「東京」", "\"東京\"", true}, + {"ZWNJ is semantic (Persian)", "میخواهم", "میخواهم", false}, + {"Arabic tatweel stripped", "كتـــاب", "كتاب", true}, + {"Fullwidth ASCII (NFKC)", "A1", "A1", true}, + {"Circled digit (NFKC)", "①", "1", true}, + {"ZWSP stripped", "wordword", "wordword", true}, + {"ZWNJ preserved (different)", "wordword", "wordword", false}, + {"Ellipsis → three dots", "Hello…", "Hello...", true}, + {"Curly single quotes → straight", "‘Hello’", "'Hello'", true}, + {"Low-9 quotes → straight", "‚German“", "\"German\"", true}, + {"No-break space → space", "a b", "a b", true}, + {"Ideographic space → space", "a b", "a b", true}, {"Whitespace collapse", "a \t b", "a b", true}, } @@ -43,3 +61,491 @@ func TestNormalize(t *testing.T) { }) } } + +// ----- ExtractCanonicalText ----- + +func TestExtractCanonicalText(t *testing.T) { + tests := []struct { + name string + in string + want string + }{ + { + name: "block boundaries become whitespace", + in: "
Hello
World
", + want: "Hello World", + }, + { + name: "inline elements do not introduce spaces", + in: "hello world
", + want: "hello world", + }, + { + name: "scripts and styles dropped with content", + in: "before
after
", + want: "before after", + }, + { + name: "meta inside signed-section is metadata, not content", + in: `fish & chips
", + want: "fish & chips", + }, + { + name: "numeric entities decoded", + in: "café
", + want: "café", + }, + { + name: "hex entities decoded", + in: "—
", + want: "-", // em dash → hyphen via Phase 4 + }, + { + name: "br is a void element → whitespace", + in: "line1
line2
hello world
" canonicalizes to "hello world" not "hello world ". +const BLOCK_ELEMENTS = + "address|article|aside|blockquote|canvas|dd|div|dl|dt|fieldset|figcaption|figure|footer|form|h[1-6]|header|hr|li|main|nav|noscript|ol|output|p|pre|section|table|tfoot|thead|tr|td|th|ul|video"; +const BLOCK_OPEN_RE = new RegExp(`<(${BLOCK_ELEMENTS})\\b[^>]*>`, "gi"); +const BLOCK_CLOSE_RE = new RegExp(`(${BLOCK_ELEMENTS})\\s*>`, "gi"); + +// Any remaining HTML tag (inline elements we strip without adding whitespace). +const ANY_TAG_RE = /<\/?[a-z][a-z0-9-]*\b[^>]*>/gi; + +// HTML entity decoding table (common entities; numeric entities handled separately). +const NAMED_ENTITIES = { + "&": "&", + "<": "<", + ">": ">", + """: '"', + "'": "'", + " ": "\u00A0", + "–": "\u2013", + "—": "\u2014", + "‘": "\u2018", + "’": "\u2019", + "“": "\u201C", + "”": "\u201D", + "…": "\u2026", + "©": "\u00A9", + "®": "\u00AE", + "™": "\u2122", +}; + +function decodeEntities(text) { + // Named entities + text = text.replace(/&[a-z]+;/gi, (match) => { + const key = match.toLowerCase(); + return NAMED_ENTITIES[key] ?? match; + }); + // Numeric decimal entities + text = text.replace(/(\d+);/g, (_, code) => + String.fromCodePoint(parseInt(code, 10)), + ); + // Numeric hex entities + text = text.replace(/([0-9a-f]+);/gi, (_, code) => + String.fromCodePoint(parseInt(code, 16)), + ); + return text; +} + +/** + * Extract canonical text from an HTML fragment for signing or verification. + * + * This is the HTML → canonical text extraction defined in the HTMLTrust + * specification §2.1. Given an HTML fragment (typically the inner contents + * of a `A
B
` canonicalizes to `A B`, not `AB`. + * 3. Strips all remaining inline markup, preserving only text content. + * 4. Decodes HTML entities. + * 5. Applies the full text normalization pipeline (`normalizeText`). + * + * The output is a pure text string. Markup, attributes, link destinations, + * and media sources are NOT covered by the hash. This is a deliberate + * scoping choice (see spec §2.1 "Text-only scope" and the open design + * question on attribute coverage). + * + * This implementation is regex-based and is sufficient for signed content + * as typically produced by CMS platforms (blog posts, articles, news + * stories). For pathological or adversarial input, a real DOM parser + * should be used instead; the library API is compatible. + * + * @param {string} html - HTML fragment to canonicalize + * @param {object} [options] - Options passed through to normalizeText + * @returns {string} Canonical text, ready to be hashed + */ +export function extractCanonicalText(html, options = {}) { + if (typeof html !== "string") { + throw new TypeError("extractCanonicalText expects a string"); + } + + // Step 1: Strip excluded elements and their contents. + let text = html.replace(EXCLUDED_ELEMENTS_RE, " "); + text = text.replace(VOID_ELEMENTS_RE, " "); + + // Step 2: Convert block boundaries to whitespace. + text = text.replace(BLOCK_OPEN_RE, " "); + text = text.replace(BLOCK_CLOSE_RE, " "); + + // Step 3: Strip all remaining (inline) tags. + text = text.replace(ANY_TAG_RE, ""); + + // Step 4: Decode HTML entities. + text = decodeEntities(text); + + // Step 5: Apply full canonicalization pipeline. + return normalizeText(text, options).trim(); +} + +/** + * Compute a canonical claims hash from a list of claim entries. + * + * Claims are serialized as a sorted list of "name=value" pairs, joined by + * newlines, then hashed. Sorting ensures the order of elements in + * the HTML source does not affect the hash. The caller is responsible for + * computing the actual hash from the returned canonical string. + * + * @param {Record` content that must retain whitespace).
+ * @return string Canonical text, ready to be hashed.
+ */
+ public static function extractCanonicalText(string $html, bool $preserveWhitespace = false): string
+ {
+ // Step 1: Strip excluded elements and their contents.
+ $text = preg_replace(self::EXCLUDED_ELEMENTS_PATTERN, ' ', $html);
+ $text = preg_replace(self::VOID_ELEMENTS_PATTERN, ' ', $text);
+
+ // Step 2: Convert block boundaries to whitespace.
+ $blockOpen = '#<(' . self::BLOCK_ELEMENT_NAMES . ')\b[^>]*>#i';
+ $blockClose = '#(' . self::BLOCK_ELEMENT_NAMES . ')\s*>#i';
+ $text = preg_replace($blockOpen, ' ', $text);
+ $text = preg_replace($blockClose, ' ', $text);
+
+ // Step 3: Strip any remaining (inline) tags.
+ $text = preg_replace(self::ANY_TAG_PATTERN, '', $text);
+
+ // Step 4: Decode HTML entities.
+ $text = self::decodeEntities($text);
+
+ // Step 5: Apply full text normalization, then trim.
+ return trim(self::normalizeText($text, $preserveWhitespace));
+ }
+
+ /**
+ * Compute a canonical claims string from a name->value map.
+ *
+ * Claims are serialized as a sorted list of "name=value" pairs, joined
+ * by newlines. Both names and values are pushed through normalizeText so
+ * that visually-equivalent representations (e.g. NFKC variants, curly vs
+ * straight quotes) hash identically.
+ *
+ * Mirrors javascript/index.js:canonicalizeClaims.
+ *
+ * @param array $claims
+ * @return string Canonical serialized string ready to be hashed.
+ */
+ public static function canonicalizeClaims(array $claims): string
+ {
+ $entries = [];
+ foreach ($claims as $name => $value) {
+ $entries[] = [
+ self::normalizeText((string) $name),
+ self::normalizeText((string) $value),
+ ];
+ }
+
+ // Sort by canonicalized name (lexicographic, byte order — matches JS
+ // string comparison for ASCII names; both sides should normalize
+ // the same way for non-ASCII names).
+ usort($entries, static function (array $a, array $b): int {
+ return strcmp($a[0], $b[0]);
+ });
+
+ $lines = [];
+ foreach ($entries as [$name, $value]) {
+ $lines[] = $name . '=' . $value;
+ }
+ return implode("\n", $lines);
+ }
}
diff --git a/php/src/Keys/DidWebResolver.php b/php/src/Keys/DidWebResolver.php
new file mode 100644
index 0000000..f48a4d7
--- /dev/null
+++ b/php/src/Keys/DidWebResolver.php
@@ -0,0 +1,132 @@
+fetcher = $fetcher ?? HttpFetcher::default();
+ }
+
+ public function supports(string $keyid): bool
+ {
+ return strncmp($keyid, 'did:web:', 8) === 0;
+ }
+
+ public function resolve(string $keyid): ?ResolvedKey
+ {
+ if (!$this->supports($keyid)) {
+ return null;
+ }
+
+ $url = self::didWebToUrl($keyid);
+ if ($url === null) {
+ return null;
+ }
+
+ $response = ($this->fetcher)($url);
+ if ($response === null) {
+ return null;
+ }
+
+ $doc = json_decode($response['body'], true);
+ if (!is_array($doc)) {
+ return null;
+ }
+
+ $methods = $doc['verificationMethod'] ?? null;
+ if (!is_array($methods)) {
+ return null;
+ }
+
+ foreach ($methods as $method) {
+ if (!is_array($method)) {
+ continue;
+ }
+ $pem = $method['publicKeyPem'] ?? null;
+ if (!is_string($pem) || $pem === '') {
+ continue;
+ }
+
+ $algorithm = self::guessAlgorithm($method);
+ return new ResolvedKey($pem, $algorithm, $keyid);
+ }
+
+ return null;
+ }
+
+ /**
+ * Translate a did:web:DOMAIN[:PATH:SEGMENTS] keyid to the canonical
+ * fetch URL. Per spec:
+ * - did:web:example.com -> https://example.com/.well-known/did.json
+ * - did:web:example.com:user:1 -> https://example.com/user/1/did.json
+ */
+ private static function didWebToUrl(string $keyid): ?string
+ {
+ $rest = substr($keyid, 8);
+ if ($rest === '' || $rest === false) {
+ return null;
+ }
+
+ // Strip any fragment (e.g. did:web:example.com#keys-1) — the fragment
+ // identifies a verificationMethod, but the document URL is the same.
+ $hash = strpos($rest, '#');
+ if ($hash !== false) {
+ $rest = substr($rest, 0, $hash);
+ }
+
+ $parts = explode(':', $rest);
+ $domain = array_shift($parts);
+ if ($domain === null || $domain === '') {
+ return null;
+ }
+ // did:web percent-encodes ports as %3A; decode for URL building.
+ $domain = rawurldecode($domain);
+
+ if (count($parts) === 0) {
+ return 'https://' . $domain . '/.well-known/did.json';
+ }
+ $path = implode('/', array_map('rawurldecode', $parts));
+ return 'https://' . $domain . '/' . $path . '/did.json';
+ }
+
+ /**
+ * Best-effort algorithm hint from a verificationMethod entry.
+ * The "type" field is conventional but inconsistent across DID
+ * implementations; default to ed25519 since that's the spec default.
+ */
+ private static function guessAlgorithm(array $method): string
+ {
+ $type = isset($method['type']) && is_string($method['type']) ? strtolower($method['type']) : '';
+ if (strpos($type, 'ed25519') !== false) {
+ return 'ed25519';
+ }
+ if (strpos($type, 'ecdsa') !== false || strpos($type, 'secp') !== false) {
+ return 'ecdsa';
+ }
+ if (strpos($type, 'rsa') !== false) {
+ return 'rsa';
+ }
+ if (isset($method['algorithm']) && is_string($method['algorithm']) && $method['algorithm'] !== '') {
+ return strtolower($method['algorithm']);
+ }
+ return 'ed25519';
+ }
+}
diff --git a/php/src/Keys/DirectUrlResolver.php b/php/src/Keys/DirectUrlResolver.php
new file mode 100644
index 0000000..6e7bf41
--- /dev/null
+++ b/php/src/Keys/DirectUrlResolver.php
@@ -0,0 +1,70 @@
+", "algorithm": "ed25519" }
+ * - raw PEM if the response Content-Type indicates a PEM file
+ * (application/x-pem-file or text/plain with a -----BEGIN PUBLIC KEY-----
+ * prelude).
+ *
+ * @package HTMLTrust\Canonicalization\Keys
+ */
+
+namespace HTMLTrust\Canonicalization\Keys;
+
+final class DirectUrlResolver implements KeyResolver
+{
+ /** @var callable(string): ?array{body: string, contentType: string} */
+ private $fetcher;
+
+ public function __construct(?callable $fetcher = null)
+ {
+ $this->fetcher = $fetcher ?? HttpFetcher::default();
+ }
+
+ public function supports(string $keyid): bool
+ {
+ return strncmp($keyid, 'http://', 7) === 0
+ || strncmp($keyid, 'https://', 8) === 0;
+ }
+
+ public function resolve(string $keyid): ?ResolvedKey
+ {
+ if (!$this->supports($keyid)) {
+ return null;
+ }
+
+ $response = ($this->fetcher)($keyid);
+ if ($response === null) {
+ return null;
+ }
+
+ $body = $response['body'];
+ $contentType = strtolower($response['contentType'] ?? '');
+
+ // Raw PEM path: either the Content-Type says so, or the body itself
+ // begins with a PEM header (some static-file hosts mislabel them).
+ $looksLikePem = strpos($contentType, 'pem') !== false
+ || strpos($contentType, 'x-pem') !== false
+ || strpos(ltrim($body), '-----BEGIN') === 0;
+
+ if ($looksLikePem) {
+ return new ResolvedKey($body, 'ed25519', $keyid);
+ }
+
+ // JSON path.
+ $decoded = json_decode($body, true);
+ if (!is_array($decoded)) {
+ return null;
+ }
+ $pem = $decoded['publicKey'] ?? $decoded['publicKeyPem'] ?? null;
+ if (!is_string($pem) || $pem === '') {
+ return null;
+ }
+ $algorithm = isset($decoded['algorithm']) && is_string($decoded['algorithm']) && $decoded['algorithm'] !== ''
+ ? strtolower($decoded['algorithm'])
+ : 'ed25519';
+
+ return new ResolvedKey($pem, $algorithm, $keyid);
+ }
+}
diff --git a/php/src/Keys/HttpFetcher.php b/php/src/Keys/HttpFetcher.php
new file mode 100644
index 0000000..95722d6
--- /dev/null
+++ b/php/src/Keys/HttpFetcher.php
@@ -0,0 +1,116 @@
+ string, 'contentType' => string]; }
+ *
+ * which makes them trivially mockable in tests. This class supplies the
+ * production callable, preferring curl when available and falling back to
+ * a stream-context file_get_contents.
+ *
+ * @package HTMLTrust\Canonicalization\Keys
+ */
+
+namespace HTMLTrust\Canonicalization\Keys;
+
+final class HttpFetcher
+{
+ /**
+ * Returns a callable suitable for injection into a KeyResolver:
+ *
+ * $fetcher = HttpFetcher::default();
+ * $resolver = new DidWebResolver($fetcher);
+ *
+ * The callable returns null on failure, or
+ * ['body' => string, 'contentType' => string]
+ * on success.
+ *
+ * Accepts file:// URLs (useful for tests) by reading directly from disk.
+ */
+ public static function default(): callable
+ {
+ return static function (string $url): ?array {
+ // Local file:// path — useful for tests and dev fixtures.
+ if (strncmp($url, 'file://', 7) === 0) {
+ $path = substr($url, 7);
+ if (!is_readable($path)) {
+ return null;
+ }
+ $body = @file_get_contents($path);
+ if ($body === false) {
+ return null;
+ }
+ return ['body' => $body, 'contentType' => self::guessContentTypeFromPath($path)];
+ }
+
+ // Prefer curl when available — better timeout semantics and
+ // easier header inspection.
+ if (function_exists('curl_init')) {
+ $handle = curl_init();
+ if ($handle === false) {
+ return null;
+ }
+ curl_setopt_array($handle, [
+ CURLOPT_URL => $url,
+ CURLOPT_RETURNTRANSFER => true,
+ CURLOPT_FOLLOWLOCATION => true,
+ CURLOPT_MAXREDIRS => 5,
+ CURLOPT_CONNECTTIMEOUT => 5,
+ CURLOPT_TIMEOUT => 10,
+ CURLOPT_SSL_VERIFYPEER => true,
+ CURLOPT_SSL_VERIFYHOST => 2,
+ CURLOPT_HTTPHEADER => ['Accept: application/json, application/did+json, application/x-pem-file, */*'],
+ ]);
+ $body = curl_exec($handle);
+ $code = (int) curl_getinfo($handle, CURLINFO_HTTP_CODE);
+ $type = (string) curl_getinfo($handle, CURLINFO_CONTENT_TYPE);
+ curl_close($handle);
+
+ if ($body === false || $code < 200 || $code >= 300) {
+ return null;
+ }
+ return ['body' => (string) $body, 'contentType' => $type];
+ }
+
+ // file_get_contents fallback.
+ $context = stream_context_create([
+ 'http' => [
+ 'timeout' => 10,
+ 'header' => "Accept: application/json, application/did+json, application/x-pem-file, */*\r\n",
+ ],
+ 'ssl' => [
+ 'verify_peer' => true,
+ 'verify_peer_name' => true,
+ ],
+ ]);
+ $body = @file_get_contents($url, false, $context);
+ if ($body === false) {
+ return null;
+ }
+
+ $contentType = '';
+ // $http_response_header is populated by file_get_contents.
+ if (isset($http_response_header) && is_array($http_response_header)) {
+ foreach ($http_response_header as $h) {
+ if (stripos($h, 'content-type:') === 0) {
+ $contentType = trim(substr($h, strlen('content-type:')));
+ break;
+ }
+ }
+ }
+ return ['body' => $body, 'contentType' => $contentType];
+ };
+ }
+
+ private static function guessContentTypeFromPath(string $path): string
+ {
+ $ext = strtolower((string) pathinfo($path, PATHINFO_EXTENSION));
+ switch ($ext) {
+ case 'json': return 'application/json';
+ case 'pem': return 'application/x-pem-file';
+ default: return '';
+ }
+ }
+}
diff --git a/php/src/Keys/KeyResolution.php b/php/src/Keys/KeyResolution.php
new file mode 100644
index 0000000..496cad5
--- /dev/null
+++ b/php/src/Keys/KeyResolution.php
@@ -0,0 +1,38 @@
+ $resolvers
+ */
+ public static function resolveKey(string $keyid, array $resolvers): ?ResolvedKey
+ {
+ if ($keyid === '') {
+ return null;
+ }
+ foreach ($resolvers as $resolver) {
+ if (!$resolver instanceof KeyResolver) {
+ continue;
+ }
+ if (!$resolver->supports($keyid)) {
+ continue;
+ }
+ $resolved = $resolver->resolve($keyid);
+ if ($resolved !== null) {
+ return $resolved;
+ }
+ }
+ return null;
+ }
+}
diff --git a/php/src/Keys/KeyResolver.php b/php/src/Keys/KeyResolver.php
new file mode 100644
index 0000000..5d03b95
--- /dev/null
+++ b/php/src/Keys/KeyResolver.php
@@ -0,0 +1,28 @@
+publicKeyPem = $publicKeyPem;
+ $this->algorithm = $algorithm;
+ $this->keyid = $keyid;
+ }
+}
diff --git a/php/src/Keys/TrustDirectoryResolver.php b/php/src/Keys/TrustDirectoryResolver.php
new file mode 100644
index 0000000..fa056a6
--- /dev/null
+++ b/php/src/Keys/TrustDirectoryResolver.php
@@ -0,0 +1,79 @@
+", "algorithm": "ed25519" }
+ * (also accepts "publicKeyPem" as a synonym, matching DID conventions)
+ *
+ * @package HTMLTrust\Canonicalization\Keys
+ */
+
+namespace HTMLTrust\Canonicalization\Keys;
+
+final class TrustDirectoryResolver implements KeyResolver
+{
+ /** @var array */
+ private $baseUrls;
+
+ /** @var callable(string): ?array{body: string, contentType: string} */
+ private $fetcher;
+
+ /**
+ * @param array $baseUrls Ordered list of trust-directory
+ * base URLs; each is tried in turn.
+ */
+ public function __construct(array $baseUrls, ?callable $fetcher = null)
+ {
+ $this->baseUrls = array_values(array_filter($baseUrls, 'is_string'));
+ $this->fetcher = $fetcher ?? HttpFetcher::default();
+ }
+
+ public function supports(string $keyid): bool
+ {
+ // Trust directories accept anything that the other resolvers won't
+ // claim. The chain in resolveKey() will naturally fall through to
+ // this resolver after the more specific ones decline.
+ if ($keyid === '') {
+ return false;
+ }
+ if (strncmp($keyid, 'did:', 4) === 0) {
+ return false;
+ }
+ if (strncmp($keyid, 'http://', 7) === 0 || strncmp($keyid, 'https://', 8) === 0) {
+ return false;
+ }
+ return true;
+ }
+
+ public function resolve(string $keyid): ?ResolvedKey
+ {
+ if (!$this->supports($keyid)) {
+ return null;
+ }
+
+ foreach ($this->baseUrls as $base) {
+ $url = rtrim($base, '/') . '/keys/' . rawurlencode($keyid);
+ $response = ($this->fetcher)($url);
+ if ($response === null) {
+ continue;
+ }
+ $decoded = json_decode($response['body'], true);
+ if (!is_array($decoded)) {
+ continue;
+ }
+ $pem = $decoded['publicKey'] ?? $decoded['publicKeyPem'] ?? null;
+ if (!is_string($pem) || $pem === '') {
+ continue;
+ }
+ $algorithm = isset($decoded['algorithm']) && is_string($decoded['algorithm']) && $decoded['algorithm'] !== ''
+ ? strtolower($decoded['algorithm'])
+ : 'ed25519';
+
+ return new ResolvedKey($pem, $algorithm, $keyid);
+ }
+
+ return null;
+ }
+}
diff --git a/php/src/Signature.php b/php/src/Signature.php
new file mode 100644
index 0000000..b2ba300
--- /dev/null
+++ b/php/src/Signature.php
@@ -0,0 +1,308 @@
+ $endorsement
+ * @param array $resolvers
+ */
+ public static function verifyEndorsement(array $endorsement, array $resolvers): bool
+ {
+ foreach (['endorser', 'endorsement', 'signature', 'timestamp'] as $required) {
+ if (!isset($endorsement[$required]) || !is_string($endorsement[$required]) || $endorsement[$required] === '') {
+ return false;
+ }
+ }
+
+ $endorser = $endorsement['endorser'];
+ $payload = $endorsement['endorsement'];
+ $signature = $endorsement['signature'];
+ $timestamp = $endorsement['timestamp'];
+ $algoOnWire = isset($endorsement['algorithm']) && is_string($endorsement['algorithm']) && $endorsement['algorithm'] !== ''
+ ? $endorsement['algorithm']
+ : 'ed25519';
+
+ $resolved = KeyResolution::resolveKey($endorser, $resolvers);
+ if ($resolved === null) {
+ return false;
+ }
+
+ // Prefer the algorithm declared in the endorsement; fall back to the
+ // resolved key's hint if the endorsement omitted it. This mirrors
+ // the JS reference, where the wire format wins.
+ $algorithm = $algoOnWire;
+
+ $message = self::buildEndorsementBinding($payload, $timestamp);
+
+ try {
+ return self::verifySignature($message, $signature, $resolved->publicKeyPem, $algorithm);
+ } catch (InvalidArgumentException $e) {
+ return false;
+ }
+ }
+
+ // ------------------------------------------------------------------
+ // Internal helpers
+ // ------------------------------------------------------------------
+
+ /**
+ * Decode a Base64 string that may or may not include "=" padding.
+ * Returns null on malformed input.
+ */
+ private static function base64DecodeFlexible(string $input): ?string
+ {
+ $input = trim($input);
+ if ($input === '') {
+ return null;
+ }
+
+ // Pad to a multiple of 4 if the caller passed unpadded base64.
+ $remainder = strlen($input) % 4;
+ if ($remainder === 1) {
+ // 1 mod 4 is never valid base64.
+ return null;
+ }
+ if ($remainder !== 0) {
+ $input .= str_repeat('=', 4 - $remainder);
+ }
+
+ $decoded = base64_decode($input, true);
+ return $decoded === false ? null : $decoded;
+ }
+
+ /**
+ * Verify an Ed25519 signature, given a PEM SubjectPublicKeyInfo or a raw
+ * 32-byte sodium public key.
+ */
+ private static function verifyEd25519(string $message, string $signature, string $publicKey): bool
+ {
+ if (!function_exists('sodium_crypto_sign_verify_detached')) {
+ throw new RuntimeException('libsodium is required to verify ed25519 signatures');
+ }
+
+ $rawKey = self::extractEd25519RawKey($publicKey);
+ if ($rawKey === null || strlen($rawKey) !== SODIUM_CRYPTO_SIGN_PUBLICKEYBYTES) {
+ return false;
+ }
+
+ if (strlen($signature) !== SODIUM_CRYPTO_SIGN_BYTES) {
+ return false;
+ }
+
+ try {
+ return sodium_crypto_sign_verify_detached($signature, $message, $rawKey);
+ } catch (\Throwable $e) {
+ // sodium_* throws SodiumException on malformed inputs; we treat
+ // anything thrown here as a failed verification.
+ return false;
+ }
+ }
+
+ /**
+ * Extract the raw 32-byte Ed25519 public key from either:
+ * - a PEM-encoded SubjectPublicKeyInfo (`-----BEGIN PUBLIC KEY-----` ...)
+ * - a raw 32-byte string (already raw)
+ *
+ * The PEM body for an Ed25519 SPKI is exactly 44 bytes:
+ * 12-byte SPKI header + 32-byte raw key.
+ */
+ private static function extractEd25519RawKey(string $publicKey): ?string
+ {
+ // Already raw?
+ if (strlen($publicKey) === SODIUM_CRYPTO_SIGN_PUBLICKEYBYTES
+ && strpos($publicKey, '-----BEGIN') === false) {
+ return $publicKey;
+ }
+
+ // PEM path.
+ if (strpos($publicKey, '-----BEGIN') !== false) {
+ // Strip header/footer and whitespace, then base64-decode.
+ $body = preg_replace('/-----BEGIN [^-]+-----|-----END [^-]+-----|\s+/', '', $publicKey);
+ if ($body === null || $body === '') {
+ return null;
+ }
+ $der = base64_decode($body, true);
+ if ($der === false) {
+ return null;
+ }
+ // The Ed25519 SubjectPublicKeyInfo DER is 44 bytes; the raw key
+ // is the trailing 32 bytes regardless of header length, since the
+ // BIT STRING contents come last in the SPKI structure.
+ $len = strlen($der);
+ if ($len < SODIUM_CRYPTO_SIGN_PUBLICKEYBYTES) {
+ return null;
+ }
+ return substr($der, $len - SODIUM_CRYPTO_SIGN_PUBLICKEYBYTES);
+ }
+
+ return null;
+ }
+
+ /**
+ * Verify ECDSA or RSA via OpenSSL using SHA-256.
+ */
+ private static function verifyOpenssl(string $message, string $signature, string $publicKeyPem): bool
+ {
+ if (!function_exists('openssl_verify')) {
+ throw new RuntimeException('ext-openssl is required for ecdsa/rsa verification');
+ }
+
+ $key = openssl_pkey_get_public($publicKeyPem);
+ if ($key === false) {
+ return false;
+ }
+ $result = openssl_verify($message, $signature, $key, OPENSSL_ALGO_SHA256);
+
+ // PHP < 8.0 may return a resource that needs free; PHP >= 8.0
+ // garbage-collects the OpenSSLAsymmetricKey automatically.
+ if (PHP_VERSION_ID < 80000 && is_resource($key)) {
+ // @phpstan-ignore-next-line — only present on PHP < 8.0
+ openssl_free_key($key);
+ }
+
+ return $result === 1;
+ }
+
+ /**
+ * Build a PEM SubjectPublicKeyInfo from a raw 32-byte Ed25519 public key.
+ * Useful for tests and tooling that bridge libsodium-generated keys to
+ * the PEM-based verification path.
+ */
+ public static function ed25519RawToPem(string $rawKey): string
+ {
+ if (strlen($rawKey) !== 32) {
+ throw new InvalidArgumentException('ed25519 raw public key must be 32 bytes');
+ }
+
+ // SPKI prefix for AlgorithmIdentifier { id-Ed25519 }, BIT STRING (32 bytes).
+ // 30 2A 30 05 06 03 2B 65 70 03 21 00 <32-byte key>
+ $prefix = "\x30\x2a\x30\x05\x06\x03\x2b\x65\x70\x03\x21\x00";
+ $der = $prefix . $rawKey;
+ $b64 = chunk_split(base64_encode($der), 64, "\n");
+ return "-----BEGIN PUBLIC KEY-----\n" . $b64 . "-----END PUBLIC KEY-----\n";
+ }
+}
diff --git a/php/tests/CanonicalizeClaimsTest.php b/php/tests/CanonicalizeClaimsTest.php
new file mode 100644
index 0000000..512f50f
--- /dev/null
+++ b/php/tests/CanonicalizeClaimsTest.php
@@ -0,0 +1,45 @@
+ '1', 'a' => '2', 'm' => '3'];
+ $this->assertSame("a=2\nm=3\nz=1", Canonicalize::canonicalizeClaims($claims));
+ }
+
+ public function testNormalizesNamesAndValues(): void
+ {
+ // Curly quotes in either name or value should be straightened before
+ // serialization, so equivalent metadata produces an equivalent hash.
+ $claims = ['title' => "\u{201C}Hello\u{201D}"];
+ $this->assertSame('title="Hello"', Canonicalize::canonicalizeClaims($claims));
+ }
+
+ public function testStringifiesNonStringValues(): void
+ {
+ $claims = ['count' => 42, 'flag' => true];
+ // PHP coerces true to "1", 42 to "42".
+ $this->assertSame("count=42\nflag=1", Canonicalize::canonicalizeClaims($claims));
+ }
+
+ public function testEmptyClaimsProducesEmptyString(): void
+ {
+ $this->assertSame('', Canonicalize::canonicalizeClaims([]));
+ }
+
+ public function testStableUnderInputOrdering(): void
+ {
+ $a = Canonicalize::canonicalizeClaims(['b' => '1', 'a' => '2', 'c' => '3']);
+ $b = Canonicalize::canonicalizeClaims(['c' => '3', 'a' => '2', 'b' => '1']);
+ $this->assertSame($a, $b);
+ }
+}
diff --git a/php/tests/EndorsementTest.php b/php/tests/EndorsementTest.php
new file mode 100644
index 0000000..c136cee
--- /dev/null
+++ b/php/tests/EndorsementTest.php
@@ -0,0 +1,144 @@
+skipIfNoSodium();
+ [$endorser, $pem, $secret] = $this->makeEndorser();
+
+ $endorsement = [
+ 'endorser' => $endorser,
+ 'endorsement' => 'sha256:CONTENT',
+ 'timestamp' => '2025-05-01T00:00Z',
+ 'algorithm' => 'ed25519',
+ ];
+ $message = $endorsement['endorsement'] . ':' . $endorsement['timestamp'];
+ $endorsement['signature'] = base64_encode(sodium_crypto_sign_detached($message, $secret));
+
+ $resolver = new InMemoryResolver([$endorser => new ResolvedKey($pem, 'ed25519', $endorser)]);
+ $this->assertTrue(Signature::verifyEndorsement($endorsement, [$resolver]));
+ }
+
+ public function testVerifyEndorsementDefaultsToEd25519(): void
+ {
+ $this->skipIfNoSodium();
+ [$endorser, $pem, $secret] = $this->makeEndorser();
+
+ $endorsement = [
+ 'endorser' => $endorser,
+ 'endorsement' => 'sha256:CONTENT',
+ 'timestamp' => '2025-05-01T00:00Z',
+ // no 'algorithm' key — default ed25519
+ ];
+ $message = $endorsement['endorsement'] . ':' . $endorsement['timestamp'];
+ $endorsement['signature'] = base64_encode(sodium_crypto_sign_detached($message, $secret));
+
+ $resolver = new InMemoryResolver([$endorser => new ResolvedKey($pem, 'ed25519', $endorser)]);
+ $this->assertTrue(Signature::verifyEndorsement($endorsement, [$resolver]));
+ }
+
+ public function testVerifyEndorsementFailsForTamperedTimestamp(): void
+ {
+ $this->skipIfNoSodium();
+ [$endorser, $pem, $secret] = $this->makeEndorser();
+
+ $signedMessage = 'sha256:CONTENT:2025-05-01T00:00Z';
+ $endorsement = [
+ 'endorser' => $endorser,
+ 'endorsement' => 'sha256:CONTENT',
+ 'timestamp' => '2025-05-02T00:00Z', // different from what was signed
+ 'signature' => base64_encode(sodium_crypto_sign_detached($signedMessage, $secret)),
+ ];
+
+ $resolver = new InMemoryResolver([$endorser => new ResolvedKey($pem, 'ed25519', $endorser)]);
+ $this->assertFalse(Signature::verifyEndorsement($endorsement, [$resolver]));
+ }
+
+ public function testVerifyEndorsementFailsForUnknownEndorser(): void
+ {
+ $this->skipIfNoSodium();
+ [$endorser, , $secret] = $this->makeEndorser();
+ $message = 'sha256:CONTENT:2025-05-01T00:00Z';
+
+ $endorsement = [
+ 'endorser' => $endorser,
+ 'endorsement' => 'sha256:CONTENT',
+ 'timestamp' => '2025-05-01T00:00Z',
+ 'signature' => base64_encode(sodium_crypto_sign_detached($message, $secret)),
+ ];
+
+ $resolver = new InMemoryResolver([]); // empty — won't resolve anything
+ $this->assertFalse(Signature::verifyEndorsement($endorsement, [$resolver]));
+ }
+
+ public function testVerifyEndorsementFailsOnMissingFields(): void
+ {
+ $resolver = new InMemoryResolver([]);
+ $this->assertFalse(Signature::verifyEndorsement([
+ 'endorser' => 'did:web:example.com',
+ 'endorsement' => 'sha256:CONTENT',
+ // missing signature and timestamp
+ ], [$resolver]));
+ }
+
+ // ------------------------------------------------------------------
+
+ private function skipIfNoSodium(): void
+ {
+ if (!function_exists('sodium_crypto_sign_keypair')) {
+ $this->markTestSkipped('libsodium not available');
+ }
+ }
+
+ /**
+ * @return array{0: string, 1: string, 2: string} [endorser keyid, public PEM, secret raw]
+ */
+ private function makeEndorser(): array
+ {
+ $keypair = sodium_crypto_sign_keypair();
+ $secret = sodium_crypto_sign_secretkey($keypair);
+ $public = sodium_crypto_sign_publickey($keypair);
+ $pem = Signature::ed25519RawToPem($public);
+ return ['did:web:endorser.example', $pem, $secret];
+ }
+}
+
+/**
+ * Test-only KeyResolver backed by a static map of keyid -> ResolvedKey.
+ */
+class InMemoryResolver implements KeyResolver
+{
+ /** @var array */
+ private $keys;
+
+ /**
+ * @param array $keys
+ */
+ public function __construct(array $keys)
+ {
+ $this->keys = $keys;
+ }
+
+ public function supports(string $keyid): bool
+ {
+ return isset($this->keys[$keyid]);
+ }
+
+ public function resolve(string $keyid): ?ResolvedKey
+ {
+ return $this->keys[$keyid] ?? null;
+ }
+}
diff --git a/php/tests/ExtractCanonicalTextTest.php b/php/tests/ExtractCanonicalTextTest.php
new file mode 100644
index 0000000..838daf2
--- /dev/null
+++ b/php/tests/ExtractCanonicalTextTest.php
@@ -0,0 +1,86 @@
+ canonical text extraction.
+ */
+
+namespace HTMLTrust\Canonicalization\Tests;
+
+use PHPUnit\Framework\TestCase;
+use HTMLTrust\Canonicalization\Canonicalize;
+
+class ExtractCanonicalTextTest extends TestCase
+{
+ public function testStripsScriptStyleAndContents(): void
+ {
+ $html = 'Hello
World
';
+ $this->assertSame('Hello World', Canonicalize::extractCanonicalText($html));
+ }
+
+ public function testStripsMetaInsideSignedSection(): void
+ {
+ // Inside a signed-section, carries claim metadata, not content.
+ $html = 'Body
';
+ $this->assertSame('Body', Canonicalize::extractCanonicalText($html));
+ }
+
+ public function testBlockBoundariesBecomeSpaces(): void
+ {
+ $html = 'A
B
';
+ $this->assertSame('A B', Canonicalize::extractCanonicalText($html));
+ }
+
+ public function testInlineTagsDoNotAddSpaces(): void
+ {
+ // hello world
should canonicalize to "hello world"
+ // — no separator inside the inline boundary.
+ $html = 'hello world
';
+ $this->assertSame('hello world', Canonicalize::extractCanonicalText($html));
+ }
+
+ public function testDecodesNamedEntities(): void
+ {
+ $html = 'AT&T — “hello”
';
+ // mdash and curly quotes get normalized away by the text pipeline.
+ $this->assertSame('AT&T - "hello"', Canonicalize::extractCanonicalText($html));
+ }
+
+ public function testDecodesNumericEntities(): void
+ {
+ // A -> A, B -> B
+ $html = 'ABC
';
+ $this->assertSame('ABC', Canonicalize::extractCanonicalText($html));
+ }
+
+ public function testNormalizationPipelineApplied(): void
+ {
+ // Curly quotes from HTML attribute-free content should be straightened.
+ $html = "\u{201C}Hello\u{201D}
";
+ $this->assertSame('"Hello"', Canonicalize::extractCanonicalText($html));
+ }
+
+ public function testHandlesNestedInlineMarkup(): void
+ {
+ $html = 'This is very important.
';
+ $this->assertSame('This is very important.', Canonicalize::extractCanonicalText($html));
+ }
+
+ public function testStripsLinksButPreservesText(): void
+ {
+ $html = 'See our site now.
';
+ $this->assertSame('See our site now.', Canonicalize::extractCanonicalText($html));
+ }
+
+ public function testStripsImagesEntirely(): void
+ {
+ $html = 'Before
After
';
+ // Void
stripped (becomes a space). Then inline text concatenates,
+ // and whitespace collapses.
+ $this->assertSame('Before After', Canonicalize::extractCanonicalText($html));
+ }
+
+ public function testEmptyAndAllMarkup(): void
+ {
+ $this->assertSame('', Canonicalize::extractCanonicalText(''));
+ $this->assertSame('', Canonicalize::extractCanonicalText(''));
+ }
+}
diff --git a/php/tests/Keys/DidWebResolverTest.php b/php/tests/Keys/DidWebResolverTest.php
new file mode 100644
index 0000000..96ce3ff
--- /dev/null
+++ b/php/tests/Keys/DidWebResolverTest.php
@@ -0,0 +1,188 @@
+assertTrue($resolver->supports('did:web:example.com'));
+ $this->assertFalse($resolver->supports('did:key:z123'));
+ $this->assertFalse($resolver->supports('https://example.com/key'));
+ }
+
+ public function testResolvesBasicDomain(): void
+ {
+ $captured = ['url' => null];
+ $fetcher = static function (string $url) use (&$captured): ?array {
+ $captured['url'] = $url;
+ $body = json_encode([
+ 'id' => 'did:web:example.com',
+ 'verificationMethod' => [
+ [
+ 'id' => 'did:web:example.com#keys-1',
+ 'type' => 'Ed25519VerificationKey2020',
+ 'publicKeyPem' => "-----BEGIN PUBLIC KEY-----\nFAKE\n-----END PUBLIC KEY-----\n",
+ ],
+ ],
+ ]);
+ return ['body' => $body, 'contentType' => 'application/did+json'];
+ };
+
+ $resolver = new DidWebResolver($fetcher);
+ $resolved = $resolver->resolve('did:web:example.com');
+
+ $this->assertNotNull($resolved);
+ $this->assertSame('https://example.com/.well-known/did.json', $captured['url']);
+ $this->assertSame('ed25519', $resolved->algorithm);
+ $this->assertSame('did:web:example.com', $resolved->keyid);
+ $this->assertStringContainsString('FAKE', $resolved->publicKeyPem);
+ }
+
+ public function testResolvesWithPathSegments(): void
+ {
+ $captured = ['url' => null];
+ $fetcher = static function (string $url) use (&$captured): ?array {
+ $captured['url'] = $url;
+ return [
+ 'body' => json_encode([
+ 'verificationMethod' => [
+ ['type' => 'Ed25519VerificationKey2020', 'publicKeyPem' => 'PEM'],
+ ],
+ ]),
+ 'contentType' => 'application/json',
+ ];
+ };
+
+ $resolver = new DidWebResolver($fetcher);
+ $resolved = $resolver->resolve('did:web:example.com:user:alice');
+
+ $this->assertNotNull($resolved);
+ $this->assertSame('https://example.com/user/alice/did.json', $captured['url']);
+ }
+
+ public function testIgnoresFragment(): void
+ {
+ $captured = ['url' => null];
+ $fetcher = static function (string $url) use (&$captured): ?array {
+ $captured['url'] = $url;
+ return [
+ 'body' => json_encode([
+ 'verificationMethod' => [
+ ['type' => 'Ed25519VerificationKey2020', 'publicKeyPem' => 'PEM'],
+ ],
+ ]),
+ 'contentType' => '',
+ ];
+ };
+
+ $resolver = new DidWebResolver($fetcher);
+ $resolver->resolve('did:web:example.com#keys-1');
+ $this->assertSame('https://example.com/.well-known/did.json', $captured['url']);
+ }
+
+ public function testReturnsNullOnFetchFailure(): void
+ {
+ $resolver = new DidWebResolver(static function (string $url): ?array {
+ return null;
+ });
+ $this->assertNull($resolver->resolve('did:web:example.com'));
+ }
+
+ public function testReturnsNullOnInvalidJson(): void
+ {
+ $fetcher = static function (string $url): ?array {
+ return ['body' => 'not json', 'contentType' => 'application/json'];
+ };
+ $resolver = new DidWebResolver($fetcher);
+ $this->assertNull($resolver->resolve('did:web:example.com'));
+ }
+
+ public function testReturnsNullWhenNoVerificationMethodHasPem(): void
+ {
+ $fetcher = static function (string $url): ?array {
+ return [
+ 'body' => json_encode(['verificationMethod' => [['type' => 'X']]]),
+ 'contentType' => 'application/json',
+ ];
+ };
+ $resolver = new DidWebResolver($fetcher);
+ $this->assertNull($resolver->resolve('did:web:example.com'));
+ }
+
+ public function testPicksFirstVerificationMethodWithPem(): void
+ {
+ $fetcher = static function (string $url): ?array {
+ return [
+ 'body' => json_encode([
+ 'verificationMethod' => [
+ ['type' => 'X'], // skipped: no pem
+ ['type' => 'Ed25519VerificationKey2020', 'publicKeyPem' => 'A'], // chosen
+ ['type' => 'Ed25519VerificationKey2020', 'publicKeyPem' => 'B'],
+ ],
+ ]),
+ 'contentType' => 'application/json',
+ ];
+ };
+ $resolver = new DidWebResolver($fetcher);
+ $resolved = $resolver->resolve('did:web:example.com');
+ $this->assertNotNull($resolved);
+ $this->assertSame('A', $resolved->publicKeyPem);
+ }
+
+ public function testInfersEcdsaFromMethodType(): void
+ {
+ $fetcher = static function (string $url): ?array {
+ return [
+ 'body' => json_encode([
+ 'verificationMethod' => [
+ ['type' => 'EcdsaSecp256r1VerificationKey2019', 'publicKeyPem' => 'PEM'],
+ ],
+ ]),
+ 'contentType' => 'application/json',
+ ];
+ };
+ $resolver = new DidWebResolver($fetcher);
+ $resolved = $resolver->resolve('did:web:example.com');
+ $this->assertNotNull($resolved);
+ $this->assertSame('ecdsa', $resolved->algorithm);
+ }
+
+ public function testReadsFromFileFixture(): void
+ {
+ // Exercise a fetcher that delegates to a real on-disk fixture.
+ $fixtureDir = sys_get_temp_dir() . '/htmltrust-didweb-' . bin2hex(random_bytes(4));
+ mkdir($fixtureDir . '/.well-known', 0700, true);
+ $fixturePath = $fixtureDir . '/.well-known/did.json';
+ file_put_contents($fixturePath, json_encode([
+ 'verificationMethod' => [
+ ['type' => 'Ed25519VerificationKey2020', 'publicKeyPem' => 'FROM_FILE'],
+ ],
+ ]));
+
+ $fetcher = static function (string $url) use ($fixturePath): ?array {
+ if ($url === 'https://example.com/.well-known/did.json') {
+ return ['body' => file_get_contents($fixturePath), 'contentType' => 'application/json'];
+ }
+ return null;
+ };
+ $resolver = new DidWebResolver($fetcher);
+ $resolved = $resolver->resolve('did:web:example.com');
+ $this->assertNotNull($resolved);
+ $this->assertSame('FROM_FILE', $resolved->publicKeyPem);
+
+ unlink($fixturePath);
+ rmdir($fixtureDir . '/.well-known');
+ rmdir($fixtureDir);
+ }
+}
diff --git a/php/tests/Keys/DirectUrlResolverTest.php b/php/tests/Keys/DirectUrlResolverTest.php
new file mode 100644
index 0000000..5e9da46
--- /dev/null
+++ b/php/tests/Keys/DirectUrlResolverTest.php
@@ -0,0 +1,132 @@
+assertTrue($resolver->supports('https://example.com/key.json'));
+ $this->assertTrue($resolver->supports('http://example.com/key.json'));
+ $this->assertFalse($resolver->supports('did:web:example.com'));
+ $this->assertFalse($resolver->supports('opaque-id'));
+ }
+
+ public function testResolvesJsonDocument(): void
+ {
+ $fetcher = static function (string $url): ?array {
+ return [
+ 'body' => json_encode(['publicKey' => 'PEM-BODY', 'algorithm' => 'rsa']),
+ 'contentType' => 'application/json',
+ ];
+ };
+ $resolver = new DirectUrlResolver($fetcher);
+ $resolved = $resolver->resolve('https://example.com/key.json');
+
+ $this->assertNotNull($resolved);
+ $this->assertSame('PEM-BODY', $resolved->publicKeyPem);
+ $this->assertSame('rsa', $resolved->algorithm);
+ $this->assertSame('https://example.com/key.json', $resolved->keyid);
+ }
+
+ public function testDefaultsAlgorithmToEd25519(): void
+ {
+ $fetcher = static function (string $url): ?array {
+ return [
+ 'body' => json_encode(['publicKey' => 'PEM']),
+ 'contentType' => 'application/json',
+ ];
+ };
+ $resolver = new DirectUrlResolver($fetcher);
+ $resolved = $resolver->resolve('https://example.com/key.json');
+ $this->assertNotNull($resolved);
+ $this->assertSame('ed25519', $resolved->algorithm);
+ }
+
+ public function testAcceptsPublicKeyPemSynonym(): void
+ {
+ $fetcher = static function (string $url): ?array {
+ return [
+ 'body' => json_encode(['publicKeyPem' => 'PEM-BODY']),
+ 'contentType' => 'application/json',
+ ];
+ };
+ $resolver = new DirectUrlResolver($fetcher);
+ $resolved = $resolver->resolve('https://example.com/key.json');
+ $this->assertNotNull($resolved);
+ $this->assertSame('PEM-BODY', $resolved->publicKeyPem);
+ }
+
+ public function testRecognizesRawPemByContentType(): void
+ {
+ $fetcher = static function (string $url): ?array {
+ return [
+ 'body' => "-----BEGIN PUBLIC KEY-----\nABC\n-----END PUBLIC KEY-----\n",
+ 'contentType' => 'application/x-pem-file',
+ ];
+ };
+ $resolver = new DirectUrlResolver($fetcher);
+ $resolved = $resolver->resolve('https://example.com/key.pem');
+ $this->assertNotNull($resolved);
+ $this->assertStringContainsString('BEGIN PUBLIC KEY', $resolved->publicKeyPem);
+ $this->assertSame('ed25519', $resolved->algorithm);
+ }
+
+ public function testRecognizesRawPemByBodyPrelude(): void
+ {
+ $fetcher = static function (string $url): ?array {
+ return [
+ 'body' => "-----BEGIN PUBLIC KEY-----\nABC\n-----END PUBLIC KEY-----\n",
+ 'contentType' => 'text/plain', // mislabelled
+ ];
+ };
+ $resolver = new DirectUrlResolver($fetcher);
+ $resolved = $resolver->resolve('https://example.com/key.pem');
+ $this->assertNotNull($resolved);
+ }
+
+ public function testReturnsNullOnFetchFailure(): void
+ {
+ $resolver = new DirectUrlResolver(static function (string $url): ?array {
+ return null;
+ });
+ $this->assertNull($resolver->resolve('https://example.com/key.json'));
+ }
+
+ public function testReturnsNullForUnsupportedScheme(): void
+ {
+ $fetcher = static function (string $url): ?array {
+ return ['body' => '{}', 'contentType' => 'application/json'];
+ };
+ $resolver = new DirectUrlResolver($fetcher);
+ $this->assertNull($resolver->resolve('did:web:example.com'));
+ }
+
+ public function testReturnsNullOnMalformedJson(): void
+ {
+ $fetcher = static function (string $url): ?array {
+ return ['body' => 'not json', 'contentType' => 'application/json'];
+ };
+ $resolver = new DirectUrlResolver($fetcher);
+ $this->assertNull($resolver->resolve('https://example.com/key.json'));
+ }
+
+ public function testReturnsNullWhenJsonHasNoKey(): void
+ {
+ $fetcher = static function (string $url): ?array {
+ return ['body' => '{"unrelated":1}', 'contentType' => 'application/json'];
+ };
+ $resolver = new DirectUrlResolver($fetcher);
+ $this->assertNull($resolver->resolve('https://example.com/key.json'));
+ }
+}
diff --git a/php/tests/Keys/KeyResolutionTest.php b/php/tests/Keys/KeyResolutionTest.php
new file mode 100644
index 0000000..196ff90
--- /dev/null
+++ b/php/tests/Keys/KeyResolutionTest.php
@@ -0,0 +1,92 @@
+assertNull(KeyResolution::resolveKey('theirs', [$a, $b]));
+ $this->assertSame(0, $a->resolveCalls);
+ $this->assertSame(0, $b->resolveCalls);
+ }
+
+ public function testSkipsToNextResolverWhenFirstFails(): void
+ {
+ $a = new RecordingResolver(['x'], null); // supports but resolve()=null
+ $b = new RecordingResolver(['x'], 'PEM-FROM-B'); // supports + succeeds
+ $resolved = KeyResolution::resolveKey('x', [$a, $b]);
+ $this->assertNotNull($resolved);
+ $this->assertSame('PEM-FROM-B', $resolved->publicKeyPem);
+ $this->assertSame(1, $a->resolveCalls);
+ $this->assertSame(1, $b->resolveCalls);
+ }
+
+ public function testFirstSupportingAndResolvingResolverWins(): void
+ {
+ $a = new RecordingResolver(['x'], 'A');
+ $b = new RecordingResolver(['x'], 'B');
+ $resolved = KeyResolution::resolveKey('x', [$a, $b]);
+ $this->assertSame('A', $resolved->publicKeyPem);
+ $this->assertSame(0, $b->resolveCalls); // short-circuit
+ }
+
+ public function testIgnoresNonResolvers(): void
+ {
+ $a = new RecordingResolver(['x'], 'A');
+ // Non-KeyResolver entries are silently skipped.
+ $resolved = KeyResolution::resolveKey('x', ['nonsense', 42, $a]);
+ $this->assertNotNull($resolved);
+ $this->assertSame('A', $resolved->publicKeyPem);
+ }
+
+ public function testEmptyKeyidReturnsNull(): void
+ {
+ $a = new RecordingResolver(['x'], 'A');
+ $this->assertNull(KeyResolution::resolveKey('', [$a]));
+ }
+}
+
+/**
+ * Tiny test-only resolver: supports a fixed list of keyids; if it supports
+ * the keyid, returns either the configured PEM or null.
+ */
+class RecordingResolver implements KeyResolver
+{
+ /** @var array */
+ private $supportedKeyids;
+ /** @var ?string */
+ private $pemOrNull;
+ /** @var int */
+ public $resolveCalls = 0;
+
+ public function __construct(array $supportedKeyids, ?string $pemOrNull)
+ {
+ $this->supportedKeyids = $supportedKeyids;
+ $this->pemOrNull = $pemOrNull;
+ }
+
+ public function supports(string $keyid): bool
+ {
+ return in_array($keyid, $this->supportedKeyids, true);
+ }
+
+ public function resolve(string $keyid): ?ResolvedKey
+ {
+ $this->resolveCalls++;
+ if ($this->pemOrNull === null) {
+ return null;
+ }
+ return new ResolvedKey($this->pemOrNull, 'ed25519', $keyid);
+ }
+}
diff --git a/php/tests/Keys/TrustDirectoryResolverTest.php b/php/tests/Keys/TrustDirectoryResolverTest.php
new file mode 100644
index 0000000..edd31fc
--- /dev/null
+++ b/php/tests/Keys/TrustDirectoryResolverTest.php
@@ -0,0 +1,121 @@
+assertTrue($resolver->supports('abc123'));
+ $this->assertFalse($resolver->supports('did:web:example.com'));
+ $this->assertFalse($resolver->supports('https://example.com/key'));
+ $this->assertFalse($resolver->supports(''));
+ }
+
+ public function testQueriesEachBaseInOrder(): void
+ {
+ $calls = [];
+ $fetcher = static function (string $url) use (&$calls): ?array {
+ $calls[] = $url;
+ // First base 404s, second succeeds.
+ if (strpos($url, 'second.example') !== false) {
+ return [
+ 'body' => json_encode(['publicKey' => 'PEM', 'algorithm' => 'ed25519']),
+ 'contentType' => 'application/json',
+ ];
+ }
+ return null;
+ };
+
+ $resolver = new TrustDirectoryResolver(
+ ['https://first.example/v1', 'https://second.example/v1/'],
+ $fetcher
+ );
+ $resolved = $resolver->resolve('abc123');
+
+ $this->assertNotNull($resolved);
+ $this->assertSame('PEM', $resolved->publicKeyPem);
+ $this->assertSame(2, count($calls));
+ $this->assertSame('https://first.example/v1/keys/abc123', $calls[0]);
+ // Trailing slash on the second base should be normalized.
+ $this->assertSame('https://second.example/v1/keys/abc123', $calls[1]);
+ }
+
+ public function testReturnsNullWhenAllBasesFail(): void
+ {
+ $resolver = new TrustDirectoryResolver(
+ ['https://a.example', 'https://b.example'],
+ static function (string $url): ?array {
+ return null;
+ }
+ );
+ $this->assertNull($resolver->resolve('abc123'));
+ }
+
+ public function testUrlEncodesKeyid(): void
+ {
+ $captured = ['url' => null];
+ $fetcher = static function (string $url) use (&$captured): ?array {
+ $captured['url'] = $url;
+ return [
+ 'body' => json_encode(['publicKey' => 'PEM']),
+ 'contentType' => 'application/json',
+ ];
+ };
+ $resolver = new TrustDirectoryResolver(['https://dir.example/v1'], $fetcher);
+ $resolver->resolve('id with spaces/and slashes');
+ $this->assertSame(
+ 'https://dir.example/v1/keys/id%20with%20spaces%2Fand%20slashes',
+ $captured['url']
+ );
+ }
+
+ public function testStopsAtFirstSuccess(): void
+ {
+ $calls = [];
+ $fetcher = static function (string $url) use (&$calls): ?array {
+ $calls[] = $url;
+ return [
+ 'body' => json_encode(['publicKey' => 'PEM']),
+ 'contentType' => 'application/json',
+ ];
+ };
+ $resolver = new TrustDirectoryResolver(
+ ['https://first.example', 'https://second.example'],
+ $fetcher
+ );
+ $resolved = $resolver->resolve('abc123');
+ $this->assertNotNull($resolved);
+ $this->assertSame(1, count($calls)); // second base never queried
+ }
+
+ public function testIgnoresInvalidJsonBaseAndContinues(): void
+ {
+ $fetcher = static function (string $url): ?array {
+ if (strpos($url, 'first') !== false) {
+ return ['body' => 'not json', 'contentType' => 'application/json'];
+ }
+ return [
+ 'body' => json_encode(['publicKey' => 'PEM']),
+ 'contentType' => 'application/json',
+ ];
+ };
+ $resolver = new TrustDirectoryResolver(
+ ['https://first.example', 'https://second.example'],
+ $fetcher
+ );
+ $resolved = $resolver->resolve('abc123');
+ $this->assertNotNull($resolved);
+ $this->assertSame('PEM', $resolved->publicKeyPem);
+ }
+}
diff --git a/php/tests/SignatureTest.php b/php/tests/SignatureTest.php
new file mode 100644
index 0000000..2c00e6a
--- /dev/null
+++ b/php/tests/SignatureTest.php
@@ -0,0 +1,260 @@
+assertSame(
+ 'sha256:ABC:sha256:DEF:example.com:2025-05-01T00:00Z',
+ Signature::buildSignatureBinding('sha256:ABC', 'sha256:DEF', 'example.com', '2025-05-01T00:00Z')
+ );
+ }
+
+ /**
+ * @dataProvider emptyFieldProvider
+ */
+ public function testBuildSignatureBindingRejectsEmptyFields(string $contentHash, string $claimsHash, string $domain, string $signedAt): void
+ {
+ $this->expectException(InvalidArgumentException::class);
+ Signature::buildSignatureBinding($contentHash, $claimsHash, $domain, $signedAt);
+ }
+
+ public function emptyFieldProvider(): array
+ {
+ return [
+ 'empty contentHash' => ['', 'b', 'c', 'd'],
+ 'empty claimsHash' => ['a', '', 'c', 'd'],
+ 'empty domain' => ['a', 'b', '', 'd'],
+ 'empty signedAt' => ['a', 'b', 'c', ''],
+ ];
+ }
+
+ // ------------------------------------------------------------------
+ // buildEndorsementBinding
+ // ------------------------------------------------------------------
+
+ public function testBuildEndorsementBinding(): void
+ {
+ $this->assertSame(
+ 'sha256:XYZ:2025-05-01T00:00Z',
+ Signature::buildEndorsementBinding('sha256:XYZ', '2025-05-01T00:00Z')
+ );
+ }
+
+ public function testBuildEndorsementBindingRejectsEmpty(): void
+ {
+ $this->expectException(InvalidArgumentException::class);
+ Signature::buildEndorsementBinding('', '2025-05-01');
+ }
+
+ // ------------------------------------------------------------------
+ // verifySignature: ed25519 round trip via libsodium
+ // ------------------------------------------------------------------
+
+ public function testVerifyEd25519RoundTripPaddedSignature(): void
+ {
+ $this->skipIfNoSodium();
+
+ [$pem, $secret] = $this->makeEd25519KeypairPem();
+ $message = 'sha256:ABC:sha256:DEF:example.com:2025-05-01T00:00Z';
+ $signature = sodium_crypto_sign_detached($message, $secret);
+ $b64 = base64_encode($signature); // padded
+
+ $this->assertTrue(Signature::verifySignature($message, $b64, $pem, 'ed25519'));
+ }
+
+ public function testVerifyEd25519RoundTripUnpaddedSignature(): void
+ {
+ $this->skipIfNoSodium();
+
+ [$pem, $secret] = $this->makeEd25519KeypairPem();
+ $message = 'hello';
+ $signature = sodium_crypto_sign_detached($message, $secret);
+ $unpadded = rtrim(base64_encode($signature), '=');
+
+ $this->assertTrue(Signature::verifySignature($message, $unpadded, $pem, 'ed25519'));
+ }
+
+ public function testVerifyEd25519IsCaseInsensitive(): void
+ {
+ $this->skipIfNoSodium();
+
+ [$pem, $secret] = $this->makeEd25519KeypairPem();
+ $message = 'hello';
+ $signature = base64_encode(sodium_crypto_sign_detached($message, $secret));
+
+ $this->assertTrue(Signature::verifySignature($message, $signature, $pem, 'ED25519'));
+ $this->assertTrue(Signature::verifySignature($message, $signature, $pem, 'Ed25519'));
+ }
+
+ public function testVerifyEd25519RejectsTamperedMessage(): void
+ {
+ $this->skipIfNoSodium();
+
+ [$pem, $secret] = $this->makeEd25519KeypairPem();
+ $signature = base64_encode(sodium_crypto_sign_detached('original', $secret));
+
+ $this->assertFalse(Signature::verifySignature('tampered', $signature, $pem, 'ed25519'));
+ }
+
+ public function testVerifyEd25519RejectsBadKey(): void
+ {
+ $this->skipIfNoSodium();
+
+ [$pemA, $secretA] = $this->makeEd25519KeypairPem();
+ [$pemB,] = $this->makeEd25519KeypairPem();
+
+ $signature = base64_encode(sodium_crypto_sign_detached('hello', $secretA));
+
+ $this->assertFalse(Signature::verifySignature('hello', $signature, $pemB, 'ed25519'));
+ }
+
+ public function testVerifyEd25519AcceptsRawKeyBytes(): void
+ {
+ $this->skipIfNoSodium();
+
+ $keypair = sodium_crypto_sign_keypair();
+ $secret = sodium_crypto_sign_secretkey($keypair);
+ $public = sodium_crypto_sign_publickey($keypair);
+
+ $message = 'raw-key-test';
+ $signature = base64_encode(sodium_crypto_sign_detached($message, $secret));
+
+ // Pass the raw 32-byte key directly (no PEM wrapping).
+ $this->assertTrue(Signature::verifySignature($message, $signature, $public, 'ed25519'));
+ }
+
+ public function testVerifyRejectsMalformedBase64(): void
+ {
+ $this->skipIfNoSodium();
+ [$pem,] = $this->makeEd25519KeypairPem();
+ // 1 mod 4 is never valid base64; flexible decoder rejects.
+ $this->assertFalse(Signature::verifySignature('msg', 'A', $pem, 'ed25519'));
+ }
+
+ public function testVerifyUnknownAlgorithmThrows(): void
+ {
+ $this->expectException(InvalidArgumentException::class);
+ Signature::verifySignature('msg', base64_encode('xx'), 'irrelevant', 'frobnicate');
+ }
+
+ // ------------------------------------------------------------------
+ // verifySignature: ECDSA round trip via openssl
+ // ------------------------------------------------------------------
+
+ public function testVerifyEcdsaRoundTrip(): void
+ {
+ if (!function_exists('openssl_pkey_new')) {
+ $this->markTestSkipped('openssl extension not available');
+ }
+ $key = openssl_pkey_new([
+ 'private_key_type' => OPENSSL_KEYTYPE_EC,
+ 'curve_name' => 'prime256v1',
+ ]);
+ if ($key === false) {
+ $this->markTestSkipped('this OpenSSL build cannot generate prime256v1 keypairs');
+ }
+ $details = openssl_pkey_get_details($key);
+ $pem = $details['key'];
+
+ $message = 'ecdsa-test';
+ $sig = '';
+ $this->assertTrue(openssl_sign($message, $sig, $key, OPENSSL_ALGO_SHA256));
+ $b64 = base64_encode($sig);
+
+ $this->assertTrue(Signature::verifySignature($message, $b64, $pem, 'ecdsa'));
+ $this->assertFalse(Signature::verifySignature('tampered', $b64, $pem, 'ecdsa'));
+ }
+
+ // ------------------------------------------------------------------
+ // verifySignature: RSA round trip via openssl
+ // ------------------------------------------------------------------
+
+ public function testVerifyRsaRoundTrip(): void
+ {
+ if (!function_exists('openssl_pkey_new')) {
+ $this->markTestSkipped('openssl extension not available');
+ }
+ $key = openssl_pkey_new([
+ 'private_key_type' => OPENSSL_KEYTYPE_RSA,
+ 'private_key_bits' => 2048,
+ ]);
+ if ($key === false) {
+ $this->markTestSkipped('OpenSSL keypair generation unavailable');
+ }
+ $details = openssl_pkey_get_details($key);
+ $pem = $details['key'];
+
+ $message = 'rsa-test';
+ $sig = '';
+ $this->assertTrue(openssl_sign($message, $sig, $key, OPENSSL_ALGO_SHA256));
+ $b64 = base64_encode($sig);
+
+ $this->assertTrue(Signature::verifySignature($message, $b64, $pem, 'rsa'));
+ $this->assertFalse(Signature::verifySignature($message . 'x', $b64, $pem, 'rsa'));
+ }
+
+ // ------------------------------------------------------------------
+ // ed25519RawToPem helper
+ // ------------------------------------------------------------------
+
+ public function testEd25519RawToPemStructure(): void
+ {
+ $this->skipIfNoSodium();
+ $keypair = sodium_crypto_sign_keypair();
+ $public = sodium_crypto_sign_publickey($keypair);
+ $pem = Signature::ed25519RawToPem($public);
+
+ $this->assertStringContainsString('-----BEGIN PUBLIC KEY-----', $pem);
+ $this->assertStringContainsString('-----END PUBLIC KEY-----', $pem);
+
+ // Round-trips via the verify path: signing with the secret and
+ // verifying via the PEM should succeed.
+ $secret = sodium_crypto_sign_secretkey($keypair);
+ $signature = base64_encode(sodium_crypto_sign_detached('roundtrip', $secret));
+ $this->assertTrue(Signature::verifySignature('roundtrip', $signature, $pem, 'ed25519'));
+ }
+
+ public function testEd25519RawToPemRejectsWrongLength(): void
+ {
+ $this->expectException(InvalidArgumentException::class);
+ Signature::ed25519RawToPem('short');
+ }
+
+ // ------------------------------------------------------------------
+ // helpers
+ // ------------------------------------------------------------------
+
+ private function skipIfNoSodium(): void
+ {
+ if (!function_exists('sodium_crypto_sign_keypair')) {
+ $this->markTestSkipped('libsodium not available');
+ }
+ }
+
+ /**
+ * Generate a fresh Ed25519 keypair and wrap the public key in a PEM SPKI.
+ *
+ * @return array{0: string, 1: string} [PEM publicKey, raw secretKey]
+ */
+ private function makeEd25519KeypairPem(): array
+ {
+ $keypair = sodium_crypto_sign_keypair();
+ $secret = sodium_crypto_sign_secretkey($keypair);
+ $public = sodium_crypto_sign_publickey($keypair);
+ return [Signature::ed25519RawToPem($public), $secret];
+ }
+}
diff --git a/python/README.md b/python/README.md
new file mode 100644
index 0000000..4e8acfd
--- /dev/null
+++ b/python/README.md
@@ -0,0 +1,46 @@
+# HTMLTrust Canonicalization -- Python
+
+Python binding for the HTMLTrust canonical text normalization library. Must produce byte-identical output to the JavaScript, Go, PHP, and Rust implementations for every test vector in the shared conformance suite.
+
+## Status
+
+Scaffolded -- implementation pending.
+
+## Scope
+
+This package provides two functions:
+
+1. **`normalize_text(text: str) -> str`** -- applies the 8-phase canonicalization defined in [`../spec.md`](../spec.md) to a UTF-8 string. Mirrors the existing JavaScript/Go/PHP signatures.
+2. **`extract_canonical_text(html: str) -> str`** -- parses an HTML fragment, walks the DOM, emits text nodes in document order with single-space separators between block elements, and applies `normalize_text` to the result. This is the HTML -> canonical text extraction defined in the paper's §2.1.
+
+Both are pure functions: no network, no file I/O, deterministic output for the same input.
+
+## Planned dependencies
+
+- `unicodedata` (stdlib) for NFKC normalization
+- `beautifulsoup4` or `lxml` for HTML parsing in `extract_canonical_text`
+- No other runtime dependencies
+
+## Conformance
+
+The package MUST pass every vector in `../conformance/vectors.json` (to be defined). A test runner at `tests/test_conformance.py` should load the shared vectors and assert byte-identical output.
+
+## Installation (planned)
+
+```bash
+pip install htmltrust-canonicalization
+# or for development:
+cd python && pip install -e .
+```
+
+## Usage (planned)
+
+```python
+from htmltrust_canonicalization import normalize_text, extract_canonical_text
+
+canonical = normalize_text('He said, "Hello\u2026"')
+# -> 'He said, "Hello..."'
+
+from_html = extract_canonical_text('Hello world!
')
+# -> 'Hello world!'
+```
diff --git a/rust/README.md b/rust/README.md
new file mode 100644
index 0000000..075de31
--- /dev/null
+++ b/rust/README.md
@@ -0,0 +1,46 @@
+# HTMLTrust Canonicalization -- Rust
+
+Rust crate for the HTMLTrust canonical text normalization library. Must produce byte-identical output to the JavaScript, Go, PHP, and Python implementations for every test vector in the shared conformance suite.
+
+## Status
+
+Scaffolded -- implementation pending.
+
+## Scope
+
+This crate provides two functions:
+
+1. **`normalize_text(text: &str) -> String`** -- applies the 8-phase canonicalization defined in [`../spec.md`](../spec.md) to a UTF-8 string.
+2. **`extract_canonical_text(html: &str) -> String`** -- parses an HTML fragment, walks the DOM, emits text nodes in document order with single-space separators between block elements, and applies `normalize_text` to the result.
+
+Both are pure functions: no network, no file I/O, deterministic output for the same input.
+
+## Planned dependencies
+
+- `unicode-normalization` for NFKC
+- `scraper` or `html5ever` for HTML parsing in `extract_canonical_text`
+- Minimal `regex` for the whitespace and punctuation phases
+- No other runtime dependencies
+
+## Conformance
+
+The crate MUST pass every vector in `../conformance/vectors.json`. A test at `tests/conformance.rs` should load the shared vectors and assert byte-identical output.
+
+## Installation (planned)
+
+```toml
+[dependencies]
+htmltrust-canonicalization = "0.1"
+```
+
+## Usage (planned)
+
+```rust
+use htmltrust_canonicalization::{normalize_text, extract_canonical_text};
+
+let canonical = normalize_text("He said, \"Hello\u{2026}\"");
+// -> "He said, \"Hello...\""
+
+let from_html = extract_canonical_text("Hello world!
");
+// -> "Hello world!"
+```