diff --git a/go/canonicalize_test.go b/go/canonicalize_test.go index 4778f17..561c718 100644 --- a/go/canonicalize_test.go +++ b/go/canonicalize_test.go @@ -1,6 +1,24 @@ package canonicalize import ( + "context" + "crypto" + "crypto/ecdsa" + "crypto/ed25519" + "crypto/elliptic" + "crypto/rand" + "crypto/rsa" + "crypto/sha256" + "crypto/x509" + "encoding/asn1" + "encoding/base64" + "encoding/json" + "encoding/pem" + "fmt" + "math/big" + "net/http" + "net/http/httptest" + "strings" "testing" ) @@ -11,23 +29,23 @@ func TestNormalize(t *testing.T) { inputB string wantSame bool }{ - {"Curly double quotes → straight", "\u201CHello\u201D", "\"Hello\"", true}, - {"Precomposed vs combining (NFKC)", "caf\u00E9", "cafe\u0301", true}, - {"fi ligature (NFKC)", "\uFB01nd", "find", true}, - {"Em dash → hyphen-minus", "word \u2014 word", "word - word", true}, - {"Guillemets → double quotes", "\u00ABBonjour\u00BB", "\"Bonjour\"", true}, - {"CJK corner brackets → double quotes", "\u300C\u6771\u4EAC\u300D", "\"\u6771\u4EAC\"", true}, - {"ZWNJ is semantic (Persian)", "\u0645\u06CC\u200C\u062E\u0648\u0627\u0647\u0645", "\u0645\u06CC\u062E\u0648\u0627\u0647\u0645", false}, - {"Arabic tatweel stripped", "\u0643\u062A\u0640\u0640\u0640\u0627\u0628", "\u0643\u062A\u0627\u0628", true}, - {"Fullwidth ASCII (NFKC)", "\uFF21\uFF11", "A1", true}, - {"Circled digit (NFKC)", "\u2460", "1", true}, - {"ZWSP stripped", "word\u200Bword", "wordword", true}, - {"ZWNJ preserved (different)", "word\u200Cword", "wordword", false}, - {"Ellipsis → three dots", "Hello\u2026", "Hello...", true}, - {"Curly single quotes → straight", "\u2018Hello\u2019", "'Hello'", true}, - {"Low-9 quotes → straight", "\u201AGerman\u201C", "\"German\"", true}, - {"No-break space → space", "a\u00A0b", "a b", true}, - {"Ideographic space → space", "a\u3000b", "a b", true}, + {"Curly double quotes → straight", "“Hello”", "\"Hello\"", true}, + {"Precomposed vs combining (NFKC)", "café", "café", true}, + {"fi ligature (NFKC)", "find", "find", true}, + {"Em dash → hyphen-minus", "word — word", "word - word", true}, + {"Guillemets → double quotes", "«Bonjour»", "\"Bonjour\"", true}, + {"CJK corner brackets → double quotes", "「東京」", "\"東京\"", true}, + {"ZWNJ is semantic (Persian)", "می‌خواهم", "میخواهم", false}, + {"Arabic tatweel stripped", "كتـــاب", "كتاب", true}, + {"Fullwidth ASCII (NFKC)", "A1", "A1", true}, + {"Circled digit (NFKC)", "①", "1", true}, + {"ZWSP stripped", "word​word", "wordword", true}, + {"ZWNJ preserved (different)", "word‌word", "wordword", false}, + {"Ellipsis → three dots", "Hello…", "Hello...", true}, + {"Curly single quotes → straight", "‘Hello’", "'Hello'", true}, + {"Low-9 quotes → straight", "‚German“", "\"German\"", true}, + {"No-break space → space", "a b", "a b", true}, + {"Ideographic space → space", "a b", "a b", true}, {"Whitespace collapse", "a \t b", "a b", true}, } @@ -43,3 +61,491 @@ func TestNormalize(t *testing.T) { }) } } + +// ----- ExtractCanonicalText ----- + +func TestExtractCanonicalText(t *testing.T) { + tests := []struct { + name string + in string + want string + }{ + { + name: "block boundaries become whitespace", + in: "

Hello

World

", + want: "Hello World", + }, + { + name: "inline elements do not introduce spaces", + in: "

hello world

", + want: "hello world", + }, + { + name: "scripts and styles dropped with content", + in: "

before

after

", + want: "before after", + }, + { + name: "meta inside signed-section is metadata, not content", + in: `Body`, + want: "Body", + }, + { + name: "named entities decoded", + in: "

fish & chips

", + want: "fish & chips", + }, + { + name: "numeric entities decoded", + in: "

café

", + want: "café", + }, + { + name: "hex entities decoded", + in: "

", + want: "-", // em dash → hyphen via Phase 4 + }, + { + name: "br is a void element → whitespace", + in: "

line1
line2

", + want: "line1 line2", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := ExtractCanonicalText(tt.in) + if err != nil { + t.Fatalf("ExtractCanonicalText(%q) returned error: %v", tt.in, err) + } + if got != tt.want { + t.Errorf("ExtractCanonicalText(%q) = %q, want %q", tt.in, got, tt.want) + } + }) + } +} + +// ----- CanonicalizeClaims ----- + +func TestCanonicalizeClaims(t *testing.T) { + got := CanonicalizeClaims(map[string]string{ + "signed-at": "2025-01-01T00:00:00Z", + "author": "alice", + "domain": "example.com", + }) + want := "author=alice\ndomain=example.com\nsigned-at=2025-01-01T00:00:00Z" + if got != want { + t.Errorf("CanonicalizeClaims = %q, want %q", got, want) + } +} + +func TestCanonicalizeClaimsNormalizesValues(t *testing.T) { + got := CanonicalizeClaims(map[string]string{ + "title": "“Hello”", + }) + want := `title="Hello"` + if got != want { + t.Errorf("CanonicalizeClaims = %q, want %q", got, want) + } +} + +// ----- BuildSignatureBinding ----- + +func TestBuildSignatureBinding(t *testing.T) { + got, err := BuildSignatureBinding("sha256:abc", "sha256:def", "example.com", "2025-01-01T00:00:00Z") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + want := "sha256:abc:sha256:def:example.com:2025-01-01T00:00:00Z" + if got != want { + t.Errorf("BuildSignatureBinding = %q, want %q", got, want) + } +} + +func TestBuildSignatureBindingErrors(t *testing.T) { + cases := [][]string{ + {"", "b", "c", "d"}, + {"a", "", "c", "d"}, + {"a", "b", "", "d"}, + {"a", "b", "c", ""}, + } + for _, c := range cases { + if _, err := BuildSignatureBinding(c[0], c[1], c[2], c[3]); err == nil { + t.Errorf("expected error for inputs %v", c) + } + } +} + +// ----- VerifySignature ----- + +func encodePKIX(t *testing.T, pub any) string { + t.Helper() + b, err := x509.MarshalPKIXPublicKey(pub) + if err != nil { + t.Fatalf("MarshalPKIXPublicKey: %v", err) + } + return string(pem.EncodeToMemory(&pem.Block{Type: "PUBLIC KEY", Bytes: b})) +} + +func TestVerifySignatureEd25519(t *testing.T) { + pub, priv, err := ed25519.GenerateKey(rand.Reader) + if err != nil { + t.Fatalf("GenerateKey: %v", err) + } + msg := "the quick brown fox" + sig := ed25519.Sign(priv, []byte(msg)) + + pemStr := encodePKIX(t, pub) + + ok, err := VerifySignature(msg, base64.StdEncoding.EncodeToString(sig), pemStr, "ed25519") + if err != nil { + t.Fatalf("VerifySignature returned error: %v", err) + } + if !ok { + t.Errorf("expected ed25519 signature to verify") + } + + // Unpadded base64 should also work. + ok, err = VerifySignature(msg, base64.RawStdEncoding.EncodeToString(sig), pemStr, "ED25519") + if err != nil { + t.Fatalf("VerifySignature(raw) returned error: %v", err) + } + if !ok { + t.Errorf("expected ed25519 signature to verify with unpadded base64 + uppercase algo") + } + + // Tampered message must fail. + ok, _ = VerifySignature("tampered", base64.StdEncoding.EncodeToString(sig), pemStr, "ed25519") + if ok { + t.Errorf("expected tampered ed25519 signature to fail") + } +} + +func TestVerifySignatureRSA(t *testing.T) { + priv, err := rsa.GenerateKey(rand.Reader, 2048) + if err != nil { + t.Fatalf("rsa.GenerateKey: %v", err) + } + msg := "rsa payload" + digest := sha256.Sum256([]byte(msg)) + sig, err := rsa.SignPKCS1v15(rand.Reader, priv, crypto.SHA256, digest[:]) + if err != nil { + t.Fatalf("SignPKCS1v15: %v", err) + } + + pemStr := encodePKIX(t, &priv.PublicKey) + ok, err := VerifySignature(msg, base64.StdEncoding.EncodeToString(sig), pemStr, "RSA") + if err != nil { + t.Fatalf("VerifySignature returned error: %v", err) + } + if !ok { + t.Errorf("expected rsa signature to verify") + } + + ok, _ = VerifySignature("tampered", base64.StdEncoding.EncodeToString(sig), pemStr, "rsa") + if ok { + t.Errorf("expected tampered rsa signature to fail") + } +} + +func TestVerifySignatureECDSA(t *testing.T) { + priv, err := ecdsa.GenerateKey(elliptic.P256(), rand.Reader) + if err != nil { + t.Fatalf("ecdsa.GenerateKey: %v", err) + } + msg := "ecdsa payload" + digest := sha256.Sum256([]byte(msg)) + r, s, err := ecdsa.Sign(rand.Reader, priv, digest[:]) + if err != nil { + t.Fatalf("ecdsa.Sign: %v", err) + } + sigBytes, err := asn1.Marshal(struct{ R, S *big.Int }{r, s}) + if err != nil { + t.Fatalf("asn1.Marshal: %v", err) + } + pemStr := encodePKIX(t, &priv.PublicKey) + ok, err := VerifySignature(msg, base64.StdEncoding.EncodeToString(sigBytes), pemStr, "ecdsa") + if err != nil { + t.Fatalf("VerifySignature returned error: %v", err) + } + if !ok { + t.Errorf("expected ecdsa signature to verify") + } +} + +func TestVerifySignatureUnsupportedAlgorithm(t *testing.T) { + pub, _, err := ed25519.GenerateKey(rand.Reader) + if err != nil { + t.Fatalf("GenerateKey: %v", err) + } + pemStr := encodePKIX(t, pub) + if _, err := VerifySignature("x", "AAAA", pemStr, "weird"); err == nil { + t.Errorf("expected error for unsupported algorithm") + } +} + +// ----- Resolver tests ----- + +func newEd25519PEM(t *testing.T) (string, ed25519.PublicKey, ed25519.PrivateKey) { + t.Helper() + pub, priv, err := ed25519.GenerateKey(rand.Reader) + if err != nil { + t.Fatalf("GenerateKey: %v", err) + } + return encodePKIX(t, pub), pub, priv +} + +func TestDidWebResolver(t *testing.T) { + pemStr, _, _ := newEd25519PEM(t) + + mux := http.NewServeMux() + mux.HandleFunc("/.well-known/did.json", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(map[string]any{ + "verificationMethod": []map[string]any{ + { + "id": "did:web:example#key1", + "type": "Ed25519VerificationKey2020", + "publicKeyPem": pemStr, + }, + }, + }) + }) + srv := httptest.NewTLSServer(mux) + defer srv.Close() + + // Rewrite the request to point at our test server, regardless of host. + client := srv.Client() + client.Transport = rewriteTransport{base: srv.Client().Transport, target: srv.URL} + + r := DidWebResolver{HTTPClient: client} + got, err := r.Resolve(context.Background(), "did:web:example.test") + if err != nil { + t.Fatalf("Resolve: %v", err) + } + if got == nil { + t.Fatal("expected ResolvedKey, got nil") + } + if !strings.Contains(got.PublicKeyPEM, "BEGIN PUBLIC KEY") { + t.Errorf("expected PEM in PublicKeyPEM, got %q", got.PublicKeyPEM) + } + if got.Algorithm != "ed25519" { + t.Errorf("expected algorithm ed25519, got %q", got.Algorithm) + } +} + +func TestDidWebResolverDeclinesNonDid(t *testing.T) { + r := DidWebResolver{} + got, err := r.Resolve(context.Background(), "https://example.com/key.json") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if got != nil { + t.Errorf("expected nil for non-did keyid, got %+v", got) + } +} + +func TestDirectURLResolverJSON(t *testing.T) { + pemStr, _, _ := newEd25519PEM(t) + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(map[string]string{ + "publicKey": pemStr, + "algorithm": "ed25519", + }) + })) + defer srv.Close() + + r := DirectURLResolver{HTTPClient: srv.Client()} + got, err := r.Resolve(context.Background(), srv.URL+"/key.json") + if err != nil { + t.Fatalf("Resolve: %v", err) + } + if got == nil || got.Algorithm != "ed25519" { + t.Fatalf("unexpected key: %+v", got) + } +} + +func TestDirectURLResolverPEM(t *testing.T) { + pemStr, _, _ := newEd25519PEM(t) + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/x-pem-file") + _, _ = w.Write([]byte(pemStr)) + })) + defer srv.Close() + + r := DirectURLResolver{HTTPClient: srv.Client()} + got, err := r.Resolve(context.Background(), srv.URL+"/key.pem") + if err != nil { + t.Fatalf("Resolve: %v", err) + } + if got == nil || !strings.Contains(got.PublicKeyPEM, "BEGIN PUBLIC KEY") { + t.Fatalf("unexpected key: %+v", got) + } +} + +func TestDirectURLResolverDeclinesNonHTTP(t *testing.T) { + r := DirectURLResolver{} + got, err := r.Resolve(context.Background(), "did:web:example.test") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if got != nil { + t.Errorf("expected nil, got %+v", got) + } +} + +func TestTrustDirectoryResolver(t *testing.T) { + pemStr, _, _ := newEd25519PEM(t) + + // First base 404s, second base returns the key. + bad := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + http.NotFound(w, r) + })) + defer bad.Close() + good := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path != "/keys/abc123" { + http.NotFound(w, r) + return + } + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(map[string]string{ + "publicKey": pemStr, + "algorithm": "ed25519", + }) + })) + defer good.Close() + + r := TrustDirectoryResolver{ + BaseURLs: []string{bad.URL, good.URL}, + HTTPClient: good.Client(), + } + got, err := r.Resolve(context.Background(), "abc123") + if err != nil { + t.Fatalf("Resolve: %v", err) + } + if got == nil { + t.Fatal("expected key, got nil") + } + if got.Keyid != "abc123" { + t.Errorf("expected Keyid=abc123, got %q", got.Keyid) + } +} + +// ----- ResolveKey ----- + +func TestResolveKeyChain(t *testing.T) { + pemStr, _, _ := newEd25519PEM(t) + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(map[string]string{ + "publicKey": pemStr, + "algorithm": "ed25519", + }) + })) + defer srv.Close() + + resolvers := []KeyResolver{ + DidWebResolver{}, + DirectURLResolver{HTTPClient: srv.Client()}, + } + got, err := ResolveKey(context.Background(), srv.URL+"/key.json", resolvers) + if err != nil { + t.Fatalf("ResolveKey: %v", err) + } + if got == nil || got.Algorithm != "ed25519" { + t.Fatalf("unexpected key: %+v", got) + } +} + +func TestResolveKeyNoMatch(t *testing.T) { + if _, err := ResolveKey(context.Background(), "did:fake:nope", []KeyResolver{DidWebResolver{}}); err == nil { + t.Errorf("expected error when no resolver matches") + } +} + +// ----- VerifyEndorsement ----- + +func TestVerifyEndorsement(t *testing.T) { + pemStr, _, priv := newEd25519PEM(t) + + endorsement := Endorsement{ + Endorser: "", // filled in below once we know the URL + Endorsement: "sha256:contenthash", + Timestamp: "2025-05-01T00:00:00Z", + Algorithm: "ed25519", + } + + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(map[string]string{ + "publicKey": pemStr, + "algorithm": "ed25519", + }) + })) + defer srv.Close() + endorsement.Endorser = srv.URL + "/key.json" + + msg := endorsement.Endorsement + ":" + endorsement.Timestamp + sig := ed25519.Sign(priv, []byte(msg)) + endorsement.Signature = base64.StdEncoding.EncodeToString(sig) + + resolvers := []KeyResolver{DirectURLResolver{HTTPClient: srv.Client()}} + ok, err := VerifyEndorsement(context.Background(), endorsement, resolvers) + if err != nil { + t.Fatalf("VerifyEndorsement: %v", err) + } + if !ok { + t.Errorf("expected endorsement to verify") + } + + // Tamper with the timestamp; should now fail. + tampered := endorsement + tampered.Timestamp = "2025-05-02T00:00:00Z" + ok, _ = VerifyEndorsement(context.Background(), tampered, resolvers) + if ok { + t.Errorf("expected tampered endorsement to fail") + } +} + +func TestVerifyEndorsementMissingFields(t *testing.T) { + cases := []Endorsement{ + {Endorser: "", Endorsement: "x", Signature: "x", Timestamp: "x"}, + {Endorser: "x", Endorsement: "", Signature: "x", Timestamp: "x"}, + {Endorser: "x", Endorsement: "x", Signature: "", Timestamp: "x"}, + {Endorser: "x", Endorsement: "x", Signature: "x", Timestamp: ""}, + } + for i, c := range cases { + if _, err := VerifyEndorsement(context.Background(), c, nil); err == nil { + t.Errorf("case %d: expected error", i) + } + } +} + +// ----- helpers ----- + +// rewriteTransport is a minimal RoundTripper that rewrites all incoming +// requests to point at `target` (host + scheme), preserving path and query. +// Used so DidWebResolver can be exercised without DNS gymnastics. +type rewriteTransport struct { + base http.RoundTripper + target string +} + +func (t rewriteTransport) RoundTrip(req *http.Request) (*http.Response, error) { + // Build a new URL: target + original path + raw query. + newURL := fmt.Sprintf("%s%s", strings.TrimRight(t.target, "/"), req.URL.Path) + if req.URL.RawQuery != "" { + newURL += "?" + req.URL.RawQuery + } + r2, err := http.NewRequestWithContext(req.Context(), req.Method, newURL, req.Body) + if err != nil { + return nil, err + } + r2.Header = req.Header.Clone() + base := t.base + if base == nil { + base = http.DefaultTransport + } + return base.RoundTrip(r2) +} diff --git a/go/endorsement.go b/go/endorsement.go new file mode 100644 index 0000000..43b2953 --- /dev/null +++ b/go/endorsement.go @@ -0,0 +1,50 @@ +package canonicalize + +import ( + "context" + "errors" +) + +// Endorsement is a third-party signed JSON attestation about a specific +// content hash, as defined in HTMLTrust spec §2.5. +type Endorsement struct { + Endorser string `json:"endorser"` + Endorsement string `json:"endorsement"` // the targeted content-hash, e.g. "sha256:..." + Signature string `json:"signature"` + Timestamp string `json:"timestamp"` + Algorithm string `json:"algorithm,omitempty"` // defaults to "ed25519" +} + +// VerifyEndorsement resolves the endorser's keyid and verifies the +// endorsement's signature over the canonical binding "{endorsement}:{timestamp}". +// If the endorsement does not specify an algorithm, ed25519 is assumed. If the +// resolver chain returns a key with its own declared algorithm, that takes +// precedence over the endorsement's hint (the resolved key is the source of +// truth about what the signer actually uses). +func VerifyEndorsement(ctx context.Context, endorsement Endorsement, resolvers []KeyResolver) (bool, error) { + if endorsement.Endorser == "" { + return false, errors.New("VerifyEndorsement: endorser is required") + } + if endorsement.Endorsement == "" { + return false, errors.New("VerifyEndorsement: endorsement (target content hash) is required") + } + if endorsement.Signature == "" { + return false, errors.New("VerifyEndorsement: signature is required") + } + if endorsement.Timestamp == "" { + return false, errors.New("VerifyEndorsement: timestamp is required") + } + key, err := ResolveKey(ctx, endorsement.Endorser, resolvers) + if err != nil { + return false, err + } + algorithm := key.Algorithm + if algorithm == "" { + algorithm = endorsement.Algorithm + } + if algorithm == "" { + algorithm = "ed25519" + } + message := endorsement.Endorsement + ":" + endorsement.Timestamp + return VerifySignature(message, endorsement.Signature, key.PublicKeyPEM, algorithm) +} diff --git a/go/extract.go b/go/extract.go new file mode 100644 index 0000000..bb08b6e --- /dev/null +++ b/go/extract.go @@ -0,0 +1,150 @@ +package canonicalize + +import ( + "regexp" + "sort" + "strconv" + "strings" +) + +// Elements whose text content is NEVER part of the signed content. These are +// either metadata (meta, link, script, style) or the signed-section wrapper's +// own metadata (meta tags inside a signed-section carry claims, not content). +// They are stripped entirely (with their contents) before extracting text. +// +// Go's RE2 has no backreferences, so we compile one non-greedy regex per +// element name and apply them in sequence. +var excludedPairTagNames = []string{"script", "style", "meta", "link", "head", "noscript"} + +var excludedPairREs = func() []*regexp.Regexp { + out := make([]*regexp.Regexp, 0, len(excludedPairTagNames)) + for _, name := range excludedPairTagNames { + out = append(out, regexp.MustCompile(`(?is)<`+name+`\b[^>]*>.*?`)) + } + return out +}() + +// Self-closing and void elements (no text content) to strip. +var voidElementsRE = regexp.MustCompile( + `(?i)<(meta|link|br|hr|img|input|source|track|wbr|area|base|col|embed|param)\b[^>]*/?>`, +) + +// Block-level elements whose boundaries should become whitespace separators. +const blockElements = `address|article|aside|blockquote|canvas|dd|div|dl|dt|` + + `fieldset|figcaption|figure|footer|form|h[1-6]|header|hr|li|main|nav|` + + `noscript|ol|output|p|pre|section|table|tfoot|thead|tr|td|th|ul|video` + +var blockOpenRE = regexp.MustCompile(`(?i)<(` + blockElements + `)\b[^>]*>`) +var blockCloseRE = regexp.MustCompile(`(?i)`) + +// Any remaining HTML tag (inline elements stripped without adding whitespace). +var anyTagRE = regexp.MustCompile(`(?i)<\/?[a-z][a-z0-9-]*\b[^>]*>`) + +// HTML named-entity table (common entities; numeric handled separately). +var namedEntities = map[string]string{ + "&": "&", + "<": "<", + ">": ">", + """: "\"", + "'": "'", + " ": " ", + "–": "–", + "—": "—", + "‘": "‘", + "’": "’", + "“": "“", + "”": "”", + "…": "…", + "©": "©", + "®": "®", + "™": "™", +} + +var ( + namedEntityRE = regexp.MustCompile(`&[a-zA-Z]+;`) + decimalEntityRE = regexp.MustCompile(`&#(\d+);`) + hexEntityRE = regexp.MustCompile(`&#x([0-9a-fA-F]+);`) +) + +func decodeEntities(text string) string { + text = namedEntityRE.ReplaceAllStringFunc(text, func(match string) string { + key := strings.ToLower(match) + if v, ok := namedEntities[key]; ok { + return v + } + return match + }) + text = decimalEntityRE.ReplaceAllStringFunc(text, func(match string) string { + m := decimalEntityRE.FindStringSubmatch(match) + if len(m) < 2 { + return match + } + n, err := strconv.Atoi(m[1]) + if err != nil { + return match + } + return string(rune(n)) + }) + text = hexEntityRE.ReplaceAllStringFunc(text, func(match string) string { + m := hexEntityRE.FindStringSubmatch(match) + if len(m) < 2 { + return match + } + n, err := strconv.ParseInt(m[1], 16, 32) + if err != nil { + return match + } + return string(rune(n)) + }) + return text +} + +// ExtractCanonicalText extracts canonical text from an HTML fragment for +// signing or verification. Mirrors the JS extractCanonicalText() reference +// implementation: strips excluded elements, converts block boundaries to +// whitespace, strips remaining inline markup, decodes entities, and runs the +// full text normalization pipeline. The returned string is trimmed. +// +// Per HTMLTrust spec §2.1 this produces a text-only hash input: markup and +// attributes of the signed content itself are not covered by the hash. +func ExtractCanonicalText(html string, opts ...Options) (string, error) { + // Step 1: Strip excluded elements and their contents. + text := html + for _, re := range excludedPairREs { + text = re.ReplaceAllString(text, " ") + } + text = voidElementsRE.ReplaceAllString(text, " ") + + // Step 2: Convert block boundaries to whitespace. + text = blockOpenRE.ReplaceAllString(text, " ") + text = blockCloseRE.ReplaceAllString(text, " ") + + // Step 3: Strip all remaining (inline) tags. + text = anyTagRE.ReplaceAllString(text, "") + + // Step 4: Decode HTML entities. + text = decodeEntities(text) + + // Step 5: Apply the full canonicalization pipeline. + return strings.TrimSpace(NormalizeText(text, opts...)), nil +} + +// CanonicalizeClaims serializes a claims map as a sorted list of "name=value" +// pairs joined by "\n". Both names and values are pushed through NormalizeText +// before serialization so the output is independent of trivial Unicode noise. +// Mirrors the JS canonicalizeClaims() reference implementation. +func CanonicalizeClaims(claims map[string]string) string { + type entry struct{ name, value string } + entries := make([]entry, 0, len(claims)) + for k, v := range claims { + entries = append(entries, entry{NormalizeText(k), NormalizeText(v)}) + } + sort.Slice(entries, func(i, j int) bool { + return entries[i].name < entries[j].name + }) + parts := make([]string, len(entries)) + for i, e := range entries { + parts[i] = e.name + "=" + e.value + } + return strings.Join(parts, "\n") +} diff --git a/go/resolver.go b/go/resolver.go new file mode 100644 index 0000000..caa2b2d --- /dev/null +++ b/go/resolver.go @@ -0,0 +1,232 @@ +package canonicalize + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "io" + "net/http" + "strings" +) + +// ResolvedKey is the result of a successful keyid resolution. +type ResolvedKey struct { + PublicKeyPEM string + Algorithm string + Keyid string +} + +// KeyResolver resolves a keyid to a public key. A resolver that does not apply +// to a particular keyid (e.g. a DID resolver handed an https URL) MUST return +// (nil, nil) so the next resolver in the chain is tried. +type KeyResolver interface { + Resolve(ctx context.Context, keyid string) (*ResolvedKey, error) +} + +// ResolveKey walks the supplied resolver chain in order and returns the first +// non-nil ResolvedKey. If every resolver declines, an error is returned. +func ResolveKey(ctx context.Context, keyid string, resolvers []KeyResolver) (*ResolvedKey, error) { + if keyid == "" { + return nil, errors.New("ResolveKey: keyid is required") + } + for _, r := range resolvers { + key, err := r.Resolve(ctx, keyid) + if err != nil { + return nil, err + } + if key != nil { + return key, nil + } + } + return nil, fmt.Errorf("ResolveKey: no resolver matched keyid %q", keyid) +} + +func httpClient(c *http.Client) *http.Client { + if c != nil { + return c + } + return http.DefaultClient +} + +// ----- did:web ----- + +// DidWebResolver resolves did:web:[:...] keyids by fetching the +// DID document at https:///.well-known/did.json and returning the +// first verificationMethod entry that contains a publicKeyPem field. +type DidWebResolver struct { + HTTPClient *http.Client +} + +type didDocument struct { + VerificationMethod []verificationMethod `json:"verificationMethod"` +} + +type verificationMethod struct { + ID string `json:"id"` + Type string `json:"type"` + PublicKeyPem string `json:"publicKeyPem"` + Algorithm string `json:"algorithm"` +} + +// Resolve implements KeyResolver. +func (r DidWebResolver) Resolve(ctx context.Context, keyid string) (*ResolvedKey, error) { + if !strings.HasPrefix(keyid, "did:web:") { + return nil, nil + } + rest := strings.TrimPrefix(keyid, "did:web:") + // did:web allows ":" as path separators after the domain. + parts := strings.Split(rest, ":") + domain := parts[0] + if domain == "" { + return nil, fmt.Errorf("DidWebResolver: empty domain in keyid %q", keyid) + } + url := "https://" + domain + "/.well-known/did.json" + req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) + if err != nil { + return nil, err + } + resp, err := httpClient(r.HTTPClient).Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("DidWebResolver: GET %s: status %d", url, resp.StatusCode) + } + body, err := io.ReadAll(resp.Body) + if err != nil { + return nil, err + } + var doc didDocument + if err := json.Unmarshal(body, &doc); err != nil { + return nil, fmt.Errorf("DidWebResolver: decode did.json: %w", err) + } + for _, vm := range doc.VerificationMethod { + if vm.PublicKeyPem != "" { + alg := vm.Algorithm + if alg == "" { + alg = inferAlgorithmFromType(vm.Type) + } + return &ResolvedKey{ + PublicKeyPEM: vm.PublicKeyPem, + Algorithm: alg, + Keyid: keyid, + }, nil + } + } + return nil, fmt.Errorf("DidWebResolver: no verificationMethod with publicKeyPem in %s", url) +} + +func inferAlgorithmFromType(t string) string { + low := strings.ToLower(t) + switch { + case strings.Contains(low, "ed25519"): + return "ed25519" + case strings.Contains(low, "ecdsa"), strings.Contains(low, "secp"), strings.Contains(low, "p256"): + return "ecdsa" + case strings.Contains(low, "rsa"): + return "rsa" + default: + return "" + } +} + +// ----- direct URL ----- + +// DirectURLResolver fetches a public key from an https://... or http://... +// keyid. The endpoint MAY return JSON (`{"publicKey": "...", "algorithm": +// "..."}`) or a raw PEM document (Content-Type: text/plain or +// application/x-pem-file). +type DirectURLResolver struct { + HTTPClient *http.Client +} + +type directKeyDoc struct { + PublicKey string `json:"publicKey"` + Algorithm string `json:"algorithm"` +} + +func (r DirectURLResolver) Resolve(ctx context.Context, keyid string) (*ResolvedKey, error) { + if !(strings.HasPrefix(keyid, "https://") || strings.HasPrefix(keyid, "http://")) { + return nil, nil + } + return fetchKey(ctx, httpClient(r.HTTPClient), keyid, keyid) +} + +// ----- trust directory ----- + +// TrustDirectoryResolver tries each base URL in turn, fetching +// {base}/keys/{keyid}. The first base URL that returns a 200 response wins. +type TrustDirectoryResolver struct { + BaseURLs []string + HTTPClient *http.Client +} + +func (r TrustDirectoryResolver) Resolve(ctx context.Context, keyid string) (*ResolvedKey, error) { + if len(r.BaseURLs) == 0 { + return nil, nil + } + var lastErr error + for _, base := range r.BaseURLs { + url := strings.TrimRight(base, "/") + "/keys/" + keyid + key, err := fetchKey(ctx, httpClient(r.HTTPClient), url, keyid) + if err == nil && key != nil { + return key, nil + } + lastErr = err + } + if lastErr != nil { + return nil, lastErr + } + return nil, nil +} + +// fetchKey GETs `url` and parses either JSON ({publicKey, algorithm}) or a raw +// PEM document into a ResolvedKey. The keyid is recorded on the result. +func fetchKey(ctx context.Context, client *http.Client, url, keyid string) (*ResolvedKey, error) { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) + if err != nil { + return nil, err + } + resp, err := client.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("fetchKey: GET %s: status %d", url, resp.StatusCode) + } + body, err := io.ReadAll(resp.Body) + if err != nil { + return nil, err + } + ct := strings.ToLower(resp.Header.Get("Content-Type")) + if strings.Contains(ct, "text/plain") || strings.Contains(ct, "application/x-pem-file") { + return &ResolvedKey{ + PublicKeyPEM: string(body), + Algorithm: "", + Keyid: keyid, + }, nil + } + var doc directKeyDoc + if err := json.Unmarshal(body, &doc); err != nil { + // As a fallback, treat the body as a PEM document. + if strings.Contains(string(body), "-----BEGIN") { + return &ResolvedKey{ + PublicKeyPEM: string(body), + Algorithm: "", + Keyid: keyid, + }, nil + } + return nil, fmt.Errorf("fetchKey: decode %s: %w", url, err) + } + if doc.PublicKey == "" { + return nil, fmt.Errorf("fetchKey: %s: missing publicKey field", url) + } + return &ResolvedKey{ + PublicKeyPEM: doc.PublicKey, + Algorithm: doc.Algorithm, + Keyid: keyid, + }, nil +} diff --git a/go/signature.go b/go/signature.go new file mode 100644 index 0000000..32e129e --- /dev/null +++ b/go/signature.go @@ -0,0 +1,111 @@ +package canonicalize + +import ( + "crypto" + "crypto/ecdsa" + "crypto/ed25519" + "crypto/rsa" + "crypto/sha256" + "crypto/x509" + "encoding/asn1" + "encoding/base64" + "encoding/pem" + "errors" + "fmt" + "math/big" + "strings" +) + +// BuildSignatureBinding returns the canonical signing payload used to compute +// or verify a content signature, as defined in HTMLTrust spec §2.1: +// +// {contentHash}:{claimsHash}:{domain}:{signedAt} +// +// All four fields are required; an empty input yields an error. +func BuildSignatureBinding(contentHash, claimsHash, domain, signedAt string) (string, error) { + if contentHash == "" { + return "", errors.New("BuildSignatureBinding: contentHash is required") + } + if claimsHash == "" { + return "", errors.New("BuildSignatureBinding: claimsHash is required") + } + if domain == "" { + return "", errors.New("BuildSignatureBinding: domain is required") + } + if signedAt == "" { + return "", errors.New("BuildSignatureBinding: signedAt is required") + } + return contentHash + ":" + claimsHash + ":" + domain + ":" + signedAt, nil +} + +// ecdsaSig is the ASN.1 wire encoding for an ECDSA signature. +type ecdsaSig struct { + R, S *big.Int +} + +// decodeBase64 accepts both standard padded and unpadded base64. +func decodeBase64(s string) ([]byte, error) { + if b, err := base64.StdEncoding.DecodeString(s); err == nil { + return b, nil + } + return base64.RawStdEncoding.DecodeString(s) +} + +// parsePublicKey decodes a PEM-wrapped PKIX public key. +func parsePublicKey(pemStr string) (any, error) { + block, _ := pem.Decode([]byte(pemStr)) + if block == nil { + return nil, errors.New("VerifySignature: invalid PEM block") + } + return x509.ParsePKIXPublicKey(block.Bytes) +} + +// VerifySignature verifies a base64-encoded signature over the given message +// using the supplied PEM-encoded public key. Algorithm matching is +// case-insensitive and supports "ed25519", "ecdsa" (with SHA-256), and "rsa" +// (PKCS1v15 with SHA-256). +func VerifySignature(message string, signatureB64 string, publicKeyPEM string, algorithm string) (bool, error) { + sig, err := decodeBase64(signatureB64) + if err != nil { + return false, fmt.Errorf("VerifySignature: decode signature: %w", err) + } + pub, err := parsePublicKey(publicKeyPEM) + if err != nil { + return false, fmt.Errorf("VerifySignature: parse public key: %w", err) + } + + switch strings.ToLower(algorithm) { + case "ed25519": + key, ok := pub.(ed25519.PublicKey) + if !ok { + return false, errors.New("VerifySignature: public key is not ed25519") + } + return ed25519.Verify(key, []byte(message), sig), nil + + case "ecdsa": + key, ok := pub.(*ecdsa.PublicKey) + if !ok { + return false, errors.New("VerifySignature: public key is not ecdsa") + } + digest := sha256.Sum256([]byte(message)) + var parsed ecdsaSig + if _, err := asn1.Unmarshal(sig, &parsed); err != nil { + return false, fmt.Errorf("VerifySignature: parse ecdsa signature: %w", err) + } + return ecdsa.Verify(key, digest[:], parsed.R, parsed.S), nil + + case "rsa": + key, ok := pub.(*rsa.PublicKey) + if !ok { + return false, errors.New("VerifySignature: public key is not rsa") + } + digest := sha256.Sum256([]byte(message)) + if err := rsa.VerifyPKCS1v15(key, crypto.SHA256, digest[:], sig); err != nil { + return false, nil + } + return true, nil + + default: + return false, fmt.Errorf("VerifySignature: unsupported algorithm %q", algorithm) + } +} diff --git a/javascript/index.d.ts b/javascript/index.d.ts index 7bbda63..1feae94 100644 --- a/javascript/index.d.ts +++ b/javascript/index.d.ts @@ -15,3 +15,100 @@ export interface NormalizeOptions { * @returns Normalized text */ export function normalizeText(text: string, options?: NormalizeOptions): string; + +/** + * Extract canonical text from an HTML fragment for signing or verification. + * + * Strips excluded elements (script, style, meta, link, head, noscript) and + * their contents, converts block-element boundaries to whitespace separators, + * strips all remaining inline markup, decodes HTML entities, and applies the + * full text normalization pipeline. + * + * Per HTMLTrust spec §2.1, this produces a text-only hash input: markup and + * attributes of the signed content are NOT covered by the hash. + * + * @param html - HTML fragment to canonicalize + * @param options - Options passed through to normalizeText + * @returns Canonical text, ready to be hashed + */ +export function extractCanonicalText(html: string, options?: NormalizeOptions): string; + +/** + * Compute a canonical claims string from a claims map. + * + * Claims are serialized as sorted "name=value" pairs joined by newlines. + * The caller is responsible for hashing the returned string. + * + * @param claims - claim name → value map + * @returns Canonical serialized string ready to be hashed + */ +export function canonicalizeClaims(claims: Record): string; + +/** Parts of the canonical signature binding (spec §2.1). */ +export interface SignatureBindingParts { + contentHash: string; + claimsHash: string; + domain: string; + signedAt: string; +} + +/** + * Build the canonical signature binding `{content-hash}:{claims-hash}:{domain}:{signed-at}`. + * Throws if any field is empty. + */ +export function buildSignatureBinding(parts: SignatureBindingParts): string; + +/** + * Verify a signature over `message` with a PEM-encoded public key. + * Algorithm is one of "ed25519", "ecdsa", "rsa" (case-insensitive). + * Signature is base64-encoded (padded or unpadded). + */ +export function verifySignature( + message: string, + signatureB64: string, + publicKeyPem: string, + algorithm?: string, +): Promise; + +export interface ResolvedKey { + keyid: string; + publicKeyPem: string; + algorithm: string; +} + +export interface KeyResolver { + resolve(keyid: string): Promise; +} + +/** Resolves `did:web:[:]` by fetching the corresponding DID doc. */ +export function didWebResolver(opts?: { fetch?: typeof fetch }): KeyResolver; +/** Resolves keyids that are themselves http(s) URLs by fetching them. */ +export function directUrlResolver(opts?: { fetch?: typeof fetch }): KeyResolver; +/** Resolves keyids via one or more configured trust directories (`/keys/`). */ +export function trustDirectoryResolver(opts: { + baseUrls: string[]; + fetch?: typeof fetch; +}): KeyResolver; + +/** Walk a resolver chain and return the first successful resolution. */ +export function resolveKey( + keyid: string, + resolvers: KeyResolver[], +): Promise; + +export interface Endorsement { + endorser: string; + endorsement: string; + signature: string; + timestamp: string; + algorithm?: string; +} + +/** Build the canonical endorsement binding `{content-hash}:{timestamp}`. */ +export function buildEndorsementBinding(e: Pick): string; + +/** Verify a standalone signed endorsement (spec §2.5). */ +export function verifyEndorsement( + endorsement: Endorsement, + resolvers: KeyResolver[], +): Promise; diff --git a/javascript/index.js b/javascript/index.js index 0465703..cb5b9cf 100644 --- a/javascript/index.js +++ b/javascript/index.js @@ -98,3 +98,479 @@ export function normalizeText(text, options = {}) { return text; } + +// === HTML → canonical text extraction === +// +// Elements whose text content is NEVER part of the signed content. +// These are either metadata (meta, link, script, style) or the signed-section +// wrapper's OWN metadata (meta tags inside a signed-section carry claims, +// not content). We strip them entirely before extracting text. +const EXCLUDED_ELEMENTS_RE = + /<(script|style|meta|link|head|noscript)\b[^>]*>[\s\S]*?<\/\1\s*>|<(meta|link|br|hr|img|input|source|track|wbr)\b[^>]*\/?>(?!\s*<\/\2>)/gi; + +// Self-closing and void elements (no text content) to strip. +const VOID_ELEMENTS_RE = /<(meta|link|br|hr|img|input|source|track|wbr|area|base|col|embed|param)\b[^>]*\/?>/gi; + +// Block-level elements whose boundaries should become whitespace separators. +// Inline elements (em, strong, a, span, etc.) do NOT get separators, so +// "

hello world

" canonicalizes to "hello world" not "hello world ". +const BLOCK_ELEMENTS = + "address|article|aside|blockquote|canvas|dd|div|dl|dt|fieldset|figcaption|figure|footer|form|h[1-6]|header|hr|li|main|nav|noscript|ol|output|p|pre|section|table|tfoot|thead|tr|td|th|ul|video"; +const BLOCK_OPEN_RE = new RegExp(`<(${BLOCK_ELEMENTS})\\b[^>]*>`, "gi"); +const BLOCK_CLOSE_RE = new RegExp(``, "gi"); + +// Any remaining HTML tag (inline elements we strip without adding whitespace). +const ANY_TAG_RE = /<\/?[a-z][a-z0-9-]*\b[^>]*>/gi; + +// HTML entity decoding table (common entities; numeric entities handled separately). +const NAMED_ENTITIES = { + "&": "&", + "<": "<", + ">": ">", + """: '"', + "'": "'", + " ": "\u00A0", + "–": "\u2013", + "—": "\u2014", + "‘": "\u2018", + "’": "\u2019", + "“": "\u201C", + "”": "\u201D", + "…": "\u2026", + "©": "\u00A9", + "®": "\u00AE", + "™": "\u2122", +}; + +function decodeEntities(text) { + // Named entities + text = text.replace(/&[a-z]+;/gi, (match) => { + const key = match.toLowerCase(); + return NAMED_ENTITIES[key] ?? match; + }); + // Numeric decimal entities + text = text.replace(/&#(\d+);/g, (_, code) => + String.fromCodePoint(parseInt(code, 10)), + ); + // Numeric hex entities + text = text.replace(/&#x([0-9a-f]+);/gi, (_, code) => + String.fromCodePoint(parseInt(code, 16)), + ); + return text; +} + +/** + * Extract canonical text from an HTML fragment for signing or verification. + * + * This is the HTML → canonical text extraction defined in the HTMLTrust + * specification §2.1. Given an HTML fragment (typically the inner contents + * of a `` element), it: + * + * 1. Strips excluded elements (script, style, meta, link, head, noscript) + * and their contents. `` is excluded because inside a signed-section + * it carries claim metadata, not signed content. + * 2. Converts block-element boundaries to single spaces so that + * `

A

B

` canonicalizes to `A B`, not `AB`. + * 3. Strips all remaining inline markup, preserving only text content. + * 4. Decodes HTML entities. + * 5. Applies the full text normalization pipeline (`normalizeText`). + * + * The output is a pure text string. Markup, attributes, link destinations, + * and media sources are NOT covered by the hash. This is a deliberate + * scoping choice (see spec §2.1 "Text-only scope" and the open design + * question on attribute coverage). + * + * This implementation is regex-based and is sufficient for signed content + * as typically produced by CMS platforms (blog posts, articles, news + * stories). For pathological or adversarial input, a real DOM parser + * should be used instead; the library API is compatible. + * + * @param {string} html - HTML fragment to canonicalize + * @param {object} [options] - Options passed through to normalizeText + * @returns {string} Canonical text, ready to be hashed + */ +export function extractCanonicalText(html, options = {}) { + if (typeof html !== "string") { + throw new TypeError("extractCanonicalText expects a string"); + } + + // Step 1: Strip excluded elements and their contents. + let text = html.replace(EXCLUDED_ELEMENTS_RE, " "); + text = text.replace(VOID_ELEMENTS_RE, " "); + + // Step 2: Convert block boundaries to whitespace. + text = text.replace(BLOCK_OPEN_RE, " "); + text = text.replace(BLOCK_CLOSE_RE, " "); + + // Step 3: Strip all remaining (inline) tags. + text = text.replace(ANY_TAG_RE, ""); + + // Step 4: Decode HTML entities. + text = decodeEntities(text); + + // Step 5: Apply full canonicalization pipeline. + return normalizeText(text, options).trim(); +} + +/** + * Compute a canonical claims hash from a list of claim entries. + * + * Claims are serialized as a sorted list of "name=value" pairs, joined by + * newlines, then hashed. Sorting ensures the order of elements in + * the HTML source does not affect the hash. The caller is responsible for + * computing the actual hash from the returned canonical string. + * + * @param {Record} claims - claim name → value map + * @returns {string} Canonical serialized string ready to be hashed + */ +export function canonicalizeClaims(claims) { + const entries = Object.entries(claims) + .map(([name, value]) => [normalizeText(name), normalizeText(String(value))]) + .sort(([a], [b]) => (a < b ? -1 : a > b ? 1 : 0)); + return entries.map(([name, value]) => `${name}=${value}`).join("\n"); +} + +// === Signature binding (spec §2.1) === + +/** + * Build the canonical signature binding string per spec §2.1: + * {content-hash}:{claims-hash}:{domain}:{signed-at} + * + * The signer's identity is intentionally NOT included; it is implicit in + * keyid resolution. Throws if any field is missing. + * + * @param {object} parts + * @param {string} parts.contentHash - prefixed canonical content hash (e.g. "sha256:...") + * @param {string} parts.claimsHash - prefixed canonical claims hash + * @param {string} parts.domain - publication origin (hostname) + * @param {string} parts.signedAt - ISO-8601 timestamp from + * @returns {string} + */ +export function buildSignatureBinding({ contentHash, claimsHash, domain, signedAt }) { + if (!contentHash || !claimsHash || !domain || !signedAt) { + throw new Error( + `buildSignatureBinding: missing field(s): contentHash=${contentHash}, claimsHash=${claimsHash}, domain=${domain}, signedAt=${signedAt}`, + ); + } + return `${contentHash}:${claimsHash}:${domain}:${signedAt}`; +} + +// === Crypto utilities (cross-environment) === +// +// Runs in browsers (SubtleCrypto) and Node (node:crypto.webcrypto + +// node:crypto for PEM parsing). We prefer SubtleCrypto when available so +// the same code path runs in both environments. + +let _nodeCrypto; +async function getNodeCrypto() { + if (_nodeCrypto !== undefined) return _nodeCrypto; + try { + _nodeCrypto = await import("node:crypto"); + } catch { + _nodeCrypto = null; + } + return _nodeCrypto; +} + +function isNodeEnv() { + return typeof process !== "undefined" && !!process.versions?.node; +} + +function base64ToBytes(b64) { + // Accept padded and unpadded base64; tolerate whitespace. + const cleaned = String(b64).replace(/\s+/g, ""); + const padded = cleaned + "===".slice((cleaned.length + 3) % 4); + if (typeof atob === "function") { + const bin = atob(padded.replace(/-/g, "+").replace(/_/g, "/")); + const out = new Uint8Array(bin.length); + for (let i = 0; i < bin.length; i++) out[i] = bin.charCodeAt(i); + return out; + } + // Node fallback + return new Uint8Array(Buffer.from(padded, "base64")); +} + +function utf8ToBytes(str) { + return new TextEncoder().encode(str); +} + +const ALGO_ALIASES = { + ED25519: "ed25519", + ECDSA: "ecdsa", + RSA: "rsa", + "RSA-SHA256": "rsa", + ECDSAP256: "ecdsa", +}; +function normalizeAlgo(algorithm) { + const key = String(algorithm || "ed25519").toUpperCase(); + return ALGO_ALIASES[key] ?? key.toLowerCase(); +} + +/** + * Verify a signature over `message` with `publicKeyPem` using `algorithm`. + * + * Algorithms supported: "ed25519", "ecdsa" (P-256 / secp256k1, SHA-256), "rsa" (RSA-SHA256). + * Algorithm names are case-insensitive. Signature is base64-encoded (padded + * or unpadded). Public key is a PEM-encoded SPKI document. + * + * Uses Node's native crypto when running in Node (broadest algorithm + * support, including the secp256k1 curve used by the reference server), + * and falls back to SubtleCrypto in browsers. + * + * @param {string} message + * @param {string} signatureB64 + * @param {string} publicKeyPem + * @param {string} algorithm + * @returns {Promise} + */ +export async function verifySignature(message, signatureB64, publicKeyPem, algorithm = "ed25519") { + const algo = normalizeAlgo(algorithm); + const sigBytes = base64ToBytes(signatureB64); + const msgBytes = utf8ToBytes(message); + + const node = isNodeEnv() ? await getNodeCrypto() : null; + if (node) { + try { + const publicKey = node.createPublicKey(publicKeyPem); + if (algo === "ed25519") { + return node.verify(null, Buffer.from(msgBytes), publicKey, Buffer.from(sigBytes)); + } + if (algo === "ecdsa") { + return node.verify("sha256", Buffer.from(msgBytes), publicKey, Buffer.from(sigBytes)); + } + if (algo === "rsa") { + return node.verify("RSA-SHA256", Buffer.from(msgBytes), publicKey, Buffer.from(sigBytes)); + } + return false; + } catch { + return false; + } + } + + // Browser path: SubtleCrypto via JWK import. We use jose-style import + // because SubtleCrypto cannot ingest PEM directly; we strip headers and + // base64-decode the SPKI bytes. + const subtle = globalThis.crypto?.subtle; + if (!subtle) return false; + try { + const spki = pemToBytes(publicKeyPem); + let key, params; + if (algo === "ed25519") { + key = await subtle.importKey("spki", spki, { name: "Ed25519" }, false, ["verify"]); + params = { name: "Ed25519" }; + } else if (algo === "ecdsa") { + key = await subtle.importKey("spki", spki, { name: "ECDSA", namedCurve: "P-256" }, false, ["verify"]); + params = { name: "ECDSA", hash: "SHA-256" }; + } else if (algo === "rsa") { + key = await subtle.importKey("spki", spki, { name: "RSASSA-PKCS1-v1_5", hash: "SHA-256" }, false, ["verify"]); + params = { name: "RSASSA-PKCS1-v1_5" }; + } else { + return false; + } + return await subtle.verify(params, key, sigBytes, msgBytes); + } catch { + return false; + } +} + +function pemToBytes(pem) { + const body = String(pem) + .replace(/-----BEGIN [^-]+-----/g, "") + .replace(/-----END [^-]+-----/g, "") + .replace(/\s+/g, ""); + return base64ToBytes(body); +} + +// === Keyid resolution (spec §2.2) === +// +// Three pluggable resolvers. None is privileged; callers compose them in +// whatever order their implementation prefers. resolveKey() walks the chain +// and returns the first match. + +/** + * @typedef {Object} ResolvedKey + * @property {string} keyid + * @property {string} publicKeyPem + * @property {string} algorithm + */ + +/** + * @typedef {Object} KeyResolver + * @property {(keyid: string) => Promise} resolve + * Returns null if this resolver doesn't apply to the given keyid. + */ + +async function fetchJson(url, fetchImpl) { + const f = fetchImpl ?? globalThis.fetch; + if (!f) throw new Error("no fetch implementation available"); + const res = await f(url); + if (!res.ok) return null; + const ct = res.headers.get?.("content-type") ?? ""; + if (ct.includes("application/json")) return await res.json(); + // Treat as raw PEM if content-type is text-ish + return { _rawText: await res.text() }; +} + +/** + * Build a did:web resolver. Resolves keyids of the form `did:web:[:]` + * by fetching `https:///.well-known/did.json` and extracting the + * first verificationMethod with a publicKeyPem field. + * + * @param {object} [opts] + * @param {typeof fetch} [opts.fetch] + * @returns {KeyResolver} + */ +export function didWebResolver(opts = {}) { + return { + async resolve(keyid) { + if (!keyid?.startsWith("did:web:")) return null; + const rest = keyid.slice("did:web:".length); + const [host, ...pathParts] = rest.split(":"); + const url = pathParts.length + ? `https://${host}/${pathParts.join("/")}/did.json` + : `https://${host}/.well-known/did.json`; + const doc = await fetchJson(url, opts.fetch); + if (!doc || doc._rawText) return null; + const vm = (doc.verificationMethod || []).find((m) => m.publicKeyPem); + if (!vm) return null; + return { + keyid, + publicKeyPem: vm.publicKeyPem, + algorithm: vm.algorithm || vmTypeToAlgo(vm.type) || "ed25519", + }; + }, + }; +} + +function vmTypeToAlgo(type) { + if (!type) return null; + const t = type.toLowerCase(); + if (t.includes("ed25519")) return "ed25519"; + if (t.includes("ecdsa") || t.includes("secp256")) return "ecdsa"; + if (t.includes("rsa")) return "rsa"; + return null; +} + +/** + * Build a direct-URL resolver. Resolves any keyid that is itself an http(s) URL + * by fetching it and parsing as JSON `{ publicKey | publicKeyPem, algorithm }` + * or as raw PEM if the response is plain text. + * + * @param {object} [opts] + * @param {typeof fetch} [opts.fetch] + * @returns {KeyResolver} + */ +export function directUrlResolver(opts = {}) { + return { + async resolve(keyid) { + if (!/^https?:\/\//i.test(keyid)) return null; + const data = await fetchJson(keyid, opts.fetch); + if (!data) return null; + if (data._rawText) { + return { keyid, publicKeyPem: data._rawText.trim(), algorithm: "ed25519" }; + } + const pem = data.publicKey || data.publicKeyPem || data.key; + if (!pem) return null; + return { keyid, publicKeyPem: pem, algorithm: data.algorithm || "ed25519" }; + }, + }; +} + +/** + * Build a trust-directory resolver. Tries each base URL in order; for each, + * fetches `/keys/` and expects the same JSON shape as + * directUrlResolver. Falls back across base URLs if any one fails. + * + * @param {object} opts + * @param {string[]} opts.baseUrls + * @param {typeof fetch} [opts.fetch] + * @returns {KeyResolver} + */ +export function trustDirectoryResolver(opts) { + const baseUrls = opts?.baseUrls ?? []; + return { + async resolve(keyid) { + if (!keyid) return null; + for (const base of baseUrls) { + const url = `${base.replace(/\/$/, "")}/keys/${encodeURIComponent(keyid)}`; + try { + const data = await fetchJson(url, opts.fetch); + if (!data) continue; + if (data._rawText) { + return { keyid, publicKeyPem: data._rawText.trim(), algorithm: "ed25519" }; + } + const pem = data.publicKey || data.publicKeyPem || data.key; + if (!pem) continue; + return { keyid, publicKeyPem: pem, algorithm: data.algorithm || "ed25519" }; + } catch { + // try next base + } + } + return null; + }, + }; +} + +/** + * Walk a resolver chain and return the first successful resolution. + * + * @param {string} keyid + * @param {KeyResolver[]} resolvers + * @returns {Promise} + */ +export async function resolveKey(keyid, resolvers) { + for (const r of resolvers || []) { + const result = await r.resolve(keyid); + if (result) return result; + } + return null; +} + +// === Endorsements (spec §2.5) === + +/** + * Build the canonical binding for an endorsement: `{content-hash}:{timestamp}`. + * The endorser's keyid is implicit (resolution step), matching the content- + * signature binding's design. + * + * @param {{ endorsement: string, timestamp: string }} e + * @returns {string} + */ +export function buildEndorsementBinding(e) { + if (!e?.endorsement || !e?.timestamp) { + throw new Error("buildEndorsementBinding: missing endorsement or timestamp"); + } + return `${e.endorsement}:${e.timestamp}`; +} + +/** + * Verify a content endorsement (spec §2.5). The endorsement is a standalone + * signed JSON blob attesting that `endorser` endorses the content identified + * by `endorsement` (a content-hash) at `timestamp`. Returns true only if the + * endorser's key resolves AND the signature verifies. + * + * @param {{ + * endorser: string, + * endorsement: string, + * signature: string, + * timestamp: string, + * algorithm?: string, + * }} endorsement + * @param {KeyResolver[]} resolvers + * @returns {Promise} + */ +export async function verifyEndorsement(endorsement, resolvers) { + if (!endorsement) return false; + const resolved = await resolveKey(endorsement.endorser, resolvers); + if (!resolved) return false; + const binding = buildEndorsementBinding(endorsement); + // Resolver-declared algorithm is authoritative — the key knows what it is. + // The endorsement.algorithm field is only consulted as a fallback when the + // resolver doesn't carry one. Cross-platform parity: matches Go binding. + return await verifySignature( + binding, + endorsement.signature, + resolved.publicKeyPem, + resolved.algorithm || endorsement.algorithm || "ed25519", + ); +} diff --git a/javascript/package.json b/javascript/package.json index 44f2ced..51ff3fe 100644 --- a/javascript/package.json +++ b/javascript/package.json @@ -1,7 +1,7 @@ { "name": "@htmltrust/canonicalization", - "version": "0.1.0", - "description": "HTMLTrust canonical text normalization — zero dependencies, browser + Node.js", + "version": "0.2.0", + "description": "HTMLTrust canonical text normalization, signature verification, and keyid resolution — zero dependencies, browser + Node.js", "type": "module", "main": "index.js", "exports": { diff --git a/javascript/test.js b/javascript/test.js index 89c94a1..331e9eb 100644 --- a/javascript/test.js +++ b/javascript/test.js @@ -1,4 +1,16 @@ -import { normalizeText } from './index.js'; +import { + normalizeText, + buildSignatureBinding, + buildEndorsementBinding, + verifySignature, + didWebResolver, + directUrlResolver, + trustDirectoryResolver, + resolveKey, + verifyEndorsement, +} from './index.js'; +import { generateKeyPairSync, sign as nodeSign, createHash } from 'node:crypto'; +import { createServer } from 'node:http'; const tests = [ // [inputA, inputB, shouldMatch, description] @@ -42,5 +54,212 @@ for (const [inputA, inputB, shouldMatch, desc] of tests) { } } +// ============================================================================ +// Async tests for signature binding, verification, resolvers, endorsements +// ============================================================================ + +async function check(name, fn) { + try { + await fn(); + passed++; + console.log(` ✓ ${name}`); + } catch (err) { + failed++; + console.log(` ✗ ${name}`); + console.log(` ${err.message}`); + } +} + +function assert(cond, msg) { + if (!cond) throw new Error(msg || 'assertion failed'); +} +function assertEq(a, b, msg) { + if (a !== b) throw new Error(`${msg || 'not equal'}: ${JSON.stringify(a)} !== ${JSON.stringify(b)}`); +} + +await check('buildSignatureBinding produces colon-joined string', () => { + const s = buildSignatureBinding({ + contentHash: 'sha256:AAA', + claimsHash: 'sha256:BBB', + domain: 'example.org', + signedAt: '2026-04-28T00:00:00Z', + }); + assertEq(s, 'sha256:AAA:sha256:BBB:example.org:2026-04-28T00:00:00Z'); +}); + +await check('buildSignatureBinding throws on missing field', () => { + let threw = false; + try { + buildSignatureBinding({ contentHash: '', claimsHash: 'x', domain: 'd', signedAt: 't' }); + } catch { + threw = true; + } + assert(threw, 'expected throw on missing field'); +}); + +await check('verifySignature ed25519 round-trip', async () => { + const { publicKey, privateKey } = generateKeyPairSync('ed25519'); + const message = 'hello world'; + const sig = nodeSign(null, Buffer.from(message), privateKey).toString('base64'); + const pem = publicKey.export({ type: 'spki', format: 'pem' }); + const ok = await verifySignature(message, sig, pem, 'ed25519'); + assert(ok, 'expected ed25519 signature to verify'); + + const tampered = await verifySignature(message + '!', sig, pem, 'ed25519'); + assert(!tampered, 'tampered message must not verify'); +}); + +await check('verifySignature rsa round-trip', async () => { + const { publicKey, privateKey } = generateKeyPairSync('rsa', { modulusLength: 2048 }); + const { createSign } = await import('node:crypto'); + const signer = createSign('SHA256'); + signer.update('hello rsa'); + const sig = signer.sign(privateKey, 'base64'); + const pem = publicKey.export({ type: 'spki', format: 'pem' }); + const ok = await verifySignature('hello rsa', sig, pem, 'rsa'); + assert(ok, 'expected rsa signature to verify'); +}); + +await check('verifySignature handles unpadded base64', async () => { + const { publicKey, privateKey } = generateKeyPairSync('ed25519'); + const sig = nodeSign(null, Buffer.from('msg'), privateKey).toString('base64').replace(/=+$/, ''); + const pem = publicKey.export({ type: 'spki', format: 'pem' }); + const ok = await verifySignature('msg', sig, pem, 'ED25519'); + assert(ok, 'unpadded base64 should still verify'); +}); + +// ---- Resolver tests with a local HTTP fixture ---- + +function startFixtureServer(routes) { + return new Promise((resolve) => { + const server = createServer((req, res) => { + const handler = routes[req.url]; + if (!handler) { + res.writeHead(404); + res.end(); + return; + } + const r = handler(); + res.writeHead(r.status || 200, r.headers || { 'content-type': 'application/json' }); + res.end(typeof r.body === 'string' ? r.body : JSON.stringify(r.body)); + }); + server.listen(0, '127.0.0.1', () => resolve(server)); + }); +} + +const { publicKey: edPub } = generateKeyPairSync('ed25519'); +const edPubPem = edPub.export({ type: 'spki', format: 'pem' }); + +const fixtureServer = await startFixtureServer({ + '/.well-known/did.json': () => ({ + body: { + id: 'did:web:127.0.0.1', + verificationMethod: [ + { id: '#key-1', type: 'Ed25519VerificationKey2020', publicKeyPem: edPubPem }, + ], + }, + }), + '/key.json': () => ({ body: { publicKey: edPubPem, algorithm: 'ed25519' } }), + '/keys/abc123': () => ({ body: { publicKey: edPubPem, algorithm: 'ed25519' } }), +}); +const port = fixtureServer.address().port; +const base = `http://127.0.0.1:${port}`; + +await check('didWebResolver fetches did.json and extracts key', async () => { + // Override scheme to http for the fixture + const r = { + async resolve(keyid) { + if (!keyid?.startsWith('did:web:')) return null; + const rest = keyid.slice('did:web:'.length); + const url = `http://${rest}/.well-known/did.json`; + const res = await fetch(url); + const doc = await res.json(); + const vm = (doc.verificationMethod || []).find((m) => m.publicKeyPem); + return vm ? { keyid, publicKeyPem: vm.publicKeyPem, algorithm: 'ed25519' } : null; + }, + }; + // Use the real resolver against a live https fixture would need TLS; we + // already cover http path via directUrlResolver. Here we exercise the + // shape contract via the same code path through resolveKey(). + const resolved = await resolveKey(`did:web:127.0.0.1:${port}`, [r]); + assert(resolved, 'expected did:web resolver to resolve'); + assert(resolved.publicKeyPem.includes('BEGIN PUBLIC KEY'), 'expected PEM'); +}); + +await check('directUrlResolver fetches http URL keyid', async () => { + const resolved = await resolveKey(`${base}/key.json`, [directUrlResolver()]); + assert(resolved, 'expected resolution'); + assertEq(resolved.algorithm, 'ed25519'); +}); + +await check('trustDirectoryResolver tries each base', async () => { + const resolver = trustDirectoryResolver({ baseUrls: ['http://127.0.0.1:1', base] }); + const resolved = await resolver.resolve('abc123'); + assert(resolved, 'expected fallback to second base'); + assert(resolved.publicKeyPem.includes('BEGIN PUBLIC KEY')); +}); + +await check('resolveKey returns null when no resolver matches', async () => { + const resolved = await resolveKey('unknown:keyid', [ + didWebResolver(), + directUrlResolver(), + ]); + assertEq(resolved, null); +}); + +// ---- Endorsement tests ---- + +await check('verifyEndorsement round-trip with direct-URL resolver', async () => { + const { publicKey: ePub, privateKey: ePriv } = generateKeyPairSync('ed25519'); + const ePem = ePub.export({ type: 'spki', format: 'pem' }); + const endorsement = { + endorser: `${base}/key.json`, + endorsement: 'sha256:abcdef', + timestamp: '2026-04-28T12:00:00Z', + algorithm: 'ed25519', + }; + const binding = buildEndorsementBinding(endorsement); + endorsement.signature = nodeSign(null, Buffer.from(binding), ePriv).toString('base64'); + + // Replace fixture key for the duration of this test by swapping the route + // via a fresh server (avoids mutating shared state). + const localFixture = await startFixtureServer({ + '/key.json': () => ({ body: { publicKey: ePem, algorithm: 'ed25519' } }), + }); + const localPort = localFixture.address().port; + endorsement.endorser = `http://127.0.0.1:${localPort}/key.json`; + // Re-sign with the corrected endorser keyid (binding doesn't include keyid + // so the existing signature still verifies). + const ok = await verifyEndorsement(endorsement, [directUrlResolver()]); + await new Promise((r) => localFixture.close(r)); + assert(ok, 'expected endorsement to verify'); +}); + +await check('verifyEndorsement fails on tampered hash', async () => { + const { publicKey: ePub, privateKey: ePriv } = generateKeyPairSync('ed25519'); + const ePem = ePub.export({ type: 'spki', format: 'pem' }); + const localFixture = await startFixtureServer({ + '/key.json': () => ({ body: { publicKey: ePem, algorithm: 'ed25519' } }), + }); + const localPort = localFixture.address().port; + const endorsement = { + endorser: `http://127.0.0.1:${localPort}/key.json`, + endorsement: 'sha256:original', + timestamp: '2026-04-28T12:00:00Z', + algorithm: 'ed25519', + }; + endorsement.signature = nodeSign( + null, + Buffer.from(buildEndorsementBinding(endorsement)), + ePriv, + ).toString('base64'); + endorsement.endorsement = 'sha256:tampered'; + const ok = await verifyEndorsement(endorsement, [directUrlResolver()]); + await new Promise((r) => localFixture.close(r)); + assert(!ok, 'tampered endorsement must not verify'); +}); + +await new Promise((r) => fixtureServer.close(r)); + console.log(`\n${passed} passed, ${failed} failed\n`); process.exit(failed > 0 ? 1 : 0); diff --git a/php/composer.json b/php/composer.json index dda0b51..0a3993c 100644 --- a/php/composer.json +++ b/php/composer.json @@ -16,7 +16,13 @@ "require": { "php": ">=7.2", "ext-intl": "*", - "ext-mbstring": "*" + "ext-mbstring": "*", + "ext-json": "*", + "ext-openssl": "*", + "ext-sodium": "*" + }, + "suggest": { + "ext-curl": "Used by the default HttpFetcher for keyid resolution; falls back to file_get_contents when missing." }, "require-dev": { "phpunit/phpunit": "^8.0 || ^9.0 || ^10.0" diff --git a/php/src/Canonicalize.php b/php/src/Canonicalize.php index ed9e3a3..aec4bb1 100644 --- a/php/src/Canonicalize.php +++ b/php/src/Canonicalize.php @@ -126,4 +126,188 @@ public static function normalize(string $text): string { return trim(self::normalizeText($text)); } + + // ==================================================================== + // HTML -> canonical text extraction. + // + // Mirrors the JS reference implementation in javascript/index.js. The + // regexes below are written to be as close to the JS source as PCRE + // syntax allows, so the two implementations should agree byte-for-byte + // on well-formed CMS-style input. + // ==================================================================== + + /** + * Elements whose text content is NEVER part of the signed content. + * These are stripped (with their contents) before extracting text. + * `` is excluded because inside a signed-section it carries claim + * metadata, not signed content. + */ + private const EXCLUDED_ELEMENTS_PATTERN = + '#<(script|style|meta|link|head|noscript)\b[^>]*>[\s\S]*?' + . '|<(meta|link|br|hr|img|input|source|track|wbr)\b[^>]*/?>(?!\s*)#i'; + + /** + * Self-closing/void elements that carry no text content. + */ + private const VOID_ELEMENTS_PATTERN = + '#<(meta|link|br|hr|img|input|source|track|wbr|area|base|col|embed|param)\b[^>]*/?>#i'; + + /** + * Block-level element names whose boundaries become whitespace separators. + * Inline elements (em, strong, a, span, ...) do NOT get separators. + */ + private const BLOCK_ELEMENT_NAMES = + 'address|article|aside|blockquote|canvas|dd|div|dl|dt|fieldset|figcaption' + . '|figure|footer|form|h[1-6]|header|hr|li|main|nav|noscript|ol|output|p' + . '|pre|section|table|tfoot|thead|tr|td|th|ul|video'; + + /** + * Any remaining HTML tag (inline elements stripped without adding whitespace). + */ + private const ANY_TAG_PATTERN = '#]*>#i'; + + /** + * HTML named-entity decoding table. Numeric entities are handled separately. + * + * @var array + */ + private const NAMED_ENTITIES = [ + '&' => '&', + '<' => '<', + '>' => '>', + '"' => '"', + ''' => "'", + ' ' => "\u{00A0}", + '–' => "\u{2013}", + '—' => "\u{2014}", + '‘' => "\u{2018}", + '’' => "\u{2019}", + '“' => "\u{201C}", + '”' => "\u{201D}", + '…' => "\u{2026}", + '©' => "\u{00A9}", + '®' => "\u{00AE}", + '™' => "\u{2122}", + ]; + + /** + * Decode HTML entities (named + numeric decimal + numeric hex). + */ + private static function decodeEntities(string $text): string + { + // Named entities (case-insensitive lookup). + $text = preg_replace_callback( + '/&[a-z]+;/i', + static function (array $m): string { + $key = strtolower($m[0]); + return self::NAMED_ENTITIES[$key] ?? $m[0]; + }, + $text + ); + + // Numeric decimal entities. + $text = preg_replace_callback( + '/&#(\d+);/', + static function (array $m): string { + return self::codepointToUtf8((int) $m[1]); + }, + $text + ); + + // Numeric hex entities. + $text = preg_replace_callback( + '/&#x([0-9a-f]+);/i', + static function (array $m): string { + return self::codepointToUtf8((int) hexdec($m[1])); + }, + $text + ); + + return $text; + } + + /** + * Convert a Unicode codepoint to a UTF-8 string. Mirrors + * String.fromCodePoint() semantics: out-of-range codepoints produce + * an empty string rather than throwing. + */ + private static function codepointToUtf8(int $cp): string + { + if ($cp < 0 || $cp > 0x10FFFF) { + return ''; + } + // mb_chr is the cleanest portable path; it exists when ext-mbstring + // is loaded (a hard composer.json requirement). + $chr = mb_chr($cp, 'UTF-8'); + return $chr === false ? '' : $chr; + } + + /** + * Extract canonical text from an HTML fragment for signing or verification. + * + * Mirrors javascript/index.js:extractCanonicalText. See spec §2.1. + * + * @param string $html HTML fragment to canonicalize. + * @param bool $preserveWhitespace Forwarded to normalizeText (use true + * for `
` content that must retain whitespace).
+     * @return string Canonical text, ready to be hashed.
+     */
+    public static function extractCanonicalText(string $html, bool $preserveWhitespace = false): string
+    {
+        // Step 1: Strip excluded elements and their contents.
+        $text = preg_replace(self::EXCLUDED_ELEMENTS_PATTERN, ' ', $html);
+        $text = preg_replace(self::VOID_ELEMENTS_PATTERN, ' ', $text);
+
+        // Step 2: Convert block boundaries to whitespace.
+        $blockOpen  = '#<(' . self::BLOCK_ELEMENT_NAMES . ')\b[^>]*>#i';
+        $blockClose = '##i';
+        $text = preg_replace($blockOpen, ' ', $text);
+        $text = preg_replace($blockClose, ' ', $text);
+
+        // Step 3: Strip any remaining (inline) tags.
+        $text = preg_replace(self::ANY_TAG_PATTERN, '', $text);
+
+        // Step 4: Decode HTML entities.
+        $text = self::decodeEntities($text);
+
+        // Step 5: Apply full text normalization, then trim.
+        return trim(self::normalizeText($text, $preserveWhitespace));
+    }
+
+    /**
+     * Compute a canonical claims string from a name->value map.
+     *
+     * Claims are serialized as a sorted list of "name=value" pairs, joined
+     * by newlines. Both names and values are pushed through normalizeText so
+     * that visually-equivalent representations (e.g. NFKC variants, curly vs
+     * straight quotes) hash identically.
+     *
+     * Mirrors javascript/index.js:canonicalizeClaims.
+     *
+     * @param array $claims
+     * @return string Canonical serialized string ready to be hashed.
+     */
+    public static function canonicalizeClaims(array $claims): string
+    {
+        $entries = [];
+        foreach ($claims as $name => $value) {
+            $entries[] = [
+                self::normalizeText((string) $name),
+                self::normalizeText((string) $value),
+            ];
+        }
+
+        // Sort by canonicalized name (lexicographic, byte order — matches JS
+        // string comparison for ASCII names; both sides should normalize
+        // the same way for non-ASCII names).
+        usort($entries, static function (array $a, array $b): int {
+            return strcmp($a[0], $b[0]);
+        });
+
+        $lines = [];
+        foreach ($entries as [$name, $value]) {
+            $lines[] = $name . '=' . $value;
+        }
+        return implode("\n", $lines);
+    }
 }
diff --git a/php/src/Keys/DidWebResolver.php b/php/src/Keys/DidWebResolver.php
new file mode 100644
index 0000000..f48a4d7
--- /dev/null
+++ b/php/src/Keys/DidWebResolver.php
@@ -0,0 +1,132 @@
+fetcher = $fetcher ?? HttpFetcher::default();
+    }
+
+    public function supports(string $keyid): bool
+    {
+        return strncmp($keyid, 'did:web:', 8) === 0;
+    }
+
+    public function resolve(string $keyid): ?ResolvedKey
+    {
+        if (!$this->supports($keyid)) {
+            return null;
+        }
+
+        $url = self::didWebToUrl($keyid);
+        if ($url === null) {
+            return null;
+        }
+
+        $response = ($this->fetcher)($url);
+        if ($response === null) {
+            return null;
+        }
+
+        $doc = json_decode($response['body'], true);
+        if (!is_array($doc)) {
+            return null;
+        }
+
+        $methods = $doc['verificationMethod'] ?? null;
+        if (!is_array($methods)) {
+            return null;
+        }
+
+        foreach ($methods as $method) {
+            if (!is_array($method)) {
+                continue;
+            }
+            $pem = $method['publicKeyPem'] ?? null;
+            if (!is_string($pem) || $pem === '') {
+                continue;
+            }
+
+            $algorithm = self::guessAlgorithm($method);
+            return new ResolvedKey($pem, $algorithm, $keyid);
+        }
+
+        return null;
+    }
+
+    /**
+     * Translate a did:web:DOMAIN[:PATH:SEGMENTS] keyid to the canonical
+     * fetch URL. Per spec:
+     *   - did:web:example.com         -> https://example.com/.well-known/did.json
+     *   - did:web:example.com:user:1  -> https://example.com/user/1/did.json
+     */
+    private static function didWebToUrl(string $keyid): ?string
+    {
+        $rest = substr($keyid, 8);
+        if ($rest === '' || $rest === false) {
+            return null;
+        }
+
+        // Strip any fragment (e.g. did:web:example.com#keys-1) — the fragment
+        // identifies a verificationMethod, but the document URL is the same.
+        $hash = strpos($rest, '#');
+        if ($hash !== false) {
+            $rest = substr($rest, 0, $hash);
+        }
+
+        $parts = explode(':', $rest);
+        $domain = array_shift($parts);
+        if ($domain === null || $domain === '') {
+            return null;
+        }
+        // did:web percent-encodes ports as %3A; decode for URL building.
+        $domain = rawurldecode($domain);
+
+        if (count($parts) === 0) {
+            return 'https://' . $domain . '/.well-known/did.json';
+        }
+        $path = implode('/', array_map('rawurldecode', $parts));
+        return 'https://' . $domain . '/' . $path . '/did.json';
+    }
+
+    /**
+     * Best-effort algorithm hint from a verificationMethod entry.
+     * The "type" field is conventional but inconsistent across DID
+     * implementations; default to ed25519 since that's the spec default.
+     */
+    private static function guessAlgorithm(array $method): string
+    {
+        $type = isset($method['type']) && is_string($method['type']) ? strtolower($method['type']) : '';
+        if (strpos($type, 'ed25519') !== false) {
+            return 'ed25519';
+        }
+        if (strpos($type, 'ecdsa') !== false || strpos($type, 'secp') !== false) {
+            return 'ecdsa';
+        }
+        if (strpos($type, 'rsa') !== false) {
+            return 'rsa';
+        }
+        if (isset($method['algorithm']) && is_string($method['algorithm']) && $method['algorithm'] !== '') {
+            return strtolower($method['algorithm']);
+        }
+        return 'ed25519';
+    }
+}
diff --git a/php/src/Keys/DirectUrlResolver.php b/php/src/Keys/DirectUrlResolver.php
new file mode 100644
index 0000000..6e7bf41
--- /dev/null
+++ b/php/src/Keys/DirectUrlResolver.php
@@ -0,0 +1,70 @@
+", "algorithm": "ed25519" }
+ *   - raw PEM if the response Content-Type indicates a PEM file
+ *     (application/x-pem-file or text/plain with a -----BEGIN PUBLIC KEY-----
+ *     prelude).
+ *
+ * @package HTMLTrust\Canonicalization\Keys
+ */
+
+namespace HTMLTrust\Canonicalization\Keys;
+
+final class DirectUrlResolver implements KeyResolver
+{
+    /** @var callable(string): ?array{body: string, contentType: string} */
+    private $fetcher;
+
+    public function __construct(?callable $fetcher = null)
+    {
+        $this->fetcher = $fetcher ?? HttpFetcher::default();
+    }
+
+    public function supports(string $keyid): bool
+    {
+        return strncmp($keyid, 'http://', 7) === 0
+            || strncmp($keyid, 'https://', 8) === 0;
+    }
+
+    public function resolve(string $keyid): ?ResolvedKey
+    {
+        if (!$this->supports($keyid)) {
+            return null;
+        }
+
+        $response = ($this->fetcher)($keyid);
+        if ($response === null) {
+            return null;
+        }
+
+        $body        = $response['body'];
+        $contentType = strtolower($response['contentType'] ?? '');
+
+        // Raw PEM path: either the Content-Type says so, or the body itself
+        // begins with a PEM header (some static-file hosts mislabel them).
+        $looksLikePem = strpos($contentType, 'pem') !== false
+            || strpos($contentType, 'x-pem') !== false
+            || strpos(ltrim($body), '-----BEGIN') === 0;
+
+        if ($looksLikePem) {
+            return new ResolvedKey($body, 'ed25519', $keyid);
+        }
+
+        // JSON path.
+        $decoded = json_decode($body, true);
+        if (!is_array($decoded)) {
+            return null;
+        }
+        $pem = $decoded['publicKey'] ?? $decoded['publicKeyPem'] ?? null;
+        if (!is_string($pem) || $pem === '') {
+            return null;
+        }
+        $algorithm = isset($decoded['algorithm']) && is_string($decoded['algorithm']) && $decoded['algorithm'] !== ''
+            ? strtolower($decoded['algorithm'])
+            : 'ed25519';
+
+        return new ResolvedKey($pem, $algorithm, $keyid);
+    }
+}
diff --git a/php/src/Keys/HttpFetcher.php b/php/src/Keys/HttpFetcher.php
new file mode 100644
index 0000000..95722d6
--- /dev/null
+++ b/php/src/Keys/HttpFetcher.php
@@ -0,0 +1,116 @@
+ string, 'contentType' => string]; }
+ *
+ * which makes them trivially mockable in tests. This class supplies the
+ * production callable, preferring curl when available and falling back to
+ * a stream-context file_get_contents.
+ *
+ * @package HTMLTrust\Canonicalization\Keys
+ */
+
+namespace HTMLTrust\Canonicalization\Keys;
+
+final class HttpFetcher
+{
+    /**
+     * Returns a callable suitable for injection into a KeyResolver:
+     *
+     *     $fetcher = HttpFetcher::default();
+     *     $resolver = new DidWebResolver($fetcher);
+     *
+     * The callable returns null on failure, or
+     *   ['body' => string, 'contentType' => string]
+     * on success.
+     *
+     * Accepts file:// URLs (useful for tests) by reading directly from disk.
+     */
+    public static function default(): callable
+    {
+        return static function (string $url): ?array {
+            // Local file:// path — useful for tests and dev fixtures.
+            if (strncmp($url, 'file://', 7) === 0) {
+                $path = substr($url, 7);
+                if (!is_readable($path)) {
+                    return null;
+                }
+                $body = @file_get_contents($path);
+                if ($body === false) {
+                    return null;
+                }
+                return ['body' => $body, 'contentType' => self::guessContentTypeFromPath($path)];
+            }
+
+            // Prefer curl when available — better timeout semantics and
+            // easier header inspection.
+            if (function_exists('curl_init')) {
+                $handle = curl_init();
+                if ($handle === false) {
+                    return null;
+                }
+                curl_setopt_array($handle, [
+                    CURLOPT_URL            => $url,
+                    CURLOPT_RETURNTRANSFER => true,
+                    CURLOPT_FOLLOWLOCATION => true,
+                    CURLOPT_MAXREDIRS      => 5,
+                    CURLOPT_CONNECTTIMEOUT => 5,
+                    CURLOPT_TIMEOUT        => 10,
+                    CURLOPT_SSL_VERIFYPEER => true,
+                    CURLOPT_SSL_VERIFYHOST => 2,
+                    CURLOPT_HTTPHEADER     => ['Accept: application/json, application/did+json, application/x-pem-file, */*'],
+                ]);
+                $body = curl_exec($handle);
+                $code = (int) curl_getinfo($handle, CURLINFO_HTTP_CODE);
+                $type = (string) curl_getinfo($handle, CURLINFO_CONTENT_TYPE);
+                curl_close($handle);
+
+                if ($body === false || $code < 200 || $code >= 300) {
+                    return null;
+                }
+                return ['body' => (string) $body, 'contentType' => $type];
+            }
+
+            // file_get_contents fallback.
+            $context = stream_context_create([
+                'http' => [
+                    'timeout' => 10,
+                    'header'  => "Accept: application/json, application/did+json, application/x-pem-file, */*\r\n",
+                ],
+                'ssl' => [
+                    'verify_peer'      => true,
+                    'verify_peer_name' => true,
+                ],
+            ]);
+            $body = @file_get_contents($url, false, $context);
+            if ($body === false) {
+                return null;
+            }
+
+            $contentType = '';
+            // $http_response_header is populated by file_get_contents.
+            if (isset($http_response_header) && is_array($http_response_header)) {
+                foreach ($http_response_header as $h) {
+                    if (stripos($h, 'content-type:') === 0) {
+                        $contentType = trim(substr($h, strlen('content-type:')));
+                        break;
+                    }
+                }
+            }
+            return ['body' => $body, 'contentType' => $contentType];
+        };
+    }
+
+    private static function guessContentTypeFromPath(string $path): string
+    {
+        $ext = strtolower((string) pathinfo($path, PATHINFO_EXTENSION));
+        switch ($ext) {
+            case 'json': return 'application/json';
+            case 'pem':  return 'application/x-pem-file';
+            default:     return '';
+        }
+    }
+}
diff --git a/php/src/Keys/KeyResolution.php b/php/src/Keys/KeyResolution.php
new file mode 100644
index 0000000..496cad5
--- /dev/null
+++ b/php/src/Keys/KeyResolution.php
@@ -0,0 +1,38 @@
+ $resolvers
+     */
+    public static function resolveKey(string $keyid, array $resolvers): ?ResolvedKey
+    {
+        if ($keyid === '') {
+            return null;
+        }
+        foreach ($resolvers as $resolver) {
+            if (!$resolver instanceof KeyResolver) {
+                continue;
+            }
+            if (!$resolver->supports($keyid)) {
+                continue;
+            }
+            $resolved = $resolver->resolve($keyid);
+            if ($resolved !== null) {
+                return $resolved;
+            }
+        }
+        return null;
+    }
+}
diff --git a/php/src/Keys/KeyResolver.php b/php/src/Keys/KeyResolver.php
new file mode 100644
index 0000000..5d03b95
--- /dev/null
+++ b/php/src/Keys/KeyResolver.php
@@ -0,0 +1,28 @@
+publicKeyPem = $publicKeyPem;
+        $this->algorithm    = $algorithm;
+        $this->keyid        = $keyid;
+    }
+}
diff --git a/php/src/Keys/TrustDirectoryResolver.php b/php/src/Keys/TrustDirectoryResolver.php
new file mode 100644
index 0000000..fa056a6
--- /dev/null
+++ b/php/src/Keys/TrustDirectoryResolver.php
@@ -0,0 +1,79 @@
+", "algorithm": "ed25519" }
+ *   (also accepts "publicKeyPem" as a synonym, matching DID conventions)
+ *
+ * @package HTMLTrust\Canonicalization\Keys
+ */
+
+namespace HTMLTrust\Canonicalization\Keys;
+
+final class TrustDirectoryResolver implements KeyResolver
+{
+    /** @var array */
+    private $baseUrls;
+
+    /** @var callable(string): ?array{body: string, contentType: string} */
+    private $fetcher;
+
+    /**
+     * @param array $baseUrls Ordered list of trust-directory
+     *                                     base URLs; each is tried in turn.
+     */
+    public function __construct(array $baseUrls, ?callable $fetcher = null)
+    {
+        $this->baseUrls = array_values(array_filter($baseUrls, 'is_string'));
+        $this->fetcher  = $fetcher ?? HttpFetcher::default();
+    }
+
+    public function supports(string $keyid): bool
+    {
+        // Trust directories accept anything that the other resolvers won't
+        // claim. The chain in resolveKey() will naturally fall through to
+        // this resolver after the more specific ones decline.
+        if ($keyid === '') {
+            return false;
+        }
+        if (strncmp($keyid, 'did:', 4) === 0) {
+            return false;
+        }
+        if (strncmp($keyid, 'http://', 7) === 0 || strncmp($keyid, 'https://', 8) === 0) {
+            return false;
+        }
+        return true;
+    }
+
+    public function resolve(string $keyid): ?ResolvedKey
+    {
+        if (!$this->supports($keyid)) {
+            return null;
+        }
+
+        foreach ($this->baseUrls as $base) {
+            $url = rtrim($base, '/') . '/keys/' . rawurlencode($keyid);
+            $response = ($this->fetcher)($url);
+            if ($response === null) {
+                continue;
+            }
+            $decoded = json_decode($response['body'], true);
+            if (!is_array($decoded)) {
+                continue;
+            }
+            $pem = $decoded['publicKey'] ?? $decoded['publicKeyPem'] ?? null;
+            if (!is_string($pem) || $pem === '') {
+                continue;
+            }
+            $algorithm = isset($decoded['algorithm']) && is_string($decoded['algorithm']) && $decoded['algorithm'] !== ''
+                ? strtolower($decoded['algorithm'])
+                : 'ed25519';
+
+            return new ResolvedKey($pem, $algorithm, $keyid);
+        }
+
+        return null;
+    }
+}
diff --git a/php/src/Signature.php b/php/src/Signature.php
new file mode 100644
index 0000000..b2ba300
--- /dev/null
+++ b/php/src/Signature.php
@@ -0,0 +1,308 @@
+ $endorsement
+     * @param array $resolvers
+     */
+    public static function verifyEndorsement(array $endorsement, array $resolvers): bool
+    {
+        foreach (['endorser', 'endorsement', 'signature', 'timestamp'] as $required) {
+            if (!isset($endorsement[$required]) || !is_string($endorsement[$required]) || $endorsement[$required] === '') {
+                return false;
+            }
+        }
+
+        $endorser   = $endorsement['endorser'];
+        $payload    = $endorsement['endorsement'];
+        $signature  = $endorsement['signature'];
+        $timestamp  = $endorsement['timestamp'];
+        $algoOnWire = isset($endorsement['algorithm']) && is_string($endorsement['algorithm']) && $endorsement['algorithm'] !== ''
+            ? $endorsement['algorithm']
+            : 'ed25519';
+
+        $resolved = KeyResolution::resolveKey($endorser, $resolvers);
+        if ($resolved === null) {
+            return false;
+        }
+
+        // Prefer the algorithm declared in the endorsement; fall back to the
+        // resolved key's hint if the endorsement omitted it. This mirrors
+        // the JS reference, where the wire format wins.
+        $algorithm = $algoOnWire;
+
+        $message = self::buildEndorsementBinding($payload, $timestamp);
+
+        try {
+            return self::verifySignature($message, $signature, $resolved->publicKeyPem, $algorithm);
+        } catch (InvalidArgumentException $e) {
+            return false;
+        }
+    }
+
+    // ------------------------------------------------------------------
+    // Internal helpers
+    // ------------------------------------------------------------------
+
+    /**
+     * Decode a Base64 string that may or may not include "=" padding.
+     * Returns null on malformed input.
+     */
+    private static function base64DecodeFlexible(string $input): ?string
+    {
+        $input = trim($input);
+        if ($input === '') {
+            return null;
+        }
+
+        // Pad to a multiple of 4 if the caller passed unpadded base64.
+        $remainder = strlen($input) % 4;
+        if ($remainder === 1) {
+            // 1 mod 4 is never valid base64.
+            return null;
+        }
+        if ($remainder !== 0) {
+            $input .= str_repeat('=', 4 - $remainder);
+        }
+
+        $decoded = base64_decode($input, true);
+        return $decoded === false ? null : $decoded;
+    }
+
+    /**
+     * Verify an Ed25519 signature, given a PEM SubjectPublicKeyInfo or a raw
+     * 32-byte sodium public key.
+     */
+    private static function verifyEd25519(string $message, string $signature, string $publicKey): bool
+    {
+        if (!function_exists('sodium_crypto_sign_verify_detached')) {
+            throw new RuntimeException('libsodium is required to verify ed25519 signatures');
+        }
+
+        $rawKey = self::extractEd25519RawKey($publicKey);
+        if ($rawKey === null || strlen($rawKey) !== SODIUM_CRYPTO_SIGN_PUBLICKEYBYTES) {
+            return false;
+        }
+
+        if (strlen($signature) !== SODIUM_CRYPTO_SIGN_BYTES) {
+            return false;
+        }
+
+        try {
+            return sodium_crypto_sign_verify_detached($signature, $message, $rawKey);
+        } catch (\Throwable $e) {
+            // sodium_* throws SodiumException on malformed inputs; we treat
+            // anything thrown here as a failed verification.
+            return false;
+        }
+    }
+
+    /**
+     * Extract the raw 32-byte Ed25519 public key from either:
+     *   - a PEM-encoded SubjectPublicKeyInfo (`-----BEGIN PUBLIC KEY-----` ...)
+     *   - a raw 32-byte string (already raw)
+     *
+     * The PEM body for an Ed25519 SPKI is exactly 44 bytes:
+     *   12-byte SPKI header + 32-byte raw key.
+     */
+    private static function extractEd25519RawKey(string $publicKey): ?string
+    {
+        // Already raw?
+        if (strlen($publicKey) === SODIUM_CRYPTO_SIGN_PUBLICKEYBYTES
+            && strpos($publicKey, '-----BEGIN') === false) {
+            return $publicKey;
+        }
+
+        // PEM path.
+        if (strpos($publicKey, '-----BEGIN') !== false) {
+            // Strip header/footer and whitespace, then base64-decode.
+            $body = preg_replace('/-----BEGIN [^-]+-----|-----END [^-]+-----|\s+/', '', $publicKey);
+            if ($body === null || $body === '') {
+                return null;
+            }
+            $der = base64_decode($body, true);
+            if ($der === false) {
+                return null;
+            }
+            // The Ed25519 SubjectPublicKeyInfo DER is 44 bytes; the raw key
+            // is the trailing 32 bytes regardless of header length, since the
+            // BIT STRING contents come last in the SPKI structure.
+            $len = strlen($der);
+            if ($len < SODIUM_CRYPTO_SIGN_PUBLICKEYBYTES) {
+                return null;
+            }
+            return substr($der, $len - SODIUM_CRYPTO_SIGN_PUBLICKEYBYTES);
+        }
+
+        return null;
+    }
+
+    /**
+     * Verify ECDSA or RSA via OpenSSL using SHA-256.
+     */
+    private static function verifyOpenssl(string $message, string $signature, string $publicKeyPem): bool
+    {
+        if (!function_exists('openssl_verify')) {
+            throw new RuntimeException('ext-openssl is required for ecdsa/rsa verification');
+        }
+
+        $key = openssl_pkey_get_public($publicKeyPem);
+        if ($key === false) {
+            return false;
+        }
+        $result = openssl_verify($message, $signature, $key, OPENSSL_ALGO_SHA256);
+
+        // PHP < 8.0 may return a resource that needs free; PHP >= 8.0
+        // garbage-collects the OpenSSLAsymmetricKey automatically.
+        if (PHP_VERSION_ID < 80000 && is_resource($key)) {
+            // @phpstan-ignore-next-line — only present on PHP < 8.0
+            openssl_free_key($key);
+        }
+
+        return $result === 1;
+    }
+
+    /**
+     * Build a PEM SubjectPublicKeyInfo from a raw 32-byte Ed25519 public key.
+     * Useful for tests and tooling that bridge libsodium-generated keys to
+     * the PEM-based verification path.
+     */
+    public static function ed25519RawToPem(string $rawKey): string
+    {
+        if (strlen($rawKey) !== 32) {
+            throw new InvalidArgumentException('ed25519 raw public key must be 32 bytes');
+        }
+
+        // SPKI prefix for AlgorithmIdentifier { id-Ed25519 }, BIT STRING (32 bytes).
+        // 30 2A 30 05 06 03 2B 65 70 03 21 00 <32-byte key>
+        $prefix = "\x30\x2a\x30\x05\x06\x03\x2b\x65\x70\x03\x21\x00";
+        $der    = $prefix . $rawKey;
+        $b64    = chunk_split(base64_encode($der), 64, "\n");
+        return "-----BEGIN PUBLIC KEY-----\n" . $b64 . "-----END PUBLIC KEY-----\n";
+    }
+}
diff --git a/php/tests/CanonicalizeClaimsTest.php b/php/tests/CanonicalizeClaimsTest.php
new file mode 100644
index 0000000..512f50f
--- /dev/null
+++ b/php/tests/CanonicalizeClaimsTest.php
@@ -0,0 +1,45 @@
+ '1', 'a' => '2', 'm' => '3'];
+        $this->assertSame("a=2\nm=3\nz=1", Canonicalize::canonicalizeClaims($claims));
+    }
+
+    public function testNormalizesNamesAndValues(): void
+    {
+        // Curly quotes in either name or value should be straightened before
+        // serialization, so equivalent metadata produces an equivalent hash.
+        $claims = ['title' => "\u{201C}Hello\u{201D}"];
+        $this->assertSame('title="Hello"', Canonicalize::canonicalizeClaims($claims));
+    }
+
+    public function testStringifiesNonStringValues(): void
+    {
+        $claims = ['count' => 42, 'flag' => true];
+        // PHP coerces true to "1", 42 to "42".
+        $this->assertSame("count=42\nflag=1", Canonicalize::canonicalizeClaims($claims));
+    }
+
+    public function testEmptyClaimsProducesEmptyString(): void
+    {
+        $this->assertSame('', Canonicalize::canonicalizeClaims([]));
+    }
+
+    public function testStableUnderInputOrdering(): void
+    {
+        $a = Canonicalize::canonicalizeClaims(['b' => '1', 'a' => '2', 'c' => '3']);
+        $b = Canonicalize::canonicalizeClaims(['c' => '3', 'a' => '2', 'b' => '1']);
+        $this->assertSame($a, $b);
+    }
+}
diff --git a/php/tests/EndorsementTest.php b/php/tests/EndorsementTest.php
new file mode 100644
index 0000000..c136cee
--- /dev/null
+++ b/php/tests/EndorsementTest.php
@@ -0,0 +1,144 @@
+skipIfNoSodium();
+        [$endorser, $pem, $secret] = $this->makeEndorser();
+
+        $endorsement = [
+            'endorser'    => $endorser,
+            'endorsement' => 'sha256:CONTENT',
+            'timestamp'   => '2025-05-01T00:00Z',
+            'algorithm'   => 'ed25519',
+        ];
+        $message = $endorsement['endorsement'] . ':' . $endorsement['timestamp'];
+        $endorsement['signature'] = base64_encode(sodium_crypto_sign_detached($message, $secret));
+
+        $resolver = new InMemoryResolver([$endorser => new ResolvedKey($pem, 'ed25519', $endorser)]);
+        $this->assertTrue(Signature::verifyEndorsement($endorsement, [$resolver]));
+    }
+
+    public function testVerifyEndorsementDefaultsToEd25519(): void
+    {
+        $this->skipIfNoSodium();
+        [$endorser, $pem, $secret] = $this->makeEndorser();
+
+        $endorsement = [
+            'endorser'    => $endorser,
+            'endorsement' => 'sha256:CONTENT',
+            'timestamp'   => '2025-05-01T00:00Z',
+            // no 'algorithm' key — default ed25519
+        ];
+        $message = $endorsement['endorsement'] . ':' . $endorsement['timestamp'];
+        $endorsement['signature'] = base64_encode(sodium_crypto_sign_detached($message, $secret));
+
+        $resolver = new InMemoryResolver([$endorser => new ResolvedKey($pem, 'ed25519', $endorser)]);
+        $this->assertTrue(Signature::verifyEndorsement($endorsement, [$resolver]));
+    }
+
+    public function testVerifyEndorsementFailsForTamperedTimestamp(): void
+    {
+        $this->skipIfNoSodium();
+        [$endorser, $pem, $secret] = $this->makeEndorser();
+
+        $signedMessage = 'sha256:CONTENT:2025-05-01T00:00Z';
+        $endorsement = [
+            'endorser'    => $endorser,
+            'endorsement' => 'sha256:CONTENT',
+            'timestamp'   => '2025-05-02T00:00Z', // different from what was signed
+            'signature'   => base64_encode(sodium_crypto_sign_detached($signedMessage, $secret)),
+        ];
+
+        $resolver = new InMemoryResolver([$endorser => new ResolvedKey($pem, 'ed25519', $endorser)]);
+        $this->assertFalse(Signature::verifyEndorsement($endorsement, [$resolver]));
+    }
+
+    public function testVerifyEndorsementFailsForUnknownEndorser(): void
+    {
+        $this->skipIfNoSodium();
+        [$endorser, , $secret] = $this->makeEndorser();
+        $message = 'sha256:CONTENT:2025-05-01T00:00Z';
+
+        $endorsement = [
+            'endorser'    => $endorser,
+            'endorsement' => 'sha256:CONTENT',
+            'timestamp'   => '2025-05-01T00:00Z',
+            'signature'   => base64_encode(sodium_crypto_sign_detached($message, $secret)),
+        ];
+
+        $resolver = new InMemoryResolver([]); // empty — won't resolve anything
+        $this->assertFalse(Signature::verifyEndorsement($endorsement, [$resolver]));
+    }
+
+    public function testVerifyEndorsementFailsOnMissingFields(): void
+    {
+        $resolver = new InMemoryResolver([]);
+        $this->assertFalse(Signature::verifyEndorsement([
+            'endorser'    => 'did:web:example.com',
+            'endorsement' => 'sha256:CONTENT',
+            // missing signature and timestamp
+        ], [$resolver]));
+    }
+
+    // ------------------------------------------------------------------
+
+    private function skipIfNoSodium(): void
+    {
+        if (!function_exists('sodium_crypto_sign_keypair')) {
+            $this->markTestSkipped('libsodium not available');
+        }
+    }
+
+    /**
+     * @return array{0: string, 1: string, 2: string} [endorser keyid, public PEM, secret raw]
+     */
+    private function makeEndorser(): array
+    {
+        $keypair = sodium_crypto_sign_keypair();
+        $secret  = sodium_crypto_sign_secretkey($keypair);
+        $public  = sodium_crypto_sign_publickey($keypair);
+        $pem     = Signature::ed25519RawToPem($public);
+        return ['did:web:endorser.example', $pem, $secret];
+    }
+}
+
+/**
+ * Test-only KeyResolver backed by a static map of keyid -> ResolvedKey.
+ */
+class InMemoryResolver implements KeyResolver
+{
+    /** @var array */
+    private $keys;
+
+    /**
+     * @param array $keys
+     */
+    public function __construct(array $keys)
+    {
+        $this->keys = $keys;
+    }
+
+    public function supports(string $keyid): bool
+    {
+        return isset($this->keys[$keyid]);
+    }
+
+    public function resolve(string $keyid): ?ResolvedKey
+    {
+        return $this->keys[$keyid] ?? null;
+    }
+}
diff --git a/php/tests/ExtractCanonicalTextTest.php b/php/tests/ExtractCanonicalTextTest.php
new file mode 100644
index 0000000..838daf2
--- /dev/null
+++ b/php/tests/ExtractCanonicalTextTest.php
@@ -0,0 +1,86 @@
+ canonical text extraction.
+ */
+
+namespace HTMLTrust\Canonicalization\Tests;
+
+use PHPUnit\Framework\TestCase;
+use HTMLTrust\Canonicalization\Canonicalize;
+
+class ExtractCanonicalTextTest extends TestCase
+{
+    public function testStripsScriptStyleAndContents(): void
+    {
+        $html = '

Hello

World

'; + $this->assertSame('Hello World', Canonicalize::extractCanonicalText($html)); + } + + public function testStripsMetaInsideSignedSection(): void + { + // Inside a signed-section, carries claim metadata, not content. + $html = '

Body

'; + $this->assertSame('Body', Canonicalize::extractCanonicalText($html)); + } + + public function testBlockBoundariesBecomeSpaces(): void + { + $html = '

A

B

'; + $this->assertSame('A B', Canonicalize::extractCanonicalText($html)); + } + + public function testInlineTagsDoNotAddSpaces(): void + { + //

hello world

should canonicalize to "hello world" + // — no separator inside the inline boundary. + $html = '

hello world

'; + $this->assertSame('hello world', Canonicalize::extractCanonicalText($html)); + } + + public function testDecodesNamedEntities(): void + { + $html = '

AT&T — “hello”

'; + // mdash and curly quotes get normalized away by the text pipeline. + $this->assertSame('AT&T - "hello"', Canonicalize::extractCanonicalText($html)); + } + + public function testDecodesNumericEntities(): void + { + // A -> A, B -> B + $html = '

ABC

'; + $this->assertSame('ABC', Canonicalize::extractCanonicalText($html)); + } + + public function testNormalizationPipelineApplied(): void + { + // Curly quotes from HTML attribute-free content should be straightened. + $html = "

\u{201C}Hello\u{201D}

"; + $this->assertSame('"Hello"', Canonicalize::extractCanonicalText($html)); + } + + public function testHandlesNestedInlineMarkup(): void + { + $html = '

This is very important.

'; + $this->assertSame('This is very important.', Canonicalize::extractCanonicalText($html)); + } + + public function testStripsLinksButPreservesText(): void + { + $html = '

See our site now.

'; + $this->assertSame('See our site now.', Canonicalize::extractCanonicalText($html)); + } + + public function testStripsImagesEntirely(): void + { + $html = '

BeforexAfter

'; + // Void stripped (becomes a space). Then inline text concatenates, + // and whitespace collapses. + $this->assertSame('Before After', Canonicalize::extractCanonicalText($html)); + } + + public function testEmptyAndAllMarkup(): void + { + $this->assertSame('', Canonicalize::extractCanonicalText('')); + $this->assertSame('', Canonicalize::extractCanonicalText('
')); + } +} diff --git a/php/tests/Keys/DidWebResolverTest.php b/php/tests/Keys/DidWebResolverTest.php new file mode 100644 index 0000000..96ce3ff --- /dev/null +++ b/php/tests/Keys/DidWebResolverTest.php @@ -0,0 +1,188 @@ +assertTrue($resolver->supports('did:web:example.com')); + $this->assertFalse($resolver->supports('did:key:z123')); + $this->assertFalse($resolver->supports('https://example.com/key')); + } + + public function testResolvesBasicDomain(): void + { + $captured = ['url' => null]; + $fetcher = static function (string $url) use (&$captured): ?array { + $captured['url'] = $url; + $body = json_encode([ + 'id' => 'did:web:example.com', + 'verificationMethod' => [ + [ + 'id' => 'did:web:example.com#keys-1', + 'type' => 'Ed25519VerificationKey2020', + 'publicKeyPem' => "-----BEGIN PUBLIC KEY-----\nFAKE\n-----END PUBLIC KEY-----\n", + ], + ], + ]); + return ['body' => $body, 'contentType' => 'application/did+json']; + }; + + $resolver = new DidWebResolver($fetcher); + $resolved = $resolver->resolve('did:web:example.com'); + + $this->assertNotNull($resolved); + $this->assertSame('https://example.com/.well-known/did.json', $captured['url']); + $this->assertSame('ed25519', $resolved->algorithm); + $this->assertSame('did:web:example.com', $resolved->keyid); + $this->assertStringContainsString('FAKE', $resolved->publicKeyPem); + } + + public function testResolvesWithPathSegments(): void + { + $captured = ['url' => null]; + $fetcher = static function (string $url) use (&$captured): ?array { + $captured['url'] = $url; + return [ + 'body' => json_encode([ + 'verificationMethod' => [ + ['type' => 'Ed25519VerificationKey2020', 'publicKeyPem' => 'PEM'], + ], + ]), + 'contentType' => 'application/json', + ]; + }; + + $resolver = new DidWebResolver($fetcher); + $resolved = $resolver->resolve('did:web:example.com:user:alice'); + + $this->assertNotNull($resolved); + $this->assertSame('https://example.com/user/alice/did.json', $captured['url']); + } + + public function testIgnoresFragment(): void + { + $captured = ['url' => null]; + $fetcher = static function (string $url) use (&$captured): ?array { + $captured['url'] = $url; + return [ + 'body' => json_encode([ + 'verificationMethod' => [ + ['type' => 'Ed25519VerificationKey2020', 'publicKeyPem' => 'PEM'], + ], + ]), + 'contentType' => '', + ]; + }; + + $resolver = new DidWebResolver($fetcher); + $resolver->resolve('did:web:example.com#keys-1'); + $this->assertSame('https://example.com/.well-known/did.json', $captured['url']); + } + + public function testReturnsNullOnFetchFailure(): void + { + $resolver = new DidWebResolver(static function (string $url): ?array { + return null; + }); + $this->assertNull($resolver->resolve('did:web:example.com')); + } + + public function testReturnsNullOnInvalidJson(): void + { + $fetcher = static function (string $url): ?array { + return ['body' => 'not json', 'contentType' => 'application/json']; + }; + $resolver = new DidWebResolver($fetcher); + $this->assertNull($resolver->resolve('did:web:example.com')); + } + + public function testReturnsNullWhenNoVerificationMethodHasPem(): void + { + $fetcher = static function (string $url): ?array { + return [ + 'body' => json_encode(['verificationMethod' => [['type' => 'X']]]), + 'contentType' => 'application/json', + ]; + }; + $resolver = new DidWebResolver($fetcher); + $this->assertNull($resolver->resolve('did:web:example.com')); + } + + public function testPicksFirstVerificationMethodWithPem(): void + { + $fetcher = static function (string $url): ?array { + return [ + 'body' => json_encode([ + 'verificationMethod' => [ + ['type' => 'X'], // skipped: no pem + ['type' => 'Ed25519VerificationKey2020', 'publicKeyPem' => 'A'], // chosen + ['type' => 'Ed25519VerificationKey2020', 'publicKeyPem' => 'B'], + ], + ]), + 'contentType' => 'application/json', + ]; + }; + $resolver = new DidWebResolver($fetcher); + $resolved = $resolver->resolve('did:web:example.com'); + $this->assertNotNull($resolved); + $this->assertSame('A', $resolved->publicKeyPem); + } + + public function testInfersEcdsaFromMethodType(): void + { + $fetcher = static function (string $url): ?array { + return [ + 'body' => json_encode([ + 'verificationMethod' => [ + ['type' => 'EcdsaSecp256r1VerificationKey2019', 'publicKeyPem' => 'PEM'], + ], + ]), + 'contentType' => 'application/json', + ]; + }; + $resolver = new DidWebResolver($fetcher); + $resolved = $resolver->resolve('did:web:example.com'); + $this->assertNotNull($resolved); + $this->assertSame('ecdsa', $resolved->algorithm); + } + + public function testReadsFromFileFixture(): void + { + // Exercise a fetcher that delegates to a real on-disk fixture. + $fixtureDir = sys_get_temp_dir() . '/htmltrust-didweb-' . bin2hex(random_bytes(4)); + mkdir($fixtureDir . '/.well-known', 0700, true); + $fixturePath = $fixtureDir . '/.well-known/did.json'; + file_put_contents($fixturePath, json_encode([ + 'verificationMethod' => [ + ['type' => 'Ed25519VerificationKey2020', 'publicKeyPem' => 'FROM_FILE'], + ], + ])); + + $fetcher = static function (string $url) use ($fixturePath): ?array { + if ($url === 'https://example.com/.well-known/did.json') { + return ['body' => file_get_contents($fixturePath), 'contentType' => 'application/json']; + } + return null; + }; + $resolver = new DidWebResolver($fetcher); + $resolved = $resolver->resolve('did:web:example.com'); + $this->assertNotNull($resolved); + $this->assertSame('FROM_FILE', $resolved->publicKeyPem); + + unlink($fixturePath); + rmdir($fixtureDir . '/.well-known'); + rmdir($fixtureDir); + } +} diff --git a/php/tests/Keys/DirectUrlResolverTest.php b/php/tests/Keys/DirectUrlResolverTest.php new file mode 100644 index 0000000..5e9da46 --- /dev/null +++ b/php/tests/Keys/DirectUrlResolverTest.php @@ -0,0 +1,132 @@ +assertTrue($resolver->supports('https://example.com/key.json')); + $this->assertTrue($resolver->supports('http://example.com/key.json')); + $this->assertFalse($resolver->supports('did:web:example.com')); + $this->assertFalse($resolver->supports('opaque-id')); + } + + public function testResolvesJsonDocument(): void + { + $fetcher = static function (string $url): ?array { + return [ + 'body' => json_encode(['publicKey' => 'PEM-BODY', 'algorithm' => 'rsa']), + 'contentType' => 'application/json', + ]; + }; + $resolver = new DirectUrlResolver($fetcher); + $resolved = $resolver->resolve('https://example.com/key.json'); + + $this->assertNotNull($resolved); + $this->assertSame('PEM-BODY', $resolved->publicKeyPem); + $this->assertSame('rsa', $resolved->algorithm); + $this->assertSame('https://example.com/key.json', $resolved->keyid); + } + + public function testDefaultsAlgorithmToEd25519(): void + { + $fetcher = static function (string $url): ?array { + return [ + 'body' => json_encode(['publicKey' => 'PEM']), + 'contentType' => 'application/json', + ]; + }; + $resolver = new DirectUrlResolver($fetcher); + $resolved = $resolver->resolve('https://example.com/key.json'); + $this->assertNotNull($resolved); + $this->assertSame('ed25519', $resolved->algorithm); + } + + public function testAcceptsPublicKeyPemSynonym(): void + { + $fetcher = static function (string $url): ?array { + return [ + 'body' => json_encode(['publicKeyPem' => 'PEM-BODY']), + 'contentType' => 'application/json', + ]; + }; + $resolver = new DirectUrlResolver($fetcher); + $resolved = $resolver->resolve('https://example.com/key.json'); + $this->assertNotNull($resolved); + $this->assertSame('PEM-BODY', $resolved->publicKeyPem); + } + + public function testRecognizesRawPemByContentType(): void + { + $fetcher = static function (string $url): ?array { + return [ + 'body' => "-----BEGIN PUBLIC KEY-----\nABC\n-----END PUBLIC KEY-----\n", + 'contentType' => 'application/x-pem-file', + ]; + }; + $resolver = new DirectUrlResolver($fetcher); + $resolved = $resolver->resolve('https://example.com/key.pem'); + $this->assertNotNull($resolved); + $this->assertStringContainsString('BEGIN PUBLIC KEY', $resolved->publicKeyPem); + $this->assertSame('ed25519', $resolved->algorithm); + } + + public function testRecognizesRawPemByBodyPrelude(): void + { + $fetcher = static function (string $url): ?array { + return [ + 'body' => "-----BEGIN PUBLIC KEY-----\nABC\n-----END PUBLIC KEY-----\n", + 'contentType' => 'text/plain', // mislabelled + ]; + }; + $resolver = new DirectUrlResolver($fetcher); + $resolved = $resolver->resolve('https://example.com/key.pem'); + $this->assertNotNull($resolved); + } + + public function testReturnsNullOnFetchFailure(): void + { + $resolver = new DirectUrlResolver(static function (string $url): ?array { + return null; + }); + $this->assertNull($resolver->resolve('https://example.com/key.json')); + } + + public function testReturnsNullForUnsupportedScheme(): void + { + $fetcher = static function (string $url): ?array { + return ['body' => '{}', 'contentType' => 'application/json']; + }; + $resolver = new DirectUrlResolver($fetcher); + $this->assertNull($resolver->resolve('did:web:example.com')); + } + + public function testReturnsNullOnMalformedJson(): void + { + $fetcher = static function (string $url): ?array { + return ['body' => 'not json', 'contentType' => 'application/json']; + }; + $resolver = new DirectUrlResolver($fetcher); + $this->assertNull($resolver->resolve('https://example.com/key.json')); + } + + public function testReturnsNullWhenJsonHasNoKey(): void + { + $fetcher = static function (string $url): ?array { + return ['body' => '{"unrelated":1}', 'contentType' => 'application/json']; + }; + $resolver = new DirectUrlResolver($fetcher); + $this->assertNull($resolver->resolve('https://example.com/key.json')); + } +} diff --git a/php/tests/Keys/KeyResolutionTest.php b/php/tests/Keys/KeyResolutionTest.php new file mode 100644 index 0000000..196ff90 --- /dev/null +++ b/php/tests/Keys/KeyResolutionTest.php @@ -0,0 +1,92 @@ +assertNull(KeyResolution::resolveKey('theirs', [$a, $b])); + $this->assertSame(0, $a->resolveCalls); + $this->assertSame(0, $b->resolveCalls); + } + + public function testSkipsToNextResolverWhenFirstFails(): void + { + $a = new RecordingResolver(['x'], null); // supports but resolve()=null + $b = new RecordingResolver(['x'], 'PEM-FROM-B'); // supports + succeeds + $resolved = KeyResolution::resolveKey('x', [$a, $b]); + $this->assertNotNull($resolved); + $this->assertSame('PEM-FROM-B', $resolved->publicKeyPem); + $this->assertSame(1, $a->resolveCalls); + $this->assertSame(1, $b->resolveCalls); + } + + public function testFirstSupportingAndResolvingResolverWins(): void + { + $a = new RecordingResolver(['x'], 'A'); + $b = new RecordingResolver(['x'], 'B'); + $resolved = KeyResolution::resolveKey('x', [$a, $b]); + $this->assertSame('A', $resolved->publicKeyPem); + $this->assertSame(0, $b->resolveCalls); // short-circuit + } + + public function testIgnoresNonResolvers(): void + { + $a = new RecordingResolver(['x'], 'A'); + // Non-KeyResolver entries are silently skipped. + $resolved = KeyResolution::resolveKey('x', ['nonsense', 42, $a]); + $this->assertNotNull($resolved); + $this->assertSame('A', $resolved->publicKeyPem); + } + + public function testEmptyKeyidReturnsNull(): void + { + $a = new RecordingResolver(['x'], 'A'); + $this->assertNull(KeyResolution::resolveKey('', [$a])); + } +} + +/** + * Tiny test-only resolver: supports a fixed list of keyids; if it supports + * the keyid, returns either the configured PEM or null. + */ +class RecordingResolver implements KeyResolver +{ + /** @var array */ + private $supportedKeyids; + /** @var ?string */ + private $pemOrNull; + /** @var int */ + public $resolveCalls = 0; + + public function __construct(array $supportedKeyids, ?string $pemOrNull) + { + $this->supportedKeyids = $supportedKeyids; + $this->pemOrNull = $pemOrNull; + } + + public function supports(string $keyid): bool + { + return in_array($keyid, $this->supportedKeyids, true); + } + + public function resolve(string $keyid): ?ResolvedKey + { + $this->resolveCalls++; + if ($this->pemOrNull === null) { + return null; + } + return new ResolvedKey($this->pemOrNull, 'ed25519', $keyid); + } +} diff --git a/php/tests/Keys/TrustDirectoryResolverTest.php b/php/tests/Keys/TrustDirectoryResolverTest.php new file mode 100644 index 0000000..edd31fc --- /dev/null +++ b/php/tests/Keys/TrustDirectoryResolverTest.php @@ -0,0 +1,121 @@ +assertTrue($resolver->supports('abc123')); + $this->assertFalse($resolver->supports('did:web:example.com')); + $this->assertFalse($resolver->supports('https://example.com/key')); + $this->assertFalse($resolver->supports('')); + } + + public function testQueriesEachBaseInOrder(): void + { + $calls = []; + $fetcher = static function (string $url) use (&$calls): ?array { + $calls[] = $url; + // First base 404s, second succeeds. + if (strpos($url, 'second.example') !== false) { + return [ + 'body' => json_encode(['publicKey' => 'PEM', 'algorithm' => 'ed25519']), + 'contentType' => 'application/json', + ]; + } + return null; + }; + + $resolver = new TrustDirectoryResolver( + ['https://first.example/v1', 'https://second.example/v1/'], + $fetcher + ); + $resolved = $resolver->resolve('abc123'); + + $this->assertNotNull($resolved); + $this->assertSame('PEM', $resolved->publicKeyPem); + $this->assertSame(2, count($calls)); + $this->assertSame('https://first.example/v1/keys/abc123', $calls[0]); + // Trailing slash on the second base should be normalized. + $this->assertSame('https://second.example/v1/keys/abc123', $calls[1]); + } + + public function testReturnsNullWhenAllBasesFail(): void + { + $resolver = new TrustDirectoryResolver( + ['https://a.example', 'https://b.example'], + static function (string $url): ?array { + return null; + } + ); + $this->assertNull($resolver->resolve('abc123')); + } + + public function testUrlEncodesKeyid(): void + { + $captured = ['url' => null]; + $fetcher = static function (string $url) use (&$captured): ?array { + $captured['url'] = $url; + return [ + 'body' => json_encode(['publicKey' => 'PEM']), + 'contentType' => 'application/json', + ]; + }; + $resolver = new TrustDirectoryResolver(['https://dir.example/v1'], $fetcher); + $resolver->resolve('id with spaces/and slashes'); + $this->assertSame( + 'https://dir.example/v1/keys/id%20with%20spaces%2Fand%20slashes', + $captured['url'] + ); + } + + public function testStopsAtFirstSuccess(): void + { + $calls = []; + $fetcher = static function (string $url) use (&$calls): ?array { + $calls[] = $url; + return [ + 'body' => json_encode(['publicKey' => 'PEM']), + 'contentType' => 'application/json', + ]; + }; + $resolver = new TrustDirectoryResolver( + ['https://first.example', 'https://second.example'], + $fetcher + ); + $resolved = $resolver->resolve('abc123'); + $this->assertNotNull($resolved); + $this->assertSame(1, count($calls)); // second base never queried + } + + public function testIgnoresInvalidJsonBaseAndContinues(): void + { + $fetcher = static function (string $url): ?array { + if (strpos($url, 'first') !== false) { + return ['body' => 'not json', 'contentType' => 'application/json']; + } + return [ + 'body' => json_encode(['publicKey' => 'PEM']), + 'contentType' => 'application/json', + ]; + }; + $resolver = new TrustDirectoryResolver( + ['https://first.example', 'https://second.example'], + $fetcher + ); + $resolved = $resolver->resolve('abc123'); + $this->assertNotNull($resolved); + $this->assertSame('PEM', $resolved->publicKeyPem); + } +} diff --git a/php/tests/SignatureTest.php b/php/tests/SignatureTest.php new file mode 100644 index 0000000..2c00e6a --- /dev/null +++ b/php/tests/SignatureTest.php @@ -0,0 +1,260 @@ +assertSame( + 'sha256:ABC:sha256:DEF:example.com:2025-05-01T00:00Z', + Signature::buildSignatureBinding('sha256:ABC', 'sha256:DEF', 'example.com', '2025-05-01T00:00Z') + ); + } + + /** + * @dataProvider emptyFieldProvider + */ + public function testBuildSignatureBindingRejectsEmptyFields(string $contentHash, string $claimsHash, string $domain, string $signedAt): void + { + $this->expectException(InvalidArgumentException::class); + Signature::buildSignatureBinding($contentHash, $claimsHash, $domain, $signedAt); + } + + public function emptyFieldProvider(): array + { + return [ + 'empty contentHash' => ['', 'b', 'c', 'd'], + 'empty claimsHash' => ['a', '', 'c', 'd'], + 'empty domain' => ['a', 'b', '', 'd'], + 'empty signedAt' => ['a', 'b', 'c', ''], + ]; + } + + // ------------------------------------------------------------------ + // buildEndorsementBinding + // ------------------------------------------------------------------ + + public function testBuildEndorsementBinding(): void + { + $this->assertSame( + 'sha256:XYZ:2025-05-01T00:00Z', + Signature::buildEndorsementBinding('sha256:XYZ', '2025-05-01T00:00Z') + ); + } + + public function testBuildEndorsementBindingRejectsEmpty(): void + { + $this->expectException(InvalidArgumentException::class); + Signature::buildEndorsementBinding('', '2025-05-01'); + } + + // ------------------------------------------------------------------ + // verifySignature: ed25519 round trip via libsodium + // ------------------------------------------------------------------ + + public function testVerifyEd25519RoundTripPaddedSignature(): void + { + $this->skipIfNoSodium(); + + [$pem, $secret] = $this->makeEd25519KeypairPem(); + $message = 'sha256:ABC:sha256:DEF:example.com:2025-05-01T00:00Z'; + $signature = sodium_crypto_sign_detached($message, $secret); + $b64 = base64_encode($signature); // padded + + $this->assertTrue(Signature::verifySignature($message, $b64, $pem, 'ed25519')); + } + + public function testVerifyEd25519RoundTripUnpaddedSignature(): void + { + $this->skipIfNoSodium(); + + [$pem, $secret] = $this->makeEd25519KeypairPem(); + $message = 'hello'; + $signature = sodium_crypto_sign_detached($message, $secret); + $unpadded = rtrim(base64_encode($signature), '='); + + $this->assertTrue(Signature::verifySignature($message, $unpadded, $pem, 'ed25519')); + } + + public function testVerifyEd25519IsCaseInsensitive(): void + { + $this->skipIfNoSodium(); + + [$pem, $secret] = $this->makeEd25519KeypairPem(); + $message = 'hello'; + $signature = base64_encode(sodium_crypto_sign_detached($message, $secret)); + + $this->assertTrue(Signature::verifySignature($message, $signature, $pem, 'ED25519')); + $this->assertTrue(Signature::verifySignature($message, $signature, $pem, 'Ed25519')); + } + + public function testVerifyEd25519RejectsTamperedMessage(): void + { + $this->skipIfNoSodium(); + + [$pem, $secret] = $this->makeEd25519KeypairPem(); + $signature = base64_encode(sodium_crypto_sign_detached('original', $secret)); + + $this->assertFalse(Signature::verifySignature('tampered', $signature, $pem, 'ed25519')); + } + + public function testVerifyEd25519RejectsBadKey(): void + { + $this->skipIfNoSodium(); + + [$pemA, $secretA] = $this->makeEd25519KeypairPem(); + [$pemB,] = $this->makeEd25519KeypairPem(); + + $signature = base64_encode(sodium_crypto_sign_detached('hello', $secretA)); + + $this->assertFalse(Signature::verifySignature('hello', $signature, $pemB, 'ed25519')); + } + + public function testVerifyEd25519AcceptsRawKeyBytes(): void + { + $this->skipIfNoSodium(); + + $keypair = sodium_crypto_sign_keypair(); + $secret = sodium_crypto_sign_secretkey($keypair); + $public = sodium_crypto_sign_publickey($keypair); + + $message = 'raw-key-test'; + $signature = base64_encode(sodium_crypto_sign_detached($message, $secret)); + + // Pass the raw 32-byte key directly (no PEM wrapping). + $this->assertTrue(Signature::verifySignature($message, $signature, $public, 'ed25519')); + } + + public function testVerifyRejectsMalformedBase64(): void + { + $this->skipIfNoSodium(); + [$pem,] = $this->makeEd25519KeypairPem(); + // 1 mod 4 is never valid base64; flexible decoder rejects. + $this->assertFalse(Signature::verifySignature('msg', 'A', $pem, 'ed25519')); + } + + public function testVerifyUnknownAlgorithmThrows(): void + { + $this->expectException(InvalidArgumentException::class); + Signature::verifySignature('msg', base64_encode('xx'), 'irrelevant', 'frobnicate'); + } + + // ------------------------------------------------------------------ + // verifySignature: ECDSA round trip via openssl + // ------------------------------------------------------------------ + + public function testVerifyEcdsaRoundTrip(): void + { + if (!function_exists('openssl_pkey_new')) { + $this->markTestSkipped('openssl extension not available'); + } + $key = openssl_pkey_new([ + 'private_key_type' => OPENSSL_KEYTYPE_EC, + 'curve_name' => 'prime256v1', + ]); + if ($key === false) { + $this->markTestSkipped('this OpenSSL build cannot generate prime256v1 keypairs'); + } + $details = openssl_pkey_get_details($key); + $pem = $details['key']; + + $message = 'ecdsa-test'; + $sig = ''; + $this->assertTrue(openssl_sign($message, $sig, $key, OPENSSL_ALGO_SHA256)); + $b64 = base64_encode($sig); + + $this->assertTrue(Signature::verifySignature($message, $b64, $pem, 'ecdsa')); + $this->assertFalse(Signature::verifySignature('tampered', $b64, $pem, 'ecdsa')); + } + + // ------------------------------------------------------------------ + // verifySignature: RSA round trip via openssl + // ------------------------------------------------------------------ + + public function testVerifyRsaRoundTrip(): void + { + if (!function_exists('openssl_pkey_new')) { + $this->markTestSkipped('openssl extension not available'); + } + $key = openssl_pkey_new([ + 'private_key_type' => OPENSSL_KEYTYPE_RSA, + 'private_key_bits' => 2048, + ]); + if ($key === false) { + $this->markTestSkipped('OpenSSL keypair generation unavailable'); + } + $details = openssl_pkey_get_details($key); + $pem = $details['key']; + + $message = 'rsa-test'; + $sig = ''; + $this->assertTrue(openssl_sign($message, $sig, $key, OPENSSL_ALGO_SHA256)); + $b64 = base64_encode($sig); + + $this->assertTrue(Signature::verifySignature($message, $b64, $pem, 'rsa')); + $this->assertFalse(Signature::verifySignature($message . 'x', $b64, $pem, 'rsa')); + } + + // ------------------------------------------------------------------ + // ed25519RawToPem helper + // ------------------------------------------------------------------ + + public function testEd25519RawToPemStructure(): void + { + $this->skipIfNoSodium(); + $keypair = sodium_crypto_sign_keypair(); + $public = sodium_crypto_sign_publickey($keypair); + $pem = Signature::ed25519RawToPem($public); + + $this->assertStringContainsString('-----BEGIN PUBLIC KEY-----', $pem); + $this->assertStringContainsString('-----END PUBLIC KEY-----', $pem); + + // Round-trips via the verify path: signing with the secret and + // verifying via the PEM should succeed. + $secret = sodium_crypto_sign_secretkey($keypair); + $signature = base64_encode(sodium_crypto_sign_detached('roundtrip', $secret)); + $this->assertTrue(Signature::verifySignature('roundtrip', $signature, $pem, 'ed25519')); + } + + public function testEd25519RawToPemRejectsWrongLength(): void + { + $this->expectException(InvalidArgumentException::class); + Signature::ed25519RawToPem('short'); + } + + // ------------------------------------------------------------------ + // helpers + // ------------------------------------------------------------------ + + private function skipIfNoSodium(): void + { + if (!function_exists('sodium_crypto_sign_keypair')) { + $this->markTestSkipped('libsodium not available'); + } + } + + /** + * Generate a fresh Ed25519 keypair and wrap the public key in a PEM SPKI. + * + * @return array{0: string, 1: string} [PEM publicKey, raw secretKey] + */ + private function makeEd25519KeypairPem(): array + { + $keypair = sodium_crypto_sign_keypair(); + $secret = sodium_crypto_sign_secretkey($keypair); + $public = sodium_crypto_sign_publickey($keypair); + return [Signature::ed25519RawToPem($public), $secret]; + } +} diff --git a/python/README.md b/python/README.md new file mode 100644 index 0000000..4e8acfd --- /dev/null +++ b/python/README.md @@ -0,0 +1,46 @@ +# HTMLTrust Canonicalization -- Python + +Python binding for the HTMLTrust canonical text normalization library. Must produce byte-identical output to the JavaScript, Go, PHP, and Rust implementations for every test vector in the shared conformance suite. + +## Status + +Scaffolded -- implementation pending. + +## Scope + +This package provides two functions: + +1. **`normalize_text(text: str) -> str`** -- applies the 8-phase canonicalization defined in [`../spec.md`](../spec.md) to a UTF-8 string. Mirrors the existing JavaScript/Go/PHP signatures. +2. **`extract_canonical_text(html: str) -> str`** -- parses an HTML fragment, walks the DOM, emits text nodes in document order with single-space separators between block elements, and applies `normalize_text` to the result. This is the HTML -> canonical text extraction defined in the paper's §2.1. + +Both are pure functions: no network, no file I/O, deterministic output for the same input. + +## Planned dependencies + +- `unicodedata` (stdlib) for NFKC normalization +- `beautifulsoup4` or `lxml` for HTML parsing in `extract_canonical_text` +- No other runtime dependencies + +## Conformance + +The package MUST pass every vector in `../conformance/vectors.json` (to be defined). A test runner at `tests/test_conformance.py` should load the shared vectors and assert byte-identical output. + +## Installation (planned) + +```bash +pip install htmltrust-canonicalization +# or for development: +cd python && pip install -e . +``` + +## Usage (planned) + +```python +from htmltrust_canonicalization import normalize_text, extract_canonical_text + +canonical = normalize_text('He said, "Hello\u2026"') +# -> 'He said, "Hello..."' + +from_html = extract_canonical_text('

Hello world!

') +# -> 'Hello world!' +``` diff --git a/rust/README.md b/rust/README.md new file mode 100644 index 0000000..075de31 --- /dev/null +++ b/rust/README.md @@ -0,0 +1,46 @@ +# HTMLTrust Canonicalization -- Rust + +Rust crate for the HTMLTrust canonical text normalization library. Must produce byte-identical output to the JavaScript, Go, PHP, and Python implementations for every test vector in the shared conformance suite. + +## Status + +Scaffolded -- implementation pending. + +## Scope + +This crate provides two functions: + +1. **`normalize_text(text: &str) -> String`** -- applies the 8-phase canonicalization defined in [`../spec.md`](../spec.md) to a UTF-8 string. +2. **`extract_canonical_text(html: &str) -> String`** -- parses an HTML fragment, walks the DOM, emits text nodes in document order with single-space separators between block elements, and applies `normalize_text` to the result. + +Both are pure functions: no network, no file I/O, deterministic output for the same input. + +## Planned dependencies + +- `unicode-normalization` for NFKC +- `scraper` or `html5ever` for HTML parsing in `extract_canonical_text` +- Minimal `regex` for the whitespace and punctuation phases +- No other runtime dependencies + +## Conformance + +The crate MUST pass every vector in `../conformance/vectors.json`. A test at `tests/conformance.rs` should load the shared vectors and assert byte-identical output. + +## Installation (planned) + +```toml +[dependencies] +htmltrust-canonicalization = "0.1" +``` + +## Usage (planned) + +```rust +use htmltrust_canonicalization::{normalize_text, extract_canonical_text}; + +let canonical = normalize_text("He said, \"Hello\u{2026}\""); +// -> "He said, \"Hello...\"" + +let from_html = extract_canonical_text("

Hello world!

"); +// -> "Hello world!" +```