From 0cf17cdcefcf01cb59b1a95a562e158de5ea2429 Mon Sep 17 00:00:00 2001 From: "Piotr P. Karwasz" Date: Thu, 26 Mar 2026 17:31:24 +0100 Subject: [PATCH 1/3] CODEC-335: Add `DigestUtils.gitBlob` and `DigestUtils.gitTree` methods This change adds two methods to `DigestUtils` that compute generalized Git object identifiers using an arbitrary `MessageDigest`, rather than being restricted to SHA-1: - `gitBlob(digest, input)`: computes a generalized [Git blob object identifier](https://git-scm.com/book/en/v2/Git-Internals-Git-Objects) for a given file or byte content. - `gitTree(digest, file)`: computes a generalized [Git tree object identifier](https://git-scm.com/book/en/v2/Git-Internals-Git-Objects) for a given directory. ### Motivation The standard Git object identifiers use SHA-1, which is [in the process of being replaced by SHA-256](https://git-scm.com/docs/hash-function-transition) in Git itself. These methods generalize the identifier computation to support any `MessageDigest`, enabling both forward compatibility and use with external standards. In particular, the `swh:1:cnt:` (content) and `swh:1:dir:` (directory) identifier types defined by [SWHID (ISO/IEC 18670)](https://www.swhid.org/specification/v1.2/5.Core_identifiers/) are currently compatible with Git blob and tree identifiers respectively (using SHA-1), and can be used to generate canonical, persistent identifiers for unpacked source and binary distributions. --- src/changes/changes.xml | 1 + .../commons/codec/digest/DigestUtils.java | 132 +++++++++++++++ .../codec/digest/GitDirectoryEntry.java | 158 ++++++++++++++++++ .../commons/codec/digest/DigestUtilsTest.java | 73 ++++++++ .../codec/digest/GitDirectoryEntryTest.java | 95 +++++++++++ .../resources/DigestUtilsTest/greetings.txt | 2 + src/test/resources/DigestUtilsTest/hello.txt | 2 + .../DigestUtilsTest/subdir/nested.txt | 2 + 8 files changed, 465 insertions(+) create mode 100644 src/main/java/org/apache/commons/codec/digest/GitDirectoryEntry.java create mode 100644 src/test/java/org/apache/commons/codec/digest/GitDirectoryEntryTest.java create mode 100644 src/test/resources/DigestUtilsTest/greetings.txt create mode 100644 src/test/resources/DigestUtilsTest/hello.txt create mode 100644 src/test/resources/DigestUtilsTest/subdir/nested.txt diff --git a/src/changes/changes.xml b/src/changes/changes.xml index ce6e8d66cd..24a8e29d1f 100644 --- a/src/changes/changes.xml +++ b/src/changes/changes.xml @@ -51,6 +51,7 @@ The type attribute can be add,update,fix,remove. Add Base58 support. Add BaseNCodecInputStream.AbstracBuilder.setByteArray(byte[]). + Add DigestUtils.gitBlob() and DigestUtils.gitTree() to compute Git blob and tree object identifiers. Bump org.apache.commons:commons-parent from 96 to 97. diff --git a/src/main/java/org/apache/commons/codec/digest/DigestUtils.java b/src/main/java/org/apache/commons/codec/digest/DigestUtils.java index 786cc4e5fa..19b407112d 100644 --- a/src/main/java/org/apache/commons/codec/digest/DigestUtils.java +++ b/src/main/java/org/apache/commons/codec/digest/DigestUtils.java @@ -18,17 +18,24 @@ package org.apache.commons.codec.digest; import java.io.BufferedInputStream; +import java.io.ByteArrayOutputStream; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.io.RandomAccessFile; import java.nio.ByteBuffer; import java.nio.channels.FileChannel; +import java.nio.charset.StandardCharsets; +import java.nio.file.DirectoryStream; import java.nio.file.Files; import java.nio.file.OpenOption; import java.nio.file.Path; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.TreeSet; import org.apache.commons.codec.binary.Hex; import org.apache.commons.codec.binary.StringUtils; @@ -139,6 +146,131 @@ public static byte[] digest(final MessageDigest messageDigest, final RandomAcces return updateDigest(messageDigest, data).digest(); } + /** + * Reads through a byte array and return a generalized Git blob identifier + * + *

The identifier is computed in the way described by the + * SWHID contents identifier, but it can use any hash + * algorithm.

+ * + *

When the hash algorithm is SHA-1, the identifier is identical to Git blob identifier and SWHID contents identifier.

+ * + * @param messageDigest The MessageDigest to use (for example SHA-1). + * @param data Data to digest. + * @return A generalized Git blob identifier. + * @since 1.22.0 + */ + public static byte[] gitBlob(final MessageDigest messageDigest, final byte[] data) { + updateDigest(messageDigest, gitBlobPrefix(data.length)); + return digest(messageDigest, data); + } + + /** + * Reads through a byte array and return a generalized Git blob identifier + * + *

The identifier is computed in the way described by the + * SWHID contents identifier, but it can use any hash + * algorithm.

+ * + *

When the hash algorithm is SHA-1, the identifier is identical to Git blob identifier and SWHID contents identifier.

+ * + * @param messageDigest The MessageDigest to use (for example SHA-1). + * @param data Data to digest. + * @param options Options how to open the file + * @return A generalized Git blob identifier. + * @throws IOException On error accessing the file + * @since 1.22.0 + */ + public static byte[] gitBlob(final MessageDigest messageDigest, final Path data, final OpenOption... options) throws IOException { + updateDigest(messageDigest, gitBlobPrefix(Files.size(data))); + return updateDigest(messageDigest, data, options).digest(); + } + + private static byte[] gitBlobPrefix(final long dataSize) { + return ("blob " + dataSize + "\0").getBytes(StandardCharsets.UTF_8); + } + + /** + * Returns a generalized Git tree identifier + * + *

The identifier is computed in the way described by the + * SWHID directory identifier, but it can use any hash + * algorithm.

+ * + *

When the hash algorithm is SHA-1, the identifier is identical to Git tree identifier and SWHID directory identifier.

+ * + * @param messageDigest The MessageDigest to use (for example SHA-1) + * @param entries The directory entries + * @return A generalized Git tree identifier. + */ + static byte[] gitTree(final MessageDigest messageDigest, final Collection entries) { + final TreeSet treeSet = new TreeSet<>(entries); + final ByteArrayOutputStream baos = new ByteArrayOutputStream(); + for (final GitDirectoryEntry entry : treeSet) { + final byte[] treeEntryBytes = entry.toTreeEntryBytes(); + baos.write(treeEntryBytes, 0, treeEntryBytes.length); + } + updateDigest(messageDigest, gitTreePrefix(baos.size())); + return updateDigest(messageDigest, baos.toByteArray()).digest(); + } + + /** + * Reads through a byte array and return a generalized Git tree identifier + * + *

The identifier is computed in the way described by the + * SWHID directory identifier, but it can use any hash + * algorithm.

+ * + *

When the hash algorithm is SHA-1, the identifier is identical to Git tree identifier and SWHID directory identifier.

+ * + * @param messageDigest The MessageDigest to use (for example SHA-1). + * @param data Data to digest. + * @param options Options how to open the file + * @return A generalized Git tree identifier. + * @throws IOException On error accessing the file + * @since 1.22.0 + */ + public static byte[] gitTree(final MessageDigest messageDigest, final Path data, final OpenOption...options) throws IOException { + final List entries = new ArrayList<>(); + try (DirectoryStream files = Files.newDirectoryStream(data)) { + for (final Path path : files) { + final GitDirectoryEntry.Type type = getGitDirectoryEntryType(path); + final byte[] rawObjectId; + if (type == GitDirectoryEntry.Type.DIRECTORY) { + rawObjectId = gitTree(messageDigest, path, options); + } else { + rawObjectId = gitBlob(messageDigest, path, options); + } + entries.add(new GitDirectoryEntry(path, type, rawObjectId)); + } + } + return gitTree(messageDigest, entries); + } + + /** + * Returns the {@link GitDirectoryEntry.Type} of a file. + * + * @param path The file to check. + * @return A {@link GitDirectoryEntry.Type} + */ + private static GitDirectoryEntry.Type getGitDirectoryEntryType(final Path path) { + // Symbolic links first + if (Files.isSymbolicLink(path)) { + return GitDirectoryEntry.Type.SYMBOLIC_LINK; + } + if (Files.isDirectory(path)) { + return GitDirectoryEntry.Type.DIRECTORY; + } + if (Files.isExecutable(path)) { + return GitDirectoryEntry.Type.EXECUTABLE; + } + return GitDirectoryEntry.Type.REGULAR; + } + + private static byte[] gitTreePrefix(final long dataSize) { + return ("tree " + dataSize + "\0").getBytes(StandardCharsets.UTF_8); + } + /** * Gets a {@code MessageDigest} for the given {@code algorithm}. * diff --git a/src/main/java/org/apache/commons/codec/digest/GitDirectoryEntry.java b/src/main/java/org/apache/commons/codec/digest/GitDirectoryEntry.java new file mode 100644 index 0000000000..9810e7f42e --- /dev/null +++ b/src/main/java/org/apache/commons/codec/digest/GitDirectoryEntry.java @@ -0,0 +1,158 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.codec.digest; + +import java.nio.charset.StandardCharsets; +import java.nio.file.Path; + +/** + * Represents a single entry in a Git tree object. + * + *

A Git tree object encodes a directory snapshot. Each entry holds:

+ *
    + *
  • a {@link Type} that determines the Unix file mode (e.g. {@code 100644} for a regular file),
  • + *
  • the entry name (file or directory name, without a path separator),
  • + *
  • the raw object id of the referenced blob or sub-tree.
  • + *
+ * + *

Entries are ordered by {@link #compareTo} using Git's tree-sort rule: directory names are compared as if they ended with {@code '/'}, so that {@code foo/} + * sorts after {@code foobar}.

+ * + *

Call {@link #toTreeEntryBytes()} to obtain the binary encoding that Git feeds to its hash function when computing the tree object identifier.

+ * + * @see Git Internals – Git Objects + * @see SWHID Directory Identifier + */ +class GitDirectoryEntry implements Comparable { + + /** + * The entry name (file or directory name, no path separator). + */ + private final String name; + + /** + * The key used for ordering entries within a tree object. + * + *

>Git appends {@code '/'} to directory names before comparing.

+ */ + private final String sortKey; + + /** + * The Git object type, which determines the Unix file-mode prefix. + */ + private final Type type; + + /** + * The raw object id of the referenced blob or sub-tree. + */ + private final byte[] rawObjectId; + + private GitDirectoryEntry(final String name, final Type type, final byte[] rawObjectId) { + this.name = name; + this.type = type; + this.sortKey = type == Type.DIRECTORY ? name + "/" : name; + this.rawObjectId = rawObjectId; + } + + GitDirectoryEntry(final Path path, final Type type, final byte[] rawObjectId) { + this(path.getFileName().toString(), type, rawObjectId); + } + + /** + * Returns the binary encoding of this entry as it appears inside a Git tree object. + * + *

The format follows the Git tree entry layout:

+ *
+     *   <mode> SP <name> NUL <20-byte-object-id>
+     * 
+ * + * @return the binary tree-entry encoding; never {@code null} + */ + byte[] toTreeEntryBytes() { + final byte[] nameBytes = name.getBytes(StandardCharsets.UTF_8); + final byte[] result = new byte[type.mode.length + nameBytes.length + rawObjectId.length + 2]; + System.arraycopy(type.mode, 0, result, 0, type.mode.length); + result[type.mode.length] = ' '; + System.arraycopy(nameBytes, 0, result, type.mode.length + 1, nameBytes.length); + result[type.mode.length + nameBytes.length + 1] = '\0'; + System.arraycopy(rawObjectId, 0, result, type.mode.length + nameBytes.length + 2, rawObjectId.length); + return result; + } + + @Override + public int compareTo(GitDirectoryEntry o) { + return sortKey.compareTo(o.sortKey); + } + + @Override + public int hashCode() { + return name.hashCode(); + } + + @Override + public boolean equals(Object obj) { + if (obj == this) { + return true; + } + if (!(obj instanceof GitDirectoryEntry)) { + return false; + } + final GitDirectoryEntry other = (GitDirectoryEntry) obj; + return name.equals(other.name); + } + + /** + * The type of a Git tree entry, which maps to a Unix file-mode string. + * + *

Git encodes the file type and permission bits as an ASCII octal string that precedes the entry name in the binary tree format. The values defined here + * cover the four entry types that Git itself produces.

+ * + *

This enum is package-private. If it were made public, {@link #mode} would need to be wrapped in an immutable copy to prevent external mutation.

+ */ + enum Type { + + /** + * A sub-directory (Git sub-tree) + */ + DIRECTORY("40000"), + + /** + * An executable file + */ + EXECUTABLE("100755"), + + /** + * A regular (non-executable) file + */ + REGULAR("100644"), + + /** + * A symbolic link + */ + SYMBOLIC_LINK("120000"); + + /** + * The ASCII-encoded octal mode string as it appears in the binary tree entry. + */ + private final byte[] mode; + + Type(final String mode) { + this.mode = mode.getBytes(StandardCharsets.US_ASCII); + } + } +} diff --git a/src/test/java/org/apache/commons/codec/digest/DigestUtilsTest.java b/src/test/java/org/apache/commons/codec/digest/DigestUtilsTest.java index b27705b5d8..225596c620 100644 --- a/src/test/java/org/apache/commons/codec/digest/DigestUtilsTest.java +++ b/src/test/java/org/apache/commons/codec/digest/DigestUtilsTest.java @@ -32,11 +32,14 @@ import java.io.OutputStream; import java.io.RandomAccessFile; import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; import java.security.MessageDigest; +import java.util.ArrayList; import java.util.Arrays; +import java.util.List; import java.util.Locale; import java.util.Random; import java.util.stream.Stream; @@ -51,7 +54,9 @@ import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.EnumSource; import org.junit.jupiter.params.provider.MethodSource; +import org.junit.jupiter.params.provider.ValueSource; /** * Tests {@link DigestUtils}. @@ -238,6 +243,16 @@ class DigestUtilsTest { "CA 92 BF 0B E5 61 5E 96 95 9D 76 71 97 A0 BE EB"; // @formatter:on + static Stream gitBlobProvider() { + return Stream.of(Arguments.of("DigestUtilsTest/hello.txt", "5f4a83288e67f1be2d6fcdad84165a86c6a970d7"), + Arguments.of("DigestUtilsTest/greetings.txt", "6cf4f797455661e61d1ee6913fc29344f5897243"), + Arguments.of("DigestUtilsTest/subdir/nested.txt", "07a392ddb4dbff06a373a7617939f30b2dcfe719")); + } + + private static Path resourcePath(final String resourceName) throws Exception { + return Paths.get(DigestUtilsTest.class.getClassLoader().getResource(resourceName).toURI()); + } + static Stream testShake128_256() { // @formatter:off return Stream.of( @@ -475,6 +490,64 @@ void testGetMessageDigest() { assertEquals(MessageDigestAlgorithms.MD5, digestUtils.getMessageDigest().getAlgorithm()); } + @ParameterizedTest + @MethodSource("gitBlobProvider") + void testGitBlobByteArray(final String resourceName, final String expectedSha1Hex) throws Exception { + final byte[] data = Files.readAllBytes(resourcePath(resourceName)); + assertArrayEquals(Hex.decodeHex(expectedSha1Hex), DigestUtils.gitBlob(DigestUtils.getSha1Digest(), data)); + } + + @ParameterizedTest + @MethodSource("gitBlobProvider") + void testGitBlobPath(final String resourceName, final String expectedSha1Hex) throws Exception { + assertArrayEquals(Hex.decodeHex(expectedSha1Hex), DigestUtils.gitBlob(DigestUtils.getSha1Digest(), resourcePath(resourceName))); + } + + /** + * Binary body of the test tree object used in {@link #testGitTreeCollection}. + * + *

Each entry has the format {@code SP NUL <20-byte-object-id>}.

+ */ + private static final String TREE_BODY_HEX = + // 100644 hello.txt\0 + objectId + "3130303634342068656c6c6f2e74787400" + "a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6e7f8a9b0" + + // 120000 link.txt\0 + objectId + "313230303030206c696e6b2e74787400" + "1234567890abcdef1234567890abcdef12345678" + + // 100755 run.sh\0 + objectId + "3130303735352072756e2e736800" + "f0e1d2c3b4a5f6e7d8c9b0a1f2e3d4c5b6a7f8e9" + + // 40000 src\0 + objectId + "34303030302073726300" + "deadbeefdeadbeefdeadbeefdeadbeefdeadbeef"; + + @ParameterizedTest + @ValueSource(strings = {MessageDigestAlgorithms.SHA_1, MessageDigestAlgorithms.SHA_256}) + void testGitTreeCollection(final String algorithm) throws Exception { + final byte[] helloId = Hex.decodeHex("a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6e7f8a9b0"); + final byte[] runId = Hex.decodeHex("f0e1d2c3b4a5f6e7d8c9b0a1f2e3d4c5b6a7f8e9"); + final byte[] linkId = Hex.decodeHex("1234567890abcdef1234567890abcdef12345678"); + final byte[] srcId = Hex.decodeHex("deadbeefdeadbeefdeadbeefdeadbeefdeadbeef"); + + // Entries are supplied out of order to verify that the method sorts them correctly. + final List entries = new ArrayList<>(); + entries.add(new GitDirectoryEntry(Paths.get("src"), GitDirectoryEntry.Type.DIRECTORY, srcId)); + entries.add(new GitDirectoryEntry(Paths.get("run.sh"), GitDirectoryEntry.Type.EXECUTABLE, runId)); + entries.add(new GitDirectoryEntry(Paths.get("hello.txt"), GitDirectoryEntry.Type.REGULAR, helloId)); + entries.add(new GitDirectoryEntry(Paths.get("link.txt"), GitDirectoryEntry.Type.SYMBOLIC_LINK, linkId)); + + // Compute expected value + final byte[] treeBody = Hex.decodeHex(TREE_BODY_HEX); + final MessageDigest md = DigestUtils.getDigest(algorithm); + DigestUtils.updateDigest(md, ("tree " + treeBody.length + "\0").getBytes(StandardCharsets.UTF_8)); + final byte[] expected = DigestUtils.updateDigest(md, treeBody).digest(); + + assertArrayEquals(expected, DigestUtils.gitTree(md, entries)); + } + + @Test + void testGitTreePath() throws Exception { + assertArrayEquals(Hex.decodeHex("e4b21f6d78ceba6eb7c211ac15e3337ec4614e8a"), + DigestUtils.gitTree(DigestUtils.getSha1Digest(), resourcePath("DigestUtilsTest"))); + } + @Test void testInternalNoSuchAlgorithmException() { assertThrows(IllegalArgumentException.class, () -> DigestUtils.getDigest("Bogus Bogus")); diff --git a/src/test/java/org/apache/commons/codec/digest/GitDirectoryEntryTest.java b/src/test/java/org/apache/commons/codec/digest/GitDirectoryEntryTest.java new file mode 100644 index 0000000000..3f13c57fd3 --- /dev/null +++ b/src/test/java/org/apache/commons/codec/digest/GitDirectoryEntryTest.java @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.codec.digest; + +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotEquals; + +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.TreeSet; + +import org.junit.jupiter.api.Test; + +class GitDirectoryEntryTest { + + private static final byte[] ZERO_ID = new byte[20]; + + /** + * The Path constructor must extract the filename component. + */ + @Test + void testPathConstructorUsesFilename() { + final GitDirectoryEntry fromLabel = new GitDirectoryEntry(Paths.get("hello.txt"), GitDirectoryEntry.Type.REGULAR, ZERO_ID); + final GitDirectoryEntry fromRelative = new GitDirectoryEntry(Paths.get("subdir/hello.txt"), GitDirectoryEntry.Type.REGULAR, ZERO_ID); + final GitDirectoryEntry fromAbsolute = new GitDirectoryEntry(Paths.get("hello.txt").toAbsolutePath(), GitDirectoryEntry.Type.REGULAR, ZERO_ID); + + assertEquals(fromLabel, fromRelative); + assertEquals(fromLabel, fromAbsolute); + assertArrayEquals(fromLabel.toTreeEntryBytes(), fromRelative.toTreeEntryBytes()); + assertArrayEquals(fromLabel.toTreeEntryBytes(), fromAbsolute.toTreeEntryBytes()); + } + + /** + * Equality and hash code are based solely on the entry name. + */ + @Test + void testEqualityBasedOnNameOnly() { + final byte[] otherId = new byte[20]; + Arrays.fill(otherId, (byte) 0xff); + + final GitDirectoryEntry regular = new GitDirectoryEntry(Paths.get("foo"), GitDirectoryEntry.Type.REGULAR, ZERO_ID); + final GitDirectoryEntry executable = new GitDirectoryEntry(Paths.get("foo"), GitDirectoryEntry.Type.EXECUTABLE, otherId); + + // Same name, different type and object id -> equal + assertEquals(regular, executable); + assertEquals(regular.hashCode(), executable.hashCode()); + + // Different name -> not equal + assertNotEquals(regular, new GitDirectoryEntry(Paths.get("bar"), GitDirectoryEntry.Type.REGULAR, ZERO_ID)); + + // Same reference -> equal + assertEquals(regular, regular); + + // Not equal to null or unrelated type + assertNotEquals(regular, null); + assertNotEquals(regular, "foo"); + } + + /** + * Entries should be sorted by Git sort rule. + * + *

Git compares the names of the entries, but adds a {@code /} at the end of directory entries.

+ */ + @Test + void testSortOrder() { + final GitDirectoryEntry alpha = new GitDirectoryEntry(Paths.get("alpha.txt"), GitDirectoryEntry.Type.REGULAR, ZERO_ID); + final GitDirectoryEntry fooTxt = new GitDirectoryEntry(Paths.get("foo.txt"), GitDirectoryEntry.Type.REGULAR, ZERO_ID); + final GitDirectoryEntry fooDir = new GitDirectoryEntry(Paths.get("foo"), GitDirectoryEntry.Type.DIRECTORY, ZERO_ID); + final GitDirectoryEntry foobar = new GitDirectoryEntry(Paths.get("foobar"), GitDirectoryEntry.Type.REGULAR, ZERO_ID); + final GitDirectoryEntry zeta = new GitDirectoryEntry(Paths.get("zeta.txt"), GitDirectoryEntry.Type.REGULAR, ZERO_ID); + + final List entries = new ArrayList<>(Arrays.asList(zeta, foobar, fooDir, alpha, fooTxt)); + entries.sort(GitDirectoryEntry::compareTo); + + assertEquals(Arrays.asList(alpha, fooTxt, fooDir, foobar, zeta), entries); + } +} diff --git a/src/test/resources/DigestUtilsTest/greetings.txt b/src/test/resources/DigestUtilsTest/greetings.txt new file mode 100644 index 0000000000..6cf4f79745 --- /dev/null +++ b/src/test/resources/DigestUtilsTest/greetings.txt @@ -0,0 +1,2 @@ +// SPDX-License-Identifier: Apache-2.0 +Greetings! diff --git a/src/test/resources/DigestUtilsTest/hello.txt b/src/test/resources/DigestUtilsTest/hello.txt new file mode 100644 index 0000000000..5f4a83288e --- /dev/null +++ b/src/test/resources/DigestUtilsTest/hello.txt @@ -0,0 +1,2 @@ +// SPDX-License-Identifier: Apache-2.0 +Hello, World! diff --git a/src/test/resources/DigestUtilsTest/subdir/nested.txt b/src/test/resources/DigestUtilsTest/subdir/nested.txt new file mode 100644 index 0000000000..07a392ddb4 --- /dev/null +++ b/src/test/resources/DigestUtilsTest/subdir/nested.txt @@ -0,0 +1,2 @@ +// SPDX-License-Identifier: Apache-2.0 +Nested file. From b6579ff001474f4edefe841a91ef26c4e77bdd2f Mon Sep 17 00:00:00 2001 From: "Piotr P. Karwasz" Date: Thu, 26 Mar 2026 20:05:06 +0100 Subject: [PATCH 2/3] fix: build errors --- .../commons/codec/digest/DigestUtils.java | 2 +- .../codec/digest/GitDirectoryEntry.java | 27 ++++++++++++++++++- .../commons/codec/digest/DigestUtilsTest.java | 1 - .../codec/digest/GitDirectoryEntryTest.java | 1 - 4 files changed, 27 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/apache/commons/codec/digest/DigestUtils.java b/src/main/java/org/apache/commons/codec/digest/DigestUtils.java index 19b407112d..7e0f809de9 100644 --- a/src/main/java/org/apache/commons/codec/digest/DigestUtils.java +++ b/src/main/java/org/apache/commons/codec/digest/DigestUtils.java @@ -230,7 +230,7 @@ static byte[] gitTree(final MessageDigest messageDigest, final Collection entries = new ArrayList<>(); try (DirectoryStream files = Files.newDirectoryStream(data)) { for (final Path path : files) { diff --git a/src/main/java/org/apache/commons/codec/digest/GitDirectoryEntry.java b/src/main/java/org/apache/commons/codec/digest/GitDirectoryEntry.java index 9810e7f42e..e1073611fc 100644 --- a/src/main/java/org/apache/commons/codec/digest/GitDirectoryEntry.java +++ b/src/main/java/org/apache/commons/codec/digest/GitDirectoryEntry.java @@ -19,6 +19,7 @@ import java.nio.charset.StandardCharsets; import java.nio.file.Path; +import java.util.Objects; /** * Represents a single entry in a Git tree object. @@ -62,6 +63,21 @@ class GitDirectoryEntry implements Comparable { */ private final byte[] rawObjectId; + private static String getFileName(final Path path) { + final Path fileName = path.getFileName(); + if (fileName == null) { + throw new IllegalArgumentException(path.toString()); + } + return fileName.toString(); + } + + /** + * Creates an entry + * + * @param name The name of the entry + * @param type The type of the entry + * @param rawObjectId The id of the entry + */ private GitDirectoryEntry(final String name, final Type type, final byte[] rawObjectId) { this.name = name; this.type = type; @@ -69,8 +85,17 @@ private GitDirectoryEntry(final String name, final Type type, final byte[] rawOb this.rawObjectId = rawObjectId; } + /** + * Creates an entry + * + * @param path The path of the entry; must not be an empty path + * @param type The type of the entry + * @param rawObjectId The id of the entry + * @throws IllegalArgumentException If the path is empty + * @throws NullPointerException If any argument is {@code null} + */ GitDirectoryEntry(final Path path, final Type type, final byte[] rawObjectId) { - this(path.getFileName().toString(), type, rawObjectId); + this(getFileName(path), Objects.requireNonNull(type), Objects.requireNonNull(rawObjectId)); } /** diff --git a/src/test/java/org/apache/commons/codec/digest/DigestUtilsTest.java b/src/test/java/org/apache/commons/codec/digest/DigestUtilsTest.java index 225596c620..01fcce06a8 100644 --- a/src/test/java/org/apache/commons/codec/digest/DigestUtilsTest.java +++ b/src/test/java/org/apache/commons/codec/digest/DigestUtilsTest.java @@ -54,7 +54,6 @@ import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.Arguments; -import org.junit.jupiter.params.provider.EnumSource; import org.junit.jupiter.params.provider.MethodSource; import org.junit.jupiter.params.provider.ValueSource; diff --git a/src/test/java/org/apache/commons/codec/digest/GitDirectoryEntryTest.java b/src/test/java/org/apache/commons/codec/digest/GitDirectoryEntryTest.java index 3f13c57fd3..ce37c0e1ef 100644 --- a/src/test/java/org/apache/commons/codec/digest/GitDirectoryEntryTest.java +++ b/src/test/java/org/apache/commons/codec/digest/GitDirectoryEntryTest.java @@ -25,7 +25,6 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; -import java.util.TreeSet; import org.junit.jupiter.api.Test; From e67feda804ee4eae43143d9b70901cbaf16e454d Mon Sep 17 00:00:00 2001 From: "Piotr P. Karwasz" Date: Fri, 27 Mar 2026 12:13:03 +0100 Subject: [PATCH 3/3] fix: reset digest before computing `gitBlob/gitTree` --- src/main/java/org/apache/commons/codec/digest/DigestUtils.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/main/java/org/apache/commons/codec/digest/DigestUtils.java b/src/main/java/org/apache/commons/codec/digest/DigestUtils.java index 7e0f809de9..2b5f7cdbd1 100644 --- a/src/main/java/org/apache/commons/codec/digest/DigestUtils.java +++ b/src/main/java/org/apache/commons/codec/digest/DigestUtils.java @@ -161,6 +161,7 @@ public static byte[] digest(final MessageDigest messageDigest, final RandomAcces * @since 1.22.0 */ public static byte[] gitBlob(final MessageDigest messageDigest, final byte[] data) { + messageDigest.reset(); updateDigest(messageDigest, gitBlobPrefix(data.length)); return digest(messageDigest, data); } @@ -182,6 +183,7 @@ public static byte[] gitBlob(final MessageDigest messageDigest, final byte[] dat * @since 1.22.0 */ public static byte[] gitBlob(final MessageDigest messageDigest, final Path data, final OpenOption... options) throws IOException { + messageDigest.reset(); updateDigest(messageDigest, gitBlobPrefix(Files.size(data))); return updateDigest(messageDigest, data, options).digest(); } @@ -210,6 +212,7 @@ static byte[] gitTree(final MessageDigest messageDigest, final Collection