diff --git a/evcache-core/src/main/java/com/netflix/evcache/EVCacheImpl.java b/evcache-core/src/main/java/com/netflix/evcache/EVCacheImpl.java index 67f7a1fd..ad3fbf64 100644 --- a/evcache-core/src/main/java/com/netflix/evcache/EVCacheImpl.java +++ b/evcache-core/src/main/java/com/netflix/evcache/EVCacheImpl.java @@ -77,6 +77,12 @@ public class EVCacheImpl implements EVCache, EVCacheImplMBean { private static final Logger log = LoggerFactory.getLogger(EVCacheImpl.class); + // The envelope transcoder used for hashed-key EVCacheValue wrapping must NOT compress its output: + // reads detect the format by the leading byte (0xAC = legacy Java, 0x0C = compact binary), which + // gzip would mask (compressed payloads start with 0x1F 0x8B). Disable by setting the threshold + // higher than any plausible value size. + private static final int ENVELOPE_COMPRESSION_DISABLED = Integer.MAX_VALUE; + private final Clock clock; private final String _appName; private final String _cacheName; @@ -164,8 +170,12 @@ public class EVCacheImpl implements EVCache, EVCacheImplMBean { this.maxHashLength = propertyRepository.get(appName + ".max.hash.length", Integer.class).orElse(-1); this.encoderBase = propertyRepository.get(appName + ".hash.encoder", String.class).orElse("base64"); this.autoHashKeys = propertyRepository.get(_appName + ".auto.hash.keys", Boolean.class).orElseGet("evcache.auto.hash.keys").orElse(false); - this.evcacheValueTranscoder = new EVCacheTranscoder(); - evcacheValueTranscoder.setCompressionThreshold(Integer.MAX_VALUE); + // Whether the EVCacheValue envelope (hashed keys) is written using the compact binary format + // instead of native Java serialization. + final boolean useBinarySerialization = propertyRepository.get(_appName + ".envelope.binary.serialization.enabled", Boolean.class) + .orElseGet("evcache.envelope.binary.serialization.enabled").orElse(false).get(); + final int maxValueSize = propertyRepository.get("default.evcache.max.data.size", Integer.class).orElse(20 * 1024 * 1024).get(); + this.evcacheValueTranscoder = new EVCacheTranscoder(maxValueSize, ENVELOPE_COMPRESSION_DISABLED, useBinarySerialization); // default max key length is 200, instead of using what is defined in MemcachedClientIF.MAX_KEY_LENGTH (250). This is to accommodate // auto key prepend with appname for duet feature. diff --git a/evcache-core/src/main/java/com/netflix/evcache/EVCacheTranscoder.java b/evcache-core/src/main/java/com/netflix/evcache/EVCacheTranscoder.java index 97be808b..789dd100 100644 --- a/evcache-core/src/main/java/com/netflix/evcache/EVCacheTranscoder.java +++ b/evcache-core/src/main/java/com/netflix/evcache/EVCacheTranscoder.java @@ -1,11 +1,15 @@ package com.netflix.evcache; +import com.netflix.evcache.pool.EVCacheValue; +import com.netflix.evcache.pool.EVCacheValueSerde; import com.netflix.evcache.util.EVCacheConfig; import net.spy.memcached.CachedData; public class EVCacheTranscoder extends EVCacheSerializingTranscoder { + private final boolean useBinarySerialization; + public EVCacheTranscoder() { this(EVCacheConfig.getInstance().getPropertyRepository().get("default.evcache.max.data.size", Integer.class).orElse(20 * 1024 * 1024).get()); } @@ -15,8 +19,13 @@ public EVCacheTranscoder(int max) { } public EVCacheTranscoder(int max, int compressionThreshold) { + this(max, compressionThreshold, false); + } + + public EVCacheTranscoder(int max, int compressionThreshold, boolean useBinarySerialization) { super(max); setCompressionThreshold(compressionThreshold); + this.useBinarySerialization = useBinarySerialization; } @Override @@ -35,4 +44,20 @@ public CachedData encode(Object o) { return super.encode(o); } + @Override + protected byte[] serialize(Object o) { + if (useBinarySerialization && o instanceof EVCacheValue) { + return EVCacheValueSerde.serialize((EVCacheValue) o); + } + return super.serialize(o); + } + + @Override + protected Object deserialize(byte[] in) { + if (EVCacheValueSerde.isBinaryFormat(in)) { + return EVCacheValueSerde.deserialize(in); + } + return super.deserialize(in); + } + } diff --git a/evcache-core/src/main/java/com/netflix/evcache/pool/EVCacheValueSerde.java b/evcache-core/src/main/java/com/netflix/evcache/pool/EVCacheValueSerde.java new file mode 100644 index 00000000..c9a845bf --- /dev/null +++ b/evcache-core/src/main/java/com/netflix/evcache/pool/EVCacheValueSerde.java @@ -0,0 +1,182 @@ +package com.netflix.evcache.pool; + +import java.nio.BufferUnderflowException; +import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; +import java.util.Arrays; + +import org.apache.commons.codec.binary.Hex; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Length-prefixed binary wire format for the {@link EVCacheValue} envelope. EVCache wraps the + * user's value in an {@link EVCacheValue} when the cache key needs to be hashed — typically + * because the canonical key would exceed memcached's key-length limit (auto-hashing path) or + * because the app opted into hashing explicitly (see {@code EVCacheImpl.getEVCacheKey}). The + * envelope carries the original (pre-hash) key so it can be recovered from the value to detect + * hash collisions on read. The on-the-wire layout is: + *
+ * [byte 0: magic 0x0C][byte 1: reserved/version 0x00] + * [int keyLen][key UTF-8 bytes] + * [int valLen][value bytes] + * [int flags][long ttl][long createTime] + *+ * + *
Byte 0 (magic {@code 0x0C}) discriminates this format from the legacy Java + * {@code ObjectOutputStream} stream header ({@code 0xAC 0xED}), so both formats can coexist + * under the same {@code SERIALIZED} CachedData flag. Callers use {@link #isBinaryFormat(byte[])} + * to decide between this codec and the legacy Java path. + * + *
Byte 1 is RESERVED for future breaking changes to the wire format. It is currently always {@code 0x00}. + * Versioning is intentionally NOT implemented yet, and the reader read-and-ignores this byte + * (it does NOT validate that it equals {@code 0x00}). + */ +public final class EVCacheValueSerde { + + private static final Logger log = LoggerFactory.getLogger(EVCacheValueSerde.class); + + static final byte BINARY_SERDE_MAGIC_CONSTANT_BYTE = 0x0C; // 12 + private static final byte RESERVED_VERSION_BYTE = 0x00; + + private static final int CORRUPT_PAYLOAD_LOG_LIMIT = 1024; + + private EVCacheValueSerde() { + // Utility class; not instantiable. + } + + /** True iff {@code bytes} starts with the binary envelope magic byte. */ + public static boolean isBinaryFormat(byte[] bytes) { + return bytes != null && bytes.length > 0 && bytes[0] == BINARY_SERDE_MAGIC_CONSTANT_BYTE; + } + + /** + * Encode an {@link EVCacheValue} into its compact binary envelope. See class Javadoc for layout. + * + *
The {@code EVCacheValue}'s key and value must be non-null. Production writes can only + * reach this method via {@link com.netflix.evcache.EVCacheTranscoder}/{@code CachedData}, + * which both reject null payloads upstream — so this method does not defensively check. + */ + public static byte[] serialize(EVCacheValue v) { + final byte[] keyBytes = v.getKey().getBytes(StandardCharsets.UTF_8); + final byte[] valueBytes = v.getValue(); + + final int bufferSize = Byte.BYTES + // magic byte + Byte.BYTES + // reserved/version byte + Integer.BYTES + keyBytes.length + // keyLen + key + Integer.BYTES + valueBytes.length + // valLen + value + Integer.BYTES + // flags + Long.BYTES + // ttl + Long.BYTES; // createTime + + final ByteBuffer buffer = ByteBuffer.allocate(bufferSize); + + buffer.put(BINARY_SERDE_MAGIC_CONSTANT_BYTE); + buffer.put(RESERVED_VERSION_BYTE); + + buffer.putInt(keyBytes.length); + buffer.put(keyBytes); + + buffer.putInt(valueBytes.length); + buffer.put(valueBytes); + + buffer.putInt(v.getFlags()); + buffer.putLong(v.getTTL()); + buffer.putLong(v.getCreateTimeUTC()); + + return buffer.array(); + } + + /** + * Deserializes bytes into {@link EVCacheValue} from custom wire format. + * + *
Error behavior: on any corrupt or truncated payload (failed bounds check, buffer + * underflow, or any other unexpected exception) this method warn-logs the field that failed + * and a truncated hex dump of the source bytes, then returns {@code null}. The caller sees a + * cache miss rather than a thrown exception, matching {@code BaseSerializingTranscoder}'s + * resilience contract. + * + *
Length prefixes are bounds-checked against the remaining buffer before allocating, so a
+ * malformed length prefix is rejected before any huge allocation or
+ * {@link NegativeArraySizeException}.
+ */
+ public static EVCacheValue deserialize(byte[] bytes) {
+ String field = "magic";
+ try {
+ final ByteBuffer buffer = ByteBuffer.wrap(bytes);
+
+ final byte magic = buffer.get();
+ if (BINARY_SERDE_MAGIC_CONSTANT_BYTE != magic) {
+ logCorruption(bytes, "Invalid magic constant: " + magic);
+ return null;
+ }
+ // Reserved/version byte: read-and-ignore (see class Javadoc).
+ field = "reserved";
+ buffer.get();
+
+ field = "keyLength";
+ final int keyLength = buffer.getInt();
+ if (keyLength < 0 || keyLength > buffer.remaining()) {
+ logCorruption(bytes,
+ "Invalid keyLength: " + keyLength + ", remaining=" + buffer.remaining());
+ return null;
+ }
+ field = "key";
+ final byte[] keyBytes = new byte[keyLength];
+ buffer.get(keyBytes);
+ final String key = new String(keyBytes, StandardCharsets.UTF_8);
+
+ field = "valueLength";
+ final int valueLength = buffer.getInt();
+ if (valueLength < 0 || valueLength > buffer.remaining()) {
+ logCorruption(bytes,
+ "Invalid valueLength: " + valueLength + ", remaining=" + buffer.remaining());
+ return null;
+ }
+ field = "value";
+ final byte[] valueBytes = new byte[valueLength];
+ buffer.get(valueBytes);
+
+ field = "flags";
+ final int flags = buffer.getInt();
+ field = "ttl";
+ final long ttl = buffer.getLong();
+ field = "createTime";
+ final long createTime = buffer.getLong();
+
+ return new EVCacheValue(key, valueBytes, flags, ttl, createTime);
+ } catch (BufferUnderflowException e) {
+ logCorruption(bytes, "BufferUnderflow at field '" + field + "'");
+ return null;
+ } catch (Exception e) {
+ // Defensive catch-all for any unexpected exception
+ log.warn("Uncaught exception decoding {} bytes of EVCacheValue binary envelope at field '{}'",
+ bytes.length, field, e);
+ return null;
+ }
+ }
+
+ /**
+ * Warn-log a corruption event with the source byte length, the failure reason, and a hex
+ * dump of the payload. We deliberately do not pass a Throwable as an SLF4J argument because
+ * data corruption is an expected/recoverable condition at WARN level; a full stack trace
+ * would be noise. The hex dump is capped at {@value #CORRUPT_PAYLOAD_LOG_LIMIT} bytes
+ * (with a truncation marker appended) to keep log spam bounded for very large corrupt
+ * payloads while preserving enough context to triage.
+ */
+ private static void logCorruption(byte[] bytes, String error) {
+ log.warn("Failed to deserialize {} bytes of EVCacheValue binary envelope, error={}, payload hex: {}",
+ bytes.length, error, toHex(bytes, CORRUPT_PAYLOAD_LOG_LIMIT));
+ }
+
+ private static String toHex(byte[] bytes, int maxBytes) {
+ if (bytes == null) {
+ return "null";
+ }
+ if (bytes.length <= maxBytes) {
+ return Hex.encodeHexString(bytes);
+ }
+ return Hex.encodeHexString(Arrays.copyOf(bytes, maxBytes))
+ + "...(truncated, total=" + bytes.length + " bytes)";
+ }
+}
diff --git a/evcache-core/src/test/java/com/netflix/evcache/pool/EVCacheValueSerdeTest.java b/evcache-core/src/test/java/com/netflix/evcache/pool/EVCacheValueSerdeTest.java
new file mode 100644
index 00000000..b8c751e8
--- /dev/null
+++ b/evcache-core/src/test/java/com/netflix/evcache/pool/EVCacheValueSerdeTest.java
@@ -0,0 +1,257 @@
+package com.netflix.evcache.pool;
+
+import static org.assertj.core.api.Assertions.assertThat;
+
+import java.io.ByteArrayOutputStream;
+import java.io.ObjectOutputStream;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Arrays;
+
+import org.testng.annotations.Test;
+
+import com.netflix.evcache.EVCacheTranscoder;
+
+import net.spy.memcached.CachedData;
+
+/**
+ * Pure unit tests for the compact binary serialization of {@link EVCacheValue} (the
+ * envelope wire format implemented inside {@link EVCacheTranscoder}), its routing through
+ * the transcoder, and backwards-compatibility with the legacy Java-serialized format.
+ * All tests go through the public {@link EVCacheTranscoder#encode(Object)} /
+ * {@link EVCacheTranscoder#decode(CachedData)} API — the binary codec itself is a private
+ * implementation detail of {@link EVCacheTranscoder}. No memcached, no DI.
+ */
+public class EVCacheValueSerdeTest {
+
+ private static final int SERIALIZED = 1; // EVCacheSerializingTranscoder.SERIALIZED
+ private static final byte JAVA_STREAM_MAGIC_FIRST = (byte) 0xAC;
+ private static final byte JAVA_STREAM_MAGIC_SECOND = (byte) 0xED;
+
+ // ---- helpers ----
+
+ /** Binary-enabled transcoder, compression disabled, so encoded bytes start with our magic. */
+ private static EVCacheTranscoder binaryTranscoder() {
+ return new EVCacheTranscoder(20 * 1024 * 1024, Integer.MAX_VALUE, true);
+ }
+
+ /** Default transcoder (binary OFF, falls through to native Java serialization). */
+ private static EVCacheTranscoder defaultTranscoder() {
+ return new EVCacheTranscoder(20 * 1024 * 1024, Integer.MAX_VALUE);
+ }
+
+ private EVCacheValue value(String key, byte[] val, int flags, long ttl, long createTime) {
+ return new EVCacheValue(key, val, flags, ttl, createTime);
+ }
+
+ private EVCacheValue typical() {
+ return value("myKey", "hello world".getBytes(StandardCharsets.UTF_8), 0, 3600L, 1_700_000_000_000L);
+ }
+
+ /** Serialize an object the legacy way an old client would: java.io ObjectOutputStream. */
+ private byte[] javaSerialize(Object o) throws Exception {
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ try (ObjectOutputStream oos = new ObjectOutputStream(baos)) {
+ oos.writeObject(o);
+ }
+ return baos.toByteArray();
+ }
+
+ private int javaSerializedLength(EVCacheValue v) throws Exception {
+ return javaSerialize(v).length;
+ }
+
+ /** End-to-end round-trip via the public transcoder API with binary serialization enabled. */
+ private void assertBinaryRoundTrip(EVCacheValue v) {
+ EVCacheTranscoder t = binaryTranscoder();
+ CachedData cd = t.encode(v);
+ // Sanity: actually binary-encoded.
+ assertThat(cd.getData()[0]).isEqualTo(EVCacheValueSerde.BINARY_SERDE_MAGIC_CONSTANT_BYTE);
+ EVCacheValue out = (EVCacheValue) t.decode(cd);
+ assertThat(out).isEqualTo(v);
+ }
+
+ // ---- 1. Binary round-trip across cases (via transcoder) ----
+
+ @Test
+ public void testBinaryRoundTripEmptyValue() {
+ assertBinaryRoundTrip(value("k", new byte[0], 0, 100L, 1L));
+ }
+
+ @Test
+ public void testBinaryRoundTripTypicalValue() {
+ assertBinaryRoundTrip(typical());
+ }
+
+ @Test
+ public void testBinaryRoundTripLargeValue() {
+ byte[] large = new byte[2 * 1024 * 1024];
+ for (int i = 0; i < large.length; i++) {
+ large[i] = (byte) (i & 0xFF);
+ }
+ assertBinaryRoundTrip(value("largeKey", large, 2, 86400L, 1_700_000_000_000L));
+ }
+
+ @Test
+ public void testBinaryRoundTripUnicodeKey() {
+ assertBinaryRoundTrip(value("键🔑-é- key",
+ "payload".getBytes(StandardCharsets.UTF_8), 7, 60L, 42L));
+ }
+
+ @Test
+ public void testBinaryRoundTripNegativeFlags() {
+ assertBinaryRoundTrip(value("nf", "v".getBytes(StandardCharsets.UTF_8), -123, 60L, 42L));
+ }
+
+ @Test
+ public void testBinaryRoundTripZeroTtl() {
+ assertBinaryRoundTrip(value("zt", "v".getBytes(StandardCharsets.UTF_8), 1, 0L, 42L));
+ }
+
+ @Test
+ public void testBinaryRoundTripNegativeCreateTime() {
+ assertBinaryRoundTrip(value("nct", "v".getBytes(StandardCharsets.UTF_8), 1, 60L, -987654321L));
+ }
+
+ @Test
+ public void testBinaryRoundTripMaxCreateTime() {
+ assertBinaryRoundTrip(value("mct", "v".getBytes(StandardCharsets.UTF_8), 1, 60L, Long.MAX_VALUE));
+ }
+
+ @Test
+ public void testBinaryRoundTripMinFlags() {
+ assertBinaryRoundTrip(value("minf", "v".getBytes(StandardCharsets.UTF_8), Integer.MIN_VALUE, 60L, 42L));
+ }
+
+ // ---- 2. Transcoder produces expected wire shape (binary mode) ----
+
+ @Test
+ public void testTranscoderBinaryWireShape() {
+ EVCacheTranscoder t = binaryTranscoder();
+ EVCacheValue v = typical();
+
+ CachedData cd = t.encode(v);
+
+ // SERIALIZED flag must be set so decode routes through deserialize().
+ assertThat(cd.getFlags() & SERIALIZED).isNotZero();
+ // Binary envelope marker present (no compression interfering).
+ assertThat(cd.getData()[0]).isEqualTo(EVCacheValueSerde.BINARY_SERDE_MAGIC_CONSTANT_BYTE);
+ // Byte index 1 is the reserved/version byte, currently always 0x00.
+ assertThat(cd.getData()[1]).isEqualTo((byte) 0x00);
+
+ Object out = t.decode(cd);
+ assertThat(out).isInstanceOf(EVCacheValue.class);
+ assertThat(out).isEqualTo(v);
+ }
+
+ // ---- 3. Default transcoder writes Java, but decode reads both formats ----
+
+ @Test
+ public void testTranscoderDefaultProducesJavaAndReadsBoth() {
+ EVCacheTranscoder t = defaultTranscoder();
+ EVCacheValue v = typical();
+
+ CachedData cd = t.encode(v);
+
+ // Java serialization stream magic is 0xAC 0xED.
+ byte[] data = cd.getData();
+ assertThat(data[0]).isEqualTo(JAVA_STREAM_MAGIC_FIRST);
+ assertThat(data[1]).isEqualTo(JAVA_STREAM_MAGIC_SECOND);
+ // SERIALIZED flag still set.
+ assertThat(cd.getFlags() & SERIALIZED).isNotZero();
+
+ // Dual-format read: default-Java write decodes back to an equal EVCacheValue.
+ Object out = t.decode(cd);
+ assertThat(out).isInstanceOf(EVCacheValue.class);
+ assertThat(out).isEqualTo(v);
+ }
+
+ // ---- 4. Backwards-compat: new client reads legacy Java-serialized bytes ----
+
+ @Test
+ public void testBackwardsCompatLegacyJavaSerialized() throws Exception {
+ EVCacheValue v = typical();
+ byte[] javaBytes = javaSerialize(v);
+
+ // Sanity: legacy bytes start with the Java stream header, not our binary magic.
+ assertThat(javaBytes[0]).isEqualTo(JAVA_STREAM_MAGIC_FIRST);
+ assertThat(javaBytes[0]).isNotEqualTo(EVCacheValueSerde.BINARY_SERDE_MAGIC_CONSTANT_BYTE);
+
+ CachedData cd = new CachedData(SERIALIZED, javaBytes, CachedData.MAX_SIZE);
+ Object out = defaultTranscoder().decode(cd);
+
+ assertThat(out).isInstanceOf(EVCacheValue.class);
+ assertThat(out).isEqualTo(v);
+ }
+
+ // ---- 5. Non-EVCacheValue passthrough (arbitrary Java objects still use Java serde) ----
+
+ @Test
+ public void testNonEVCacheValuePassthrough() {
+ EVCacheTranscoder t = binaryTranscoder(); // even with binary on, non-EVCacheValue stays Java
+ ArrayList