diff --git a/paimon-common/src/main/java/org/apache/paimon/predicate/DefaultValueTransform.java b/paimon-common/src/main/java/org/apache/paimon/predicate/DefaultValueTransform.java new file mode 100644 index 000000000000..9ae8a3f3cc59 --- /dev/null +++ b/paimon-common/src/main/java/org/apache/paimon/predicate/DefaultValueTransform.java @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.paimon.predicate; + +import org.apache.paimon.data.InternalRow; +import org.apache.paimon.types.DataType; +import org.apache.paimon.utils.DefaultValueUtils; + +import java.util.Collections; +import java.util.List; +import java.util.Objects; + +import static org.apache.paimon.utils.Preconditions.checkArgument; + +/** + * A {@link Transform} which always returns the default value of the input field's {@link DataType}. + */ +public class DefaultValueTransform implements Transform { + + private static final long serialVersionUID = 1L; + + private final FieldRef fieldRef; + + public DefaultValueTransform(FieldRef fieldRef) { + this.fieldRef = Objects.requireNonNull(fieldRef, "fieldRef must not be null"); + } + + public FieldRef fieldRef() { + return fieldRef; + } + + @Override + public List inputs() { + return Collections.singletonList(fieldRef); + } + + @Override + public DataType outputType() { + return fieldRef.type(); + } + + @Override + public Object transform(InternalRow row) { + return DefaultValueUtils.defaultValue(fieldRef.type()); + } + + @Override + public Transform copyWithNewInputs(List inputs) { + List nonNullInputs = + Objects.requireNonNull(inputs, "DefaultValueTransform expects non-null inputs"); + checkArgument(nonNullInputs.size() == 1, "DefaultValueTransform expects 1 input"); + checkArgument( + nonNullInputs.get(0) instanceof FieldRef, + "DefaultValueTransform input must be FieldRef"); + return new DefaultValueTransform((FieldRef) nonNullInputs.get(0)); + } + + @Override + public boolean equals(Object o) { + if (o == null || getClass() != o.getClass()) { + return false; + } + DefaultValueTransform that = (DefaultValueTransform) o; + return Objects.equals(fieldRef, that.fieldRef); + } + + @Override + public int hashCode() { + return Objects.hashCode(fieldRef); + } + + @Override + public String toString() { + return "DefaultValueTransform{" + "fieldRef=" + fieldRef + '}'; + } +} diff --git a/paimon-common/src/main/java/org/apache/paimon/predicate/HashTransform.java b/paimon-common/src/main/java/org/apache/paimon/predicate/HashTransform.java new file mode 100644 index 000000000000..01c6183dc167 --- /dev/null +++ b/paimon-common/src/main/java/org/apache/paimon/predicate/HashTransform.java @@ -0,0 +1,165 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.paimon.predicate; + +import org.apache.paimon.data.BinaryString; + +import org.apache.paimon.shade.guava30.com.google.common.hash.HashCode; + +import javax.annotation.Nullable; + +import java.nio.charset.StandardCharsets; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.util.Arrays; +import java.util.List; +import java.util.Locale; +import java.util.Objects; + +import static org.apache.paimon.utils.Preconditions.checkArgument; + +/** + * A {@link Transform} which masks a string column by hashing it. + * + *

Output is a hex string. Default algorithm is {@code SHA-256}. + */ +public class HashTransform extends StringTransform { + + private static final long serialVersionUID = 1L; + + private final String algorithm; + + @Nullable private final BinaryString salt; + + private transient MessageDigest digest; + + public HashTransform(FieldRef fieldRef) { + this(fieldRef, null, null); + } + + public HashTransform( + FieldRef fieldRef, @Nullable String algorithm, @Nullable BinaryString salt) { + super(Arrays.asList(Objects.requireNonNull(fieldRef, "fieldRef must not be null"))); + this.algorithm = resolveAlgorithm(algorithm); + this.salt = salt; + ensureDigest(); + } + + public FieldRef fieldRef() { + return (FieldRef) inputs().get(0); + } + + public String algorithm() { + return algorithm; + } + + @Nullable + public BinaryString salt() { + return salt; + } + + @Nullable + @Override + protected BinaryString transform(List inputs) { + BinaryString s = inputs.get(0); + if (s == null) { + return null; + } + + MessageDigest md = ensureDigest(); + md.reset(); + + if (salt != null) { + md.update(salt.toString().getBytes(StandardCharsets.UTF_8)); + } + md.update(s.toString().getBytes(StandardCharsets.UTF_8)); + + return BinaryString.fromString(HashCode.fromBytes(md.digest()).toString()); + } + + @Override + public Transform copyWithNewInputs(List inputs) { + List nonNullInputs = + Objects.requireNonNull(inputs, "HashTransform expects non-null inputs"); + checkArgument(nonNullInputs.size() == 1, "HashTransform expects 1 input"); + checkArgument( + nonNullInputs.get(0) instanceof FieldRef, "HashTransform input must be FieldRef"); + return new HashTransform((FieldRef) nonNullInputs.get(0), algorithm, salt); + } + + @Override + public boolean equals(Object o) { + if (o == null || getClass() != o.getClass()) { + return false; + } + HashTransform that = (HashTransform) o; + return Objects.equals(fieldRef(), that.fieldRef()) + && Objects.equals(algorithm, that.algorithm) + && Objects.equals(salt, that.salt); + } + + @Override + public int hashCode() { + return Objects.hash(fieldRef(), algorithm, salt); + } + + @Override + public String toString() { + return "HashTransform{" + + "fieldRef=" + + fieldRef() + + ", algorithm='" + + algorithm + + '\'' + + ", salt=" + + salt + + '}'; + } + + private MessageDigest ensureDigest() { + if (digest == null) { + try { + digest = MessageDigest.getInstance(algorithm); + } catch (NoSuchAlgorithmException e) { + throw new IllegalArgumentException("Unsupported hash algorithm: " + algorithm, e); + } + } + return digest; + } + + private static String resolveAlgorithm(@Nullable String algorithm) { + if (algorithm == null || algorithm.trim().isEmpty()) { + return "SHA-256"; + } + String a = algorithm.trim(); + String normalized = a.replace("-", "").toLowerCase(Locale.ROOT); + switch (normalized) { + case "md5": + return "MD5"; + case "sha1": + return "SHA-1"; + case "sha256": + return "SHA-256"; + case "sha512": + return "SHA-512"; + default: + return a; + } + } +} diff --git a/paimon-common/src/main/java/org/apache/paimon/predicate/NullTransform.java b/paimon-common/src/main/java/org/apache/paimon/predicate/NullTransform.java new file mode 100644 index 000000000000..0ef989f4c63b --- /dev/null +++ b/paimon-common/src/main/java/org/apache/paimon/predicate/NullTransform.java @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.paimon.predicate; + +import org.apache.paimon.data.InternalRow; +import org.apache.paimon.types.DataType; + +import java.util.Collections; +import java.util.List; +import java.util.Objects; + +import static org.apache.paimon.utils.Preconditions.checkArgument; + +/** A {@link Transform} which always returns {@code null}. */ +public class NullTransform implements Transform { + + private static final long serialVersionUID = 1L; + + private final FieldRef fieldRef; + + public NullTransform(FieldRef fieldRef) { + this.fieldRef = Objects.requireNonNull(fieldRef, "fieldRef must not be null"); + } + + public FieldRef fieldRef() { + return fieldRef; + } + + @Override + public List inputs() { + return Collections.singletonList(fieldRef); + } + + @Override + public DataType outputType() { + return fieldRef.type(); + } + + @Override + public Object transform(InternalRow row) { + return null; + } + + @Override + public Transform copyWithNewInputs(List inputs) { + List nonNullInputs = + Objects.requireNonNull(inputs, "NullTransform expects non-null inputs"); + checkArgument(nonNullInputs.size() == 1, "NullTransform expects 1 input"); + checkArgument( + nonNullInputs.get(0) instanceof FieldRef, "NullTransform input must be FieldRef"); + return new NullTransform((FieldRef) nonNullInputs.get(0)); + } + + @Override + public boolean equals(Object o) { + if (o == null || getClass() != o.getClass()) { + return false; + } + NullTransform that = (NullTransform) o; + return Objects.equals(fieldRef, that.fieldRef); + } + + @Override + public int hashCode() { + return Objects.hashCode(fieldRef); + } + + @Override + public String toString() { + return "NullTransform{" + "fieldRef=" + fieldRef + '}'; + } +} diff --git a/paimon-common/src/main/java/org/apache/paimon/predicate/PartialMaskTransform.java b/paimon-common/src/main/java/org/apache/paimon/predicate/PartialMaskTransform.java new file mode 100644 index 000000000000..eae540e715db --- /dev/null +++ b/paimon-common/src/main/java/org/apache/paimon/predicate/PartialMaskTransform.java @@ -0,0 +1,163 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.paimon.predicate; + +import org.apache.paimon.data.BinaryString; + +import javax.annotation.Nullable; + +import java.util.Arrays; +import java.util.List; +import java.util.Objects; + +import static org.apache.paimon.utils.Preconditions.checkArgument; + +/** + * A {@link Transform} which masks the middle part of a string by keeping the specified number of + * prefix and suffix characters. + * + *

If the string length is less than or equal to {@code prefixLen + suffixLen}, the output will + * be a fully masked string of the same length. + */ +public class PartialMaskTransform extends StringTransform { + + private static final long serialVersionUID = 1L; + + private final int prefixLen; + private final int suffixLen; + + public PartialMaskTransform( + FieldRef fieldRef, int prefixLen, int suffixLen, BinaryString mask) { + super( + Arrays.asList( + Objects.requireNonNull(fieldRef, "fieldRef must not be null"), + Objects.requireNonNull(mask, "mask must not be null"))); + checkArgument(prefixLen >= 0, "prefixLen must be >= 0"); + checkArgument(suffixLen >= 0, "suffixLen must be >= 0"); + // "mask" is treated as a token repeated by character count. Empty mask would be invalid. + checkArgument(mask.numChars() > 0, "mask must not be empty"); + + this.prefixLen = prefixLen; + this.suffixLen = suffixLen; + } + + public FieldRef fieldRef() { + return (FieldRef) inputs().get(0); + } + + public int prefixLen() { + return prefixLen; + } + + public int suffixLen() { + return suffixLen; + } + + public BinaryString mask() { + return (BinaryString) inputs().get(1); + } + + @Nullable + @Override + protected BinaryString transform(List inputs) { + BinaryString s = inputs.get(0); + if (s == null) { + return null; + } + BinaryString mask = inputs.get(1); + checkArgument(mask != null, "mask must not be null"); + + int len = s.numChars(); + if (len <= 0) { + return s; + } + + if (prefixLen + suffixLen >= len) { + return repeat(mask, len); + } + + BinaryString prefix = prefixLen == 0 ? BinaryString.EMPTY_UTF8 : s.substring(0, prefixLen); + BinaryString suffix = + suffixLen == 0 ? BinaryString.EMPTY_UTF8 : s.substring(len - suffixLen, len); + int middleLen = len - prefixLen - suffixLen; + BinaryString middle = middleLen == 0 ? BinaryString.EMPTY_UTF8 : repeat(mask, middleLen); + return BinaryString.concat(prefix, middle, suffix); + } + + private static BinaryString repeat(BinaryString token, int times) { + if (times <= 0) { + return BinaryString.EMPTY_UTF8; + } + String t = token.toString(); + StringBuilder sb = new StringBuilder(t.length() * times); + for (int i = 0; i < times; i++) { + sb.append(t); + } + return BinaryString.fromString(sb.toString()); + } + + @Override + public Transform copyWithNewInputs(List inputs) { + List nonNullInputs = + Objects.requireNonNull(inputs, "PartialMaskTransform expects non-null inputs"); + checkArgument(nonNullInputs.size() == 2, "PartialMaskTransform expects 2 inputs"); + checkArgument( + nonNullInputs.get(0) instanceof FieldRef, + "PartialMaskTransform input must be FieldRef"); + checkArgument( + nonNullInputs.get(1) instanceof BinaryString, + "PartialMaskTransform mask input must be BinaryString"); + return new PartialMaskTransform( + (FieldRef) nonNullInputs.get(0), + prefixLen, + suffixLen, + (BinaryString) nonNullInputs.get(1)); + } + + @Override + public boolean equals(Object o) { + if (o == null || getClass() != o.getClass()) { + return false; + } + PartialMaskTransform that = (PartialMaskTransform) o; + return prefixLen == that.prefixLen + && suffixLen == that.suffixLen + && Objects.equals(fieldRef(), that.fieldRef()) + && Objects.equals(mask(), that.mask()); + } + + @Override + public int hashCode() { + return Objects.hash(fieldRef(), prefixLen, suffixLen, mask()); + } + + @Override + public String toString() { + return "PartialMaskTransform{" + + "fieldRef=" + + fieldRef() + + ", prefixLen=" + + prefixLen + + ", suffixLen=" + + suffixLen + + ", mask=" + + mask() + + '}'; + } +} diff --git a/paimon-common/src/main/java/org/apache/paimon/utils/DefaultValueUtils.java b/paimon-common/src/main/java/org/apache/paimon/utils/DefaultValueUtils.java index b82f5f77e700..0f8521a3b608 100644 --- a/paimon-common/src/main/java/org/apache/paimon/utils/DefaultValueUtils.java +++ b/paimon-common/src/main/java/org/apache/paimon/utils/DefaultValueUtils.java @@ -21,14 +21,30 @@ import org.apache.paimon.casting.CastExecutor; import org.apache.paimon.casting.CastExecutors; import org.apache.paimon.data.BinaryString; +import org.apache.paimon.data.Blob; +import org.apache.paimon.data.Decimal; +import org.apache.paimon.data.GenericArray; +import org.apache.paimon.data.GenericMap; +import org.apache.paimon.data.GenericRow; +import org.apache.paimon.data.Timestamp; +import org.apache.paimon.data.variant.GenericVariant; +import org.apache.paimon.data.variant.Variant; +import org.apache.paimon.types.BinaryType; import org.apache.paimon.types.DataType; +import org.apache.paimon.types.DecimalType; +import org.apache.paimon.types.RowType; import org.apache.paimon.types.VarCharType; import javax.annotation.Nullable; +import java.math.BigDecimal; +import java.util.Collections; + /** Utils for default value. */ public class DefaultValueUtils { + private static final Variant NULL_VARIANT = GenericVariant.fromJson("null"); + public static Object convertDefaultValue(DataType dataType, String defaultValueStr) { @SuppressWarnings("unchecked") CastExecutor resolve = @@ -58,4 +74,58 @@ public static void validateDefaultValue(DataType dataType, @Nullable String defa "Unsupported default value `" + defaultValueStr + "` for type " + dataType, e); } } + + /** Creates a default value object for the given {@link DataType}. */ + public static Object defaultValue(DataType dataType) { + switch (dataType.getTypeRoot()) { + case BOOLEAN: + return false; + case TINYINT: + return (byte) 0; + case SMALLINT: + return (short) 0; + case INTEGER: + case DATE: + case TIME_WITHOUT_TIME_ZONE: + return 0; + case BIGINT: + return 0L; + case FLOAT: + return 0.0f; + case DOUBLE: + return 0.0d; + case DECIMAL: + DecimalType decimalType = (DecimalType) dataType; + return Decimal.fromBigDecimal( + BigDecimal.ZERO, decimalType.getPrecision(), decimalType.getScale()); + case CHAR: + case VARCHAR: + return BinaryString.fromString(""); + case BINARY: + return new byte[((BinaryType) dataType).getLength()]; + case VARBINARY: + return new byte[0]; + case TIMESTAMP_WITHOUT_TIME_ZONE: + case TIMESTAMP_WITH_LOCAL_TIME_ZONE: + return Timestamp.fromEpochMillis(0); + case ARRAY: + return new GenericArray(new Object[0]); + case MAP: + case MULTISET: + return new GenericMap(Collections.emptyMap()); + case ROW: + RowType rowType = (RowType) dataType; + GenericRow row = new GenericRow(rowType.getFieldCount()); + for (int i = 0; i < rowType.getFieldCount(); i++) { + row.setField(i, defaultValue(rowType.getTypeAt(i))); + } + return row; + case VARIANT: + return NULL_VARIANT; + case BLOB: + return Blob.fromData(new byte[0]); + default: + throw new UnsupportedOperationException("Unsupported type: " + dataType); + } + } } diff --git a/paimon-common/src/test/java/org/apache/paimon/predicate/DefaultValueTransformTest.java b/paimon-common/src/test/java/org/apache/paimon/predicate/DefaultValueTransformTest.java new file mode 100644 index 000000000000..4f2c3ff83fbc --- /dev/null +++ b/paimon-common/src/test/java/org/apache/paimon/predicate/DefaultValueTransformTest.java @@ -0,0 +1,122 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.paimon.predicate; + +import org.apache.paimon.data.GenericRow; +import org.apache.paimon.data.InternalMap; +import org.apache.paimon.types.DataType; +import org.apache.paimon.types.DataTypeRoot; +import org.apache.paimon.types.DataTypes; +import org.apache.paimon.utils.DefaultValueUtils; +import org.apache.paimon.utils.InternalRowUtils; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.MethodSource; + +import java.util.Collections; +import java.util.stream.Stream; + +import static org.assertj.core.api.Assertions.assertThat; + +class DefaultValueTransformTest { + + @ParameterizedTest + @MethodSource("allTypes") + void testReturnDefaultValueForAllTypes(DataType type) { + DefaultValueTransform transform = new DefaultValueTransform(new FieldRef(0, "f0", type)); + assertThat(transform.outputType()).isEqualTo(type); + + Object expected = DefaultValueUtils.defaultValue(type); + Object actual = transform.transform(GenericRow.of(123)); + if (type.getTypeRoot() == DataTypeRoot.MULTISET) { + assertThat(actual).isInstanceOf(InternalMap.class); + assertThat(((InternalMap) actual).size()).isEqualTo(0); + } else { + assertThat(InternalRowUtils.equals(actual, expected, type)).isTrue(); + } + } + + @Test + void testCopyWithNewInputs() { + FieldRef ref0 = new FieldRef(0, "f0", DataTypes.INT()); + FieldRef ref3 = new FieldRef(3, "f0", DataTypes.INT()); + + DefaultValueTransform transform = new DefaultValueTransform(ref0); + Transform copied = transform.copyWithNewInputs(Collections.singletonList(ref3)); + + assertThat(copied).isEqualTo(new DefaultValueTransform(ref3)); + assertThat(copied.outputType()).isEqualTo(DataTypes.INT()); + assertThat(copied.transform(GenericRow.of((Object) null))).isEqualTo(0); + } + + private static Stream allTypes() { + return Stream.of( + // numeric + DataTypes.TINYINT(), + DataTypes.SMALLINT(), + DataTypes.INT(), + DataTypes.BIGINT(), + DataTypes.FLOAT(), + DataTypes.DOUBLE(), + DataTypes.DECIMAL(10, 2), + + // boolean + DataTypes.BOOLEAN(), + + // string + DataTypes.STRING(), + DataTypes.CHAR(3), + DataTypes.VARCHAR(20), + + // binary + DataTypes.BYTES(), + DataTypes.BINARY(8), + DataTypes.VARBINARY(12), + + // datetime + DataTypes.DATE(), + DataTypes.TIME(), + DataTypes.TIME(9), + DataTypes.TIMESTAMP(), + DataTypes.TIMESTAMP_MILLIS(), + DataTypes.TIMESTAMP(9), + DataTypes.TIMESTAMP_WITH_LOCAL_TIME_ZONE(), + DataTypes.TIMESTAMP_WITH_LOCAL_TIME_ZONE(9), + DataTypes.TIMESTAMP_LTZ_MILLIS(), + + // complex + DataTypes.ARRAY(DataTypes.INT()), + DataTypes.ARRAY(DataTypes.ARRAY(DataTypes.TIMESTAMP())), + DataTypes.MAP(DataTypes.VARCHAR(10), DataTypes.TIMESTAMP()), + DataTypes.MULTISET(DataTypes.STRING()), + DataTypes.ROW( + DataTypes.FIELD(0, "a", DataTypes.INT()), + DataTypes.FIELD(1, "b", DataTypes.STRING())), + + // special + DataTypes.VARIANT(), + DataTypes.BLOB(), + + // not-null variants (exercise nullability flag on type) + DataTypes.INT().copy(false), + DataTypes.STRING().copy(false), + DataTypes.ARRAY(DataTypes.INT()).copy(false)); + } +} diff --git a/paimon-common/src/test/java/org/apache/paimon/predicate/HashTransformTest.java b/paimon-common/src/test/java/org/apache/paimon/predicate/HashTransformTest.java new file mode 100644 index 000000000000..9154cd0f81ab --- /dev/null +++ b/paimon-common/src/test/java/org/apache/paimon/predicate/HashTransformTest.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.paimon.predicate; + +import org.apache.paimon.data.BinaryString; +import org.apache.paimon.data.GenericRow; +import org.apache.paimon.types.DataTypes; + +import org.junit.jupiter.api.Test; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +class HashTransformTest { + + @Test + void testNull() { + HashTransform transform = new HashTransform(new FieldRef(0, "f0", DataTypes.STRING())); + assertThat(transform.transform(GenericRow.of((Object) null))).isNull(); + } + + @Test + void testSha256HexLower() { + HashTransform transform = + new HashTransform(new FieldRef(0, "f0", DataTypes.STRING()), "sha256", null); + Object out = transform.transform(GenericRow.of(BinaryString.fromString("abcdef"))); + assertThat(out) + .isEqualTo( + BinaryString.fromString( + "bef57ec7f53a6d40beb640a780a639c83bc29ac8a9816f1fc6c5c6dcd93c4721")); + } + + @Test + void testSha256WithSalt() { + HashTransform transform = + new HashTransform( + new FieldRef(0, "f0", DataTypes.STRING()), + "SHA-256", + BinaryString.fromString("SALT_")); + Object out = transform.transform(GenericRow.of(BinaryString.fromString("abcdef"))); + // sha256("SALT_" + "abcdef") + assertThat(out) + .isEqualTo( + BinaryString.fromString( + "1aff6a6e4dc5a1bcf81b101216ae7cb85b32ec8c56e926be3e6d3ae211caf522")); + } + + @Test + void testIllegalAlgorithm() { + assertThatThrownBy( + () -> + new HashTransform( + new FieldRef(0, "f0", DataTypes.STRING()), + "NO_SUCH_ALGO", + null)) + .isInstanceOf(IllegalArgumentException.class); + } +} diff --git a/paimon-common/src/test/java/org/apache/paimon/predicate/NullTransformTest.java b/paimon-common/src/test/java/org/apache/paimon/predicate/NullTransformTest.java new file mode 100644 index 000000000000..9f0767896f29 --- /dev/null +++ b/paimon-common/src/test/java/org/apache/paimon/predicate/NullTransformTest.java @@ -0,0 +1,111 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.paimon.predicate; + +import org.apache.paimon.data.GenericRow; +import org.apache.paimon.types.DataType; +import org.apache.paimon.types.DataTypes; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.MethodSource; + +import java.util.Collections; +import java.util.stream.Stream; + +import static org.assertj.core.api.Assertions.assertThat; + +class NullTransformTest { + + @ParameterizedTest + @MethodSource("allTypes") + void testAlwaysReturnNullForAllTypes(DataType type) { + NullTransform transform = new NullTransform(new FieldRef(0, "f0", type)); + assertThat(transform.outputType()).isEqualTo(type); + assertThat(transform.transform(GenericRow.of((Object) null))).isNull(); + assertThat(transform.transform(GenericRow.of(123))).isNull(); + } + + @Test + void testCopyWithNewInputs() { + FieldRef ref0 = new FieldRef(0, "f0", DataTypes.INT()); + FieldRef ref3 = new FieldRef(3, "f0", DataTypes.INT()); + + NullTransform transform = new NullTransform(ref0); + Transform copied = transform.copyWithNewInputs(Collections.singletonList(ref3)); + + assertThat(copied).isEqualTo(new NullTransform(ref3)); + assertThat(copied.outputType()).isEqualTo(DataTypes.INT()); + assertThat(copied.transform(GenericRow.of((Object) null))).isNull(); + } + + private static Stream allTypes() { + return Stream.of( + // numeric + DataTypes.TINYINT(), + DataTypes.SMALLINT(), + DataTypes.INT(), + DataTypes.BIGINT(), + DataTypes.FLOAT(), + DataTypes.DOUBLE(), + DataTypes.DECIMAL(10, 2), + + // boolean + DataTypes.BOOLEAN(), + + // string + DataTypes.STRING(), + DataTypes.CHAR(3), + DataTypes.VARCHAR(20), + + // binary + DataTypes.BYTES(), + DataTypes.BINARY(8), + DataTypes.VARBINARY(12), + + // datetime + DataTypes.DATE(), + DataTypes.TIME(), + DataTypes.TIME(9), + DataTypes.TIMESTAMP(), + DataTypes.TIMESTAMP_MILLIS(), + DataTypes.TIMESTAMP(9), + DataTypes.TIMESTAMP_WITH_LOCAL_TIME_ZONE(), + DataTypes.TIMESTAMP_WITH_LOCAL_TIME_ZONE(9), + DataTypes.TIMESTAMP_LTZ_MILLIS(), + + // complex + DataTypes.ARRAY(DataTypes.INT()), + DataTypes.ARRAY(DataTypes.ARRAY(DataTypes.TIMESTAMP())), + DataTypes.MAP(DataTypes.VARCHAR(10), DataTypes.TIMESTAMP()), + DataTypes.MULTISET(DataTypes.STRING()), + DataTypes.ROW( + DataTypes.FIELD(0, "a", DataTypes.INT()), + DataTypes.FIELD(1, "b", DataTypes.STRING())), + + // special + DataTypes.VARIANT(), + DataTypes.BLOB(), + + // not-null variants (exercise nullability flag on type) + DataTypes.INT().copy(false), + DataTypes.STRING().copy(false), + DataTypes.ARRAY(DataTypes.INT()).copy(false)); + } +} diff --git a/paimon-common/src/test/java/org/apache/paimon/predicate/PartialMaskTransformTest.java b/paimon-common/src/test/java/org/apache/paimon/predicate/PartialMaskTransformTest.java new file mode 100644 index 000000000000..4c0fd5bc7a5e --- /dev/null +++ b/paimon-common/src/test/java/org/apache/paimon/predicate/PartialMaskTransformTest.java @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.paimon.predicate; + +import org.apache.paimon.data.BinaryString; +import org.apache.paimon.data.GenericRow; +import org.apache.paimon.types.DataTypes; + +import org.junit.jupiter.api.Test; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +class PartialMaskTransformTest { + + @Test + void testNull() { + PartialMaskTransform transform = + new PartialMaskTransform( + new FieldRef(0, "f0", DataTypes.STRING()), + 2, + 2, + BinaryString.fromString("*")); + assertThat(transform.transform(GenericRow.of((Object) null))).isNull(); + } + + @Test + void testNormal() { + PartialMaskTransform transform = + new PartialMaskTransform( + new FieldRef(0, "f0", DataTypes.STRING()), + 2, + 2, + BinaryString.fromString("*")); + Object out = transform.transform(GenericRow.of(BinaryString.fromString("abcdef"))); + assertThat(out).isEqualTo(BinaryString.fromString("ab**ef")); + } + + @Test + void testShorterThanPrefixPlusSuffix() { + PartialMaskTransform transform = + new PartialMaskTransform( + new FieldRef(0, "f0", DataTypes.STRING()), + 3, + 3, + BinaryString.fromString("*")); + Object out = transform.transform(GenericRow.of(BinaryString.fromString("abc"))); + assertThat(out).isEqualTo(BinaryString.fromString("***")); + } + + @Test + void testMultiCharMaskToken() { + PartialMaskTransform transform = + new PartialMaskTransform( + new FieldRef(0, "f0", DataTypes.STRING()), + 1, + 1, + BinaryString.fromString("xx")); + Object out = transform.transform(GenericRow.of(BinaryString.fromString("abcd"))); + assertThat(out).isEqualTo(BinaryString.fromString("axxxxd")); + } + + @Test + void testIllegalArgs() { + assertThatThrownBy( + () -> + new PartialMaskTransform( + new FieldRef(0, "f0", DataTypes.STRING()), + -1, + 0, + BinaryString.fromString("*"))) + .isInstanceOf(IllegalArgumentException.class); + assertThatThrownBy( + () -> + new PartialMaskTransform( + new FieldRef(0, "f0", DataTypes.STRING()), + 0, + 0, + BinaryString.fromString(""))) + .isInstanceOf(IllegalArgumentException.class); + } +}