Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,11 @@ public List<DataType> visit(ArrayType arrayType) {
return Collections.singletonList(arrayType.getElementType());
}

@Override
public List<DataType> visit(VecType vecType) {
return Collections.singletonList(vecType.getElementType());
}

@Override
public List<DataType> visit(MultisetType multisetType) {
return Collections.singletonList(multisetType.getElementType());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,11 @@ public R visit(ArrayType arrayType) {
return defaultMethod(arrayType);
}

@Override
public R visit(VecType vecType) {
return defaultMethod(vecType);
}

@Override
public R visit(MultisetType multisetType) {
return defaultMethod(multisetType);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,10 @@ public static DataType parseDataType(JsonNode json, AtomicInteger fieldId) {
if (typeString.startsWith("ARRAY")) {
DataType element = parseDataType(json.get("element"), fieldId);
return new ArrayType(!typeString.contains("NOT NULL"), element);
} else if (typeString.startsWith("VECTOR")) {
DataType element = parseDataType(json.get("element"), fieldId);
int length = json.get("length").asInt();
return new VecType(!typeString.contains("NOT NULL"), length, element);
} else if (typeString.startsWith("MULTISET")) {
DataType element = parseDataType(json.get("element"), fieldId);
return new MultisetType(!typeString.contains("NOT NULL"), element);
Expand Down Expand Up @@ -318,6 +322,7 @@ private enum Keyword {
SECOND,
TO,
ARRAY,
VECTOR,
MULTISET,
MAP,
ROW,
Expand Down Expand Up @@ -544,6 +549,8 @@ private DataType parseTypeByKeyword() {
return new VariantType();
case BLOB:
return new BlobType();
case VECTOR:
return parseVecType();
default:
throw parsingError("Unsupported type: " + token().value);
}
Expand Down Expand Up @@ -665,5 +672,16 @@ private int parseOptionalPrecision(int defaultPrecision) {
}
return precision;
}

private DataType parseVecType() {
// VECTOR<elementType, length>
nextToken(TokenType.BEGIN_SUBTYPE);
DataType elementType = parseTypeWithNullability();
nextToken(TokenType.LIST_SEPARATOR);
nextToken(TokenType.LITERAL_INT);
int length = tokenAsInt();
nextToken(TokenType.END_SUBTYPE);
return DataTypes.VECTOR(length, elementType);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,8 @@ public enum DataTypeRoot {

ARRAY(DataTypeFamily.CONSTRUCTED, DataTypeFamily.COLLECTION),

VECTOR(DataTypeFamily.CONSTRUCTED, DataTypeFamily.COLLECTION),

MULTISET(DataTypeFamily.CONSTRUCTED, DataTypeFamily.COLLECTION),

MAP(DataTypeFamily.CONSTRUCTED, DataTypeFamily.EXTENSION),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ public interface DataTypeVisitor<R> {

R visit(ArrayType arrayType);

R visit(VecType vecType);

R visit(MultisetType multisetType);

R visit(MapType mapType);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,10 @@ public static ArrayType ARRAY(DataType element) {
return new ArrayType(element);
}

public static VecType VECTOR(int length, DataType element) {
return new VecType(length, element);
}

public static CharType CHAR(int length) {
return new CharType(length);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,12 @@ public DataType visit(ArrayType arrayType) {
return new ArrayType(arrayType.isNullable(), arrayType.getElementType().accept(this));
}

@Override
public DataType visit(VecType vecType) {
return new VecType(
vecType.isNullable(), vecType.getLength(), vecType.getElementType().accept(this));
}

@Override
public DataType visit(MultisetType multisetType) {
return new MultisetType(
Expand Down
177 changes: 177 additions & 0 deletions paimon-api/src/main/java/org/apache/paimon/types/VecType.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.paimon.types;

import org.apache.paimon.annotation.Public;
import org.apache.paimon.utils.Preconditions;

import org.apache.paimon.shade.jackson2.com.fasterxml.jackson.core.JsonGenerator;

import java.io.IOException;
import java.util.Objects;
import java.util.Set;

/**
* Data type of fixed-size vector type. The elements are densely stored.
*
* @since 2.0.0
*/
@Public
public class VecType extends DataType {

private static final long serialVersionUID = 1L;

public static final int MIN_LENGTH = 1;

public static final int MAX_LENGTH = Integer.MAX_VALUE;

public static final String FORMAT = "VECTOR<%s, %d>";

private final DataType elementType;

private final int length;

public VecType(boolean isNullable, int length, DataType elementType) {
super(isNullable, DataTypeRoot.VECTOR);
this.elementType =
Preconditions.checkNotNull(elementType, "Element type must not be null.");
Preconditions.checkArgument(
isValidElementType(elementType), "Invalid element type for vector: " + elementType);
if (length < MIN_LENGTH) {
throw new IllegalArgumentException(
String.format(
"Vector length must be between %d and %d (both inclusive).",
MIN_LENGTH, MAX_LENGTH));
}
this.length = length;
}

public VecType(int length, DataType elementType) {
this(true, length, elementType);
}

public int getLength() {
return length;
}

public DataType getElementType() {
return elementType;
}

public static boolean isValidElementType(DataType elementType) {
switch (elementType.getTypeRoot()) {
case BOOLEAN:
case TINYINT:
case SMALLINT:
case INTEGER:
case BIGINT:
case FLOAT:
case DOUBLE:
return true;
default:
return false;
}
}

@Override
public int defaultSize() {
return elementType.defaultSize() * length;
}

@Override
public DataType copy(boolean isNullable) {
return new VecType(isNullable, length, elementType.copy());
}

@Override
public String asSQLString() {
return withNullability(FORMAT, elementType.asSQLString(), length);
}

@Override
public void serializeJson(JsonGenerator generator) throws IOException {
generator.writeStartObject();
generator.writeStringField("type", isNullable() ? "VECTOR" : "VECTOR NOT NULL");
generator.writeFieldName("element");
elementType.serializeJson(generator);
generator.writeFieldName("length");
generator.writeNumber(length);
generator.writeEndObject();
}

@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
if (!super.equals(o)) {
return false;
}
VecType vecType = (VecType) o;
return elementType.equals(vecType.elementType) && length == vecType.length;
}

@Override
public boolean equalsIgnoreFieldId(DataType o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
if (!super.equals(o)) {
return false;
}
VecType vecType = (VecType) o;
return elementType.equalsIgnoreFieldId(vecType.elementType) && length == vecType.length;
}

@Override
public boolean isPrunedFrom(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
if (!super.equals(o)) {
return false;
}
VecType vecType = (VecType) o;
return elementType.isPrunedFrom(vecType.elementType);
}

@Override
public int hashCode() {
return Objects.hash(super.hashCode(), elementType, length);
}

@Override
public <R> R accept(DataTypeVisitor<R> visitor) {
return visitor.visit(this);
}

@Override
public void collectFieldIds(Set<Integer> fieldIds) {
elementType.collectFieldIds(fieldIds);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
import org.apache.paimon.types.VarBinaryType;
import org.apache.paimon.types.VarCharType;
import org.apache.paimon.types.VariantType;
import org.apache.paimon.types.VecType;

import org.apache.arrow.vector.types.TimeUnit;
import org.apache.arrow.vector.types.Types;
Expand Down Expand Up @@ -179,6 +180,12 @@ public FieldType visit(ArrayType arrayType) {
return new FieldType(arrayType.isNullable(), Types.MinorType.LIST.getType(), null);
}

@Override
public FieldType visit(VecType vecType) {
ArrowType arrowType = new ArrowType.FixedSizeList(vecType.getLength());
return new FieldType(vecType.isNullable(), arrowType, null);
}

@Override
public FieldType visit(MultisetType multisetType) {
throw new UnsupportedOperationException("Doesn't support MultisetType.");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import org.apache.paimon.types.DataType;
import org.apache.paimon.types.MapType;
import org.apache.paimon.types.RowType;
import org.apache.paimon.types.VecType;

import org.apache.arrow.c.ArrowArray;
import org.apache.arrow.c.ArrowSchema;
Expand Down Expand Up @@ -135,14 +136,16 @@ public static Field toArrowField(
fieldType.getDictionary(),
Collections.singletonMap(PARQUET_FIELD_ID, String.valueOf(fieldId)));
List<Field> children = null;
if (dataType instanceof ArrayType) {
if (dataType instanceof ArrayType || dataType instanceof VecType) {
final DataType elementType;
if (dataType instanceof VecType) {
elementType = ((VecType) dataType).getElementType();
} else {
elementType = ((ArrayType) dataType).getElementType();
}
Field field =
toArrowField(
ListVector.DATA_VECTOR_NAME,
fieldId,
((ArrayType) dataType).getElementType(),
depth + 1,
visitor);
ListVector.DATA_VECTOR_NAME, fieldId, elementType, depth + 1, visitor);
FieldType typeInner = field.getFieldType();
field =
new Field(
Expand Down
Loading