use of org.apache.arrow.vector.types.pojo.FieldType in project flink by apache.
the class ArrowUtils method toArrowField.
private static Field toArrowField(String fieldName, LogicalType logicalType) {
FieldType fieldType = new FieldType(logicalType.isNullable(), logicalType.accept(LogicalTypeToArrowTypeConverter.INSTANCE), null);
List<Field> children = null;
if (logicalType instanceof ArrayType) {
children = Collections.singletonList(toArrowField("element", ((ArrayType) logicalType).getElementType()));
} else if (logicalType instanceof RowType) {
RowType rowType = (RowType) logicalType;
children = new ArrayList<>(rowType.getFieldCount());
for (RowType.RowField field : rowType.getFields()) {
children.add(toArrowField(field.getName(), field.getType()));
}
}
return new Field(fieldName, fieldType, children);
}
use of org.apache.arrow.vector.types.pojo.FieldType in project hive by apache.
the class ArrowColumnarBatchSerDe method toField.
private static Field toField(String name, TypeInfo typeInfo) {
switch(typeInfo.getCategory()) {
case PRIMITIVE:
final PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo;
switch(primitiveTypeInfo.getPrimitiveCategory()) {
case BOOLEAN:
return Field.nullable(name, MinorType.BIT.getType());
case BYTE:
return Field.nullable(name, MinorType.TINYINT.getType());
case SHORT:
return Field.nullable(name, MinorType.SMALLINT.getType());
case INT:
return Field.nullable(name, MinorType.INT.getType());
case LONG:
return Field.nullable(name, MinorType.BIGINT.getType());
case FLOAT:
return Field.nullable(name, MinorType.FLOAT4.getType());
case DOUBLE:
return Field.nullable(name, MinorType.FLOAT8.getType());
case STRING:
case VARCHAR:
case CHAR:
return Field.nullable(name, MinorType.VARCHAR.getType());
case DATE:
return Field.nullable(name, MinorType.DATEDAY.getType());
case TIMESTAMP:
return Field.nullable(name, MinorType.TIMESTAMPMILLI.getType());
case TIMESTAMPLOCALTZ:
final TimestampLocalTZTypeInfo timestampLocalTZTypeInfo = (TimestampLocalTZTypeInfo) typeInfo;
final String timeZone = timestampLocalTZTypeInfo.getTimeZone().toString();
return Field.nullable(name, new ArrowType.Timestamp(TimeUnit.MILLISECOND, timeZone));
case BINARY:
return Field.nullable(name, MinorType.VARBINARY.getType());
case DECIMAL:
final DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) typeInfo;
final int precision = decimalTypeInfo.precision();
final int scale = decimalTypeInfo.scale();
return Field.nullable(name, new ArrowType.Decimal(precision, scale));
case INTERVAL_YEAR_MONTH:
return Field.nullable(name, MinorType.INTERVALYEAR.getType());
case INTERVAL_DAY_TIME:
return Field.nullable(name, MinorType.INTERVALDAY.getType());
default:
throw new IllegalArgumentException();
}
case LIST:
final ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo;
final TypeInfo elementTypeInfo = listTypeInfo.getListElementTypeInfo();
return new Field(name, FieldType.nullable(MinorType.LIST.getType()), Lists.newArrayList(toField(DEFAULT_ARROW_FIELD_NAME, elementTypeInfo)));
case STRUCT:
final StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
final List<TypeInfo> fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos();
final List<String> fieldNames = structTypeInfo.getAllStructFieldNames();
final List<Field> structFields = Lists.newArrayList();
final int structSize = fieldNames.size();
for (int i = 0; i < structSize; i++) {
structFields.add(toField(fieldNames.get(i), fieldTypeInfos.get(i)));
}
return new Field(name, FieldType.nullable(MinorType.STRUCT.getType()), structFields);
case UNION:
final UnionTypeInfo unionTypeInfo = (UnionTypeInfo) typeInfo;
final List<TypeInfo> objectTypeInfos = unionTypeInfo.getAllUnionObjectTypeInfos();
final List<Field> unionFields = Lists.newArrayList();
final int unionSize = unionFields.size();
for (int i = 0; i < unionSize; i++) {
unionFields.add(toField(DEFAULT_ARROW_FIELD_NAME, objectTypeInfos.get(i)));
}
return new Field(name, FieldType.nullable(MinorType.UNION.getType()), unionFields);
case MAP:
final MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo;
final TypeInfo keyTypeInfo = mapTypeInfo.getMapKeyTypeInfo();
final TypeInfo valueTypeInfo = mapTypeInfo.getMapValueTypeInfo();
final List<Field> mapFields = Lists.newArrayList();
mapFields.add(toField(name + "_keys", keyTypeInfo));
mapFields.add(toField(name + "_values", valueTypeInfo));
FieldType struct = new FieldType(false, new ArrowType.Struct(), null);
List<Field> childrenOfList = Lists.newArrayList(new Field(name, struct, mapFields));
return new Field(name, FieldType.nullable(MinorType.LIST.getType()), childrenOfList);
default:
throw new IllegalArgumentException();
}
}
use of org.apache.arrow.vector.types.pojo.FieldType in project hive by apache.
the class Serializer method writeStruct.
private void writeStruct(NonNullableStructVector arrowVector, StructColumnVector hiveVector, StructTypeInfo typeInfo, int size, VectorizedRowBatch vectorizedRowBatch, boolean isNative, boolean isMapDataType) {
final List<String> fieldNames = typeInfo.getAllStructFieldNames();
final List<TypeInfo> fieldTypeInfos = typeInfo.getAllStructFieldTypeInfos();
final ColumnVector[] hiveFieldVectors = hiveVector == null ? null : hiveVector.fields;
final int fieldSize = fieldTypeInfos.size();
// see - https://issues.apache.org/jira/browse/HIVE-25243
if (hiveVector != null && hiveFieldVectors != null) {
for (int i = 0; i < size; i++) {
if (hiveVector.isNull[i]) {
for (ColumnVector fieldVector : hiveFieldVectors) {
fieldVector.isNull[i] = true;
fieldVector.noNulls = false;
}
}
}
}
for (int fieldIndex = 0; fieldIndex < fieldSize; fieldIndex++) {
final TypeInfo fieldTypeInfo = fieldTypeInfos.get(fieldIndex);
final ColumnVector hiveFieldVector = hiveVector == null ? null : hiveFieldVectors[fieldIndex];
final String fieldName = fieldNames.get(fieldIndex);
// If the call is coming from writeMap(), then the structs within the list type should be non-nullable.
FieldType elementFieldType = (isMapDataType) ? (new FieldType(false, toArrowType(fieldTypeInfo), null)) : (toFieldType(fieldTypeInfos.get(fieldIndex)));
final FieldVector arrowFieldVector = arrowVector.addOrGet(fieldName, elementFieldType, FieldVector.class);
arrowFieldVector.setInitialCapacity(size);
arrowFieldVector.allocateNew();
write(arrowFieldVector, hiveFieldVector, fieldTypeInfo, size, vectorizedRowBatch, isNative);
}
for (int rowIndex = 0; rowIndex < size; rowIndex++) {
if (hiveVector == null || hiveVector.isNull[rowIndex]) {
BitVectorHelper.setValidityBit(arrowVector.getValidityBuffer(), rowIndex, 0);
} else {
BitVectorHelper.setValidityBitToOne(arrowVector.getValidityBuffer(), rowIndex);
}
}
}
use of org.apache.arrow.vector.types.pojo.FieldType in project hive by apache.
the class Serializer method writeList.
private void writeList(ListVector arrowVector, ListColumnVector hiveVector, ListTypeInfo typeInfo, int size, VectorizedRowBatch vectorizedRowBatch, boolean isNative, boolean isMapDataType) {
final int OFFSET_WIDTH = 4;
final TypeInfo elementTypeInfo = typeInfo.getListElementTypeInfo();
final ColumnVector hiveElementVector = hiveVector == null ? null : hiveVector.child;
// If the call is coming from writeMap(), then the List type should be non-nullable.
FieldType elementFieldType = (isMapDataType) ? (new FieldType(false, toArrowType(elementTypeInfo), null)) : (toFieldType(elementTypeInfo));
final FieldVector arrowElementVector = (FieldVector) arrowVector.addOrGetVector(elementFieldType).getVector();
VectorizedRowBatch correctedVrb = vectorizedRowBatch;
int correctedSize = hiveVector == null ? 0 : hiveVector.childCount;
if (vectorizedRowBatch.selectedInUse) {
correctedVrb = correctSelectedAndSize(vectorizedRowBatch, hiveVector);
correctedSize = correctedVrb.size;
}
arrowElementVector.setInitialCapacity(correctedSize);
arrowElementVector.allocateNew();
// writeStruct() with the same flag value, as the map is converted as a list of structs.
if (isMapDataType) {
writeStruct((NonNullableStructVector) arrowElementVector, (StructColumnVector) hiveElementVector, (StructTypeInfo) elementTypeInfo, correctedSize, correctedVrb, isNative, isMapDataType);
} else {
write(arrowElementVector, hiveElementVector, elementTypeInfo, correctedSize, correctedVrb, isNative);
}
int nextOffset = 0;
for (int rowIndex = 0; rowIndex < size; rowIndex++) {
int selectedIndex = rowIndex;
if (vectorizedRowBatch.selectedInUse) {
selectedIndex = vectorizedRowBatch.selected[rowIndex];
}
if (hiveVector == null || hiveVector.isNull[selectedIndex]) {
arrowVector.getOffsetBuffer().setInt(rowIndex * OFFSET_WIDTH, nextOffset);
} else {
arrowVector.getOffsetBuffer().setInt(rowIndex * OFFSET_WIDTH, nextOffset);
nextOffset += (int) hiveVector.lengths[selectedIndex];
arrowVector.setNotNull(rowIndex);
}
}
arrowVector.getOffsetBuffer().setInt(size * OFFSET_WIDTH, nextOffset);
}
use of org.apache.arrow.vector.types.pojo.FieldType in project hive by apache.
the class Serializer method serializeBatch.
// Used for both:
// 1. VectorizedRowBatch constructed by batching rows
// 2. VectorizedRowBatch provided from upstream (isNative)
public ArrowWrapperWritable serializeBatch(VectorizedRowBatch vectorizedRowBatch, boolean isNative) {
rootVector.setValueCount(0);
for (int fieldIndex = 0; fieldIndex < vectorizedRowBatch.projectionSize; fieldIndex++) {
final int projectedColumn = vectorizedRowBatch.projectedColumns[fieldIndex];
final ColumnVector hiveVector = vectorizedRowBatch.cols[projectedColumn];
final TypeInfo fieldTypeInfo = fieldTypeInfos.get(fieldIndex);
final String fieldName = fieldNames.get(fieldIndex);
final FieldType fieldType = toFieldType(fieldTypeInfo);
// Reuse existing FieldVector buffers
// since we always call setValue or setNull for each row
boolean fieldExists = false;
if (rootVector.getChild(fieldName) != null) {
fieldExists = true;
}
final FieldVector arrowVector = rootVector.addOrGet(fieldName, fieldType, FieldVector.class);
if (fieldExists) {
arrowVector.setValueCount(isNative ? vectorizedRowBatch.size : batchSize);
} else {
arrowVector.setInitialCapacity(isNative ? vectorizedRowBatch.size : batchSize);
arrowVector.allocateNew();
}
write(arrowVector, hiveVector, fieldTypeInfo, isNative ? vectorizedRowBatch.size : batchSize, vectorizedRowBatch, isNative);
}
if (!isNative) {
// Only mutate batches that are constructed by this serde
vectorizedRowBatch.reset();
rootVector.setValueCount(batchSize);
} else {
rootVector.setValueCount(vectorizedRowBatch.size);
}
batchSize = 0;
VectorSchemaRoot vectorSchemaRoot = new VectorSchemaRoot(rootVector);
return new ArrowWrapperWritable(vectorSchemaRoot, allocator, rootVector);
}
Aggregations