Search in sources :

Example 1 with ValueVector

use of org.apache.arrow.vector.ValueVector in project iceberg by apache.

the class TestHelpers method assertEqualsBatch.

public static void assertEqualsBatch(Types.StructType struct, Iterator<Record> expected, ColumnarBatch batch, boolean checkArrowValidityVector) {
    for (int rowId = 0; rowId < batch.numRows(); rowId++) {
        List<Types.NestedField> fields = struct.fields();
        InternalRow row = batch.getRow(rowId);
        Record rec = expected.next();
        for (int i = 0; i < fields.size(); i += 1) {
            Type fieldType = fields.get(i).type();
            Object expectedValue = rec.get(i);
            Object actualValue = row.isNullAt(i) ? null : row.get(i, convert(fieldType));
            assertEqualsUnsafe(fieldType, expectedValue, actualValue);
            if (checkArrowValidityVector) {
                ColumnVector columnVector = batch.column(i);
                ValueVector arrowVector = ((IcebergArrowColumnVector) columnVector).vectorAccessor().getVector();
                Assert.assertFalse("Nullability doesn't match of " + columnVector.dataType(), expectedValue == null ^ arrowVector.isNull(rowId));
            }
        }
    }
}
Also used : ValueVector(org.apache.arrow.vector.ValueVector) BinaryType(org.apache.spark.sql.types.BinaryType) DataType(org.apache.spark.sql.types.DataType) StructType(org.apache.spark.sql.types.StructType) Type(org.apache.iceberg.types.Type) ArrayType(org.apache.spark.sql.types.ArrayType) MapType(org.apache.spark.sql.types.MapType) Record(org.apache.avro.generic.GenericData.Record) InternalRow(org.apache.spark.sql.catalyst.InternalRow) ColumnVector(org.apache.spark.sql.vectorized.ColumnVector) IcebergArrowColumnVector(org.apache.iceberg.spark.data.vectorized.IcebergArrowColumnVector)

Example 2 with ValueVector

use of org.apache.arrow.vector.ValueVector in project conquery by bakdata.

the class ArrowRenderer method generateVectorFiller.

private static RowConsumer generateVectorFiller(int pos, ValueVector vector, final PrintSettings settings, ResultType resultType) {
    // TODO When Pattern-matching lands, clean this up. (Think Java 12?)
    if (vector instanceof IntVector) {
        return intVectorFiller((IntVector) vector, (line) -> (Integer) line[pos]);
    }
    if (vector instanceof VarCharVector) {
        return varCharVectorFiller((VarCharVector) vector, (line) -> {
            if (line[pos] == null) {
                // If there is no value, we don't want to have it displayed as an empty string (see next if)
                return null;
            }
            if (resultType != null) {
                return resultType.printNullable(settings, line[pos]);
            }
            return line[pos].toString();
        });
    }
    if (vector instanceof BitVector) {
        return bitVectorFiller((BitVector) vector, (line) -> (Boolean) line[pos]);
    }
    if (vector instanceof Float4Vector) {
        return float4VectorFiller((Float4Vector) vector, (line) -> (Number) line[pos]);
    }
    if (vector instanceof Float8Vector) {
        return float8VectorFiller((Float8Vector) vector, (line) -> (Number) line[pos]);
    }
    if (vector instanceof DateDayVector) {
        return dateDayVectorFiller((DateDayVector) vector, (line) -> (Number) line[pos]);
    }
    if (vector instanceof StructVector) {
        StructVector structVector = (StructVector) vector;
        List<ValueVector> nestedVectors = structVector.getPrimitiveVectors();
        RowConsumer[] nestedConsumers = new RowConsumer[nestedVectors.size()];
        for (int i = 0; i < nestedVectors.size(); i++) {
            nestedConsumers[i] = generateVectorFiller(i, nestedVectors.get(i), settings, resultType);
        }
        return structVectorFiller(structVector, nestedConsumers, (line) -> (List<?>) line[pos]);
    }
    if (vector instanceof ListVector) {
        ListVector listVector = (ListVector) vector;
        ValueVector nestedVector = listVector.getDataVector();
        // pos = 0 is a workaround for now
        return listVectorFiller(listVector, generateVectorFiller(0, nestedVector, settings, ((ResultType.ListT) resultType).getElementType()), (line) -> (List<?>) line[pos]);
    }
    throw new IllegalArgumentException("Unsupported vector type " + vector);
}
Also used : BitVector(org.apache.arrow.vector.BitVector) StructVector(org.apache.arrow.vector.complex.StructVector) IntVector(org.apache.arrow.vector.IntVector) Float4Vector(org.apache.arrow.vector.Float4Vector) Float8Vector(org.apache.arrow.vector.Float8Vector) VarCharVector(org.apache.arrow.vector.VarCharVector) ValueVector(org.apache.arrow.vector.ValueVector) ListVector(org.apache.arrow.vector.complex.ListVector) DateDayVector(org.apache.arrow.vector.DateDayVector)

Example 3 with ValueVector

use of org.apache.arrow.vector.ValueVector in project aws-athena-query-federation by awslabs.

the class BlockTest method ListOfListsTest.

@Test
public void ListOfListsTest() throws Exception {
    BlockAllocatorImpl expectedAllocator = new BlockAllocatorImpl();
    /**
     * Generate and write the schema
     */
    SchemaBuilder schemaBuilder = new SchemaBuilder();
    schemaBuilder.addField(FieldBuilder.newBuilder("outerlist", new ArrowType.List()).addListField("innerList", Types.MinorType.VARCHAR.getType()).build());
    Schema origSchema = schemaBuilder.build();
    /**
     * Generate and write the block
     */
    Block expectedBlock = expectedAllocator.createBlock(origSchema);
    int expectedRows = 200;
    for (Field next : origSchema.getFields()) {
        ValueVector vector = expectedBlock.getFieldVector(next.getName());
        switch(vector.getMinorType()) {
            case LIST:
                Field child = vector.getField().getChildren().get(0);
                for (int i = 0; i < expectedRows; i++) {
                    // For each row
                    List<List<String>> value = new ArrayList<>();
                    switch(Types.getMinorTypeForArrowType(child.getType())) {
                        case LIST:
                            List<String> values = new ArrayList<>();
                            values.add(String.valueOf(1000));
                            values.add(String.valueOf(1001));
                            values.add(String.valueOf(1002));
                            value.add(values);
                            break;
                        default:
                            throw new UnsupportedOperationException(vector.getMinorType() + " is not supported");
                    }
                    BlockUtils.setComplexValue((ListVector) vector, i, FieldResolver.DEFAULT, value);
                }
                break;
            default:
                throw new UnsupportedOperationException(vector.getMinorType() + " is not supported");
        }
    }
    expectedBlock.setRowCount(expectedRows);
    RecordBatchSerDe expectSerDe = new RecordBatchSerDe(expectedAllocator);
    ByteArrayOutputStream blockOut = new ByteArrayOutputStream();
    ArrowRecordBatch expectedBatch = expectedBlock.getRecordBatch();
    expectSerDe.serialize(expectedBatch, blockOut);
    assertSerializationOverhead(blockOut);
    expectedBatch.close();
    expectedBlock.close();
    ByteArrayOutputStream schemaOut = new ByteArrayOutputStream();
    SchemaSerDe schemaSerDe = new SchemaSerDe();
    schemaSerDe.serialize(origSchema, schemaOut);
    Schema actualSchema = schemaSerDe.deserialize(new ByteArrayInputStream(schemaOut.toByteArray()));
    BlockAllocatorImpl actualAllocator = new BlockAllocatorImpl();
    RecordBatchSerDe actualSerDe = new RecordBatchSerDe(actualAllocator);
    ArrowRecordBatch batch = actualSerDe.deserialize(blockOut.toByteArray());
    /**
     * Generate and write the block
     */
    Block actualBlock = actualAllocator.createBlock(actualSchema);
    actualBlock.loadRecordBatch(batch);
    batch.close();
    for (int i = 0; i < actualBlock.getRowCount(); i++) {
        logger.info("ListOfList: util {}", BlockUtils.rowToString(actualBlock, i));
    }
    assertEquals("Row count missmatch", expectedRows, actualBlock.getRowCount());
    int actualFieldCount = 1;
    for (Field next : actualBlock.getFields()) {
        FieldReader vector = actualBlock.getFieldReader(next.getName());
        switch(vector.getMinorType()) {
            case LIST:
                int actual = 0;
                for (int i = 0; i < actualBlock.getRowCount(); i++) {
                    vector.setPosition(i);
                    int entryValues = 0;
                    while (vector.next()) {
                        FieldReader innerReader = vector.reader();
                        int j = 0;
                        while (innerReader.next()) {
                            entryValues++;
                            assertEquals(String.valueOf(1000 + j++), innerReader.reader().readText().toString());
                        }
                    }
                    if (entryValues > 0) {
                        actual++;
                    }
                }
                assertEquals("failed for " + vector.getField().getName(), actualBlock.getRowCount(), actual);
                break;
            default:
                throw new UnsupportedOperationException(next.getType().getTypeID() + " is not supported");
        }
        actualFieldCount++;
    }
    actualBlock.close();
}
Also used : Schema(org.apache.arrow.vector.types.pojo.Schema) ArrayList(java.util.ArrayList) ByteArrayOutputStream(java.io.ByteArrayOutputStream) ValueVector(org.apache.arrow.vector.ValueVector) Field(org.apache.arrow.vector.types.pojo.Field) ArrowRecordBatch(org.apache.arrow.vector.ipc.message.ArrowRecordBatch) ByteArrayInputStream(java.io.ByteArrayInputStream) List(java.util.List) ArrayList(java.util.ArrayList) FieldReader(org.apache.arrow.vector.complex.reader.FieldReader) Test(org.junit.Test)

Example 4 with ValueVector

use of org.apache.arrow.vector.ValueVector in project aws-athena-query-federation by awslabs.

the class BlockTest method generateTestBlock.

public static Block generateTestBlock(BlockAllocatorImpl expectedAllocator, Schema origSchema, int expectedRows) throws UnsupportedOperationException {
    /**
     * Generate and write the block
     */
    Block expectedBlock = expectedAllocator.createBlock(origSchema);
    int fieldCount = 1;
    for (Field next : origSchema.getFields()) {
        ValueVector vector = expectedBlock.getFieldVector(next.getName());
        switch(vector.getMinorType()) {
            case DATEDAY:
                DateDayVector dateDayVector = (DateDayVector) vector;
                for (int i = 0; i < expectedRows; i++) {
                    dateDayVector.setSafe(i, i * fieldCount);
                }
                break;
            case UINT4:
                UInt4Vector uInt4Vector = (UInt4Vector) vector;
                for (int i = 0; i < expectedRows; i++) {
                    uInt4Vector.setSafe(i, i * fieldCount * 2);
                }
                break;
            case INT:
                IntVector intVector = (IntVector) vector;
                for (int i = 0; i < expectedRows; i++) {
                    intVector.setSafe(i, i * fieldCount * 3);
                }
                break;
            case FLOAT8:
                Float8Vector fVector = (Float8Vector) vector;
                for (int i = 0; i < expectedRows; i++) {
                    fVector.setSafe(i, i * fieldCount * 1.1);
                }
                break;
            case VARCHAR:
                VarCharVector vVector = (VarCharVector) vector;
                for (int i = 0; i < expectedRows; i++) {
                    vVector.setSafe(i, String.valueOf(i * fieldCount).getBytes(Charsets.UTF_8));
                }
                break;
            case DATEMILLI:
                DateMilliVector dateMilliVector = (DateMilliVector) vector;
                for (int i = 0; i < expectedRows; i++) {
                    dateMilliVector.setSafe(i, i * fieldCount * 4);
                }
                break;
            case TINYINT:
                TinyIntVector tinyIntVector = (TinyIntVector) vector;
                for (int i = 0; i < expectedRows; i++) {
                    tinyIntVector.setSafe(i, i * fieldCount * 5);
                }
                break;
            case UINT1:
                UInt1Vector uInt1Vector = (UInt1Vector) vector;
                for (int i = 0; i < expectedRows; i++) {
                    uInt1Vector.setSafe(i, i * fieldCount * 6);
                }
                break;
            case SMALLINT:
                SmallIntVector smallIntVector = (SmallIntVector) vector;
                for (int i = 0; i < expectedRows; i++) {
                    smallIntVector.setSafe(i, i * fieldCount * 7);
                }
                break;
            case UINT2:
                UInt2Vector uInt2Vector = (UInt2Vector) vector;
                for (int i = 0; i < expectedRows; i++) {
                    uInt2Vector.setSafe(i, i * fieldCount * 8);
                }
                break;
            case UINT8:
                UInt8Vector uInt8Vector = (UInt8Vector) vector;
                for (int i = 0; i < expectedRows; i++) {
                    uInt8Vector.setSafe(i, i * fieldCount * 9);
                }
                break;
            case BIGINT:
                BigIntVector bigIntVector = (BigIntVector) vector;
                for (int i = 0; i < expectedRows; i++) {
                    bigIntVector.setSafe(i, i * fieldCount * 10);
                }
                break;
            case DECIMAL:
                DecimalVector decimalVector = (DecimalVector) vector;
                for (int i = 0; i < expectedRows; i++) {
                    BigDecimal bigDecimal = new BigDecimal((double) (i * fieldCount) * 1.01);
                    bigDecimal = bigDecimal.setScale(2, RoundingMode.HALF_UP);
                    decimalVector.setSafe(i, bigDecimal);
                }
                break;
            case FLOAT4:
                Float4Vector float4Vector = (Float4Vector) vector;
                for (int i = 0; i < expectedRows; i++) {
                    float4Vector.setSafe(i, i * fieldCount * 9);
                }
                break;
            case VARBINARY:
                VarBinaryVector varBinaryVector = (VarBinaryVector) vector;
                for (int i = 0; i < expectedRows; i++) {
                    byte[] data = String.valueOf(i * fieldCount).getBytes();
                    varBinaryVector.setSafe(i, data);
                }
                break;
            case BIT:
                BitVector bitVector = (BitVector) vector;
                for (int i = 0; i < expectedRows; i++) {
                    bitVector.setSafe(i, i % 2);
                }
                break;
            case STRUCT:
                StructVector sVector = (StructVector) vector;
                for (int i = 0; i < expectedRows; i++) {
                    final int seed = i;
                    BlockUtils.setComplexValue(sVector, i, (Field field, Object value) -> {
                        if (field.getName().equals("nestedBigInt")) {
                            return (long) seed;
                        }
                        if (field.getName().equals("nestedString")) {
                            return String.valueOf(1000 + seed);
                        }
                        if (field.getName().equals("tinyintcol")) {
                            return (byte) seed;
                        }
                        if (field.getName().equals("smallintcol")) {
                            return (short) seed;
                        }
                        if (field.getName().equals("nestedList")) {
                            List<String> values = new ArrayList<>();
                            values.add("val1");
                            values.add("val2");
                            return values;
                        }
                        if (field.getName().equals("nestedListDec")) {
                            List<Double> values = new ArrayList<>();
                            values.add(2.0D);
                            values.add(2.2D);
                            return values;
                        }
                        if (field.getName().equals("float4Col")) {
                            return seed * 1.0F;
                        }
                        if (field.getName().equals("float8Col")) {
                            return seed * 2.0D;
                        }
                        if (field.getName().equals("shortDecCol")) {
                            return seed * 3.0D;
                        }
                        if (field.getName().equals("longDecCol")) {
                            return seed * 4.0D;
                        }
                        if (field.getName().equals("binaryCol")) {
                            return String.valueOf(seed).getBytes(Charsets.UTF_8);
                        }
                        if (field.getName().equals("bitCol")) {
                            return seed % 2 == 1;
                        }
                        if (field.getName().equals("nestedStruct")) {
                            // it just needs to be non-null
                            return new Object();
                        }
                        throw new RuntimeException("Unexpected field " + field.getName());
                    }, new Object());
                }
                break;
            case LIST:
                Field child = vector.getField().getChildren().get(0);
                if (Types.getMinorTypeForArrowType(child.getType()) == Types.MinorType.BIGINT) {
                    for (int i = 0; i < expectedRows; i++) {
                        List<Long> values = new ArrayList<>();
                        values.add(Long.valueOf(i));
                        values.add(i + 1L);
                        values.add(i + 2L);
                        BlockUtils.setComplexValue((ListVector) vector, i, FieldResolver.DEFAULT, values);
                    }
                } else if (Types.getMinorTypeForArrowType(child.getType()) == Types.MinorType.VARCHAR) {
                    for (int i = 0; i < expectedRows; i++) {
                        List<String> values = new ArrayList<>();
                        values.add(String.valueOf(1000 + i));
                        values.add(String.valueOf(1000 + i + 1));
                        values.add(String.valueOf(1000 + i + 2));
                        BlockUtils.setComplexValue((ListVector) vector, i, FieldResolver.DEFAULT, values);
                    }
                } else if (Types.getMinorTypeForArrowType(child.getType()) == Types.MinorType.SMALLINT) {
                    for (int i = 0; i < expectedRows; i++) {
                        BlockUtils.setComplexValue((ListVector) vector, i, FieldResolver.DEFAULT, Collections.singletonList((short) (i + 1)));
                    }
                } else if (Types.getMinorTypeForArrowType(child.getType()) == Types.MinorType.INT) {
                    for (int i = 0; i < expectedRows; i++) {
                        BlockUtils.setComplexValue((ListVector) vector, i, FieldResolver.DEFAULT, Collections.singletonList(i));
                    }
                } else if (Types.getMinorTypeForArrowType(child.getType()) == Types.MinorType.TINYINT) {
                    for (int i = 0; i < expectedRows; i++) {
                        BlockUtils.setComplexValue((ListVector) vector, i, FieldResolver.DEFAULT, Collections.singletonList((byte) i));
                    }
                } else if (Types.getMinorTypeForArrowType(child.getType()) == Types.MinorType.FLOAT4) {
                    for (int i = 0; i < expectedRows; i++) {
                        BlockUtils.setComplexValue((ListVector) vector, i, FieldResolver.DEFAULT, Collections.singletonList((i * 1.0F)));
                    }
                } else if (Types.getMinorTypeForArrowType(child.getType()) == Types.MinorType.FLOAT8) {
                    for (int i = 0; i < expectedRows; i++) {
                        BlockUtils.setComplexValue((ListVector) vector, i, FieldResolver.DEFAULT, Collections.singletonList((i * 1.0D)));
                    }
                } else if (Types.getMinorTypeForArrowType(child.getType()) == Types.MinorType.DECIMAL) {
                    for (int i = 0; i < expectedRows; i++) {
                        BlockUtils.setComplexValue((ListVector) vector, i, FieldResolver.DEFAULT, Collections.singletonList((i * 1.0D)));
                    }
                } else if (Types.getMinorTypeForArrowType(child.getType()) == Types.MinorType.VARBINARY) {
                    for (int i = 0; i < expectedRows; i++) {
                        BlockUtils.setComplexValue((ListVector) vector, i, FieldResolver.DEFAULT, Collections.singletonList(String.valueOf(i).getBytes(Charsets.UTF_8)));
                    }
                } else if (Types.getMinorTypeForArrowType(child.getType()) == Types.MinorType.BIT) {
                    for (int i = 0; i < expectedRows; i++) {
                        BlockUtils.setComplexValue((ListVector) vector, i, FieldResolver.DEFAULT, Collections.singletonList(i % 2 == 1));
                    }
                }
                break;
            case MAP:
                MapVector mapVector = (MapVector) vector;
                for (int i = 0; i < expectedRows; i++) {
                    final int seed = i;
                    BlockUtils.setComplexValue(mapVector, i, (Field field, Object value) -> {
                        if (field.getName().equals("key")) {
                            return String.valueOf(1000 + seed);
                        }
                        if (field.getName().equals("value")) {
                            return seed;
                        }
                        if (field.getName().equals("tinyintcol")) {
                            return (byte) seed;
                        }
                        if (field.getName().equals("smallintcol")) {
                            return (short) seed;
                        }
                        if (field.getName().equals("nestedList")) {
                            List<String> values = new ArrayList<>();
                            values.add("val1");
                            values.add("val2");
                            return values;
                        }
                        if (field.getName().equals("nestedListDec")) {
                            List<Double> values = new ArrayList<>();
                            values.add(2.0D);
                            values.add(2.2D);
                            return values;
                        }
                        if (field.getName().equals("float4Col")) {
                            return seed * 1.0F;
                        }
                        if (field.getName().equals("float8Col")) {
                            return seed * 2.0D;
                        }
                        if (field.getName().equals("shortDecCol")) {
                            return seed * 3.0D;
                        }
                        if (field.getName().equals("longDecCol")) {
                            return seed * 4.0D;
                        }
                        if (field.getName().equals("binaryCol")) {
                            return String.valueOf(seed).getBytes(Charsets.UTF_8);
                        }
                        if (field.getName().equals("bitCol")) {
                            return seed % 2 == 1;
                        }
                        if (field.getName().equals("nestedStruct")) {
                            // it just needs to be non-null
                            return new Object();
                        }
                        throw new RuntimeException("Unexpected field " + field.getName());
                    }, new Object());
                }
                List<Field> children = vector.getField().getChildren();
                Field keyValueStructField;
                if (children.size() != 1) {
                    throw new IllegalStateException("Invalid Arrow Map schema: " + vector.getField());
                } else {
                    keyValueStructField = children.get(0);
                    if (!ENTRIES.equals(keyValueStructField.getName()) || !(keyValueStructField.getType() instanceof ArrowType.Struct)) {
                        throw new IllegalStateException("Invalid Arrow Map schema: " + vector.getField());
                    }
                }
                List<Field> keyValueChildren = keyValueStructField.getChildren();
                Field keyField;
                Field valueField;
                if (keyValueChildren.size() != 2) {
                    throw new IllegalStateException("Invalid Arrow Map schema: " + vector.getField());
                } else {
                    keyField = keyValueChildren.get(0);
                    valueField = keyValueChildren.get(1);
                    if (!KEY.equals(keyField.getName()) || !VALUE.equals(valueField.getName())) {
                        throw new IllegalStateException("Invalid Arrow Map schema: " + vector.getField());
                    }
                }
                break;
            default:
                throw new UnsupportedOperationException(vector.getMinorType() + " is not supported");
        }
        fieldCount++;
    }
    expectedBlock.setRowCount(expectedRows);
    return expectedBlock;
}
Also used : BitVector(org.apache.arrow.vector.BitVector) StructVector(org.apache.arrow.vector.complex.StructVector) Float4Vector(org.apache.arrow.vector.Float4Vector) ArrayList(java.util.ArrayList) ArrowType(org.apache.arrow.vector.types.pojo.ArrowType) VarBinaryVector(org.apache.arrow.vector.VarBinaryVector) Field(org.apache.arrow.vector.types.pojo.Field) DateDayVector(org.apache.arrow.vector.DateDayVector) UInt1Vector(org.apache.arrow.vector.UInt1Vector) List(java.util.List) ArrayList(java.util.ArrayList) DecimalVector(org.apache.arrow.vector.DecimalVector) BigIntVector(org.apache.arrow.vector.BigIntVector) TinyIntVector(org.apache.arrow.vector.TinyIntVector) IntVector(org.apache.arrow.vector.IntVector) SmallIntVector(org.apache.arrow.vector.SmallIntVector) SmallIntVector(org.apache.arrow.vector.SmallIntVector) Float8Vector(org.apache.arrow.vector.Float8Vector) VarCharVector(org.apache.arrow.vector.VarCharVector) UInt8Vector(org.apache.arrow.vector.UInt8Vector) UInt4Vector(org.apache.arrow.vector.UInt4Vector) BigDecimal(java.math.BigDecimal) BigIntVector(org.apache.arrow.vector.BigIntVector) ValueVector(org.apache.arrow.vector.ValueVector) ListVector(org.apache.arrow.vector.complex.ListVector) DateMilliVector(org.apache.arrow.vector.DateMilliVector) UInt2Vector(org.apache.arrow.vector.UInt2Vector) TinyIntVector(org.apache.arrow.vector.TinyIntVector) MapVector(org.apache.arrow.vector.complex.MapVector)

Example 5 with ValueVector

use of org.apache.arrow.vector.ValueVector in project TileDB-Spark by TileDB-Inc.

the class TileDBDataReaderPartitionScan method createValueVectors.

/**
 * Creates the value Vectors, later to be used to create the arrowBuffers for the query.
 *
 * @param readBufferSize the readBufferSize
 * @throws TileDBError
 */
private void createValueVectors(long readBufferSize) throws TileDBError {
    metricsUpdater.startTimer(queryAllocBufferTimerName);
    // Create coordinate buffers
    int minDimDize = Integer.MAX_VALUE;
    for (Dimension dimension : arraySchema.getDomain().getDimensions()) {
        int nativeSize = dimension.getType().getNativeSize();
        if (nativeSize < minDimDize)
            minDimDize = nativeSize;
    }
    releaseArrowVectors();
    for (String fieldName : fieldNames) {
        // get the spark column name and match to array schema
        String name = fieldName;
        TypeInfo typeInfo = getTypeInfo(name);
        RootAllocator allocator = ArrowUtils.rootAllocator();
        ArrowType arrowType;
        ValueVector valueVector;
        ValueVector validityValueVector = new UInt1Vector(fieldName, allocator);
        switch(typeInfo.datatype) {
            case CHAR:
            case ASCII:
                if (!typeInfo.isVarLen)
                    throw new RuntimeException("Unhandled fixed-len char buffer for attribute " + fieldName);
                valueVector = new VarCharVector(fieldName, allocator);
                break;
            case UINT8:
            case INT8:
                arrowType = new ArrowType.Int(8, true);
                if (typeInfo.isVarLen) {
                    ListVector lv = ListVector.empty(fieldName, allocator);
                    lv.addOrGetVector(FieldType.nullable(arrowType));
                    valueVector = lv;
                } else {
                    valueVector = new TinyIntVector(fieldName, allocator);
                }
                break;
            case INT32:
                arrowType = new ArrowType.Int(32, true);
                if (typeInfo.isVarLen || typeInfo.isArray) {
                    ListVector lv = ListVector.empty(fieldName, allocator);
                    lv.addOrGetVector(FieldType.nullable(arrowType));
                    valueVector = lv;
                } else {
                    valueVector = new IntVector(fieldName, allocator);
                }
                break;
            case FLOAT32:
                arrowType = new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE);
                if (typeInfo.isVarLen) {
                    ListVector lv = ListVector.empty(fieldName, allocator);
                    lv.addOrGetVector(FieldType.nullable(arrowType));
                    valueVector = lv;
                } else {
                    valueVector = new Float4Vector(fieldName, allocator);
                }
                break;
            case FlOAT64:
                arrowType = new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE);
                if (typeInfo.isVarLen) {
                    ListVector lv = ListVector.empty(fieldName, allocator);
                    lv.addOrGetVector(FieldType.nullable(arrowType));
                    valueVector = lv;
                } else {
                    valueVector = new Float8Vector(fieldName, allocator);
                }
                break;
            case INT16:
            case UINT16:
                arrowType = new ArrowType.Int(16, true);
                if (typeInfo.isVarLen) {
                    ListVector lv = ListVector.empty(fieldName, allocator);
                    lv.addOrGetVector(FieldType.nullable(arrowType));
                    valueVector = lv;
                } else {
                    valueVector = new SmallIntVector(fieldName, allocator);
                }
                break;
            case LONG:
            case DATE:
                arrowType = new ArrowType.Int(64, true);
                if (typeInfo.isVarLen) {
                    ListVector lv = ListVector.empty(fieldName, allocator);
                    lv.addOrGetVector(FieldType.nullable(arrowType));
                    valueVector = lv;
                } else {
                    valueVector = new BigIntVector(fieldName, allocator);
                }
                break;
            default:
                throw new RuntimeException("Unhandled datatype for Arrow buffer, attribute " + fieldName);
        }
        // Max number of rows is nbytes / sizeof(int32_t), i.e. the max number of offsets that can be
        // stored.
        long maxRowsL = (readBufferSize / util.getDefaultRecordByteCount(valueVector.getClass()));
        int maxNumRows = util.longToInt(maxRowsL);
        // rare case when readbuffer size is set to a value smaller than the type
        if (maxNumRows == 0)
            maxNumRows = 1;
        if (valueVector instanceof ListVector) {
            ((ListVector) valueVector).setInitialCapacity(maxNumRows, 1);
        } else {
            valueVector.setInitialCapacity(maxNumRows);
        }
        validityValueVector.setInitialCapacity(maxNumRows);
        // The valueVector is the one holding the data and the corresponding validity and
        // offsetBuffers.
        // The validityValueVector is a help valueVector that holds the validity values in a byte
        // format which is the one expected from TileDB. The validity buffers in the main valueVector
        // is a bitmap instead!
        // A conversion between the two is needed when retrieving the data. See the code in the get()
        // method.
        valueVector.allocateNew();
        validityValueVector.allocateNew();
        createAndSetArrowBuffers(valueVector, validityValueVector, typeInfo, name);
    }
    metricsUpdater.finish(queryAllocBufferTimerName);
}
Also used : BigIntVector(org.apache.arrow.vector.BigIntVector) TinyIntVector(org.apache.arrow.vector.TinyIntVector) IntVector(org.apache.arrow.vector.IntVector) SmallIntVector(org.apache.arrow.vector.SmallIntVector) Float4Vector(org.apache.arrow.vector.Float4Vector) SmallIntVector(org.apache.arrow.vector.SmallIntVector) Float8Vector(org.apache.arrow.vector.Float8Vector) ArrowType(org.apache.arrow.vector.types.pojo.ArrowType) VarCharVector(org.apache.arrow.vector.VarCharVector) BigIntVector(org.apache.arrow.vector.BigIntVector) ValueVector(org.apache.arrow.vector.ValueVector) RootAllocator(org.apache.arrow.memory.RootAllocator) ListVector(org.apache.arrow.vector.complex.ListVector) UInt1Vector(org.apache.arrow.vector.UInt1Vector) TinyIntVector(org.apache.arrow.vector.TinyIntVector)

Aggregations

ValueVector (org.apache.arrow.vector.ValueVector)10 ArrayList (java.util.ArrayList)5 Float4Vector (org.apache.arrow.vector.Float4Vector)4 Float8Vector (org.apache.arrow.vector.Float8Vector)4 IntVector (org.apache.arrow.vector.IntVector)4 VarCharVector (org.apache.arrow.vector.VarCharVector)4 ListVector (org.apache.arrow.vector.complex.ListVector)4 Field (org.apache.arrow.vector.types.pojo.Field)4 Schema (org.apache.arrow.vector.types.pojo.Schema)4 ByteArrayInputStream (java.io.ByteArrayInputStream)3 ByteArrayOutputStream (java.io.ByteArrayOutputStream)3 List (java.util.List)3 BigIntVector (org.apache.arrow.vector.BigIntVector)3 SmallIntVector (org.apache.arrow.vector.SmallIntVector)3 TinyIntVector (org.apache.arrow.vector.TinyIntVector)3 FieldReader (org.apache.arrow.vector.complex.reader.FieldReader)3 ArrowRecordBatch (org.apache.arrow.vector.ipc.message.ArrowRecordBatch)3 ArrowType (org.apache.arrow.vector.types.pojo.ArrowType)3 HashMap (java.util.HashMap)2 RootAllocator (org.apache.arrow.memory.RootAllocator)2