Search in sources :

Example 6 with ValueVector

use of org.apache.arrow.vector.ValueVector in project Mycat2 by MyCATApache.

the class TopKPlan method buildCompare.

private VectorValueComparator buildCompare(SortPlan.SortColumn p) {
    FieldVector values = p.getValues();
    SortOptions options = p.getOptions();
    VectorValueComparator<FieldVector> defaultComparator = DefaultVectorComparators.createDefaultComparator(values);
    defaultComparator.attachVectors(values, values);
    return new VectorValueComparator() {

        @Override
        public int getValueWidth() {
            return super.getValueWidth();
        }

        @Override
        public void attachVector(ValueVector vector) {
            super.attachVector(vector);
        }

        @Override
        public void attachVectors(ValueVector vector1, ValueVector vector2) {
            super.attachVectors(vector1, vector2);
        }

        @Override
        public int compare(int index1, int index2) {
            boolean isNull1 = values.isNull(index1);
            boolean isNull2 = values.isNull(index2);
            if (isNull1 || isNull2) {
                if (isNull1 && isNull2) {
                    return 0;
                } else if (isNull1) {
                    if (options.nullsFirst) {
                        // null1 is smaller
                        return -1;
                    } else {
                        return 1;
                    }
                } else {
                    if (options.nullsFirst) {
                        // null2 is smaller
                        return 1;
                    } else {
                        return -1;
                    }
                }
            }
            return compareNotNull(index1, index2);
        }

        @Override
        public int compareNotNull(int index1, int index2) {
            return defaultComparator.compareNotNull(index1, index2);
        }

        @Override
        public VectorValueComparator createNew() {
            return this;
        }
    };
}
Also used : ValueVector(org.apache.arrow.vector.ValueVector) VectorValueComparator(org.apache.arrow.algorithm.sort.VectorValueComparator) FieldVector(org.apache.arrow.vector.FieldVector) SortOptions(io.ordinate.engine.builder.SortOptions)

Example 7 with ValueVector

use of org.apache.arrow.vector.ValueVector in project TileDB-Spark by TileDB-Inc.

the class TileDBPartitionReader method createValueVectors.

/**
 * Creates the value Vectors, later to be used to create the arrowBuffers for the query.
 *
 * @param readBufferSize the readBufferSize
 * @throws TileDBError
 */
private void createValueVectors(long readBufferSize) throws TileDBError {
    metricsUpdater.startTimer(queryAllocBufferTimerName);
    // Create coordinate buffers
    int minDimDize = Integer.MAX_VALUE;
    Domain domain = arraySchema.getDomain();
    for (Dimension dimension : domain.getDimensions()) {
        int nativeSize = dimension.getType().getNativeSize();
        if (nativeSize < minDimDize)
            minDimDize = nativeSize;
        dimension.close();
    }
    domain.close();
    releaseArrowVectors();
    for (String fieldName : fieldNames) {
        // get the spark column name and match to array schema
        String name = fieldName;
        TypeInfo typeInfo = getTypeInfo(name);
        RootAllocator allocator = ArrowUtils.rootAllocator();
        ArrowType arrowType;
        ValueVector valueVector;
        // In theory we could try to replace the following UInt2Vector with Uint1Vector. However,
        // TileDB will throw an error that more validity cells are needed for the query. This
        // happens because apache-arrow rounds up the size of the data buffers, thus making it
        // necessary for us to provide more validity cells. This implementation provides double
        // the amount of validity cells necessary which makes it safe.
        ValueVector validityValueVector = new UInt2Vector(fieldName, allocator);
        switch(typeInfo.datatype) {
            case CHAR:
            case ASCII:
                if (!typeInfo.isVarLen)
                    throw new RuntimeException("Unhandled fixed-len char buffer for attribute " + fieldName);
                valueVector = new VarCharVector(fieldName, allocator);
                break;
            case UINT8:
            case INT8:
                arrowType = new ArrowType.Int(8, true);
                if (typeInfo.isVarLen) {
                    ListVector lv = ListVector.empty(fieldName, allocator);
                    lv.addOrGetVector(FieldType.nullable(arrowType));
                    valueVector = lv;
                } else {
                    valueVector = new TinyIntVector(fieldName, allocator);
                }
                break;
            case INT32:
                arrowType = new ArrowType.Int(32, true);
                if (typeInfo.isVarLen || typeInfo.isArray) {
                    ListVector lv = ListVector.empty(fieldName, allocator);
                    lv.addOrGetVector(FieldType.nullable(arrowType));
                    valueVector = lv;
                } else {
                    valueVector = new IntVector(fieldName, allocator);
                }
                break;
            case FLOAT32:
                arrowType = new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE);
                if (typeInfo.isVarLen) {
                    ListVector lv = ListVector.empty(fieldName, allocator);
                    lv.addOrGetVector(FieldType.nullable(arrowType));
                    valueVector = lv;
                } else {
                    valueVector = new Float4Vector(fieldName, allocator);
                }
                break;
            case FlOAT64:
                arrowType = new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE);
                if (typeInfo.isVarLen) {
                    ListVector lv = ListVector.empty(fieldName, allocator);
                    lv.addOrGetVector(FieldType.nullable(arrowType));
                    valueVector = lv;
                } else {
                    valueVector = new Float8Vector(fieldName, allocator);
                }
                break;
            case INT16:
            case UINT16:
                arrowType = new ArrowType.Int(16, true);
                if (typeInfo.isVarLen) {
                    ListVector lv = ListVector.empty(fieldName, allocator);
                    lv.addOrGetVector(FieldType.nullable(arrowType));
                    valueVector = lv;
                } else {
                    valueVector = new SmallIntVector(fieldName, allocator);
                }
                break;
            case LONG:
            case DATE:
                arrowType = new ArrowType.Int(64, true);
                if (typeInfo.isVarLen) {
                    ListVector lv = ListVector.empty(fieldName, allocator);
                    lv.addOrGetVector(FieldType.nullable(arrowType));
                    valueVector = lv;
                } else {
                    valueVector = new BigIntVector(fieldName, allocator);
                }
                break;
            default:
                throw new RuntimeException("Unhandled datatype for Arrow buffer, attribute " + fieldName);
        }
        // Max number of rows is nbytes / sizeof(int32_t), i.e. the max number of offsets that can be
        // stored.
        long maxRowsL = (readBufferSize / util.getDefaultRecordByteCount(valueVector.getClass()));
        int maxNumRows = util.longToInt(maxRowsL);
        // rare case when readbuffer size is set to a value smaller than the type
        if (maxNumRows == 0)
            maxNumRows = 1;
        if (valueVector instanceof ListVector) {
            ((ListVector) valueVector).setInitialCapacity(maxNumRows, 1);
        } else {
            valueVector.setInitialCapacity(maxNumRows);
        }
        validityValueVector.setInitialCapacity(maxNumRows);
        // The valueVector is the one holding the data and the corresponding validity and
        // offsetBuffers.
        // The validityValueVector is a help valueVector that holds the validity values in a byte
        // format which is the one expected from TileDB. The validity buffers in the main valueVector
        // is a bitmap instead!
        // A conversion between the two is needed when retrieving the data. See the code in the get()
        // method.
        valueVector.allocateNew();
        validityValueVector.allocateNew();
        createAndSetArrowBuffers(valueVector, validityValueVector, typeInfo, name);
    }
    metricsUpdater.finish(queryAllocBufferTimerName);
}
Also used : BigIntVector(org.apache.arrow.vector.BigIntVector) TinyIntVector(org.apache.arrow.vector.TinyIntVector) IntVector(org.apache.arrow.vector.IntVector) SmallIntVector(org.apache.arrow.vector.SmallIntVector) Float4Vector(org.apache.arrow.vector.Float4Vector) SmallIntVector(org.apache.arrow.vector.SmallIntVector) Float8Vector(org.apache.arrow.vector.Float8Vector) ArrowType(org.apache.arrow.vector.types.pojo.ArrowType) VarCharVector(org.apache.arrow.vector.VarCharVector) BigIntVector(org.apache.arrow.vector.BigIntVector) ValueVector(org.apache.arrow.vector.ValueVector) RootAllocator(org.apache.arrow.memory.RootAllocator) ListVector(org.apache.arrow.vector.complex.ListVector) UInt2Vector(org.apache.arrow.vector.UInt2Vector) TinyIntVector(org.apache.arrow.vector.TinyIntVector)

Example 8 with ValueVector

use of org.apache.arrow.vector.ValueVector in project deephaven-core by deephaven.

the class VectorSchemaRootAdapter method of.

/**
 * Convert a {@code table} into a {@link FieldVector}.
 *
 * @param table the table
 * @param allocator the allocator
 * @return the vector schema root
 */
public static VectorSchemaRoot of(NewTable table, BufferAllocator allocator) {
    final List<FieldVector> fieldVectors = new ArrayList<>(table.numColumns());
    for (Column<?> column : table) {
        fieldVectors.add(FieldVectorAdapter.of(column, allocator));
    }
    final Schema schema = new Schema(fieldVectors.stream().map(ValueVector::getField).collect(Collectors.toList()));
    return new VectorSchemaRoot(schema, fieldVectors, table.size());
}
Also used : ValueVector(org.apache.arrow.vector.ValueVector) VectorSchemaRoot(org.apache.arrow.vector.VectorSchemaRoot) Schema(org.apache.arrow.vector.types.pojo.Schema) ArrayList(java.util.ArrayList) FieldVector(org.apache.arrow.vector.FieldVector)

Example 9 with ValueVector

use of org.apache.arrow.vector.ValueVector in project aws-athena-query-federation by awslabs.

the class BlockTest method ListOfStructsTest.

@Test
public void ListOfStructsTest() throws Exception {
    BlockAllocatorImpl expectedAllocator = new BlockAllocatorImpl();
    /**
     * Generate and write the schema
     */
    SchemaBuilder schemaBuilder = new SchemaBuilder();
    schemaBuilder.addField(FieldBuilder.newBuilder("outerlist", new ArrowType.List()).addField(FieldBuilder.newBuilder("innerStruct", Types.MinorType.STRUCT.getType()).addStringField("varchar").addBigIntField("bigint").build()).build());
    Schema origSchema = schemaBuilder.build();
    /**
     * Generate and write the block
     */
    Block expectedBlock = expectedAllocator.createBlock(origSchema);
    int expectedRows = 200;
    for (Field next : origSchema.getFields()) {
        ValueVector vector = expectedBlock.getFieldVector(next.getName());
        switch(vector.getMinorType()) {
            case LIST:
                Field child = vector.getField().getChildren().get(0);
                for (int i = 0; i < expectedRows; i++) {
                    // For each row
                    List<Map<String, Object>> value = new ArrayList<>();
                    switch(Types.getMinorTypeForArrowType(child.getType())) {
                        case STRUCT:
                            Map<String, Object> values = new HashMap<>();
                            values.put("varchar", "chars");
                            values.put("bigint", 100L);
                            value.add(values);
                            break;
                        default:
                            throw new UnsupportedOperationException(vector.getMinorType() + " is not supported");
                    }
                    BlockUtils.setComplexValue((ListVector) vector, i, FieldResolver.DEFAULT, value);
                }
                break;
            default:
                throw new UnsupportedOperationException(vector.getMinorType() + " is not supported");
        }
    }
    expectedBlock.setRowCount(expectedRows);
    RecordBatchSerDe expectSerDe = new RecordBatchSerDe(expectedAllocator);
    ByteArrayOutputStream blockOut = new ByteArrayOutputStream();
    ArrowRecordBatch expectedBatch = expectedBlock.getRecordBatch();
    expectSerDe.serialize(expectedBatch, blockOut);
    assertSerializationOverhead(blockOut);
    expectedBatch.close();
    expectedBlock.close();
    ByteArrayOutputStream schemaOut = new ByteArrayOutputStream();
    SchemaSerDe schemaSerDe = new SchemaSerDe();
    schemaSerDe.serialize(origSchema, schemaOut);
    Schema actualSchema = schemaSerDe.deserialize(new ByteArrayInputStream(schemaOut.toByteArray()));
    BlockAllocatorImpl actualAllocator = new BlockAllocatorImpl();
    RecordBatchSerDe actualSerDe = new RecordBatchSerDe(actualAllocator);
    ArrowRecordBatch batch = actualSerDe.deserialize(blockOut.toByteArray());
    /**
     * Generate and write the block
     */
    Block actualBlock = actualAllocator.createBlock(actualSchema);
    actualBlock.loadRecordBatch(batch);
    batch.close();
    for (int i = 0; i < actualBlock.getRowCount(); i++) {
        logger.info("ListOfList: util {}", BlockUtils.rowToString(actualBlock, i));
    }
    assertEquals("Row count missmatch", expectedRows, actualBlock.getRowCount());
    int actualFieldCount = 1;
    for (Field next : actualBlock.getFields()) {
        FieldReader vector = actualBlock.getFieldReader(next.getName());
        switch(vector.getMinorType()) {
            case LIST:
                int actual = 0;
                for (int i = 0; i < actualBlock.getRowCount(); i++) {
                    vector.setPosition(i);
                    int entryValues = 0;
                    while (vector.next()) {
                        entryValues++;
                        assertEquals("chars", vector.reader().reader("varchar").readText().toString());
                        assertEquals(Long.valueOf(100), vector.reader().reader("bigint").readLong());
                    }
                    if (entryValues > 0) {
                        actual++;
                    }
                }
                assertEquals("failed for " + vector.getField().getName(), actualBlock.getRowCount(), actual);
                break;
            default:
                throw new UnsupportedOperationException(next.getType().getTypeID() + " is not supported");
        }
        actualFieldCount++;
    }
    actualBlock.close();
}
Also used : HashMap(java.util.HashMap) Schema(org.apache.arrow.vector.types.pojo.Schema) ArrayList(java.util.ArrayList) ByteArrayOutputStream(java.io.ByteArrayOutputStream) ValueVector(org.apache.arrow.vector.ValueVector) Field(org.apache.arrow.vector.types.pojo.Field) ArrowRecordBatch(org.apache.arrow.vector.ipc.message.ArrowRecordBatch) ByteArrayInputStream(java.io.ByteArrayInputStream) List(java.util.List) ArrayList(java.util.ArrayList) Map(java.util.Map) HashMap(java.util.HashMap) FieldReader(org.apache.arrow.vector.complex.reader.FieldReader) Test(org.junit.Test)

Example 10 with ValueVector

use of org.apache.arrow.vector.ValueVector in project aws-athena-query-federation by awslabs.

the class BlockTest method structOfListsTest.

@Test
public void structOfListsTest() throws Exception {
    BlockAllocatorImpl expectedAllocator = new BlockAllocatorImpl();
    /**
     * Generate and write the schema
     */
    SchemaBuilder schemaBuilder = new SchemaBuilder();
    schemaBuilder.addField(FieldBuilder.newBuilder("innerStruct", Types.MinorType.STRUCT.getType()).addStringField("varchar").addListField("list", Types.MinorType.VARCHAR.getType()).build());
    Schema origSchema = schemaBuilder.build();
    /**
     * Generate and write the block
     */
    Block expectedBlock = expectedAllocator.createBlock(origSchema);
    int expectedRows = 200;
    for (Field next : origSchema.getFields()) {
        ValueVector vector = expectedBlock.getFieldVector(next.getName());
        for (int i = 0; i < expectedRows; i++) {
            switch(vector.getMinorType()) {
                case STRUCT:
                    Map<String, Object> value = new HashMap<>();
                    value.put("varchar", "chars");
                    if (i % 2 == 0) {
                        List<String> listVal = new ArrayList<>();
                        listVal.add("value_0_" + i);
                        listVal.add("value_1_" + i);
                        value.put("list", listVal);
                    } else {
                        value.put("list", null);
                    }
                    BlockUtils.setComplexValue((StructVector) vector, i, FieldResolver.DEFAULT, value);
                    break;
                default:
                    throw new UnsupportedOperationException(vector.getMinorType() + " is not supported");
            }
        }
    }
    expectedBlock.setRowCount(expectedRows);
    RecordBatchSerDe expectSerDe = new RecordBatchSerDe(expectedAllocator);
    ByteArrayOutputStream blockOut = new ByteArrayOutputStream();
    ArrowRecordBatch expectedBatch = expectedBlock.getRecordBatch();
    expectSerDe.serialize(expectedBatch, blockOut);
    assertSerializationOverhead(blockOut);
    expectedBatch.close();
    expectedBlock.close();
    ByteArrayOutputStream schemaOut = new ByteArrayOutputStream();
    SchemaSerDe schemaSerDe = new SchemaSerDe();
    schemaSerDe.serialize(origSchema, schemaOut);
    Schema actualSchema = schemaSerDe.deserialize(new ByteArrayInputStream(schemaOut.toByteArray()));
    BlockAllocatorImpl actualAllocator = new BlockAllocatorImpl();
    RecordBatchSerDe actualSerDe = new RecordBatchSerDe(actualAllocator);
    ArrowRecordBatch batch = actualSerDe.deserialize(blockOut.toByteArray());
    /**
     * Generate and write the block
     */
    Block actualBlock = actualAllocator.createBlock(actualSchema);
    actualBlock.loadRecordBatch(batch);
    batch.close();
    for (int i = 0; i < actualBlock.getRowCount(); i++) {
        logger.info("ListOfList: util {}", BlockUtils.rowToString(actualBlock, i));
    }
    assertEquals("Row count missmatch", expectedRows, actualBlock.getRowCount());
    int actualListValues = 0;
    int emptyListValues = 0;
    for (Field next : actualBlock.getFields()) {
        FieldReader vector = actualBlock.getFieldReader(next.getName());
        for (int i = 0; i < actualBlock.getRowCount(); i++) {
            switch(vector.getMinorType()) {
                case STRUCT:
                    vector.setPosition(i);
                    assertEquals("chars", vector.reader("varchar").readText().toString());
                    FieldReader listReader = vector.reader("list");
                    int found = 0;
                    while (listReader.next()) {
                        assertEquals("value_" + found + "_" + i, listReader.reader().readText().toString());
                        found++;
                        actualListValues++;
                    }
                    if (found == 0) {
                        emptyListValues++;
                    }
                    break;
                default:
                    throw new UnsupportedOperationException(next.getType().getTypeID() + " is not supported");
            }
        }
    }
    actualBlock.close();
    assertEquals(200, actualListValues);
    assertEquals(100, emptyListValues);
    logger.info("structOfListsTest: actualListValues[{}] emptyListValues[{}]", actualListValues, emptyListValues);
}
Also used : HashMap(java.util.HashMap) Schema(org.apache.arrow.vector.types.pojo.Schema) ArrayList(java.util.ArrayList) ByteArrayOutputStream(java.io.ByteArrayOutputStream) ValueVector(org.apache.arrow.vector.ValueVector) Field(org.apache.arrow.vector.types.pojo.Field) ArrowRecordBatch(org.apache.arrow.vector.ipc.message.ArrowRecordBatch) ByteArrayInputStream(java.io.ByteArrayInputStream) FieldReader(org.apache.arrow.vector.complex.reader.FieldReader) Test(org.junit.Test)

Aggregations

ValueVector (org.apache.arrow.vector.ValueVector)10 ArrayList (java.util.ArrayList)5 Float4Vector (org.apache.arrow.vector.Float4Vector)4 Float8Vector (org.apache.arrow.vector.Float8Vector)4 IntVector (org.apache.arrow.vector.IntVector)4 VarCharVector (org.apache.arrow.vector.VarCharVector)4 ListVector (org.apache.arrow.vector.complex.ListVector)4 Field (org.apache.arrow.vector.types.pojo.Field)4 Schema (org.apache.arrow.vector.types.pojo.Schema)4 ByteArrayInputStream (java.io.ByteArrayInputStream)3 ByteArrayOutputStream (java.io.ByteArrayOutputStream)3 List (java.util.List)3 BigIntVector (org.apache.arrow.vector.BigIntVector)3 SmallIntVector (org.apache.arrow.vector.SmallIntVector)3 TinyIntVector (org.apache.arrow.vector.TinyIntVector)3 FieldReader (org.apache.arrow.vector.complex.reader.FieldReader)3 ArrowRecordBatch (org.apache.arrow.vector.ipc.message.ArrowRecordBatch)3 ArrowType (org.apache.arrow.vector.types.pojo.ArrowType)3 HashMap (java.util.HashMap)2 RootAllocator (org.apache.arrow.memory.RootAllocator)2