Search in sources :

Example 1 with RepeatedMapVector

use of org.apache.drill.exec.vector.complex.RepeatedMapVector in project drill by axbaretto.

the class FlattenRecordBatch method getFlattenFieldTransferPair.

/**
 * The data layout is the same for the actual data within a repeated field, as it is in a scalar vector for
 * the same sql type. For example, a repeated int vector has a vector of offsets into a regular int vector to
 * represent the lists. As the data layout for the actual values in the same in the repeated vector as in the
 * scalar vector of the same type, we can avoid making individual copies for the column being flattened, and just
 * use vector copies between the inner vector of the repeated field to the resulting scalar vector from the flatten
 * operation. This is completed after we determine how many records will fit (as we will hit either a batch end, or
 * the end of one of the other vectors while we are copying the data of the other vectors alongside each new flattened
 * value coming out of the repeated field.)
 */
@SuppressWarnings("resource")
private TransferPair getFlattenFieldTransferPair(FieldReference reference) {
    final TypedFieldId fieldId = incoming.getValueVectorId(popConfig.getColumn());
    final Class<?> vectorClass = incoming.getSchema().getColumn(fieldId.getFieldIds()[0]).getValueClass();
    final ValueVector flattenField = incoming.getValueAccessorById(vectorClass, fieldId.getFieldIds()).getValueVector();
    TransferPair tp = null;
    if (flattenField instanceof RepeatedMapVector) {
        tp = ((RepeatedMapVector) flattenField).getTransferPairToSingleMap(reference.getAsNamePart().getName(), oContext.getAllocator());
    } else if (!(flattenField instanceof RepeatedValueVector)) {
        if (incoming.getRecordCount() != 0) {
            throw UserException.unsupportedError().message("Flatten does not support inputs of non-list values.").build(logger);
        }
        logger.error("Cannot cast {} to RepeatedValueVector", flattenField);
        // when incoming recordCount is 0, don't throw exception since the type being seen here is not solid
        final ValueVector vv = new RepeatedMapVector(flattenField.getField(), oContext.getAllocator(), null);
        tp = RepeatedValueVector.class.cast(vv).getTransferPair(reference.getAsNamePart().getName(), oContext.getAllocator());
    } else {
        final ValueVector vvIn = RepeatedValueVector.class.cast(flattenField).getDataVector();
        // vvIn may be null because of fast schema return for repeated list vectors
        if (vvIn != null) {
            tp = vvIn.getTransferPair(reference.getAsNamePart().getName(), oContext.getAllocator());
        }
    }
    return tp;
}
Also used : RepeatedValueVector(org.apache.drill.exec.vector.complex.RepeatedValueVector) ValueVector(org.apache.drill.exec.vector.ValueVector) TransferPair(org.apache.drill.exec.record.TransferPair) RepeatedMapVector(org.apache.drill.exec.vector.complex.RepeatedMapVector) RepeatedValueVector(org.apache.drill.exec.vector.complex.RepeatedValueVector) TypedFieldId(org.apache.drill.exec.record.TypedFieldId)

Example 2 with RepeatedMapVector

use of org.apache.drill.exec.vector.complex.RepeatedMapVector in project drill by axbaretto.

the class FlattenRecordBatch method setFlattenVector.

@SuppressWarnings("resource")
private void setFlattenVector() {
    final TypedFieldId typedFieldId = incoming.getValueVectorId(popConfig.getColumn());
    final MaterializedField field = incoming.getSchema().getColumn(typedFieldId.getFieldIds()[0]);
    final RepeatedValueVector vector;
    final ValueVector inVV = incoming.getValueAccessorById(field.getValueClass(), typedFieldId.getFieldIds()).getValueVector();
    if (!(inVV instanceof RepeatedValueVector)) {
        if (incoming.getRecordCount() != 0) {
            throw UserException.unsupportedError().message("Flatten does not support inputs of non-list values.").build(logger);
        }
        // when incoming recordCount is 0, don't throw exception since the type being seen here is not solid
        logger.error("setFlattenVector cast failed and recordcount is 0, create empty vector anyway.");
        vector = new RepeatedMapVector(field, oContext.getAllocator(), null);
    } else {
        vector = RepeatedValueVector.class.cast(inVV);
    }
    flattener.setFlattenField(vector);
}
Also used : RepeatedValueVector(org.apache.drill.exec.vector.complex.RepeatedValueVector) ValueVector(org.apache.drill.exec.vector.ValueVector) RepeatedValueVector(org.apache.drill.exec.vector.complex.RepeatedValueVector) RepeatedMapVector(org.apache.drill.exec.vector.complex.RepeatedMapVector) TypedFieldId(org.apache.drill.exec.record.TypedFieldId) MaterializedField(org.apache.drill.exec.record.MaterializedField)

Example 3 with RepeatedMapVector

use of org.apache.drill.exec.vector.complex.RepeatedMapVector in project drill by axbaretto.

the class VectorContainerBuilder method buildMap.

@SuppressWarnings("resource")
private void buildMap(TupleProxy parentTuple, BaseMapColumnState colModel) {
    // Creating the map vector will create its contained vectors if we
    // give it a materialized field with children. So, instead pass a clone
    // without children so we can add them.
    ColumnMetadata mapColSchema = colModel.schema().cloneEmpty();
    // Don't get the map vector from the vector cache. Map vectors may
    // have content that varies from batch to batch. Only the leaf
    // vectors can be cached.
    AbstractMapVector mapVector;
    if (mapColSchema.isArray()) {
        // A repeated map shares an offset vector with the internal
        // repeated map.
        UInt4Vector offsets = (UInt4Vector) colModel.vector();
        mapVector = new RepeatedMapVector(mapColSchema.schema(), offsets, null);
    } else {
        mapVector = new MapVector(mapColSchema.schema(), allocator(), null);
    }
    // Add the map vector and schema to the parent tuple
    parentTuple.add(mapVector);
    int index = parentTuple.schema.addColumn(mapColSchema);
    assert parentTuple.size() == parentTuple.size();
    // Update the tuple, which will add the new columns in the map
    updateTuple(colModel.mapState(), parentTuple.mapProxy(index));
}
Also used : ColumnMetadata(org.apache.drill.exec.record.metadata.ColumnMetadata) AbstractMapVector(org.apache.drill.exec.vector.complex.AbstractMapVector) RepeatedMapVector(org.apache.drill.exec.vector.complex.RepeatedMapVector) UInt4Vector(org.apache.drill.exec.vector.UInt4Vector) RepeatedMapVector(org.apache.drill.exec.vector.complex.RepeatedMapVector) MapVector(org.apache.drill.exec.vector.complex.MapVector) AbstractMapVector(org.apache.drill.exec.vector.complex.AbstractMapVector)

Example 4 with RepeatedMapVector

use of org.apache.drill.exec.vector.complex.RepeatedMapVector in project drill by axbaretto.

the class TestValueVector method testVectors.

/**
 * Convenience method that allows running tests on various {@link ValueVector vector} instances.
 *
 * @param test test function to execute
 */
@SuppressWarnings("resource")
private void testVectors(VectorVerifier test) throws Exception {
    final MaterializedField[] fields = { MaterializedField.create(EMPTY_SCHEMA_PATH, UInt4Holder.TYPE), MaterializedField.create(EMPTY_SCHEMA_PATH, BitHolder.TYPE), MaterializedField.create(EMPTY_SCHEMA_PATH, VarCharHolder.TYPE), MaterializedField.create(EMPTY_SCHEMA_PATH, NullableVarCharHolder.TYPE), MaterializedField.create(EMPTY_SCHEMA_PATH, RepeatedListVector.TYPE), MaterializedField.create(EMPTY_SCHEMA_PATH, MapVector.TYPE), MaterializedField.create(EMPTY_SCHEMA_PATH, RepeatedMapVector.TYPE) };
    final ValueVector[] vectors = { new UInt4Vector(fields[0], allocator), new BitVector(fields[1], allocator), new VarCharVector(fields[2], allocator), new NullableVarCharVector(fields[3], allocator), new RepeatedListVector(fields[4], allocator, null), new MapVector(fields[5], allocator, null), new RepeatedMapVector(fields[6], allocator, null) };
    try {
        for (final ValueVector vector : vectors) {
            test.verify(vector);
        }
    } finally {
        AutoCloseables.close(vectors);
    }
}
Also used : ValueVector(org.apache.drill.exec.vector.ValueVector) BaseValueVector(org.apache.drill.exec.vector.BaseValueVector) BitVector(org.apache.drill.exec.vector.BitVector) NullableVarCharVector(org.apache.drill.exec.vector.NullableVarCharVector) RepeatedListVector(org.apache.drill.exec.vector.complex.RepeatedListVector) RepeatedMapVector(org.apache.drill.exec.vector.complex.RepeatedMapVector) NullableVarCharVector(org.apache.drill.exec.vector.NullableVarCharVector) VarCharVector(org.apache.drill.exec.vector.VarCharVector) MaterializedField(org.apache.drill.exec.record.MaterializedField) NullableUInt4Vector(org.apache.drill.exec.vector.NullableUInt4Vector) UInt4Vector(org.apache.drill.exec.vector.UInt4Vector) RepeatedMapVector(org.apache.drill.exec.vector.complex.RepeatedMapVector) MapVector(org.apache.drill.exec.vector.complex.MapVector)

Example 5 with RepeatedMapVector

use of org.apache.drill.exec.vector.complex.RepeatedMapVector in project drill by axbaretto.

the class RowSetTest method testRepeatedMapStructure.

@Test
public void testRepeatedMapStructure() {
    TupleMetadata schema = new SchemaBuilder().add("a", MinorType.INT).addMapArray("m").add("b", MinorType.INT).add("c", MinorType.INT).resumeSchema().buildSchema();
    ExtendableRowSet rowSet = fixture.rowSet(schema);
    RowSetWriter writer = rowSet.writer();
    // Map and Int
    // Pick out components and lightly test. (Assumes structure
    // tested earlier is still valid, so no need to exhaustively
    // test again.)
    assertEquals(ObjectType.SCALAR, writer.column("a").type());
    assertEquals(ObjectType.ARRAY, writer.column("m").type());
    ArrayWriter maWriter = writer.column(1).array();
    assertEquals(ObjectType.TUPLE, maWriter.entryType());
    TupleWriter mapWriter = maWriter.tuple();
    assertEquals(ObjectType.SCALAR, mapWriter.column("b").type());
    assertEquals(ObjectType.SCALAR, mapWriter.column("c").type());
    ScalarWriter aWriter = writer.column("a").scalar();
    ScalarWriter bWriter = mapWriter.scalar("b");
    ScalarWriter cWriter = mapWriter.scalar("c");
    assertEquals(ValueType.INTEGER, aWriter.valueType());
    assertEquals(ValueType.INTEGER, bWriter.valueType());
    assertEquals(ValueType.INTEGER, cWriter.valueType());
    // Write data
    aWriter.setInt(10);
    bWriter.setInt(101);
    cWriter.setInt(102);
    // Advance to next array position
    maWriter.save();
    bWriter.setInt(111);
    cWriter.setInt(112);
    maWriter.save();
    writer.save();
    aWriter.setInt(20);
    bWriter.setInt(201);
    cWriter.setInt(202);
    maWriter.save();
    bWriter.setInt(211);
    cWriter.setInt(212);
    maWriter.save();
    writer.save();
    aWriter.setInt(30);
    bWriter.setInt(301);
    cWriter.setInt(302);
    maWriter.save();
    bWriter.setInt(311);
    cWriter.setInt(312);
    maWriter.save();
    writer.save();
    // Finish the row set and get a reader.
    SingleRowSet actual = writer.done();
    RowSetReader reader = actual.reader();
    // Verify reader structure
    assertEquals(ObjectType.SCALAR, reader.column("a").type());
    assertEquals(ObjectType.ARRAY, reader.column("m").type());
    ArrayReader maReader = reader.column(1).array();
    assertEquals(ObjectType.TUPLE, maReader.entryType());
    TupleReader mapReader = maReader.tuple();
    assertEquals(ObjectType.SCALAR, mapReader.column("b").type());
    assertEquals(ObjectType.SCALAR, mapReader.column("c").type());
    ScalarReader aReader = reader.column("a").scalar();
    ScalarReader bReader = mapReader.scalar("b");
    ScalarReader cReader = mapReader.scalar("c");
    assertEquals(ValueType.INTEGER, aReader.valueType());
    assertEquals(ValueType.INTEGER, bReader.valueType());
    assertEquals(ValueType.INTEGER, cReader.valueType());
    // Row 1: use index accessors
    assertTrue(reader.next());
    assertEquals(10, aReader.getInt());
    TupleReader ixReader = maReader.tuple(0);
    assertEquals(101, ixReader.scalar(0).getInt());
    assertEquals(102, ixReader.scalar(1).getInt());
    ixReader = maReader.tuple(1);
    assertEquals(111, ixReader.scalar(0).getInt());
    assertEquals(112, ixReader.scalar(1).getInt());
    // Row 2: use common accessor with explicit positioning,
    // but access scalars through the map reader.
    assertTrue(reader.next());
    assertEquals(20, aReader.getInt());
    maReader.setPosn(0);
    assertEquals(201, mapReader.scalar(0).getInt());
    assertEquals(202, mapReader.scalar(1).getInt());
    maReader.setPosn(1);
    assertEquals(211, mapReader.scalar(0).getInt());
    assertEquals(212, mapReader.scalar(1).getInt());
    // Row 3: use common accessor for scalars
    assertTrue(reader.next());
    assertEquals(30, aReader.getInt());
    maReader.setPosn(0);
    assertEquals(301, bReader.getInt());
    assertEquals(302, cReader.getInt());
    maReader.setPosn(1);
    assertEquals(311, bReader.getInt());
    assertEquals(312, cReader.getInt());
    assertFalse(reader.next());
    // Verify that the map accessor's value count was set.
    @SuppressWarnings("resource") RepeatedMapVector mapVector = (RepeatedMapVector) actual.container().getValueVector(1).getValueVector();
    assertEquals(3, mapVector.getAccessor().getValueCount());
    // Verify the readers and writers again using the testing tools.
    SingleRowSet expected = fixture.rowSetBuilder(schema).addRow(10, objArray(objArray(101, 102), objArray(111, 112))).addRow(20, objArray(objArray(201, 202), objArray(211, 212))).addRow(30, objArray(objArray(301, 302), objArray(311, 312))).build();
    new RowSetComparison(expected).verifyAndClearAll(actual);
}
Also used : RowSetWriter(org.apache.drill.test.rowSet.RowSetWriter) TupleReader(org.apache.drill.exec.vector.accessor.TupleReader) SingleRowSet(org.apache.drill.test.rowSet.RowSet.SingleRowSet) RepeatedMapVector(org.apache.drill.exec.vector.complex.RepeatedMapVector) ScalarReader(org.apache.drill.exec.vector.accessor.ScalarReader) ArrayReader(org.apache.drill.exec.vector.accessor.ArrayReader) RowSetComparison(org.apache.drill.test.rowSet.RowSetComparison) TupleWriter(org.apache.drill.exec.vector.accessor.TupleWriter) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.test.rowSet.schema.SchemaBuilder) ArrayWriter(org.apache.drill.exec.vector.accessor.ArrayWriter) RowSetReader(org.apache.drill.test.rowSet.RowSetReader) ScalarWriter(org.apache.drill.exec.vector.accessor.ScalarWriter) ExtendableRowSet(org.apache.drill.test.rowSet.RowSet.ExtendableRowSet) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Aggregations

RepeatedMapVector (org.apache.drill.exec.vector.complex.RepeatedMapVector)22 ValueVector (org.apache.drill.exec.vector.ValueVector)11 Test (org.junit.Test)10 MaterializedField (org.apache.drill.exec.record.MaterializedField)9 RepeatedValueVector (org.apache.drill.exec.vector.complex.RepeatedValueVector)9 UInt4Vector (org.apache.drill.exec.vector.UInt4Vector)7 SubOperatorTest (org.apache.drill.test.SubOperatorTest)7 TupleMetadata (org.apache.drill.exec.record.metadata.TupleMetadata)6 SingleRowSet (org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet)5 SchemaBuilder (org.apache.drill.exec.record.metadata.SchemaBuilder)5 TransferPair (org.apache.drill.exec.record.TransferPair)4 TypedFieldId (org.apache.drill.exec.record.TypedFieldId)4 TupleWriter (org.apache.drill.exec.vector.accessor.TupleWriter)4 RowSet (org.apache.drill.exec.physical.rowSet.RowSet)3 ColumnSize (org.apache.drill.exec.record.RecordBatchSizer.ColumnSize)3 MapVector (org.apache.drill.exec.vector.complex.MapVector)3 BaseTest (org.apache.drill.test.BaseTest)3 RowSetBuilder (org.apache.drill.exec.physical.rowSet.RowSetBuilder)2 ColumnMetadata (org.apache.drill.exec.record.metadata.ColumnMetadata)2 BaseValueVector (org.apache.drill.exec.vector.BaseValueVector)2