Search in sources :

Example 16 with DictVector

use of org.apache.drill.exec.vector.complex.DictVector in project drill by apache.

the class TestDictVector method testLoadValueVector.

@Test
public void testLoadValueVector() {
    MaterializedField field = MaterializedField.create("map", DictVector.TYPE);
    try (DictVector mapVector = new DictVector(field, allocator, null)) {
        mapVector.allocateNew();
        List<Map<Object, Object>> maps = Arrays.asList(TestBuilder.mapOfObject(4f, 1L, 5.3f, 2L, 0.3f, 3L, -0.2f, 4L, 102.07f, 5L), TestBuilder.mapOfObject(45f, 6L, 9.2f, 7L), TestBuilder.mapOfObject(4.01f, 8L, 9.2f, 9L, -2.3f, 10L), TestBuilder.mapOfObject(), TestBuilder.mapOfObject(11f, 11L, 9.73f, 12L, 0.03f, 13L));
        BaseWriter.DictWriter mapWriter = new SingleDictWriter(mapVector, null);
        int index = 0;
        for (Map<Object, Object> map : maps) {
            mapWriter.setPosition(index++);
            mapWriter.start();
            for (Map.Entry<Object, Object> entry : map.entrySet()) {
                mapWriter.startKeyValuePair();
                mapWriter.float4(DictVector.FIELD_KEY_NAME).writeFloat4((float) entry.getKey());
                mapWriter.bigInt(DictVector.FIELD_VALUE_NAME).writeBigInt((long) entry.getValue());
                mapWriter.endKeyValuePair();
            }
            mapWriter.end();
        }
        UserBitShared.SerializedField oldField = mapVector.getMetadata();
        WritableBatch writableBatch = WritableBatch.getBatchNoHV(oldField.getValueCount(), Collections.singletonList(mapVector), false);
        DrillBuf byteBuf = TestLoad.serializeBatch(allocator, writableBatch);
        DictVector newMapVector = new DictVector(field.clone(), allocator, null);
        newMapVector.load(oldField, byteBuf);
        BaseReader.DictReader mapReader = newMapVector.getReader();
        index = 0;
        for (Map<Object, Object> map : maps) {
            mapReader.setPosition(index++);
            for (Map.Entry<Object, Object> entry : map.entrySet()) {
                mapReader.next();
                Float actualKey = mapReader.reader(DictVector.FIELD_KEY_NAME).readFloat();
                Long actualValue = mapReader.reader(DictVector.FIELD_VALUE_NAME).readLong();
                assertEquals(entry.getKey(), actualKey);
                assertEquals(entry.getValue(), actualValue);
            }
        }
        newMapVector.clear();
        byteBuf.release();
        writableBatch.clear();
    }
}
Also used : DictVector(org.apache.drill.exec.vector.complex.DictVector) BaseWriter(org.apache.drill.exec.vector.complex.writer.BaseWriter) MaterializedField(org.apache.drill.exec.record.MaterializedField) BaseReader(org.apache.drill.exec.vector.complex.reader.BaseReader) SingleDictWriter(org.apache.drill.exec.vector.complex.impl.SingleDictWriter) WritableBatch(org.apache.drill.exec.record.WritableBatch) Map(java.util.Map) UserBitShared(org.apache.drill.exec.proto.UserBitShared) DrillBuf(io.netty.buffer.DrillBuf) ExecTest(org.apache.drill.exec.ExecTest) Test(org.junit.Test) VectorTest(org.apache.drill.categories.VectorTest)

Example 17 with DictVector

use of org.apache.drill.exec.vector.complex.DictVector in project drill by apache.

the class SingleSchemaInference method inferDictSchema.

private TupleSchema inferDictSchema(ValueVector vector) {
    final List<ColumnMetadata> columns = new ArrayList<>();
    DictVector dictVector;
    if (vector.getField().getType().getMode() == DataMode.REPEATED) {
        dictVector = (DictVector) ((RepeatedDictVector) vector).getDataVector();
    } else {
        dictVector = (DictVector) vector;
    }
    for (int i = 0; i < dictVector.size(); i++) {
        columns.add(inferVector(dictVector.getChildByOrdinal(i)));
    }
    return MetadataUtils.fromColumns(columns);
}
Also used : DictVector(org.apache.drill.exec.vector.complex.DictVector) RepeatedDictVector(org.apache.drill.exec.vector.complex.RepeatedDictVector) ColumnMetadata(org.apache.drill.exec.record.metadata.ColumnMetadata) ArrayList(java.util.ArrayList) RepeatedDictVector(org.apache.drill.exec.vector.complex.RepeatedDictVector)

Example 18 with DictVector

use of org.apache.drill.exec.vector.complex.DictVector in project drill by apache.

the class BuildVectorsFromMetadata method populateDict.

private void populateDict(RepeatedDictVector vector, TupleMetadata dictMetadata) {
    for (int i = 0; i < dictMetadata.size(); i++) {
        final ColumnMetadata childSchema = dictMetadata.metadata(i);
        DictVector dataVector = (DictVector) vector.getDataVector();
        dataVector.putChild(childSchema.name(), buildVector(childSchema));
    }
}
Also used : DictVector(org.apache.drill.exec.vector.complex.DictVector) RepeatedDictVector(org.apache.drill.exec.vector.complex.RepeatedDictVector) ColumnMetadata(org.apache.drill.exec.record.metadata.ColumnMetadata)

Example 19 with DictVector

use of org.apache.drill.exec.vector.complex.DictVector in project drill by apache.

the class ColumnBuilder method buildDictArray.

private ColumnState buildDictArray(ContainerState parent, ColumnMetadata columnSchema) {
    final ProjectionFilter projFilter = parent.projection();
    final ProjResult projResult = projFilter.projection(columnSchema);
    // Create the dict's offset vector.
    final RepeatedDictVector repeatedDictVector;
    final UInt4Vector offsetVector;
    if (projResult.isProjected) {
        // Creating the dict vector will create its contained vectors if we
        // give it a materialized field with children. So, instead pass a clone
        // without children so we can add them.
        final ColumnMetadata dictColMetadata = columnSchema.cloneEmpty();
        assert columnSchema.tupleSchema().isEmpty();
        repeatedDictVector = new RepeatedDictVector(dictColMetadata.schema(), parent.loader().allocator(), null);
        offsetVector = repeatedDictVector.getOffsetVector();
    } else {
        repeatedDictVector = null;
        offsetVector = null;
    }
    // Create the writer using the offset vector
    final AbstractObjectWriter writer = ObjectDictWriter.buildDictArray(columnSchema, repeatedDictVector, new ArrayList<>());
    // Wrap the offset vector in a vector state
    VectorState offsetVectorState;
    VectorState dictOffsetVectorState;
    if (!projResult.isProjected) {
        offsetVectorState = new NullVectorState();
        dictOffsetVectorState = new NullVectorState();
    } else {
        AbstractArrayWriter arrayWriter = (AbstractArrayWriter) writer.array();
        offsetVectorState = new OffsetVectorState(arrayWriter.offsetWriter(), offsetVector, writer.array().entry().events());
        dictOffsetVectorState = new OffsetVectorState(((AbstractArrayWriter) arrayWriter.array()).offsetWriter(), ((DictVector) repeatedDictVector.getDataVector()).getOffsetVector(), writer.array().entry().dict().entry().events());
    }
    final VectorState mapVectorState = new TupleState.DictArrayVectorState(repeatedDictVector, offsetVectorState, dictOffsetVectorState);
    // Assemble it all into the column state.
    final TupleState.DictArrayState dictArrayState = new TupleState.DictArrayState(parent.loader(), parent.vectorCache().childCache(columnSchema.name()), projResult.mapFilter);
    return new TupleState.DictColumnState(dictArrayState, writer, mapVectorState, parent.isVersioned());
}
Also used : DictVector(org.apache.drill.exec.vector.complex.DictVector) RepeatedDictVector(org.apache.drill.exec.vector.complex.RepeatedDictVector) ColumnMetadata(org.apache.drill.exec.record.metadata.ColumnMetadata) PrimitiveColumnMetadata(org.apache.drill.exec.record.metadata.PrimitiveColumnMetadata) OffsetVectorState(org.apache.drill.exec.physical.resultSet.impl.SingleVectorState.OffsetVectorState) ProjResult(org.apache.drill.exec.physical.resultSet.impl.ProjectionFilter.ProjResult) UInt4Vector(org.apache.drill.exec.vector.UInt4Vector) AbstractArrayWriter(org.apache.drill.exec.vector.accessor.writer.AbstractArrayWriter) RepeatedDictVector(org.apache.drill.exec.vector.complex.RepeatedDictVector) AbstractObjectWriter(org.apache.drill.exec.vector.accessor.writer.AbstractObjectWriter) MapVectorState(org.apache.drill.exec.physical.resultSet.impl.TupleState.MapVectorState) RepeatedListVectorState(org.apache.drill.exec.physical.resultSet.impl.RepeatedListState.RepeatedListVectorState) UnionVectorState(org.apache.drill.exec.physical.resultSet.impl.UnionState.UnionVectorState) OffsetVectorState(org.apache.drill.exec.physical.resultSet.impl.SingleVectorState.OffsetVectorState) ListVectorState(org.apache.drill.exec.physical.resultSet.impl.ListState.ListVectorState) SimpleVectorState(org.apache.drill.exec.physical.resultSet.impl.SingleVectorState.SimpleVectorState)

Aggregations

DictVector (org.apache.drill.exec.vector.complex.DictVector)19 Test (org.junit.Test)13 MaterializedField (org.apache.drill.exec.record.MaterializedField)10 Map (java.util.Map)8 VectorTest (org.apache.drill.categories.VectorTest)8 ExecTest (org.apache.drill.exec.ExecTest)8 RepeatedDictVector (org.apache.drill.exec.vector.complex.RepeatedDictVector)8 SingleDictWriter (org.apache.drill.exec.vector.complex.impl.SingleDictWriter)8 BaseWriter (org.apache.drill.exec.vector.complex.writer.BaseWriter)8 BaseReader (org.apache.drill.exec.vector.complex.reader.BaseReader)6 SingleRowSet (org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet)5 SchemaBuilder (org.apache.drill.exec.record.metadata.SchemaBuilder)5 TupleMetadata (org.apache.drill.exec.record.metadata.TupleMetadata)5 DictWriter (org.apache.drill.exec.vector.accessor.DictWriter)5 SubOperatorTest (org.apache.drill.test.SubOperatorTest)5 ColumnMetadata (org.apache.drill.exec.record.metadata.ColumnMetadata)4 ResultSetLoader (org.apache.drill.exec.physical.resultSet.ResultSetLoader)3 RowSetLoader (org.apache.drill.exec.physical.resultSet.RowSetLoader)3 RowSet (org.apache.drill.exec.physical.rowSet.RowSet)3 ScalarWriter (org.apache.drill.exec.vector.accessor.ScalarWriter)3