Search in sources :

Example 1 with MapVector

use of org.apache.drill.exec.vector.complex.MapVector in project drill by axbaretto.

the class HBaseRecordReader method setup.

@Override
public void setup(OperatorContext context, OutputMutator output) throws ExecutionSetupException {
    this.operatorContext = context;
    this.outputMutator = output;
    familyVectorMap = new HashMap<>();
    try {
        hTable = connection.getTable(hbaseTableName);
        // when creating reader (order of first appearance in query).
        for (SchemaPath column : getColumns()) {
            if (column.equals(ROW_KEY_PATH)) {
                MaterializedField field = MaterializedField.create(column.getAsNamePart().getName(), ROW_KEY_TYPE);
                rowKeyVector = outputMutator.addField(field, VarBinaryVector.class);
            } else {
                getOrCreateFamilyVector(column.getRootSegment().getPath(), false);
            }
        }
        // Add map and child vectors for any HBase columns that are requested (in
        // order to avoid later creation of dummy NullableIntVectors for them).
        final Set<Map.Entry<byte[], NavigableSet<byte[]>>> familiesEntries = hbaseScanColumnsOnly.getFamilyMap().entrySet();
        for (Map.Entry<byte[], NavigableSet<byte[]>> familyEntry : familiesEntries) {
            final String familyName = new String(familyEntry.getKey(), StandardCharsets.UTF_8);
            final MapVector familyVector = getOrCreateFamilyVector(familyName, false);
            final Set<byte[]> children = familyEntry.getValue();
            if (null != children) {
                for (byte[] childNameBytes : children) {
                    final String childName = new String(childNameBytes, StandardCharsets.UTF_8);
                    getOrCreateColumnVector(familyVector, childName);
                }
            }
        }
        // Add map vectors for any HBase column families that are requested.
        for (String familyName : completeFamilies) {
            getOrCreateFamilyVector(familyName, false);
        }
        resultScanner = hTable.getScanner(hbaseScan);
    } catch (SchemaChangeException | IOException e) {
        throw new ExecutionSetupException(e);
    }
}
Also used : ExecutionSetupException(org.apache.drill.common.exceptions.ExecutionSetupException) NavigableSet(java.util.NavigableSet) MaterializedField(org.apache.drill.exec.record.MaterializedField) IOException(java.io.IOException) VarBinaryVector(org.apache.drill.exec.vector.VarBinaryVector) NullableVarBinaryVector(org.apache.drill.exec.vector.NullableVarBinaryVector) SchemaChangeException(org.apache.drill.exec.exception.SchemaChangeException) SchemaPath(org.apache.drill.common.expression.SchemaPath) HashMap(java.util.HashMap) Map(java.util.Map) MapVector(org.apache.drill.exec.vector.complex.MapVector)

Example 2 with MapVector

use of org.apache.drill.exec.vector.complex.MapVector in project drill by axbaretto.

the class VectorContainerBuilder method buildMap.

@SuppressWarnings("resource")
private void buildMap(TupleProxy parentTuple, BaseMapColumnState colModel) {
    // Creating the map vector will create its contained vectors if we
    // give it a materialized field with children. So, instead pass a clone
    // without children so we can add them.
    ColumnMetadata mapColSchema = colModel.schema().cloneEmpty();
    // Don't get the map vector from the vector cache. Map vectors may
    // have content that varies from batch to batch. Only the leaf
    // vectors can be cached.
    AbstractMapVector mapVector;
    if (mapColSchema.isArray()) {
        // A repeated map shares an offset vector with the internal
        // repeated map.
        UInt4Vector offsets = (UInt4Vector) colModel.vector();
        mapVector = new RepeatedMapVector(mapColSchema.schema(), offsets, null);
    } else {
        mapVector = new MapVector(mapColSchema.schema(), allocator(), null);
    }
    // Add the map vector and schema to the parent tuple
    parentTuple.add(mapVector);
    int index = parentTuple.schema.addColumn(mapColSchema);
    assert parentTuple.size() == parentTuple.size();
    // Update the tuple, which will add the new columns in the map
    updateTuple(colModel.mapState(), parentTuple.mapProxy(index));
}
Also used : ColumnMetadata(org.apache.drill.exec.record.metadata.ColumnMetadata) AbstractMapVector(org.apache.drill.exec.vector.complex.AbstractMapVector) RepeatedMapVector(org.apache.drill.exec.vector.complex.RepeatedMapVector) UInt4Vector(org.apache.drill.exec.vector.UInt4Vector) RepeatedMapVector(org.apache.drill.exec.vector.complex.RepeatedMapVector) MapVector(org.apache.drill.exec.vector.complex.MapVector) AbstractMapVector(org.apache.drill.exec.vector.complex.AbstractMapVector)

Example 3 with MapVector

use of org.apache.drill.exec.vector.complex.MapVector in project drill by axbaretto.

the class TestValueVector method testVectors.

/**
 * Convenience method that allows running tests on various {@link ValueVector vector} instances.
 *
 * @param test test function to execute
 */
@SuppressWarnings("resource")
private void testVectors(VectorVerifier test) throws Exception {
    final MaterializedField[] fields = { MaterializedField.create(EMPTY_SCHEMA_PATH, UInt4Holder.TYPE), MaterializedField.create(EMPTY_SCHEMA_PATH, BitHolder.TYPE), MaterializedField.create(EMPTY_SCHEMA_PATH, VarCharHolder.TYPE), MaterializedField.create(EMPTY_SCHEMA_PATH, NullableVarCharHolder.TYPE), MaterializedField.create(EMPTY_SCHEMA_PATH, RepeatedListVector.TYPE), MaterializedField.create(EMPTY_SCHEMA_PATH, MapVector.TYPE), MaterializedField.create(EMPTY_SCHEMA_PATH, RepeatedMapVector.TYPE) };
    final ValueVector[] vectors = { new UInt4Vector(fields[0], allocator), new BitVector(fields[1], allocator), new VarCharVector(fields[2], allocator), new NullableVarCharVector(fields[3], allocator), new RepeatedListVector(fields[4], allocator, null), new MapVector(fields[5], allocator, null), new RepeatedMapVector(fields[6], allocator, null) };
    try {
        for (final ValueVector vector : vectors) {
            test.verify(vector);
        }
    } finally {
        AutoCloseables.close(vectors);
    }
}
Also used : ValueVector(org.apache.drill.exec.vector.ValueVector) BaseValueVector(org.apache.drill.exec.vector.BaseValueVector) BitVector(org.apache.drill.exec.vector.BitVector) NullableVarCharVector(org.apache.drill.exec.vector.NullableVarCharVector) RepeatedListVector(org.apache.drill.exec.vector.complex.RepeatedListVector) RepeatedMapVector(org.apache.drill.exec.vector.complex.RepeatedMapVector) NullableVarCharVector(org.apache.drill.exec.vector.NullableVarCharVector) VarCharVector(org.apache.drill.exec.vector.VarCharVector) MaterializedField(org.apache.drill.exec.record.MaterializedField) NullableUInt4Vector(org.apache.drill.exec.vector.NullableUInt4Vector) UInt4Vector(org.apache.drill.exec.vector.UInt4Vector) RepeatedMapVector(org.apache.drill.exec.vector.complex.RepeatedMapVector) MapVector(org.apache.drill.exec.vector.complex.MapVector)

Example 4 with MapVector

use of org.apache.drill.exec.vector.complex.MapVector in project drill by axbaretto.

the class TestRecordBatchSizer method testSizerMap.

@Test
public void testSizerMap() {
    BatchSchema schema = new SchemaBuilder().addMap("map").add("key", MinorType.INT).add("value", MinorType.VARCHAR).resumeSchema().build();
    RowSetBuilder builder = fixture.rowSetBuilder(schema);
    for (int i = 0; i < 10; i++) {
        builder.addRow((Object) (new Object[] { 10, "a" }));
    }
    RowSet rows = builder.build();
    // Run the record batch sizer on the resulting batch.
    RecordBatchSizer sizer = new RecordBatchSizer(rows.container());
    assertEquals(1, sizer.columns().size());
    /**
     * stdDataSize:50+4, stdNetSize:50+4+4, dataSizePerEntry:4+1,
     * netSizePerEntry: 4+1+4,
     * totalDataSize:5*10, totalNetSize:4*10+4*10+1*10,
     * valueCount:10,
     * elementCount:10, estElementCountPerArray:1, isVariableWidth:true
     */
    verifyColumnValues(sizer.columns().get("map"), 54, 58, 5, 9, 50, 90, 10, 10, 1, false);
    SingleRowSet empty = fixture.rowSet(schema);
    VectorAccessible accessible = empty.vectorAccessible();
    for (VectorWrapper<?> vw : accessible) {
        ValueVector v = vw.getValueVector();
        RecordBatchSizer.ColumnSize colSize = sizer.getColumn(v.getField().getName());
        // Allocates to nearest power of two
        colSize.allocateVector(v, testRowCount);
        MapVector mapVector = (MapVector) v;
        ValueVector keyVector = mapVector.getChild("key");
        ValueVector valueVector1 = mapVector.getChild("value");
        assertEquals((Integer.highestOneBit(testRowCount) << 1), keyVector.getValueCapacity());
        UInt4Vector offsetVector = ((VariableWidthVector) valueVector1).getOffsetVector();
        assertEquals((Integer.highestOneBit(testRowCount) << 1), offsetVector.getValueCapacity());
        assertEquals(Integer.highestOneBit(testRowCount << 1) - 1, valueVector1.getValueCapacity());
        // Allocates the same as value passed since it is already power of two.
        colSize.allocateVector(v, testRowCountPowerTwo - 1);
        mapVector = (MapVector) v;
        keyVector = mapVector.getChild("key");
        valueVector1 = mapVector.getChild("value");
        assertEquals((Integer.highestOneBit(testRowCountPowerTwo - 1) << 1), keyVector.getValueCapacity());
        offsetVector = ((VariableWidthVector) valueVector1).getOffsetVector();
        assertEquals(testRowCountPowerTwo, offsetVector.getValueCapacity());
        assertEquals(Integer.highestOneBit(testRowCountPowerTwo) - 1, valueVector1.getValueCapacity());
        // Allocate for max rows.
        colSize.allocateVector(v, ValueVector.MAX_ROW_COUNT - 1);
        mapVector = (MapVector) v;
        keyVector = mapVector.getChild("key");
        valueVector1 = mapVector.getChild("value");
        assertEquals(ValueVector.MAX_ROW_COUNT, keyVector.getValueCapacity());
        offsetVector = ((VariableWidthVector) valueVector1).getOffsetVector();
        assertEquals(ValueVector.MAX_ROW_COUNT, offsetVector.getValueCapacity());
        assertEquals(ValueVector.MAX_ROW_COUNT - 1, valueVector1.getValueCapacity());
        // Allocate for 0 rows. should atleast do allocation for 1 row.
        colSize.allocateVector(v, 0);
        mapVector = (MapVector) v;
        keyVector = mapVector.getChild("key");
        valueVector1 = mapVector.getChild("value");
        assertEquals(ValueVector.MIN_ROW_COUNT, keyVector.getValueCapacity());
        offsetVector = ((VariableWidthVector) valueVector1).getOffsetVector();
        assertEquals(ValueVector.MIN_ROW_COUNT + 1, offsetVector.getValueCapacity());
        assertEquals(ValueVector.MIN_ROW_COUNT, valueVector1.getValueCapacity());
        v.clear();
    }
    empty.clear();
    rows.clear();
}
Also used : SingleRowSet(org.apache.drill.test.rowSet.RowSet.SingleRowSet) ColumnSize(org.apache.drill.exec.record.RecordBatchSizer.ColumnSize) SingleRowSet(org.apache.drill.test.rowSet.RowSet.SingleRowSet) RowSet(org.apache.drill.test.rowSet.RowSet) VariableWidthVector(org.apache.drill.exec.vector.VariableWidthVector) UInt4Vector(org.apache.drill.exec.vector.UInt4Vector) ValueVector(org.apache.drill.exec.vector.ValueVector) RepeatedValueVector(org.apache.drill.exec.vector.complex.RepeatedValueVector) RowSetBuilder(org.apache.drill.test.rowSet.RowSetBuilder) SchemaBuilder(org.apache.drill.test.rowSet.schema.SchemaBuilder) RepeatedMapVector(org.apache.drill.exec.vector.complex.RepeatedMapVector) MapVector(org.apache.drill.exec.vector.complex.MapVector) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 5 with MapVector

use of org.apache.drill.exec.vector.complex.MapVector in project drill by axbaretto.

the class RowSetTest method testMapStructure.

/**
 * Test a simple map structure at the top level of a row.
 *
 * @throws VectorOverflowException should never occur
 */
@Test
public void testMapStructure() {
    TupleMetadata schema = new SchemaBuilder().add("a", MinorType.INT).addMap("m").addArray("b", MinorType.INT).resumeSchema().buildSchema();
    ExtendableRowSet rowSet = fixture.rowSet(schema);
    RowSetWriter writer = rowSet.writer();
    // Map and Int
    // Test Invariants
    assertEquals(ObjectType.SCALAR, writer.column("a").type());
    assertEquals(ObjectType.SCALAR, writer.column(0).type());
    assertEquals(ObjectType.TUPLE, writer.column("m").type());
    assertEquals(ObjectType.TUPLE, writer.column(1).type());
    assertSame(writer.column(1).tuple(), writer.tuple(1));
    TupleWriter mapWriter = writer.column(1).tuple();
    assertEquals(ObjectType.SCALAR, mapWriter.column("b").array().entry().type());
    assertEquals(ObjectType.SCALAR, mapWriter.column("b").array().entryType());
    ScalarWriter aWriter = writer.column("a").scalar();
    ScalarWriter bWriter = writer.column("m").tuple().column("b").array().entry().scalar();
    assertSame(bWriter, writer.tuple(1).array(0).scalar());
    assertEquals(ValueType.INTEGER, bWriter.valueType());
    try {
        writer.column(1).scalar();
        fail();
    } catch (UnsupportedOperationException e) {
    // Expected
    }
    try {
        writer.column(1).array();
        fail();
    } catch (UnsupportedOperationException e) {
    // Expected
    }
    // Write data
    aWriter.setInt(10);
    bWriter.setInt(11);
    bWriter.setInt(12);
    writer.save();
    aWriter.setInt(20);
    bWriter.setInt(21);
    bWriter.setInt(22);
    writer.save();
    aWriter.setInt(30);
    bWriter.setInt(31);
    bWriter.setInt(32);
    writer.save();
    // Finish the row set and get a reader.
    SingleRowSet actual = writer.done();
    RowSetReader reader = actual.reader();
    assertEquals(ObjectType.SCALAR, reader.column("a").type());
    assertEquals(ObjectType.SCALAR, reader.column(0).type());
    assertEquals(ObjectType.TUPLE, reader.column("m").type());
    assertEquals(ObjectType.TUPLE, reader.column(1).type());
    assertSame(reader.column(1).tuple(), reader.tuple(1));
    ScalarReader aReader = reader.column(0).scalar();
    TupleReader mReader = reader.column(1).tuple();
    assertEquals(ObjectType.SCALAR, mReader.column("b").array().entryType());
    ScalarElementReader bReader = mReader.column(0).elements();
    assertEquals(ValueType.INTEGER, bReader.valueType());
    assertTrue(reader.next());
    assertEquals(10, aReader.getInt());
    assertEquals(11, bReader.getInt(0));
    assertEquals(12, bReader.getInt(1));
    assertTrue(reader.next());
    assertEquals(20, aReader.getInt());
    assertEquals(21, bReader.getInt(0));
    assertEquals(22, bReader.getInt(1));
    assertTrue(reader.next());
    assertEquals(30, aReader.getInt());
    assertEquals(31, bReader.getInt(0));
    assertEquals(32, bReader.getInt(1));
    assertFalse(reader.next());
    // Verify that the map accessor's value count was set.
    @SuppressWarnings("resource") MapVector mapVector = (MapVector) actual.container().getValueVector(1).getValueVector();
    assertEquals(actual.rowCount(), mapVector.getAccessor().getValueCount());
    SingleRowSet expected = fixture.rowSetBuilder(schema).addRow(10, objArray(intArray(11, 12))).addRow(20, objArray(intArray(21, 22))).addRow(30, objArray(intArray(31, 32))).build();
    new RowSetComparison(expected).verifyAndClearAll(actual);
}
Also used : RowSetWriter(org.apache.drill.test.rowSet.RowSetWriter) TupleReader(org.apache.drill.exec.vector.accessor.TupleReader) SingleRowSet(org.apache.drill.test.rowSet.RowSet.SingleRowSet) ScalarElementReader(org.apache.drill.exec.vector.accessor.ScalarElementReader) ScalarReader(org.apache.drill.exec.vector.accessor.ScalarReader) RowSetComparison(org.apache.drill.test.rowSet.RowSetComparison) TupleWriter(org.apache.drill.exec.vector.accessor.TupleWriter) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.test.rowSet.schema.SchemaBuilder) RowSetReader(org.apache.drill.test.rowSet.RowSetReader) ScalarWriter(org.apache.drill.exec.vector.accessor.ScalarWriter) ExtendableRowSet(org.apache.drill.test.rowSet.RowSet.ExtendableRowSet) RepeatedMapVector(org.apache.drill.exec.vector.complex.RepeatedMapVector) MapVector(org.apache.drill.exec.vector.complex.MapVector) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Aggregations

MapVector (org.apache.drill.exec.vector.complex.MapVector)40 ValueVector (org.apache.drill.exec.vector.ValueVector)21 Test (org.junit.Test)16 SubOperatorTest (org.apache.drill.test.SubOperatorTest)14 RepeatedMapVector (org.apache.drill.exec.vector.complex.RepeatedMapVector)13 TupleMetadata (org.apache.drill.exec.record.metadata.TupleMetadata)12 SchemaBuilder (org.apache.drill.exec.record.metadata.SchemaBuilder)11 RowSet (org.apache.drill.exec.physical.rowSet.RowSet)10 MaterializedField (org.apache.drill.exec.record.MaterializedField)10 SingleRowSet (org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet)9 UInt4Vector (org.apache.drill.exec.vector.UInt4Vector)9 RowSetBuilder (org.apache.drill.exec.physical.rowSet.RowSetBuilder)7 ColumnSize (org.apache.drill.exec.record.RecordBatchSizer.ColumnSize)6 VectorContainer (org.apache.drill.exec.record.VectorContainer)6 VarCharVector (org.apache.drill.exec.vector.VarCharVector)6 RepeatedValueVector (org.apache.drill.exec.vector.complex.RepeatedValueVector)6 TupleWriter (org.apache.drill.exec.vector.accessor.TupleWriter)5 ArrayList (java.util.ArrayList)4 DrillRuntimeException (org.apache.drill.common.exceptions.DrillRuntimeException)4 SchemaPath (org.apache.drill.common.expression.SchemaPath)4