Examples with MapVector - org.apache.drill.exec.vector.complex.MapVector

Example 31 with MapVector

use of org.apache.drill.exec.vector.complex.MapVector in project drill by apache.

the class TestRowSet method testMapStructure.

/**
 * Test a simple map structure at the top level of a row.
 *
 * @throws VectorOverflowException should never occur
 */
@Test
public void testMapStructure() {
    final TupleMetadata schema = new SchemaBuilder().add("a", MinorType.INT).addMap("m").addArray("b", MinorType.INT).resumeSchema().buildSchema();
    final ExtendableRowSet rowSet = fixture.rowSet(schema);
    final RowSetWriter writer = rowSet.writer();
    // Map and Int
    // Test Invariants
    assertEquals(ObjectType.SCALAR, writer.column("a").type());
    assertEquals(ObjectType.SCALAR, writer.column(0).type());
    assertEquals(ObjectType.TUPLE, writer.column("m").type());
    assertEquals(ObjectType.TUPLE, writer.column(1).type());
    assertSame(writer.column(1).tuple(), writer.tuple(1));
    final TupleWriter mapWriter = writer.column(1).tuple();
    assertEquals(ObjectType.SCALAR, mapWriter.column("b").array().entry().type());
    assertEquals(ObjectType.SCALAR, mapWriter.column("b").array().entryType());
    final ScalarWriter aWriter = writer.column("a").scalar();
    final ScalarWriter bWriter = writer.column("m").tuple().column("b").array().entry().scalar();
    assertSame(bWriter, writer.tuple(1).array(0).scalar());
    assertEquals(ValueType.INTEGER, bWriter.valueType());
    try {
        writer.column(1).scalar();
        fail();
    } catch (final UnsupportedOperationException e) {
    // Expected
    }
    try {
        writer.column(1).array();
        fail();
    } catch (final UnsupportedOperationException e) {
    // Expected
    }
    // Write data
    aWriter.setInt(10);
    bWriter.setInt(11);
    bWriter.setInt(12);
    writer.save();
    aWriter.setInt(20);
    bWriter.setInt(21);
    bWriter.setInt(22);
    writer.save();
    aWriter.setInt(30);
    bWriter.setInt(31);
    bWriter.setInt(32);
    writer.save();
    // Finish the row set and get a reader.
    final SingleRowSet actual = writer.done();
    final RowSetReader reader = actual.reader();
    assertEquals(ObjectType.SCALAR, reader.column("a").type());
    assertEquals(ObjectType.SCALAR, reader.column(0).type());
    assertEquals(ObjectType.TUPLE, reader.column("m").type());
    assertEquals(ObjectType.TUPLE, reader.column(1).type());
    assertSame(reader.column(1).tuple(), reader.tuple(1));
    final ScalarReader aReader = reader.column(0).scalar();
    final TupleReader mReader = reader.column(1).tuple();
    final ArrayReader bArray = mReader.column("b").array();
    assertEquals(ObjectType.SCALAR, bArray.entryType());
    final ScalarReader bReader = bArray.scalar();
    assertEquals(ValueType.INTEGER, bReader.valueType());
    // Row 1: (10, {[11, 12]})
    assertTrue(reader.next());
    assertEquals(10, aReader.getInt());
    assertFalse(mReader.isNull());
    assertTrue(bArray.next());
    assertFalse(bReader.isNull());
    assertEquals(11, bReader.getInt());
    assertTrue(bArray.next());
    assertFalse(bReader.isNull());
    assertEquals(12, bReader.getInt());
    assertFalse(bArray.next());
    // Row 2: (20, {[21, 22]})
    assertTrue(reader.next());
    assertEquals(20, aReader.getInt());
    assertFalse(mReader.isNull());
    assertTrue(bArray.next());
    assertEquals(21, bReader.getInt());
    assertTrue(bArray.next());
    assertEquals(22, bReader.getInt());
    // Row 3: (30, {[31, 32]})
    assertTrue(reader.next());
    assertEquals(30, aReader.getInt());
    assertFalse(mReader.isNull());
    assertTrue(bArray.next());
    assertEquals(31, bReader.getInt());
    assertTrue(bArray.next());
    assertEquals(32, bReader.getInt());
    assertFalse(reader.next());
    // Verify that the map accessor's value count was set.
    final MapVector mapVector = (MapVector) actual.container().getValueVector(1).getValueVector();
    assertEquals(actual.rowCount(), mapVector.getAccessor().getValueCount());
    final SingleRowSet expected = fixture.rowSetBuilder(schema).addRow(10, objArray(intArray(11, 12))).addRow(20, objArray(intArray(21, 22))).addRow(30, objArray(intArray(31, 32))).build();
    RowSetUtilities.verify(expected, actual);
}

Also used : TupleReader(org.apache.drill.exec.vector.accessor.TupleReader) SingleRowSet(org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet) ScalarReader(org.apache.drill.exec.vector.accessor.ScalarReader) ArrayReader(org.apache.drill.exec.vector.accessor.ArrayReader) TupleWriter(org.apache.drill.exec.vector.accessor.TupleWriter) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) ScalarWriter(org.apache.drill.exec.vector.accessor.ScalarWriter) ExtendableRowSet(org.apache.drill.exec.physical.rowSet.RowSet.ExtendableRowSet) RepeatedMapVector(org.apache.drill.exec.vector.complex.RepeatedMapVector) MapVector(org.apache.drill.exec.vector.complex.MapVector) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 32 with MapVector

use of org.apache.drill.exec.vector.complex.MapVector in project drill by apache.

the class TestRecordBatchSizer method testEmptyBatchMap.

@Test
public void testEmptyBatchMap() {
    TupleMetadata schema = new SchemaBuilder().addMap("map").add("key", MinorType.INT).add("value", MinorType.VARCHAR).resumeSchema().buildSchema();
    RowSetBuilder builder = fixture.rowSetBuilder(schema);
    RowSet rows = builder.build();
    // Run the record batch sizer on the resulting batch.
    RecordBatchSizer sizer = new RecordBatchSizer(rows.container());
    assertEquals(1, sizer.columns().size());
    /**
     * stdDataSize:50+4, stdNetSize:50+4+4, dataSizePerEntry:0,
     * netSizePerEntry:0,
     * totalDataSize:0, totalNetSize:0,
     * valueCount:0,
     * elementCount:0, cardinality:0, isVariableWidth:true
     */
    verifyColumnValues(sizer.columns().get("map"), 54, 58, 0, 0, 0, 0, 0, 0, 0, false);
    // Verify memory allocation is done correctly based on std size for empty batch.
    SingleRowSet empty = fixture.rowSet(schema);
    VectorAccessible accessible = empty.vectorAccessible();
    for (VectorWrapper<?> vw : accessible) {
        ValueVector v = vw.getValueVector();
        RecordBatchSizer.ColumnSize colSize = sizer.getColumn(v.getField().getName());
        // Allocates to nearest power of two
        colSize.allocateVector(v, testRowCount);
        MapVector mapVector = (MapVector) v;
        ValueVector keyVector = mapVector.getChild("key");
        ValueVector valueVector1 = mapVector.getChild("value");
        assertEquals((Integer.highestOneBit(testRowCount) << 1), keyVector.getValueCapacity());
        UInt4Vector offsetVector = ((VariableWidthVector) valueVector1).getOffsetVector();
        assertEquals((Integer.highestOneBit(testRowCount) << 1), offsetVector.getValueCapacity());
        assertEquals(Integer.highestOneBit(testRowCount << 1) - 1, valueVector1.getValueCapacity());
        // Allocates the same as value passed since it is already power of two.
        colSize.allocateVector(v, testRowCountPowerTwo - 1);
        mapVector = (MapVector) v;
        keyVector = mapVector.getChild("key");
        valueVector1 = mapVector.getChild("value");
        assertEquals((Integer.highestOneBit(testRowCountPowerTwo - 1) << 1), keyVector.getValueCapacity());
        offsetVector = ((VariableWidthVector) valueVector1).getOffsetVector();
        assertEquals(testRowCountPowerTwo, offsetVector.getValueCapacity());
        assertEquals(Integer.highestOneBit(testRowCountPowerTwo) - 1, valueVector1.getValueCapacity());
        // Allocate for max rows.
        colSize.allocateVector(v, ValueVector.MAX_ROW_COUNT - 1);
        mapVector = (MapVector) v;
        keyVector = mapVector.getChild("key");
        valueVector1 = mapVector.getChild("value");
        assertEquals(ValueVector.MAX_ROW_COUNT, keyVector.getValueCapacity());
        offsetVector = ((VariableWidthVector) valueVector1).getOffsetVector();
        assertEquals(ValueVector.MAX_ROW_COUNT, offsetVector.getValueCapacity());
        assertEquals(ValueVector.MAX_ROW_COUNT - 1, valueVector1.getValueCapacity());
        // Allocate for 0 rows. should atleast do allocation for 1 row.
        colSize.allocateVector(v, 0);
        mapVector = (MapVector) v;
        keyVector = mapVector.getChild("key");
        valueVector1 = mapVector.getChild("value");
        assertEquals(ValueVector.MIN_ROW_COUNT, keyVector.getValueCapacity());
        offsetVector = ((VariableWidthVector) valueVector1).getOffsetVector();
        assertEquals(ValueVector.MIN_ROW_COUNT + 1, offsetVector.getValueCapacity());
        assertEquals(ValueVector.MIN_ROW_COUNT, valueVector1.getValueCapacity());
        v.clear();
    }
    empty.clear();
    rows.clear();
}

Also used : SingleRowSet(org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet) ColumnSize(org.apache.drill.exec.record.RecordBatchSizer.ColumnSize) SingleRowSet(org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet) RowSet(org.apache.drill.exec.physical.rowSet.RowSet) VariableWidthVector(org.apache.drill.exec.vector.VariableWidthVector) UInt4Vector(org.apache.drill.exec.vector.UInt4Vector) ValueVector(org.apache.drill.exec.vector.ValueVector) RepeatedValueVector(org.apache.drill.exec.vector.complex.RepeatedValueVector) RowSetBuilder(org.apache.drill.exec.physical.rowSet.RowSetBuilder) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) RepeatedMapVector(org.apache.drill.exec.vector.complex.RepeatedMapVector) MapVector(org.apache.drill.exec.vector.complex.MapVector) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 33 with MapVector

use of org.apache.drill.exec.vector.complex.MapVector in project drill by apache.

the class StatisticsAggBatch method createAggregatorInternal.

@Override
protected StreamingAggregator createAggregatorInternal() {
    List<LogicalExpression> keyExprs = Lists.newArrayList();
    List<LogicalExpression> valueExprs = Lists.newArrayList();
    List<TypedFieldId> keyOutputIds = Lists.newArrayList();
    String[] colMeta = new String[] { Statistic.COLNAME, Statistic.COLTYPE };
    container.clear();
    // the implicit columns
    for (String col : colMeta) {
        MapVector parent = new MapVector(col, oContext.getAllocator(), null);
        container.add(parent);
        for (MaterializedField mf : incoming.getSchema()) {
            LogicalExpression expr;
            if (col.equals(colMeta[0])) {
                expr = ValueExpressions.getChar(SchemaPath.getSimplePath(mf.getName()).toString(), 0);
            } else {
                try {
                    expr = ValueExpressions.getChar(DrillStatsTable.getMapper().writeValueAsString(mf.getType()), 0);
                } catch (JsonProcessingException e) {
                    throw UserException.dataWriteError(e).addContext("Failed to write statistics to JSON").build();
                }
            }
            // Ignore implicit columns
            if (!isImplicitFileOrPartitionColumn(mf, incoming.getContext().getOptions())) {
                createNestedKeyColumn(parent, SchemaPath.getSimplePath(mf.getName()).toString(), expr, keyExprs, keyOutputIds);
            }
        }
    }
    // employee NDV = 500, salary NDV = 10
    for (String func : functions) {
        MapVector parent = new MapVector(func, oContext.getAllocator(), null);
        container.add(parent);
        for (MaterializedField mf : incoming.getSchema()) {
            // such as MAP, LIST are not supported!
            if (isColMinorTypeValid(mf) && !isImplicitFileOrPartitionColumn(mf, incoming.getContext().getOptions())) {
                List<LogicalExpression> args = Lists.newArrayList();
                args.add(SchemaPath.getSimplePath(mf.getName()));
                LogicalExpression call = FunctionCallFactory.createExpression(func, args);
                addMapVector(SchemaPath.getSimplePath(mf.getName()).toString(), parent, call, valueExprs);
            }
        }
    }
    // Now generate the code for the statistics aggregate
    return codegenAggregator(keyExprs, valueExprs, keyOutputIds);
}

Also used : LogicalExpression(org.apache.drill.common.expression.LogicalExpression) TypedFieldId(org.apache.drill.exec.record.TypedFieldId) MaterializedField(org.apache.drill.exec.record.MaterializedField) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException) MapVector(org.apache.drill.exec.vector.complex.MapVector)

Example 34 with MapVector

use of org.apache.drill.exec.vector.complex.MapVector in project drill by apache.

the class ColumnMergedStatistic method setOutput.

@Override
public void setOutput(MapVector output) {
    // Check the input is a Map Vector
    assert (output.getField().getType().getMinorType() == TypeProtos.MinorType.MAP);
    MapVector outputMap = (MapVector) output;
    for (ValueVector outMapCol : outputMap) {
        String colName = outMapCol.getField().getName();
        VarCharVector vv = (VarCharVector) outMapCol;
        vv.allocateNewSafe();
        // Set column name in ValueVector
        vv.getMutator().setSafe(0, colName.getBytes(), 0, colName.length());
    }
    // Now moving to COMPLETE state
    state = State.COMPLETE;
}

Also used : ValueVector(org.apache.drill.exec.vector.ValueVector) VarCharVector(org.apache.drill.exec.vector.VarCharVector) MapVector(org.apache.drill.exec.vector.complex.MapVector)

Example 35 with MapVector

use of org.apache.drill.exec.vector.complex.MapVector in project drill by apache.

the class HBaseRecordReader method setup.

@Override
public void setup(OperatorContext context, OutputMutator output) throws ExecutionSetupException {
    this.operatorContext = context;
    this.outputMutator = output;
    familyVectorMap = new HashMap<>();
    try {
        hTable = connection.getTable(hbaseTableName);
        // when creating reader (order of first appearance in query).
        for (SchemaPath column : getColumns()) {
            if (column.equals(ROW_KEY_PATH)) {
                MaterializedField field = MaterializedField.create(column.getAsNamePart().getName(), ROW_KEY_TYPE);
                rowKeyVector = outputMutator.addField(field, VarBinaryVector.class);
            } else {
                getOrCreateFamilyVector(column.getRootSegment().getPath(), false);
            }
        }
        // Add map and child vectors for any HBase columns that are requested (in
        // order to avoid later creation of dummy NullableIntVectors for them).
        final Set<Map.Entry<byte[], NavigableSet<byte[]>>> familiesEntries = hbaseScanColumnsOnly.getFamilyMap().entrySet();
        for (Map.Entry<byte[], NavigableSet<byte[]>> familyEntry : familiesEntries) {
            final String familyName = new String(familyEntry.getKey(), StandardCharsets.UTF_8);
            final MapVector familyVector = getOrCreateFamilyVector(familyName, false);
            final Set<byte[]> children = familyEntry.getValue();
            if (null != children) {
                for (byte[] childNameBytes : children) {
                    final String childName = new String(childNameBytes, StandardCharsets.UTF_8);
                    getOrCreateColumnVector(familyVector, childName);
                }
            }
        }
        // Add map vectors for any HBase column families that are requested.
        for (String familyName : completeFamilies) {
            getOrCreateFamilyVector(familyName, false);
        }
        resultScanner = hTable.getScanner(hbaseScan);
    } catch (SchemaChangeException | IOException e) {
        throw new ExecutionSetupException(e);
    }
}

Also used : ExecutionSetupException(org.apache.drill.common.exceptions.ExecutionSetupException) NavigableSet(java.util.NavigableSet) MaterializedField(org.apache.drill.exec.record.MaterializedField) IOException(java.io.IOException) VarBinaryVector(org.apache.drill.exec.vector.VarBinaryVector) NullableVarBinaryVector(org.apache.drill.exec.vector.NullableVarBinaryVector) SchemaChangeException(org.apache.drill.exec.exception.SchemaChangeException) SchemaPath(org.apache.drill.common.expression.SchemaPath) HashMap(java.util.HashMap) Map(java.util.Map) MapVector(org.apache.drill.exec.vector.complex.MapVector)

Aggregations

MapVector (org.apache.drill.exec.vector.complex.MapVector)40 ValueVector (org.apache.drill.exec.vector.ValueVector)21 Test (org.junit.Test)16 SubOperatorTest (org.apache.drill.test.SubOperatorTest)14 RepeatedMapVector (org.apache.drill.exec.vector.complex.RepeatedMapVector)13 TupleMetadata (org.apache.drill.exec.record.metadata.TupleMetadata)12 SchemaBuilder (org.apache.drill.exec.record.metadata.SchemaBuilder)11 RowSet (org.apache.drill.exec.physical.rowSet.RowSet)10 MaterializedField (org.apache.drill.exec.record.MaterializedField)10 SingleRowSet (org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet)9 UInt4Vector (org.apache.drill.exec.vector.UInt4Vector)9 RowSetBuilder (org.apache.drill.exec.physical.rowSet.RowSetBuilder)7 ColumnSize (org.apache.drill.exec.record.RecordBatchSizer.ColumnSize)6 VectorContainer (org.apache.drill.exec.record.VectorContainer)6 VarCharVector (org.apache.drill.exec.vector.VarCharVector)6 RepeatedValueVector (org.apache.drill.exec.vector.complex.RepeatedValueVector)6 TupleWriter (org.apache.drill.exec.vector.accessor.TupleWriter)5 ArrayList (java.util.ArrayList)4 DrillRuntimeException (org.apache.drill.common.exceptions.DrillRuntimeException)4 SchemaPath (org.apache.drill.common.expression.SchemaPath)4