Search in sources :

Example 16 with MapVector

use of org.apache.drill.exec.vector.complex.MapVector in project drill by apache.

the class BatchValidator method validateUnionVector.

private void validateUnionVector(String name, UnionVector vector) {
    int valueCount = vector.getAccessor().getValueCount();
    MapVector internalMap = vector.getTypeMap();
    for (MinorType type : vector.getSubTypes()) {
        if (type == MinorType.LATE) {
            error(name, vector, String.format("Union vector includes illegal type LATE %s", type.name()));
            continue;
        }
        // Warning: do not call getMember(type), doing so will create an
        // empty map vector that causes validation to fail.
        ValueVector child = internalMap.getChild(type.name());
        if (child == null) {
        // Disabling this check for now. TopNBatch, SortBatch
        // and perhaps others will create vectors with a set of
        // types, but won't actually populate some of the types.
        // 
        // error(name, vector, String.format(
        // "Union vector includes type %s, but the internal map has no matching member",
        // type.name()));
        } else {
            validateVector(name + "-type-" + type.name(), valueCount, child);
        }
    }
}
Also used : ValueVector(org.apache.drill.exec.vector.ValueVector) BaseRepeatedValueVector(org.apache.drill.exec.vector.complex.BaseRepeatedValueVector) MinorType(org.apache.drill.common.types.TypeProtos.MinorType) MapVector(org.apache.drill.exec.vector.complex.MapVector) AbstractRepeatedMapVector(org.apache.drill.exec.vector.complex.AbstractRepeatedMapVector)

Example 17 with MapVector

use of org.apache.drill.exec.vector.complex.MapVector in project drill by apache.

the class StatisticsMergeBatch method addVectorToOutgoingContainer.

/**
 * Adds a value vector corresponding to the statistic in the outgoing record
 * batch. Determines the MajorType based on the incoming value vector. Please
 * look at the comments above the class definition which describes the
 * incoming/outgoing batch schema
 */
private void addVectorToOutgoingContainer(String outStatName, VectorWrapper<?> vw) {
    // Input map vector
    MapVector inputVector = (MapVector) vw.getValueVector();
    assert inputVector.getPrimitiveVectors().size() > 0;
    // Proceed to create output map vector with same name e.g. statcount etc.
    MajorType mt = inputVector.getField().getType();
    MaterializedField mf = MaterializedField.create(outStatName, mt);
    ValueVector outputValueVector = TypeHelper.getNewVector(mf, oContext.getAllocator());
    container.add(outputValueVector);
    MapVector outputVector = (MapVector) outputValueVector;
    for (ValueVector vv : inputVector) {
        String columnName = vv.getField().getName();
        // statistic calculation functions).
        if (outStatName.equals(Statistic.COLNAME) || outStatName.equals(Statistic.COLTYPE)) {
            outputVector.addOrGet(columnName, vv.getField().getType(), vv.getClass());
        } else {
            TypeProtos.MinorType minorType;
            if (outStatName.equals(Statistic.AVG_WIDTH)) {
                minorType = TypeProtos.MinorType.FLOAT8;
            } else if (outStatName.equals(Statistic.HLL_MERGE) || outStatName.equals(Statistic.TDIGEST_MERGE)) {
                minorType = TypeProtos.MinorType.VARBINARY;
            } else {
                minorType = TypeProtos.MinorType.BIGINT;
            }
            Class<? extends ValueVector> vvc = TypeHelper.getValueVectorClass(minorType, TypeProtos.DataMode.OPTIONAL);
            outputVector.addOrGet(columnName, Types.optional(minorType), vvc);
        }
    }
}
Also used : ValueVector(org.apache.drill.exec.vector.ValueVector) MajorType(org.apache.drill.common.types.TypeProtos.MajorType) MaterializedField(org.apache.drill.exec.record.MaterializedField) TypeProtos(org.apache.drill.common.types.TypeProtos) MapVector(org.apache.drill.exec.vector.complex.MapVector)

Example 18 with MapVector

use of org.apache.drill.exec.vector.complex.MapVector in project drill by apache.

the class TestVariantAccessors method testBuildRowSetUnionArray.

@Test
public void testBuildRowSetUnionArray() {
    final TupleMetadata schema = new SchemaBuilder().addList("list1").addType(MinorType.BIGINT).addMap().addNullable("a", MinorType.INT).addNullable("b", MinorType.VARCHAR).resumeUnion().addList().addType(MinorType.FLOAT8).resumeUnion().resumeSchema().buildSchema();
    final ExtendableRowSet rowSet = fixture.rowSet(schema);
    final VectorContainer vc = rowSet.container();
    assertEquals(1, vc.getNumberOfColumns());
    // List with complex internal structure
    final ValueVector vector = vc.getValueVector(0).getValueVector();
    assertTrue(vector instanceof ListVector);
    final ListVector list = (ListVector) vector;
    assertTrue(list.getDataVector() instanceof UnionVector);
    final UnionVector union = (UnionVector) list.getDataVector();
    // Union inside the list
    final MajorType unionType = union.getField().getType();
    final List<MinorType> types = unionType.getSubTypeList();
    assertEquals(3, types.size());
    assertTrue(types.contains(MinorType.BIGINT));
    assertTrue(types.contains(MinorType.MAP));
    assertTrue(types.contains(MinorType.LIST));
    final MapVector typeMap = union.getTypeMap();
    ValueVector member = typeMap.getChild(MinorType.BIGINT.name());
    assertTrue(member instanceof NullableBigIntVector);
    // Map inside the list
    member = typeMap.getChild(MinorType.MAP.name());
    assertTrue(member instanceof MapVector);
    final MapVector childMap = (MapVector) member;
    ValueVector mapMember = childMap.getChild("a");
    assertNotNull(mapMember);
    assertTrue(mapMember instanceof NullableIntVector);
    mapMember = childMap.getChild("b");
    assertNotNull(mapMember);
    assertTrue(mapMember instanceof NullableVarCharVector);
    // Single-type list inside the outer list
    member = typeMap.getChild(MinorType.LIST.name());
    assertTrue(member instanceof ListVector);
    final ListVector childList = (ListVector) member;
    assertTrue(childList.getDataVector() instanceof NullableFloat8Vector);
    rowSet.clear();
}
Also used : NullableFloat8Vector(org.apache.drill.exec.vector.NullableFloat8Vector) MajorType(org.apache.drill.common.types.TypeProtos.MajorType) UnionVector(org.apache.drill.exec.vector.complex.UnionVector) VectorContainer(org.apache.drill.exec.record.VectorContainer) ValueVector(org.apache.drill.exec.vector.ValueVector) NullableIntVector(org.apache.drill.exec.vector.NullableIntVector) NullableVarCharVector(org.apache.drill.exec.vector.NullableVarCharVector) NullableBigIntVector(org.apache.drill.exec.vector.NullableBigIntVector) ListVector(org.apache.drill.exec.vector.complex.ListVector) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) MinorType(org.apache.drill.common.types.TypeProtos.MinorType) ExtendableRowSet(org.apache.drill.exec.physical.rowSet.RowSet.ExtendableRowSet) MapVector(org.apache.drill.exec.vector.complex.MapVector) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 19 with MapVector

use of org.apache.drill.exec.vector.complex.MapVector in project drill by apache.

the class TestVariantAccessors method testBuildRowSetUnion.

@Test
public void testBuildRowSetUnion() {
    final TupleMetadata schema = new SchemaBuilder().addUnion("u").addType(MinorType.INT).addMap().addNullable("c", MinorType.BIGINT).addNullable("d", MinorType.VARCHAR).resumeUnion().addList().addType(MinorType.VARCHAR).resumeUnion().resumeSchema().buildSchema();
    final ExtendableRowSet rowSet = fixture.rowSet(schema);
    final VectorContainer vc = rowSet.container();
    assertEquals(1, vc.getNumberOfColumns());
    // Single union
    final ValueVector vector = vc.getValueVector(0).getValueVector();
    assertTrue(vector instanceof UnionVector);
    final UnionVector union = (UnionVector) vector;
    final MapVector typeMap = union.getTypeMap();
    ValueVector member = typeMap.getChild(MinorType.INT.name());
    assertTrue(member instanceof NullableIntVector);
    // Inner map
    member = typeMap.getChild(MinorType.MAP.name());
    assertTrue(member instanceof MapVector);
    member = typeMap.getChild(MinorType.MAP.name());
    assertTrue(member instanceof MapVector);
    final MapVector childMap = (MapVector) member;
    ValueVector mapMember = childMap.getChild("c");
    assertNotNull(mapMember);
    assertTrue(mapMember instanceof NullableBigIntVector);
    mapMember = childMap.getChild("d");
    assertNotNull(mapMember);
    assertTrue(mapMember instanceof NullableVarCharVector);
    // Inner list
    member = typeMap.getChild(MinorType.LIST.name());
    assertTrue(member instanceof ListVector);
    final ListVector list = (ListVector) member;
    assertTrue(list.getDataVector() instanceof NullableVarCharVector);
    rowSet.clear();
}
Also used : ValueVector(org.apache.drill.exec.vector.ValueVector) NullableIntVector(org.apache.drill.exec.vector.NullableIntVector) NullableVarCharVector(org.apache.drill.exec.vector.NullableVarCharVector) NullableBigIntVector(org.apache.drill.exec.vector.NullableBigIntVector) ListVector(org.apache.drill.exec.vector.complex.ListVector) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) UnionVector(org.apache.drill.exec.vector.complex.UnionVector) ExtendableRowSet(org.apache.drill.exec.physical.rowSet.RowSet.ExtendableRowSet) VectorContainer(org.apache.drill.exec.record.VectorContainer) MapVector(org.apache.drill.exec.vector.complex.MapVector) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 20 with MapVector

use of org.apache.drill.exec.vector.complex.MapVector in project drill by apache.

the class TestUnnestWithLateralCorrectness method testNestedUnnest.

/**
 *     Run a plan like the following for various input batches :
 *             Lateral1
 *               /    \
 *              /    Lateral2
 *            Scan      / \
 *                     /   \
 *                Project1 Project2
 *                   /       \
 *                  /         \
 *              Unnest1      Unnest2
 *
 * @param incomingSchemas
 * @param iterOutcomes
 * @param execKill
 * @param data
 * @param baseline
 * @param <T>
 * @throws Exception
 */
private <T> void testNestedUnnest(TupleMetadata[] incomingSchemas, RecordBatch.IterOutcome[] iterOutcomes, // number of batches after which to kill the execution (!)
int execKill, T[][] data, T[][][] baseline) throws Exception {
    // Get the incoming container with dummy data for LJ
    final List<VectorContainer> incomingContainer = new ArrayList<>(data.length);
    // Create data
    ArrayList<RowSet.SingleRowSet> rowSets = new ArrayList<>();
    int rowNumber = 0;
    int batchNum = 0;
    for (Object[] recordBatch : data) {
        RowSetBuilder rowSetBuilder = fixture.rowSetBuilder(incomingSchemas[batchNum]);
        for (Object rowData : recordBatch) {
            rowSetBuilder.addRow(++rowNumber, rowData);
        }
        RowSet.SingleRowSet rowSet = rowSetBuilder.build();
        rowSets.add(rowSet);
        incomingContainer.add(rowSet.container());
        batchNum++;
    }
    // Get the unnest POPConfig
    final UnnestPOP unnestPopConfig1 = new UnnestPOP(null, SchemaPath.getSimplePath("unnestColumn"), DrillUnnestRelBase.IMPLICIT_COLUMN);
    final UnnestPOP unnestPopConfig2 = new UnnestPOP(null, SchemaPath.getSimplePath("colB"), DrillUnnestRelBase.IMPLICIT_COLUMN);
    // Get the IterOutcomes for LJ
    final List<RecordBatch.IterOutcome> outcomes = new ArrayList<>(iterOutcomes.length);
    for (RecordBatch.IterOutcome o : iterOutcomes) {
        outcomes.add(o);
    }
    // Create incoming MockRecordBatch
    final MockRecordBatch incomingMockBatch = new MockRecordBatch(fixture.getFragmentContext(), operatorContext, incomingContainer, outcomes, incomingContainer.get(0).getSchema());
    // setup Unnest record batch
    final UnnestRecordBatch unnestBatch1 = new UnnestRecordBatch(unnestPopConfig1, fixture.getFragmentContext());
    final UnnestRecordBatch unnestBatch2 = new UnnestRecordBatch(unnestPopConfig2, fixture.getFragmentContext());
    // Create intermediate Project
    final Project projectPopConfig1 = new Project(DrillLogicalTestUtils.parseExprs("unnestColumn.colB", "colB", unnestPopConfig1.getImplicitColumn(), unnestPopConfig1.getImplicitColumn()), unnestPopConfig1);
    final ProjectRecordBatch projectBatch1 = new ProjectRecordBatch(projectPopConfig1, unnestBatch1, fixture.getFragmentContext());
    final Project projectPopConfig2 = new Project(DrillLogicalTestUtils.parseExprs("colB", "unnestColumn2", unnestPopConfig2.getImplicitColumn(), unnestPopConfig2.getImplicitColumn()), unnestPopConfig2);
    final ProjectRecordBatch projectBatch2 = new ProjectRecordBatch(projectPopConfig2, unnestBatch2, fixture.getFragmentContext());
    final LateralJoinPOP ljPopConfig2 = new LateralJoinPOP(projectPopConfig1, projectPopConfig2, JoinRelType.INNER, DrillLateralJoinRelBase.IMPLICIT_COLUMN, Lists.newArrayList());
    final LateralJoinPOP ljPopConfig1 = new LateralJoinPOP(mockPopConfig, ljPopConfig2, JoinRelType.INNER, DrillLateralJoinRelBase.IMPLICIT_COLUMN, Lists.newArrayList());
    final LateralJoinBatch lateralJoinBatch2 = new LateralJoinBatch(ljPopConfig2, fixture.getFragmentContext(), projectBatch1, projectBatch2);
    final LateralJoinBatch lateralJoinBatch1 = new LateralJoinBatch(ljPopConfig1, fixture.getFragmentContext(), incomingMockBatch, lateralJoinBatch2);
    // set pointer to Lateral in unnest
    unnestBatch1.setIncoming((LateralContract) lateralJoinBatch1);
    unnestBatch2.setIncoming((LateralContract) lateralJoinBatch2);
    // Simulate the pipeline by calling next on the incoming
    // results is an array ot batches, each batch being an array of output vectors.
    List<List<ValueVector>> resultList = new ArrayList<>();
    List<List<ValueVector>> results = null;
    int batchesProcessed = 0;
    try {
        try {
            while (!isTerminal(lateralJoinBatch1.next())) {
                if (lateralJoinBatch1.getRecordCount() > 0) {
                    addBatchToResults(resultList, lateralJoinBatch1);
                }
                batchesProcessed++;
                if (batchesProcessed == execKill) {
                    lateralJoinBatch1.getContext().getExecutorState().fail(new DrillException("Testing failure of execution."));
                    lateralJoinBatch1.cancel();
                }
            // else nothing to do
            }
        } catch (UserException e) {
            throw e;
        } catch (Exception e) {
            throw new Exception("Test failed to execute lateralJoinBatch.next() because: " + e.getMessage());
        }
        // Check results against baseline
        results = resultList;
        int batchIndex = 0;
        int vectorIndex = 0;
        // int valueIndex = 0;
        for (List<ValueVector> batch : results) {
            int vectorCount = batch.size();
            if (vectorCount != baseline[batchIndex].length + 2) {
                // baseline does not include the original unnest column(s)
                fail("Test failed in validating unnest output. Batch column count mismatch.");
            }
            for (ValueVector vv : batch) {
                if (vv.getField().getName().equals("unnestColumn") || vv.getField().getName().equals("colB")) {
                    // skip the original input column
                    continue;
                }
                int valueCount = vv.getAccessor().getValueCount();
                if (valueCount != baseline[batchIndex][vectorIndex].length) {
                    fail("Test failed in validating unnest output. Value count mismatch in batch number " + (batchIndex + 1) + "" + ".");
                }
                for (int valueIndex = 0; valueIndex < valueCount; valueIndex++) {
                    if (vv instanceof MapVector) {
                        if (!compareMapBaseline(baseline[batchIndex][vectorIndex][valueIndex], vv.getAccessor().getObject(valueIndex))) {
                            fail("Test failed in validating unnest(Map) output. Value mismatch");
                        }
                    } else if (vv instanceof VarCharVector) {
                        Object val = vv.getAccessor().getObject(valueIndex);
                        if (((String) baseline[batchIndex][vectorIndex][valueIndex]).compareTo(val.toString()) != 0) {
                            fail("Test failed in validating unnest output. Value mismatch. Baseline value[]" + vectorIndex + "][" + valueIndex + "]" + ": " + baseline[vectorIndex][valueIndex] + "   VV.getObject(valueIndex): " + val);
                        }
                    } else {
                        Object val = vv.getAccessor().getObject(valueIndex);
                        if (!baseline[batchIndex][vectorIndex][valueIndex].equals(val)) {
                            fail("Test failed in validating unnest output. Value mismatch. Baseline value[" + vectorIndex + "][" + valueIndex + "]" + ": " + baseline[batchIndex][vectorIndex][valueIndex] + "   VV.getObject(valueIndex): " + val);
                        }
                    }
                }
                vectorIndex++;
            }
            vectorIndex = 0;
            batchIndex++;
        }
    } catch (UserException e) {
        // Valid exception
        throw e;
    } catch (Exception e) {
        fail("Test failed. Exception : " + e.getMessage());
    } finally {
        // Close all the resources for this test case
        unnestBatch1.close();
        lateralJoinBatch1.close();
        unnestBatch2.close();
        lateralJoinBatch2.close();
        incomingMockBatch.close();
        if (results != null) {
            for (List<ValueVector> batch : results) {
                for (ValueVector vv : batch) {
                    vv.clear();
                }
            }
        }
        for (RowSet.SingleRowSet rowSet : rowSets) {
            rowSet.clear();
        }
    }
}
Also used : MockRecordBatch(org.apache.drill.exec.physical.impl.MockRecordBatch) ProjectRecordBatch(org.apache.drill.exec.physical.impl.project.ProjectRecordBatch) RecordBatch(org.apache.drill.exec.record.RecordBatch) ArrayList(java.util.ArrayList) RowSet(org.apache.drill.exec.physical.rowSet.RowSet) UnnestPOP(org.apache.drill.exec.physical.config.UnnestPOP) DrillException(org.apache.drill.common.exceptions.DrillException) RowSetBuilder(org.apache.drill.exec.physical.rowSet.RowSetBuilder) ArrayList(java.util.ArrayList) List(java.util.List) UserException(org.apache.drill.common.exceptions.UserException) LateralJoinBatch(org.apache.drill.exec.physical.impl.join.LateralJoinBatch) VarCharVector(org.apache.drill.exec.vector.VarCharVector) UserException(org.apache.drill.common.exceptions.UserException) DrillException(org.apache.drill.common.exceptions.DrillException) VectorContainer(org.apache.drill.exec.record.VectorContainer) ValueVector(org.apache.drill.exec.vector.ValueVector) Project(org.apache.drill.exec.physical.config.Project) LateralJoinPOP(org.apache.drill.exec.physical.config.LateralJoinPOP) MockRecordBatch(org.apache.drill.exec.physical.impl.MockRecordBatch) ProjectRecordBatch(org.apache.drill.exec.physical.impl.project.ProjectRecordBatch) MapVector(org.apache.drill.exec.vector.complex.MapVector)

Aggregations

MapVector (org.apache.drill.exec.vector.complex.MapVector)40 ValueVector (org.apache.drill.exec.vector.ValueVector)21 Test (org.junit.Test)16 SubOperatorTest (org.apache.drill.test.SubOperatorTest)14 RepeatedMapVector (org.apache.drill.exec.vector.complex.RepeatedMapVector)13 TupleMetadata (org.apache.drill.exec.record.metadata.TupleMetadata)12 SchemaBuilder (org.apache.drill.exec.record.metadata.SchemaBuilder)11 RowSet (org.apache.drill.exec.physical.rowSet.RowSet)10 MaterializedField (org.apache.drill.exec.record.MaterializedField)10 SingleRowSet (org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet)9 UInt4Vector (org.apache.drill.exec.vector.UInt4Vector)9 RowSetBuilder (org.apache.drill.exec.physical.rowSet.RowSetBuilder)7 ColumnSize (org.apache.drill.exec.record.RecordBatchSizer.ColumnSize)6 VectorContainer (org.apache.drill.exec.record.VectorContainer)6 VarCharVector (org.apache.drill.exec.vector.VarCharVector)6 RepeatedValueVector (org.apache.drill.exec.vector.complex.RepeatedValueVector)6 TupleWriter (org.apache.drill.exec.vector.accessor.TupleWriter)5 ArrayList (java.util.ArrayList)4 DrillRuntimeException (org.apache.drill.common.exceptions.DrillRuntimeException)4 SchemaPath (org.apache.drill.common.expression.SchemaPath)4