Search in sources :

Example 11 with MapJoinTableContainerSerDe

use of org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe in project hive by apache.

the class TestMapJoinOperator method executeTestImplementation.

private void executeTestImplementation(MapJoinTestImplementation mapJoinImplementation, MapJoinTestDescription testDesc, MapJoinTestData testData, RowTestObjectsMultiSet expectedTestRowMultiSet, String title) throws Exception {
    System.out.println("*BENCHMARK* Starting implementation " + mapJoinImplementation + " variation " + testDesc.vectorMapJoinVariation + " title " + title);
    // UNDONE: Parameterize for implementation variation?
    MapJoinDesc mapJoinDesc = MapJoinTestConfig.createMapJoinDesc(testDesc);
    final boolean isVectorOutput = MapJoinTestConfig.isVectorOutput(mapJoinImplementation);
    RowTestObjectsMultiSet outputTestRowMultiSet = new RowTestObjectsMultiSet();
    CreateMapJoinResult result = MapJoinTestConfig.createMapJoinImplementation(mapJoinImplementation, testDesc, testData, mapJoinDesc);
    MapJoinOperator mapJoinOperator = result.mapJoinOperator;
    MapJoinTableContainer mapJoinTableContainer = result.mapJoinTableContainer;
    MapJoinTableContainerSerDe mapJoinTableContainerSerDe = result.mapJoinTableContainerSerDe;
    CountCollectorTestOperator testCollectorOperator;
    if (!isVectorOutput) {
        testCollectorOperator = new TestMultiSetCollectorOperator(testDesc.outputObjectInspectors, outputTestRowMultiSet);
    } else {
        VectorizationContext vOutContext = ((VectorizationContextRegion) mapJoinOperator).getOutputVectorizationContext();
        testCollectorOperator = new TestMultiSetVectorCollectorOperator(ArrayUtils.toPrimitive(vOutContext.getProjectedColumns().toArray(new Integer[0])), testDesc.outputTypeInfos, testDesc.outputObjectInspectors, outputTestRowMultiSet);
    }
    MapJoinTestConfig.connectOperators(mapJoinOperator, testCollectorOperator);
    CountCollectorTestOperator interceptTestCollectorOperator = null;
    if (testDesc.vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER && !mapJoinDesc.isDynamicPartitionHashJoin()) {
        if (mapJoinImplementation == MapJoinTestImplementation.ROW_MODE_HASH_MAP) {
            // Not supported.
            return;
        }
        // Wire in FULL OUTER Intercept.
        interceptTestCollectorOperator = MapJoinTestConfig.addFullOuterIntercept(mapJoinImplementation, testDesc, outputTestRowMultiSet, testData, mapJoinOperator, mapJoinTableContainer, mapJoinTableContainerSerDe);
    } else {
        // Invoke initializeOp methods.
        mapJoinOperator.initialize(testDesc.hiveConf, testDesc.inputObjectInspectors);
        // Fixup the mapJoinTables.
        mapJoinOperator.setTestMapJoinTableContainer(1, mapJoinTableContainer, mapJoinTableContainerSerDe);
    }
    if (!isVectorOutput) {
        MapJoinTestData.driveBigTableData(testDesc, testData, mapJoinOperator);
    } else {
        MapJoinTestData.driveVectorBigTableData(testDesc, testData, mapJoinOperator);
    }
    if (!testCollectorOperator.getIsClosed()) {
        Assert.fail("collector operator not closed");
    }
    if (testCollectorOperator.getIsAborted()) {
        Assert.fail("collector operator aborted");
    }
    if (testDesc.vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER && !mapJoinDesc.isDynamicPartitionHashJoin()) {
        if (!interceptTestCollectorOperator.getIsClosed()) {
            Assert.fail("intercept collector operator not closed");
        }
        if (interceptTestCollectorOperator.getIsAborted()) {
            Assert.fail("intercept collector operator aborted");
        }
    }
    System.out.println("*BENCHMARK* executeTestImplementation row count " + testCollectorOperator.getRowCount());
    // Verify the output!
    String option = "";
    if (testDesc.vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER) {
        option = " mapJoinPlanVariation " + testDesc.mapJoinPlanVariation.name();
    }
    if (!expectedTestRowMultiSet.verify(outputTestRowMultiSet, "expected", "actual")) {
        System.out.println("*BENCHMARK* " + title + " verify failed" + " for implementation " + mapJoinImplementation + " variation " + testDesc.vectorMapJoinVariation + option);
        expectedTestRowMultiSet.displayDifferences(outputTestRowMultiSet, "expected", "actual");
    } else {
        System.out.println("*BENCHMARK* " + title + " verify succeeded " + " for implementation " + mapJoinImplementation + " variation " + testDesc.vectorMapJoinVariation + option);
    }
}
Also used : MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) VectorMapJoinOperator(org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator) TestMultiSetVectorCollectorOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.TestMultiSetVectorCollectorOperator) MapJoinDesc(org.apache.hadoop.hive.ql.plan.MapJoinDesc) VectorMapJoinDesc(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc) CountCollectorTestOperator(org.apache.hadoop.hive.ql.exec.util.collectoroperator.CountCollectorTestOperator) CreateMapJoinResult(org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.CreateMapJoinResult) VectorizationContext(org.apache.hadoop.hive.ql.exec.vector.VectorizationContext) RowTestObjectsMultiSet(org.apache.hadoop.hive.ql.exec.util.rowobjects.RowTestObjectsMultiSet) MapJoinTableContainerSerDe(org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe) TestMultiSetCollectorOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.TestMultiSetCollectorOperator) MapJoinTableContainer(org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer) VectorizationContextRegion(org.apache.hadoop.hive.ql.exec.vector.VectorizationContextRegion)

Example 12 with MapJoinTableContainerSerDe

use of org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe in project hive by apache.

the class MapJoinTestConfig method createMapJoinTableContainerSerDe.

public static MapJoinTableContainerSerDe createMapJoinTableContainerSerDe(MapJoinDesc mapJoinDesc) throws SerDeException {
    final Byte smallTablePos = 1;
    TableDesc keyTableDesc = mapJoinDesc.getKeyTblDesc();
    AbstractSerDe keySerializer = (AbstractSerDe) ReflectionUtil.newInstance(BinarySortableSerDe.class, null);
    keySerializer.initialize(null, keyTableDesc.getProperties(), null);
    MapJoinObjectSerDeContext keyContext = new MapJoinObjectSerDeContext(keySerializer, false);
    final List<TableDesc> valueTableDescList;
    if (mapJoinDesc.getNoOuterJoin()) {
        valueTableDescList = mapJoinDesc.getValueTblDescs();
    } else {
        valueTableDescList = mapJoinDesc.getValueFilteredTblDescs();
    }
    TableDesc valueTableDesc = valueTableDescList.get(smallTablePos);
    AbstractSerDe valueSerDe = (AbstractSerDe) ReflectionUtil.newInstance(valueTableDesc.getSerDeClass(), null);
    valueSerDe.initialize(null, valueTableDesc.getProperties(), null);
    MapJoinObjectSerDeContext valueContext = new MapJoinObjectSerDeContext(valueSerDe, hasFilter(mapJoinDesc, smallTablePos));
    MapJoinTableContainerSerDe mapJoinTableContainerSerDe = new MapJoinTableContainerSerDe(keyContext, valueContext);
    return mapJoinTableContainerSerDe;
}
Also used : MapJoinTableContainerSerDe(org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe) BinarySortableSerDe(org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe) MapJoinObjectSerDeContext(org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) AbstractSerDe(org.apache.hadoop.hive.serde2.AbstractSerDe)

Aggregations

MapJoinTableContainerSerDe (org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe)12 MapJoinTableContainer (org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer)8 VectorizationContext (org.apache.hadoop.hive.ql.exec.vector.VectorizationContext)6 VectorMapJoinDesc (org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc)6 ArrayList (java.util.ArrayList)5 List (java.util.List)5 MapJoinObjectSerDeContext (org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext)5 MapJoinOperator (org.apache.hadoop.hive.ql.exec.MapJoinOperator)4 MapJoinBytesTableContainer (org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer)4 VectorMapJoinOperator (org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator)4 VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)4 HashMapWrapper (org.apache.hadoop.hive.ql.exec.persistence.HashMapWrapper)3 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)3 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)3 TableDesc (org.apache.hadoop.hive.ql.plan.TableDesc)3 AbstractSerDe (org.apache.hadoop.hive.serde2.AbstractSerDe)3 ImmutablePair (org.apache.commons.lang3.tuple.ImmutablePair)2 Pair (org.apache.commons.lang3.tuple.Pair)2 CompilationOpContext (org.apache.hadoop.hive.ql.CompilationOpContext)2 CountCollectorTestOperator (org.apache.hadoop.hive.ql.exec.util.collectoroperator.CountCollectorTestOperator)2