Search in sources :

Example 26 with VectorizationContext

use of org.apache.hadoop.hive.ql.exec.vector.VectorizationContext in project hive by apache.

the class TestVectorStringUnary method doVectorIfTest.

private void doVectorIfTest(TypeInfo typeInfo, TypeInfo targetTypeInfo, List<String> columns, TypeInfo[] typeInfos, DataTypePhysicalVariation[] dataTypePhysicalVariations, List<ExprNodeDesc> children, StringUnaryTestMode stringUnaryTestMode, VectorRandomBatchSource batchSource, VectorizedRowBatchCtx batchContext, GenericUDF genericUdf, Object[] resultObjects) throws Exception {
    ExprNodeGenericFuncDesc exprDesc = new ExprNodeGenericFuncDesc(targetTypeInfo, genericUdf, children);
    HiveConf hiveConf = new HiveConf();
    if (stringUnaryTestMode == StringUnaryTestMode.ADAPTOR) {
        hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_TEST_VECTOR_ADAPTOR_OVERRIDE, true);
    }
    VectorizationContext vectorizationContext = new VectorizationContext("name", columns, Arrays.asList(typeInfos), Arrays.asList(dataTypePhysicalVariations), hiveConf);
    VectorExpression vectorExpression = vectorizationContext.getVectorExpression(exprDesc);
    if (stringUnaryTestMode == StringUnaryTestMode.VECTOR_EXPRESSION && vectorExpression instanceof VectorUDFAdaptor) {
        System.out.println("*NO NATIVE VECTOR EXPRESSION* typeInfo " + typeInfo.toString() + " stringUnaryTestMode " + stringUnaryTestMode + " vectorExpression " + vectorExpression.toString());
    }
    VectorizedRowBatch batch = batchContext.createVectorizedRowBatch();
    VectorExtractRow resultVectorExtractRow = new VectorExtractRow();
    resultVectorExtractRow.init(new TypeInfo[] { targetTypeInfo }, new int[] { columns.size() });
    Object[] scrqtchRow = new Object[1];
    // System.out.println("*VECTOR EXPRESSION* " + vectorExpression.getClass().getSimpleName());
    /*
    System.out.println(
        "*DEBUG* typeInfo " + typeInfo.toString() +
        " targetTypeInfo " + targetTypeInfo.toString() +
        " stringUnaryTestMode " + stringUnaryTestMode +
        " vectorExpression " + vectorExpression.getClass().getSimpleName());
    */
    batchSource.resetBatchIteration();
    int rowIndex = 0;
    while (true) {
        if (!batchSource.fillNextBatch(batch)) {
            break;
        }
        vectorExpression.evaluate(batch);
        extractResultObjects(batch, rowIndex, resultVectorExtractRow, scrqtchRow, targetTypeInfo, resultObjects);
        rowIndex += batch.size;
    }
}
Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) HiveConf(org.apache.hadoop.hive.conf.HiveConf) VectorizationContext(org.apache.hadoop.hive.ql.exec.vector.VectorizationContext) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) VectorUDFAdaptor(org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor) VectorExtractRow(org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow)

Example 27 with VectorizationContext

use of org.apache.hadoop.hive.ql.exec.vector.VectorizationContext in project hive by apache.

the class TestVectorTimestampExtract method doVectorCastTest.

private boolean doVectorCastTest(TypeInfo dateTimeStringTypeInfo, List<String> columns, String[] columnNames, TypeInfo[] typeInfos, DataTypePhysicalVariation[] dataTypePhysicalVariations, List<ExprNodeDesc> children, ExprNodeGenericFuncDesc exprDesc, TimestampExtractTestMode timestampExtractTestMode, VectorRandomBatchSource batchSource, Object[] resultObjects) throws Exception {
    HiveConf hiveConf = new HiveConf();
    if (timestampExtractTestMode == TimestampExtractTestMode.ADAPTOR) {
        hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_TEST_VECTOR_ADAPTOR_OVERRIDE, true);
    }
    VectorizationContext vectorizationContext = new VectorizationContext("name", columns, Arrays.asList(typeInfos), Arrays.asList(dataTypePhysicalVariations), hiveConf);
    VectorExpression vectorExpression = vectorizationContext.getVectorExpression(exprDesc);
    vectorExpression.transientInit(hiveConf);
    if (timestampExtractTestMode == TimestampExtractTestMode.VECTOR_EXPRESSION && vectorExpression instanceof VectorUDFAdaptor) {
        System.out.println("*NO NATIVE VECTOR EXPRESSION* dateTimeStringTypeInfo " + dateTimeStringTypeInfo.toString() + " timestampExtractTestMode " + timestampExtractTestMode + " vectorExpression " + vectorExpression.toString());
    }
    // System.out.println("*VECTOR EXPRESSION* " + vectorExpression.getClass().getSimpleName());
    /*
    System.out.println(
        "*DEBUG* dateTimeStringTypeInfo " + dateTimeStringTypeInfo.toString() +
        " timestampExtractTestMode " + timestampExtractTestMode +
        " vectorExpression " + vectorExpression.getClass().getSimpleName());
    */
    VectorRandomRowSource rowSource = batchSource.getRowSource();
    VectorizedRowBatchCtx batchContext = new VectorizedRowBatchCtx(columnNames, rowSource.typeInfos(), rowSource.dataTypePhysicalVariations(), /* dataColumnNums */
    null, /* partitionColumnCount */
    0, /* virtualColumnCount */
    0, /* neededVirtualColumns */
    null, vectorizationContext.getScratchColumnTypeNames(), vectorizationContext.getScratchDataTypePhysicalVariations());
    VectorizedRowBatch batch = batchContext.createVectorizedRowBatch();
    VectorExtractRow resultVectorExtractRow = new VectorExtractRow();
    resultVectorExtractRow.init(new TypeInfo[] { TypeInfoFactory.intTypeInfo }, new int[] { vectorExpression.getOutputColumnNum() });
    Object[] scrqtchRow = new Object[1];
    batchSource.resetBatchIteration();
    int rowIndex = 0;
    while (true) {
        if (!batchSource.fillNextBatch(batch)) {
            break;
        }
        vectorExpression.evaluate(batch);
        extractResultObjects(batch, rowIndex, resultVectorExtractRow, scrqtchRow, TypeInfoFactory.intTypeInfo, resultObjects);
        rowIndex += batch.size;
    }
    return true;
}
Also used : VectorizedRowBatchCtx(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx) VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) HiveConf(org.apache.hadoop.hive.conf.HiveConf) VectorizationContext(org.apache.hadoop.hive.ql.exec.vector.VectorizationContext) VectorUDFAdaptor(org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor) VectorExtractRow(org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow) VectorRandomRowSource(org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource)

Example 28 with VectorizationContext

use of org.apache.hadoop.hive.ql.exec.vector.VectorizationContext in project hive by apache.

the class MapJoinTestConfig method createNativeVectorMapJoin.

public static CreateMapJoinResult createNativeVectorMapJoin(MapJoinTestDescription testDesc, MapJoinTestData testData, MapJoinDesc mapJoinDesc, HashTableImplementationType hashTableImplementationType, MapJoinTableContainer shareMapJoinTableContainer) throws SerDeException, IOException, HiveException {
    VectorMapJoinDesc vectorDesc = MapJoinTestConfig.createVectorMapJoinDesc(testDesc);
    mapJoinDesc.setVectorDesc(vectorDesc);
    vectorDesc.setHashTableImplementationType(hashTableImplementationType);
    VectorMapJoinInfo vectorMapJoinInfo = vectorDesc.getVectorMapJoinInfo();
    MapJoinTableContainer mapJoinTableContainer;
    MapJoinTableContainerSerDe mapJoinTableContainerSerDe = null;
    switch(vectorDesc.getHashTableImplementationType()) {
        case OPTIMIZED:
            mapJoinTableContainer = new MapJoinBytesTableContainer(testDesc.hiveConf, null, testData.smallTableKeyHashMap.size(), 0);
            mapJoinTableContainerSerDe = MapJoinTestConfig.createMapJoinTableContainerSerDe(mapJoinDesc);
            mapJoinTableContainer.setSerde(mapJoinTableContainerSerDe.getKeyContext(), mapJoinTableContainerSerDe.getValueContext());
            break;
        case FAST:
            mapJoinTableContainer = new VectorMapJoinFastTableContainer(mapJoinDesc, testDesc.hiveConf, testData.smallTableKeyHashMap.size(), 1);
            break;
        default:
            throw new RuntimeException("Unexpected hash table implementation type " + vectorDesc.getHashTableImplementationType());
    }
    // if (shareMapJoinTableContainer == null) {
    loadTableContainerData(testDesc, testData, mapJoinTableContainer);
    // } else {
    // setTableContainerData(mapJoinTableContainer, shareMapJoinTableContainer);
    // }
    VectorizationContext vContext = MapJoinTestConfig.createVectorizationContext(testDesc);
    byte posBigTable = (byte) mapJoinDesc.getPosBigTable();
    VectorExpression[] slimmedBigTableKeyExpressions = vContext.getVectorExpressions(mapJoinDesc.getKeys().get(posBigTable));
    vectorMapJoinInfo.setSlimmedBigTableKeyExpressions(slimmedBigTableKeyExpressions);
    Map<Byte, List<ExprNodeDesc>> exprs = mapJoinDesc.getExprs();
    VectorExpression[] slimmedBigTableValueExpressions = vContext.getVectorExpressions(exprs.get(posBigTable));
    vectorMapJoinInfo.setSlimmedBigTableValueExpressions(slimmedBigTableValueExpressions);
    VectorMapJoinCommonOperator operator = MapJoinTestConfig.createNativeVectorMapJoinOperator(testDesc.vectorMapJoinVariation, mapJoinDesc, vectorDesc, vContext);
    HiveConf.setBoolVar(testDesc.hiveConf, HiveConf.ConfVars.HIVE_MAPJOIN_TESTING_NO_HASH_TABLE_LOAD, true);
    return new CreateMapJoinResult(operator, mapJoinTableContainer, mapJoinTableContainerSerDe);
}
Also used : VectorMapJoinDesc(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc) MapJoinBytesTableContainer(org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer) VectorMapJoinInfo(org.apache.hadoop.hive.ql.plan.VectorMapJoinInfo) VectorizationContext(org.apache.hadoop.hive.ql.exec.vector.VectorizationContext) MapJoinTableContainerSerDe(org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe) VectorMapJoinFastTableContainer(org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastTableContainer) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) List(java.util.List) ArrayList(java.util.ArrayList) MapJoinTableContainer(org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer)

Example 29 with VectorizationContext

use of org.apache.hadoop.hive.ql.exec.vector.VectorizationContext in project hive by apache.

the class TestMapJoinOperator method executeTestImplementation.

private void executeTestImplementation(MapJoinTestImplementation mapJoinImplementation, MapJoinTestDescription testDesc, MapJoinTestData testData, RowTestObjectsMultiSet expectedTestRowMultiSet, String title) throws Exception {
    System.out.println("*BENCHMARK* Starting implementation " + mapJoinImplementation + " variation " + testDesc.vectorMapJoinVariation + " title " + title);
    // UNDONE: Parameterize for implementation variation?
    MapJoinDesc mapJoinDesc = MapJoinTestConfig.createMapJoinDesc(testDesc);
    final boolean isVectorOutput = MapJoinTestConfig.isVectorOutput(mapJoinImplementation);
    RowTestObjectsMultiSet outputTestRowMultiSet = new RowTestObjectsMultiSet();
    CreateMapJoinResult result = MapJoinTestConfig.createMapJoinImplementation(mapJoinImplementation, testDesc, testData, mapJoinDesc);
    MapJoinOperator mapJoinOperator = result.mapJoinOperator;
    MapJoinTableContainer mapJoinTableContainer = result.mapJoinTableContainer;
    MapJoinTableContainerSerDe mapJoinTableContainerSerDe = result.mapJoinTableContainerSerDe;
    CountCollectorTestOperator testCollectorOperator;
    if (!isVectorOutput) {
        testCollectorOperator = new TestMultiSetCollectorOperator(testDesc.outputObjectInspectors, outputTestRowMultiSet);
    } else {
        VectorizationContext vOutContext = ((VectorizationContextRegion) mapJoinOperator).getOutputVectorizationContext();
        testCollectorOperator = new TestMultiSetVectorCollectorOperator(ArrayUtils.toPrimitive(vOutContext.getProjectedColumns().toArray(new Integer[0])), testDesc.outputTypeInfos, testDesc.outputObjectInspectors, outputTestRowMultiSet);
    }
    MapJoinTestConfig.connectOperators(mapJoinOperator, testCollectorOperator);
    CountCollectorTestOperator interceptTestCollectorOperator = null;
    if (testDesc.vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER && !mapJoinDesc.isDynamicPartitionHashJoin()) {
        if (mapJoinImplementation == MapJoinTestImplementation.ROW_MODE_HASH_MAP) {
            // Not supported.
            return;
        }
        // Wire in FULL OUTER Intercept.
        interceptTestCollectorOperator = MapJoinTestConfig.addFullOuterIntercept(mapJoinImplementation, testDesc, outputTestRowMultiSet, testData, mapJoinOperator, mapJoinTableContainer, mapJoinTableContainerSerDe);
    } else {
        // Invoke initializeOp methods.
        mapJoinOperator.initialize(testDesc.hiveConf, testDesc.inputObjectInspectors);
        // Fixup the mapJoinTables.
        mapJoinOperator.setTestMapJoinTableContainer(1, mapJoinTableContainer, mapJoinTableContainerSerDe);
    }
    if (!isVectorOutput) {
        MapJoinTestData.driveBigTableData(testDesc, testData, mapJoinOperator);
    } else {
        MapJoinTestData.driveVectorBigTableData(testDesc, testData, mapJoinOperator);
    }
    if (!testCollectorOperator.getIsClosed()) {
        Assert.fail("collector operator not closed");
    }
    if (testCollectorOperator.getIsAborted()) {
        Assert.fail("collector operator aborted");
    }
    if (testDesc.vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER && !mapJoinDesc.isDynamicPartitionHashJoin()) {
        if (!interceptTestCollectorOperator.getIsClosed()) {
            Assert.fail("intercept collector operator not closed");
        }
        if (interceptTestCollectorOperator.getIsAborted()) {
            Assert.fail("intercept collector operator aborted");
        }
    }
    System.out.println("*BENCHMARK* executeTestImplementation row count " + testCollectorOperator.getRowCount());
    // Verify the output!
    String option = "";
    if (testDesc.vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER) {
        option = " mapJoinPlanVariation " + testDesc.mapJoinPlanVariation.name();
    }
    if (!expectedTestRowMultiSet.verify(outputTestRowMultiSet, "expected", "actual")) {
        System.out.println("*BENCHMARK* " + title + " verify failed" + " for implementation " + mapJoinImplementation + " variation " + testDesc.vectorMapJoinVariation + option);
        expectedTestRowMultiSet.displayDifferences(outputTestRowMultiSet, "expected", "actual");
    } else {
        System.out.println("*BENCHMARK* " + title + " verify succeeded " + " for implementation " + mapJoinImplementation + " variation " + testDesc.vectorMapJoinVariation + option);
    }
}
Also used : MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) VectorMapJoinOperator(org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator) TestMultiSetVectorCollectorOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.TestMultiSetVectorCollectorOperator) MapJoinDesc(org.apache.hadoop.hive.ql.plan.MapJoinDesc) VectorMapJoinDesc(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc) CountCollectorTestOperator(org.apache.hadoop.hive.ql.exec.util.collectoroperator.CountCollectorTestOperator) CreateMapJoinResult(org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.CreateMapJoinResult) VectorizationContext(org.apache.hadoop.hive.ql.exec.vector.VectorizationContext) RowTestObjectsMultiSet(org.apache.hadoop.hive.ql.exec.util.rowobjects.RowTestObjectsMultiSet) MapJoinTableContainerSerDe(org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe) TestMultiSetCollectorOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.TestMultiSetCollectorOperator) MapJoinTableContainer(org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer) VectorizationContextRegion(org.apache.hadoop.hive.ql.exec.vector.VectorizationContextRegion)

Example 30 with VectorizationContext

use of org.apache.hadoop.hive.ql.exec.vector.VectorizationContext in project hive by apache.

the class MapJoinTestConfig method createVectorizationContext.

public static VectorizationContext createVectorizationContext(MapJoinTestDescription testDesc) throws HiveException {
    VectorizationContext vContext = new VectorizationContext("test", testDesc.bigTableColumnNameList);
    boolean isOuterJoin = (testDesc.vectorMapJoinVariation == VectorMapJoinVariation.OUTER || testDesc.vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER);
    if (isOuterJoin) {
        // We need physical columns.
        for (int i = 0; i < testDesc.smallTableRetainKeyColumnNums.length; i++) {
            final int smallTableKeyRetainColumnNum = testDesc.smallTableRetainKeyColumnNums[i];
            vContext.allocateScratchColumn(testDesc.smallTableKeyTypeInfos[smallTableKeyRetainColumnNum]);
        }
    }
    // Create scratch columns to hold small table results.
    for (int i = 0; i < testDesc.smallTableRetainValueColumnNums.length; i++) {
        vContext.allocateScratchColumn(testDesc.smallTableValueTypeInfos[i]);
    }
    return vContext;
}
Also used : VectorizationContext(org.apache.hadoop.hive.ql.exec.vector.VectorizationContext)

Aggregations

VectorizationContext (org.apache.hadoop.hive.ql.exec.vector.VectorizationContext)36 VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)19 HiveConf (org.apache.hadoop.hive.conf.HiveConf)18 VectorExtractRow (org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow)18 VectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch)18 VectorUDFAdaptor (org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor)16 VectorizedRowBatchCtx (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx)12 ArrayList (java.util.ArrayList)8 CompilationOpContext (org.apache.hadoop.hive.ql.CompilationOpContext)8 VectorRandomRowSource (org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource)7 VectorMapJoinDesc (org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc)7 DataTypePhysicalVariation (org.apache.hadoop.hive.common.type.DataTypePhysicalVariation)6 MapJoinTableContainer (org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer)6 MapJoinTableContainerSerDe (org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe)6 VectorMapJoinOperator (org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator)6 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)6 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)6 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)6 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)6 List (java.util.List)5