Search in sources :

Example 26 with VectorExpression

use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.

the class VectorSelectOperatorBench method setup.

@Setup
public void setup(Blackhole bh) throws HiveException {
    HiveConf hconf = new HiveConf();
    List<String> columns = new ArrayList<String>();
    columns.add("a");
    columns.add("b");
    columns.add("c");
    VectorizationContext vc = new VectorizationContext("name", columns);
    selDesc = new SelectDesc(false);
    List<ExprNodeDesc> colList = new ArrayList<ExprNodeDesc>();
    ExprNodeColumnDesc colDesc1 = new ExprNodeColumnDesc(Long.class, "a", "table", false);
    ExprNodeColumnDesc colDesc2 = new ExprNodeColumnDesc(Long.class, "b", "table", false);
    ExprNodeColumnDesc colDesc3 = new ExprNodeColumnDesc(Long.class, "c", "table", false);
    ExprNodeGenericFuncDesc plusDesc = new ExprNodeGenericFuncDesc();
    GenericUDF gudf = new GenericUDFOPPlus();
    plusDesc.setGenericUDF(gudf);
    List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
    children.add(colDesc1);
    children.add(colDesc2);
    plusDesc.setChildren(children);
    plusDesc.setTypeInfo(TypeInfoFactory.longTypeInfo);
    colList.add(plusDesc);
    colList.add(colDesc3);
    selDesc.setColList(colList);
    List<String> outputColNames = new ArrayList<String>();
    outputColNames.add("_col0");
    outputColNames.add("_col1");
    selDesc.setOutputColumnNames(outputColNames);
    VectorSelectDesc vectorSelectDesc = new VectorSelectDesc();
    selDesc.setVectorDesc(vectorSelectDesc);
    List<ExprNodeDesc> selectColList = selDesc.getColList();
    VectorExpression[] vectorSelectExprs = new VectorExpression[selectColList.size()];
    for (int i = 0; i < selectColList.size(); i++) {
        ExprNodeDesc expr = selectColList.get(i);
        VectorExpression ve = vc.getVectorExpression(expr);
        vectorSelectExprs[i] = ve;
    }
    vectorSelectDesc.setSelectExpressions(vectorSelectExprs);
    vectorSelectDesc.setProjectedOutputColumns(new int[] { 3, 2 });
    CompilationOpContext opContext = new CompilationOpContext();
    vso = new VectorSelectOperator(opContext, selDesc, vc, vectorSelectDesc);
    // to trigger vectorForward
    child = new ArrayList<>();
    child.add(new BlackholeOperator(opContext, bh));
    child.add(new BlackholeOperator(opContext, bh));
    vso.initialize(hconf, null);
    vrg = VectorizedRowGroupGenUtil.getVectorizedRowBatch(VectorizedRowBatch.DEFAULT_SIZE, 4, 17);
}
Also used : VectorSelectOperator(org.apache.hadoop.hive.ql.exec.vector.VectorSelectOperator) ArrayList(java.util.ArrayList) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) VectorizationContext(org.apache.hadoop.hive.ql.exec.vector.VectorizationContext) GenericUDF(org.apache.hadoop.hive.ql.udf.generic.GenericUDF) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) BlackholeOperator(org.apache.hive.benchmark.vectorization.BlackholeOperator) GenericUDFOPPlus(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPPlus) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) HiveConf(org.apache.hadoop.hive.conf.HiveConf) VectorSelectDesc(org.apache.hadoop.hive.ql.plan.VectorSelectDesc) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc) VectorSelectDesc(org.apache.hadoop.hive.ql.plan.VectorSelectDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) Setup(org.openjdk.jmh.annotations.Setup)

Example 27 with VectorExpression

use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.

the class VectorMapJoinLeftSemiMultiKeyOperator method processBatch.

@Override
public void processBatch(VectorizedRowBatch batch) throws HiveException {
    try {
        // For left semi joins, we may apply the filter(s) now.
        for (VectorExpression ve : bigTableFilterExpressions) {
            ve.evaluate(batch);
        }
        final int inputLogicalSize = batch.size;
        if (inputLogicalSize == 0) {
            return;
        }
        // Perform any key expressions.  Results will go into scratch columns.
        if (bigTableKeyExpressions != null) {
            for (VectorExpression ve : bigTableKeyExpressions) {
                ve.evaluate(batch);
            }
        }
        /*
       * Multi-Key specific declarations.
       */
        // None.
        /*
       * Multi-Key Long check for repeating.
       */
        // If all BigTable input columns to key expressions are isRepeating, then
        // calculate key once; lookup once.
        boolean allKeyInputColumnsRepeating;
        if (bigTableKeyColumnMap.length == 0) {
            allKeyInputColumnsRepeating = false;
        } else {
            allKeyInputColumnsRepeating = true;
            for (int i = 0; i < bigTableKeyColumnMap.length; i++) {
                if (!batch.cols[bigTableKeyColumnMap[i]].isRepeating) {
                    allKeyInputColumnsRepeating = false;
                    break;
                }
            }
        }
        if (allKeyInputColumnsRepeating) {
            /*
         * Repeating.
         */
            // All key input columns are repeating.  Generate key once.  Lookup once.
            // Since the key is repeated, we must use entry 0 regardless of selectedInUse.
            /*
         * Multi-Key specific repeated lookup.
         */
            keyVectorSerializeWrite.setOutput(currentKeyOutput);
            keyVectorSerializeWrite.serializeWrite(batch, 0);
            JoinUtil.JoinResult joinResult;
            if (keyVectorSerializeWrite.getHasAnyNulls()) {
                joinResult = JoinUtil.JoinResult.NOMATCH;
            } else {
                byte[] keyBytes = currentKeyOutput.getData();
                int keyLength = currentKeyOutput.getLength();
                // LOG.debug(CLASS_NAME + " processOp all " + displayBytes(keyBytes, 0, keyLength));
                joinResult = hashSet.contains(keyBytes, 0, keyLength, hashSetResults[0]);
            }
            if (LOG.isDebugEnabled()) {
                LOG.debug(CLASS_NAME + " batch #" + batchCounter + " repeated joinResult " + joinResult.name());
            }
            finishLeftSemiRepeated(batch, joinResult, hashSetResults[0]);
        } else {
            if (LOG.isDebugEnabled()) {
                LOG.debug(CLASS_NAME + " batch #" + batchCounter + " non-repeated");
            }
            // We remember any matching rows in matchs / matchSize.  At the end of the loop,
            // selected / batch.size will represent both matching and non-matching rows for outer join.
            // Only deferred rows will have been removed from selected.
            int[] selected = batch.selected;
            boolean selectedInUse = batch.selectedInUse;
            int hashSetResultCount = 0;
            int allMatchCount = 0;
            int spillCount = 0;
            /*
         * Multi-Key specific variables.
         */
            Output temp;
            // We optimize performance by only looking up the first key in a series of equal keys.
            boolean haveSaveKey = false;
            JoinUtil.JoinResult saveJoinResult = JoinUtil.JoinResult.NOMATCH;
            // Logical loop over the rows in the batch since the batch may have selected in use.
            for (int logical = 0; logical < inputLogicalSize; logical++) {
                int batchIndex = (selectedInUse ? selected[logical] : logical);
                /*
           * Multi-Key get key.
           */
                // Generate binary sortable key for current row in vectorized row batch.
                keyVectorSerializeWrite.setOutput(currentKeyOutput);
                keyVectorSerializeWrite.serializeWrite(batch, batchIndex);
                boolean isAnyNull = keyVectorSerializeWrite.getHasAnyNulls();
                if (isAnyNull || !haveSaveKey || !saveKeyOutput.arraysEquals(currentKeyOutput)) {
                    if (haveSaveKey) {
                        // Move on with our counts.
                        switch(saveJoinResult) {
                            case MATCH:
                                // We have extracted the existence from the hash set result, so we don't keep it.
                                break;
                            case SPILL:
                                // We keep the hash set result for its spill information.
                                hashSetResultCount++;
                                break;
                            case NOMATCH:
                                break;
                        }
                    }
                    if (isAnyNull) {
                        saveJoinResult = JoinUtil.JoinResult.NOMATCH;
                        haveSaveKey = false;
                    } else {
                        // Regardless of our matching result, we keep that information to make multiple use
                        // of it for a possible series of equal keys.
                        haveSaveKey = true;
                        /*
               * Multi-Key specific save key and lookup.
               */
                        temp = saveKeyOutput;
                        saveKeyOutput = currentKeyOutput;
                        currentKeyOutput = temp;
                        /*
               * Multi-key specific lookup key.
               */
                        byte[] keyBytes = saveKeyOutput.getData();
                        int keyLength = saveKeyOutput.getLength();
                        saveJoinResult = hashSet.contains(keyBytes, 0, keyLength, hashSetResults[hashSetResultCount]);
                    }
                    switch(saveJoinResult) {
                        case MATCH:
                            allMatchs[allMatchCount++] = batchIndex;
                            // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH isSingleValue " + equalKeySeriesIsSingleValue[equalKeySeriesCount] + " currentKey " + currentKey);
                            break;
                        case SPILL:
                            spills[spillCount] = batchIndex;
                            spillHashMapResultIndices[spillCount] = hashSetResultCount;
                            spillCount++;
                            break;
                        case NOMATCH:
                            // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH" + " currentKey " + currentKey);
                            break;
                    }
                } else {
                    switch(saveJoinResult) {
                        case MATCH:
                            allMatchs[allMatchCount++] = batchIndex;
                            // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH duplicate");
                            break;
                        case SPILL:
                            spills[spillCount] = batchIndex;
                            spillHashMapResultIndices[spillCount] = hashSetResultCount;
                            spillCount++;
                            break;
                        case NOMATCH:
                            // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH duplicate");
                            break;
                    }
                }
            }
            if (haveSaveKey) {
                // Update our counts for the last key.
                switch(saveJoinResult) {
                    case MATCH:
                        // We have extracted the existence from the hash set result, so we don't keep it.
                        break;
                    case SPILL:
                        // We keep the hash set result for its spill information.
                        hashSetResultCount++;
                        break;
                    case NOMATCH:
                        break;
                }
            }
            if (LOG.isDebugEnabled()) {
                LOG.debug(CLASS_NAME + " allMatchs " + intArrayToRangesString(allMatchs, allMatchCount) + " spills " + intArrayToRangesString(spills, spillCount) + " spillHashMapResultIndices " + intArrayToRangesString(spillHashMapResultIndices, spillCount) + " hashMapResults " + Arrays.toString(Arrays.copyOfRange(hashSetResults, 0, hashSetResultCount)));
            }
            finishLeftSemi(batch, allMatchCount, spillCount, (VectorMapJoinHashTableResult[]) hashSetResults);
        }
        if (batch.size > 0) {
            // Forward any remaining selected rows.
            forwardBigTableBatch(batch);
        }
    } catch (IOException e) {
        throw new HiveException(e);
    } catch (Exception e) {
        throw new HiveException(e);
    }
}
Also used : VectorMapJoinHashTableResult(org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashTableResult) JoinUtil(org.apache.hadoop.hive.ql.exec.JoinUtil) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) Output(org.apache.hadoop.hive.serde2.ByteStream.Output) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) IOException(java.io.IOException) IOException(java.io.IOException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException)

Example 28 with VectorExpression

use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.

the class VectorMapJoinOuterGenerateResultOperator method doValueExprOnInputSelected.

/**
 * Apply the value expression to rows in the (original) input selected array.
 *
 * @param batch
 *          The vectorized row batch.
 * @param inputSelectedInUse
 *          Whether the (original) input batch is selectedInUse.
 * @param inputLogicalSize
 *          The (original) input batch size.
 */
private void doValueExprOnInputSelected(VectorizedRowBatch batch, boolean inputSelectedInUse, int inputLogicalSize) throws HiveException {
    int saveBatchSize = batch.size;
    int[] saveSelected = batch.selected;
    boolean saveSelectedInUse = batch.selectedInUse;
    batch.size = inputLogicalSize;
    batch.selected = inputSelected;
    batch.selectedInUse = inputSelectedInUse;
    if (bigTableValueExpressions != null) {
        for (VectorExpression ve : bigTableValueExpressions) {
            ve.evaluate(batch);
        }
    }
    batch.size = saveBatchSize;
    batch.selected = saveSelected;
    batch.selectedInUse = saveSelectedInUse;
}
Also used : VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)

Example 29 with VectorExpression

use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.

the class VectorMapJoinLeftSemiLongOperator method processBatch.

@Override
public void processBatch(VectorizedRowBatch batch) throws HiveException {
    try {
        // For left semi joins, we may apply the filter(s) now.
        for (VectorExpression ve : bigTableFilterExpressions) {
            ve.evaluate(batch);
        }
        final int inputLogicalSize = batch.size;
        if (inputLogicalSize == 0) {
            return;
        }
        // Perform any key expressions.  Results will go into scratch columns.
        if (bigTableKeyExpressions != null) {
            for (VectorExpression ve : bigTableKeyExpressions) {
                ve.evaluate(batch);
            }
        }
        /*
       * Single-Column Long specific declarations.
       */
        // The one join column for this specialized class.
        LongColumnVector joinColVector = (LongColumnVector) batch.cols[singleJoinColumn];
        long[] vector = joinColVector.vector;
        /*
       * Single-Column Long check for repeating.
       */
        // Check single column for repeating.
        boolean allKeyInputColumnsRepeating = joinColVector.isRepeating;
        if (allKeyInputColumnsRepeating) {
            /*
         * Repeating.
         */
            // All key input columns are repeating.  Generate key once.  Lookup once.
            // Since the key is repeated, we must use entry 0 regardless of selectedInUse.
            /*
         * Single-Column Long specific repeated lookup.
         */
            JoinUtil.JoinResult joinResult;
            if (!joinColVector.noNulls && joinColVector.isNull[0]) {
                joinResult = JoinUtil.JoinResult.NOMATCH;
            } else {
                long key = vector[0];
                if (useMinMax && (key < min || key > max)) {
                    // Out of range for whole batch.
                    joinResult = JoinUtil.JoinResult.NOMATCH;
                } else {
                    joinResult = hashSet.contains(key, hashSetResults[0]);
                }
            }
            if (LOG.isDebugEnabled()) {
                LOG.debug(CLASS_NAME + " batch #" + batchCounter + " repeated joinResult " + joinResult.name());
            }
            finishLeftSemiRepeated(batch, joinResult, hashSetResults[0]);
        } else {
            if (LOG.isDebugEnabled()) {
                LOG.debug(CLASS_NAME + " batch #" + batchCounter + " non-repeated");
            }
            // We remember any matching rows in matchs / matchSize.  At the end of the loop,
            // selected / batch.size will represent both matching and non-matching rows for outer join.
            // Only deferred rows will have been removed from selected.
            int[] selected = batch.selected;
            boolean selectedInUse = batch.selectedInUse;
            int hashSetResultCount = 0;
            int allMatchCount = 0;
            int spillCount = 0;
            /*
         * Single-Column Long specific variables.
         */
            long saveKey = 0;
            // We optimize performance by only looking up the first key in a series of equal keys.
            boolean haveSaveKey = false;
            JoinUtil.JoinResult saveJoinResult = JoinUtil.JoinResult.NOMATCH;
            // Logical loop over the rows in the batch since the batch may have selected in use.
            for (int logical = 0; logical < inputLogicalSize; logical++) {
                int batchIndex = (selectedInUse ? selected[logical] : logical);
                /*
           * Single-Column Long get key.
           */
                long currentKey;
                boolean isNull;
                if (!joinColVector.noNulls && joinColVector.isNull[batchIndex]) {
                    currentKey = 0;
                    isNull = true;
                } else {
                    currentKey = vector[batchIndex];
                    isNull = false;
                }
                if (isNull || !haveSaveKey || currentKey != saveKey) {
                    if (haveSaveKey) {
                        // Move on with our counts.
                        switch(saveJoinResult) {
                            case MATCH:
                                // We have extracted the existence from the hash set result, so we don't keep it.
                                break;
                            case SPILL:
                                // We keep the hash set result for its spill information.
                                hashSetResultCount++;
                                break;
                            case NOMATCH:
                                break;
                        }
                    }
                    if (isNull) {
                        saveJoinResult = JoinUtil.JoinResult.NOMATCH;
                        haveSaveKey = false;
                    } else {
                        // Regardless of our matching result, we keep that information to make multiple use
                        // of it for a possible series of equal keys.
                        haveSaveKey = true;
                        /*
               * Single-Column Long specific save key.
               */
                        saveKey = currentKey;
                        if (useMinMax && (currentKey < min || currentKey > max)) {
                            // Key out of range for whole hash table.
                            saveJoinResult = JoinUtil.JoinResult.NOMATCH;
                        } else {
                            saveJoinResult = hashSet.contains(currentKey, hashSetResults[hashSetResultCount]);
                        }
                    }
                    switch(saveJoinResult) {
                        case MATCH:
                            allMatchs[allMatchCount++] = batchIndex;
                            // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH isSingleValue " + equalKeySeriesIsSingleValue[equalKeySeriesCount] + " currentKey " + currentKey);
                            break;
                        case SPILL:
                            spills[spillCount] = batchIndex;
                            spillHashMapResultIndices[spillCount] = hashSetResultCount;
                            spillCount++;
                            break;
                        case NOMATCH:
                            // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH" + " currentKey " + currentKey);
                            break;
                    }
                } else {
                    switch(saveJoinResult) {
                        case MATCH:
                            allMatchs[allMatchCount++] = batchIndex;
                            // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH duplicate");
                            break;
                        case SPILL:
                            spills[spillCount] = batchIndex;
                            spillHashMapResultIndices[spillCount] = hashSetResultCount;
                            spillCount++;
                            break;
                        case NOMATCH:
                            // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH duplicate");
                            break;
                    }
                }
            }
            if (haveSaveKey) {
                // Update our counts for the last key.
                switch(saveJoinResult) {
                    case MATCH:
                        // We have extracted the existence from the hash set result, so we don't keep it.
                        break;
                    case SPILL:
                        // We keep the hash set result for its spill information.
                        hashSetResultCount++;
                        break;
                    case NOMATCH:
                        break;
                }
            }
            if (LOG.isDebugEnabled()) {
                LOG.debug(CLASS_NAME + " allMatchs " + intArrayToRangesString(allMatchs, allMatchCount) + " spills " + intArrayToRangesString(spills, spillCount) + " spillHashMapResultIndices " + intArrayToRangesString(spillHashMapResultIndices, spillCount) + " hashMapResults " + Arrays.toString(Arrays.copyOfRange(hashSetResults, 0, hashSetResultCount)));
            }
            finishLeftSemi(batch, allMatchCount, spillCount, (VectorMapJoinHashTableResult[]) hashSetResults);
        }
        if (batch.size > 0) {
            // Forward any remaining selected rows.
            forwardBigTableBatch(batch);
        }
    } catch (IOException e) {
        throw new HiveException(e);
    } catch (Exception e) {
        throw new HiveException(e);
    }
}
Also used : VectorMapJoinHashTableResult(org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashTableResult) JoinUtil(org.apache.hadoop.hive.ql.exec.JoinUtil) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) IOException(java.io.IOException) IOException(java.io.IOException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)

Example 30 with VectorExpression

use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.

the class VectorMapJoinOuterLongOperator method processBatch.

@Override
public void processBatch(VectorizedRowBatch batch) throws HiveException {
    try {
        final int inputLogicalSize = batch.size;
        // Do the per-batch setup for an outer join.
        outerPerBatchSetup(batch);
        // For outer join, remember our input rows before ON expression filtering or before
        // hash table matching so we can generate results for all rows (matching and non matching)
        // later.
        boolean inputSelectedInUse = batch.selectedInUse;
        if (inputSelectedInUse) {
            System.arraycopy(batch.selected, 0, inputSelected, 0, inputLogicalSize);
        }
        // Filtering for outer join just removes rows available for hash table matching.
        boolean someRowsFilteredOut = false;
        if (bigTableFilterExpressions.length > 0) {
            // Since the input
            for (VectorExpression ve : bigTableFilterExpressions) {
                ve.evaluate(batch);
            }
            someRowsFilteredOut = (batch.size != inputLogicalSize);
        }
        // Perform any key expressions.  Results will go into scratch columns.
        if (bigTableKeyExpressions != null) {
            for (VectorExpression ve : bigTableKeyExpressions) {
                ve.evaluate(batch);
            }
        }
        /*
       * Single-Column Long specific declarations.
       */
        // The one join column for this specialized class.
        LongColumnVector joinColVector = (LongColumnVector) batch.cols[singleJoinColumn];
        long[] vector = joinColVector.vector;
        /*
       * Single-Column Long check for repeating.
       */
        // Check single column for repeating.
        boolean allKeyInputColumnsRepeating = joinColVector.isRepeating;
        if (allKeyInputColumnsRepeating) {
            /*
         * Repeating.
         */
            // All key input columns are repeating.  Generate key once.  Lookup once.
            // Since the key is repeated, we must use entry 0 regardless of selectedInUse.
            /*
         * Single-Column Long specific repeated lookup.
         */
            JoinUtil.JoinResult joinResult;
            if (batch.size == 0) {
                // Whole repeated key batch was filtered out.
                joinResult = JoinUtil.JoinResult.NOMATCH;
            } else if (!joinColVector.noNulls && joinColVector.isNull[0]) {
                // Any (repeated) null key column is no match for whole batch.
                joinResult = JoinUtil.JoinResult.NOMATCH;
            } else {
                // Handle *repeated* join key, if found.
                long key = vector[0];
                if (useMinMax && (key < min || key > max)) {
                    // Out of range for whole batch.
                    joinResult = JoinUtil.JoinResult.NOMATCH;
                } else {
                    joinResult = hashMap.lookup(key, hashMapResults[0], matchTracker);
                }
            }
            /*
         * Common repeated join result processing.
         */
            finishOuterRepeated(batch, joinResult, hashMapResults[0], someRowsFilteredOut, inputSelectedInUse, inputLogicalSize);
        } else {
            /*
         * NOT Repeating.
         */
            int[] selected = batch.selected;
            boolean selectedInUse = batch.selectedInUse;
            int hashMapResultCount = 0;
            int allMatchCount = 0;
            int equalKeySeriesCount = 0;
            int spillCount = 0;
            boolean atLeastOneNonMatch = someRowsFilteredOut;
            /*
         * Single-Column Long specific variables.
         */
            long saveKey = 0;
            // We optimize performance by only looking up the first key in a series of equal keys.
            boolean haveSaveKey = false;
            JoinUtil.JoinResult saveJoinResult = JoinUtil.JoinResult.NOMATCH;
            // Logical loop over the rows in the batch since the batch may have selected in use.
            for (int logical = 0; logical < batch.size; logical++) {
                int batchIndex = (selectedInUse ? selected[logical] : logical);
                /*
           * Single-Column Long outer null detection.
           */
                boolean isNull = !joinColVector.noNulls && joinColVector.isNull[batchIndex];
                if (isNull) {
                    // Have that the NULL does not interfere with the current equal key series, if there
                    // is one. We do not set saveJoinResult.
                    // 
                    // Let a current MATCH equal key series keep going, or
                    // Let a current SPILL equal key series keep going, or
                    // Let a current NOMATCH keep not matching.
                    atLeastOneNonMatch = true;
                } else {
                    /*
             * Single-Column Long outer get key.
             */
                    long currentKey = vector[batchIndex];
                    if (!haveSaveKey || currentKey != saveKey) {
                        if (haveSaveKey) {
                            // Move on with our counts.
                            switch(saveJoinResult) {
                                case MATCH:
                                    hashMapResultCount++;
                                    equalKeySeriesCount++;
                                    break;
                                case SPILL:
                                    hashMapResultCount++;
                                    break;
                                case NOMATCH:
                                    break;
                            }
                        }
                        // Regardless of our matching result, we keep that information to make multiple use
                        // of it for a possible series of equal keys.
                        haveSaveKey = true;
                        /*
               * Single-Column Long specific save key.
               */
                        saveKey = currentKey;
                        if (useMinMax && (currentKey < min || currentKey > max)) {
                            // Key out of range for whole hash table.
                            saveJoinResult = JoinUtil.JoinResult.NOMATCH;
                        } else {
                            saveJoinResult = hashMap.lookup(currentKey, hashMapResults[hashMapResultCount], matchTracker);
                        }
                        switch(saveJoinResult) {
                            case MATCH:
                                equalKeySeriesHashMapResultIndices[equalKeySeriesCount] = hashMapResultCount;
                                equalKeySeriesAllMatchIndices[equalKeySeriesCount] = allMatchCount;
                                equalKeySeriesIsSingleValue[equalKeySeriesCount] = hashMapResults[hashMapResultCount].isSingleRow();
                                equalKeySeriesDuplicateCounts[equalKeySeriesCount] = 1;
                                allMatchs[allMatchCount++] = batchIndex;
                                break;
                            case SPILL:
                                spills[spillCount] = batchIndex;
                                spillHashMapResultIndices[spillCount] = hashMapResultCount;
                                spillCount++;
                                break;
                            case NOMATCH:
                                atLeastOneNonMatch = true;
                                break;
                        }
                    } else {
                        switch(saveJoinResult) {
                            case MATCH:
                                equalKeySeriesDuplicateCounts[equalKeySeriesCount]++;
                                allMatchs[allMatchCount++] = batchIndex;
                                break;
                            case SPILL:
                                spills[spillCount] = batchIndex;
                                spillHashMapResultIndices[spillCount] = hashMapResultCount;
                                spillCount++;
                                break;
                            case NOMATCH:
                                break;
                        }
                    }
                }
            }
            if (haveSaveKey) {
                // Update our counts for the last key.
                switch(saveJoinResult) {
                    case MATCH:
                        hashMapResultCount++;
                        equalKeySeriesCount++;
                        break;
                    case SPILL:
                        hashMapResultCount++;
                        break;
                    case NOMATCH:
                        break;
                }
            }
            if (LOG.isDebugEnabled()) {
                LOG.debug(CLASS_NAME + " batch #" + batchCounter + " allMatchs " + intArrayToRangesString(allMatchs, allMatchCount) + " equalKeySeriesHashMapResultIndices " + intArrayToRangesString(equalKeySeriesHashMapResultIndices, equalKeySeriesCount) + " equalKeySeriesAllMatchIndices " + intArrayToRangesString(equalKeySeriesAllMatchIndices, equalKeySeriesCount) + " equalKeySeriesIsSingleValue " + Arrays.toString(Arrays.copyOfRange(equalKeySeriesIsSingleValue, 0, equalKeySeriesCount)) + " equalKeySeriesDuplicateCounts " + Arrays.toString(Arrays.copyOfRange(equalKeySeriesDuplicateCounts, 0, equalKeySeriesCount)) + " atLeastOneNonMatch " + atLeastOneNonMatch + " inputSelectedInUse " + inputSelectedInUse + " inputLogicalSize " + inputLogicalSize + " spills " + intArrayToRangesString(spills, spillCount) + " spillHashMapResultIndices " + intArrayToRangesString(spillHashMapResultIndices, spillCount) + " hashMapResults " + Arrays.toString(Arrays.copyOfRange(hashMapResults, 0, hashMapResultCount)));
            }
            // We will generate results for all matching and non-matching rows.
            finishOuter(batch, allMatchCount, equalKeySeriesCount, atLeastOneNonMatch, inputSelectedInUse, inputLogicalSize, spillCount, hashMapResultCount);
        }
        if (batch.size > 0) {
            // Forward any rows in the Big Table batch that had results added (they will be selected).
            // NOTE: Other result rows may have been generated in the overflowBatch.
            forwardBigTableBatch(batch);
        }
    } catch (IOException e) {
        throw new HiveException(e);
    } catch (Exception e) {
        throw new HiveException(e);
    }
}
Also used : JoinUtil(org.apache.hadoop.hive.ql.exec.JoinUtil) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) IOException(java.io.IOException) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) IOException(java.io.IOException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException)

Aggregations

VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)140 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)57 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)44 ArrayList (java.util.ArrayList)43 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)38 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)32 VectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch)30 Test (org.junit.Test)29 JoinUtil (org.apache.hadoop.hive.ql.exec.JoinUtil)27 ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)27 IOException (java.io.IOException)25 ConstantVectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.ConstantVectorExpression)25 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)25 DynamicValueVectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.DynamicValueVectorExpression)23 FilterConstantBooleanVectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.FilterConstantBooleanVectorExpression)23 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)23 VectorizationContext (org.apache.hadoop.hive.ql.exec.vector.VectorizationContext)19 DecimalTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo)19 StructTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo)19 DataTypePhysicalVariation (org.apache.hadoop.hive.common.type.DataTypePhysicalVariation)15