Search in sources :

Example 6 with VectorMapJoinHashMapResult

use of org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult in project hive by apache.

the class VectorMapJoinOuterGenerateResultOperator method finishOuter.

/**
   * Generate the outer join output results for one vectorized row batch.
   *
   * @param batch
   *          The big table batch with any matching and any non matching rows both as
   *          selected in use.
   * @param allMatchCount
   *          Number of matches in allMatchs.
   * @param equalKeySeriesCount
   *          Number of single value matches.
   * @param atLeastOneNonMatch
   *          Whether at least one row was a non-match.
   * @param inputSelectedInUse
   *          A copy of the batch's selectedInUse flag on input to the process method.
   * @param inputLogicalSize
   *          The batch's size on input to the process method.
   * @param spillCount
   *          Number of spills in spills.
   * @param hashMapResultCount
   *          Number of entries in hashMapResults.
   */
public void finishOuter(VectorizedRowBatch batch, int allMatchCount, int equalKeySeriesCount, boolean atLeastOneNonMatch, boolean inputSelectedInUse, int inputLogicalSize, int spillCount, int hashMapResultCount) throws IOException, HiveException {
    // Get rid of spills before we start modifying the batch.
    if (spillCount > 0) {
        spillHashMapBatch(batch, (VectorMapJoinHashTableResult[]) hashMapResults, spills, spillHashMapResultIndices, spillCount);
    }
    int noMatchCount = 0;
    if (spillCount > 0) {
        // Subtract the spills to get all match and non-match rows.
        int nonSpillCount = subtractFromInputSelected(inputSelectedInUse, inputLogicalSize, spills, spillCount, nonSpills);
        if (isLogDebugEnabled) {
            LOG.debug("finishOuter spillCount > 0" + " nonSpills " + intArrayToRangesString(nonSpills, nonSpillCount));
        }
        // Big table value expressions apply to ALL matching and non-matching rows.
        if (bigTableValueExpressions != null) {
            doValueExpr(batch, nonSpills, nonSpillCount);
        }
        if (atLeastOneNonMatch) {
            noMatchCount = subtract(nonSpills, nonSpillCount, allMatchs, allMatchCount, noMatchs);
            if (isLogDebugEnabled) {
                LOG.debug("finishOuter spillCount > 0" + " noMatchs " + intArrayToRangesString(noMatchs, noMatchCount));
            }
        }
    } else {
        // Run value expressions over original (whole) input batch.
        doValueExprOnInputSelected(batch, inputSelectedInUse, inputLogicalSize);
        if (atLeastOneNonMatch) {
            noMatchCount = subtractFromInputSelected(inputSelectedInUse, inputLogicalSize, allMatchs, allMatchCount, noMatchs);
            if (isLogDebugEnabled) {
                LOG.debug("finishOuter spillCount == 0" + " noMatchs " + intArrayToRangesString(noMatchs, noMatchCount));
            }
        }
    }
    // overflow batch.
    if (allMatchCount > 0) {
        int numSel = 0;
        for (int i = 0; i < equalKeySeriesCount; i++) {
            int hashMapResultIndex = equalKeySeriesHashMapResultIndices[i];
            VectorMapJoinHashMapResult hashMapResult = hashMapResults[hashMapResultIndex];
            int allMatchesIndex = equalKeySeriesAllMatchIndices[i];
            boolean isSingleValue = equalKeySeriesIsSingleValue[i];
            int duplicateCount = equalKeySeriesDuplicateCounts[i];
            if (isSingleValue) {
                numSel = generateHashMapResultSingleValue(batch, hashMapResult, allMatchs, allMatchesIndex, duplicateCount, numSel);
            } else {
                generateHashMapResultMultiValue(batch, hashMapResult, allMatchs, allMatchesIndex, duplicateCount);
            }
        }
        // The number of single value rows that were generated in the big table batch.
        batch.size = numSel;
        batch.selectedInUse = true;
        if (isLogDebugEnabled) {
            LOG.debug("finishOuter allMatchCount > 0" + " batch.selected " + intArrayToRangesString(batch.selected, batch.size));
        }
    } else {
        batch.size = 0;
    }
    if (noMatchCount > 0) {
        if (batch.size > 0) {
            generateOuterNulls(batch, noMatchs, noMatchCount);
            // Merge noMatchs and (match) selected.
            int mergeCount = sortMerge(noMatchs, noMatchCount, batch.selected, batch.size, merged);
            if (isLogDebugEnabled) {
                LOG.debug("finishOuter noMatchCount > 0 && batch.size > 0" + " merged " + intArrayToRangesString(merged, mergeCount));
            }
            System.arraycopy(merged, 0, batch.selected, 0, mergeCount);
            batch.size = mergeCount;
            batch.selectedInUse = true;
        } else {
            // We can use the whole batch for output of no matches.
            generateOuterNullsRepeatedAll(batch);
            System.arraycopy(noMatchs, 0, batch.selected, 0, noMatchCount);
            batch.size = noMatchCount;
            batch.selectedInUse = true;
            if (isLogDebugEnabled) {
                LOG.debug("finishOuter noMatchCount > 0 && batch.size == 0" + " batch.selected " + intArrayToRangesString(batch.selected, batch.size));
            }
        }
    }
}
Also used : VectorMapJoinHashTableResult(org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashTableResult) VectorMapJoinHashMapResult(org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult)

Example 7 with VectorMapJoinHashMapResult

use of org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult in project hive by apache.

the class VectorMapJoinInnerGenerateResultOperator method finishInner.

/**
   * Generate the inner join output results for one vectorized row batch.
   *
   * @param batch
   *          The big table batch with any matching and any non matching rows both as
   *          selected in use.
   * @param allMatchCount
   *          Number of matches in allMatchs.
   * @param equalKeySeriesCount
   *          Number of single value matches.
   * @param spillCount
   *          Number of spills in spills.
   * @param hashMapResultCount
   *          Number of entries in hashMapResults.
   */
protected void finishInner(VectorizedRowBatch batch, int allMatchCount, int equalKeySeriesCount, int spillCount, int hashMapResultCount) throws HiveException, IOException {
    int numSel = 0;
    /*
     * Optimize by running value expressions only over the matched rows.
     */
    if (allMatchCount > 0 && bigTableValueExpressions != null) {
        performValueExpressions(batch, allMatchs, allMatchCount);
    }
    for (int i = 0; i < equalKeySeriesCount; i++) {
        int hashMapResultIndex = equalKeySeriesHashMapResultIndices[i];
        VectorMapJoinHashMapResult hashMapResult = hashMapResults[hashMapResultIndex];
        int allMatchesIndex = equalKeySeriesAllMatchIndices[i];
        boolean isSingleValue = equalKeySeriesIsSingleValue[i];
        int duplicateCount = equalKeySeriesDuplicateCounts[i];
        if (isSingleValue) {
            numSel = generateHashMapResultSingleValue(batch, hashMapResult, allMatchs, allMatchesIndex, duplicateCount, numSel);
        } else {
            generateHashMapResultMultiValue(batch, hashMapResult, allMatchs, allMatchesIndex, duplicateCount);
        }
    }
    if (spillCount > 0) {
        spillHashMapBatch(batch, (VectorMapJoinHashTableResult[]) hashMapResults, spills, spillHashMapResultIndices, spillCount);
    }
    batch.size = numSel;
    batch.selectedInUse = true;
}
Also used : VectorMapJoinHashTableResult(org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashTableResult) VectorMapJoinHashMapResult(org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult)

Aggregations

VectorMapJoinHashMapResult (org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult)7 JoinUtil (org.apache.hadoop.hive.ql.exec.JoinUtil)5 Random (java.util.Random)4 Test (org.junit.Test)4 VerifyFastBytesHashMap (org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.CheckFastHashTable.VerifyFastBytesHashMap)2 VerifyFastLongHashMap (org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.CheckFastHashTable.VerifyFastLongHashMap)2 VectorMapJoinFastLongHashMap (org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastLongHashMap)2 VectorMapJoinHashTableResult (org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashTableResult)2