Search in sources :

Example 86 with VectorExpression

use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.

the class VectorMapJoinOuterMultiKeyOperator method process.

// ---------------------------------------------------------------------------
// Process Multi-Key Outer Join on a vectorized row batch.
// 
@Override
public void process(Object row, int tag) throws HiveException {
    try {
        VectorizedRowBatch batch = (VectorizedRowBatch) row;
        alias = (byte) tag;
        if (needCommonSetup) {
            // Our one time process method initialization.
            commonSetup(batch);
            /*
         * Initialize Multi-Key members for this specialized class.
         */
            keyVectorSerializeWrite = new VectorSerializeRow(new BinarySortableSerializeWrite(bigTableKeyColumnMap.length));
            keyVectorSerializeWrite.init(bigTableKeyTypeInfos, bigTableKeyColumnMap);
            currentKeyOutput = new Output();
            saveKeyOutput = new Output();
            needCommonSetup = false;
        }
        if (needHashTableSetup) {
            // Setup our hash table specialization.  It will be the first time the process
            // method is called, or after a Hybrid Grace reload.
            /*
         * Get our Multi-Key hash map information for this specialized class.
         */
            hashMap = (VectorMapJoinBytesHashMap) vectorMapJoinHashTable;
            needHashTableSetup = false;
        }
        batchCounter++;
        final int inputLogicalSize = batch.size;
        if (inputLogicalSize == 0) {
            if (LOG.isDebugEnabled()) {
                LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty");
            }
            return;
        }
        // Do the per-batch setup for an outer join.
        outerPerBatchSetup(batch);
        // For outer join, remember our input rows before ON expression filtering or before
        // hash table matching so we can generate results for all rows (matching and non matching)
        // later.
        boolean inputSelectedInUse = batch.selectedInUse;
        if (inputSelectedInUse) {
            // if (!verifyMonotonicallyIncreasing(batch.selected, batch.size)) {
            // throw new HiveException("batch.selected is not in sort order and unique");
            // }
            System.arraycopy(batch.selected, 0, inputSelected, 0, inputLogicalSize);
        }
        // Filtering for outer join just removes rows available for hash table matching.
        boolean someRowsFilteredOut = false;
        if (bigTableFilterExpressions.length > 0) {
            // Since the input
            for (VectorExpression ve : bigTableFilterExpressions) {
                ve.evaluate(batch);
            }
            someRowsFilteredOut = (batch.size != inputLogicalSize);
            if (LOG.isDebugEnabled()) {
                if (batch.selectedInUse) {
                    if (inputSelectedInUse) {
                        LOG.debug(CLASS_NAME + " inputSelected " + intArrayToRangesString(inputSelected, inputLogicalSize) + " filtered batch.selected " + intArrayToRangesString(batch.selected, batch.size));
                    } else {
                        LOG.debug(CLASS_NAME + " inputLogicalSize " + inputLogicalSize + " filtered batch.selected " + intArrayToRangesString(batch.selected, batch.size));
                    }
                }
            }
        }
        // Perform any key expressions.  Results will go into scratch columns.
        if (bigTableKeyExpressions != null) {
            for (VectorExpression ve : bigTableKeyExpressions) {
                ve.evaluate(batch);
            }
        }
        /*
       * Multi-Key specific declarations.
       */
        // None.
        /*
       * Multi-Key Long check for repeating.
       */
        // If all BigTable input columns to key expressions are isRepeating, then
        // calculate key once; lookup once.
        // Also determine if any nulls are present since for a join that means no match.
        boolean allKeyInputColumnsRepeating;
        // Only valid if allKeyInputColumnsRepeating is true.
        boolean someKeyInputColumnIsNull = false;
        if (bigTableKeyColumnMap.length == 0) {
            allKeyInputColumnsRepeating = false;
        } else {
            allKeyInputColumnsRepeating = true;
            for (int i = 0; i < bigTableKeyColumnMap.length; i++) {
                ColumnVector colVector = batch.cols[bigTableKeyColumnMap[i]];
                if (!colVector.isRepeating) {
                    allKeyInputColumnsRepeating = false;
                    break;
                }
                if (!colVector.noNulls && colVector.isNull[0]) {
                    someKeyInputColumnIsNull = true;
                }
            }
        }
        if (allKeyInputColumnsRepeating) {
            /*
         * Repeating.
         */
            // All key input columns are repeating.  Generate key once.  Lookup once.
            // Since the key is repeated, we must use entry 0 regardless of selectedInUse.
            /*
         * Multi-Key specific repeated lookup.
         */
            JoinUtil.JoinResult joinResult;
            if (batch.size == 0) {
                // Whole repeated key batch was filtered out.
                joinResult = JoinUtil.JoinResult.NOMATCH;
            } else if (someKeyInputColumnIsNull) {
                // Any (repeated) null key column is no match for whole batch.
                joinResult = JoinUtil.JoinResult.NOMATCH;
            } else {
                // All key input columns are repeating.  Generate key once.  Lookup once.
                keyVectorSerializeWrite.setOutput(currentKeyOutput);
                keyVectorSerializeWrite.serializeWrite(batch, 0);
                byte[] keyBytes = currentKeyOutput.getData();
                int keyLength = currentKeyOutput.getLength();
                joinResult = hashMap.lookup(keyBytes, 0, keyLength, hashMapResults[0]);
            }
            if (LOG.isDebugEnabled()) {
                LOG.debug(CLASS_NAME + " batch #" + batchCounter + " repeated joinResult " + joinResult.name());
            }
            finishOuterRepeated(batch, joinResult, hashMapResults[0], someRowsFilteredOut, inputSelectedInUse, inputLogicalSize);
        } else {
            if (LOG.isDebugEnabled()) {
                LOG.debug(CLASS_NAME + " batch #" + batchCounter + " non-repeated");
            }
            int[] selected = batch.selected;
            boolean selectedInUse = batch.selectedInUse;
            int hashMapResultCount = 0;
            int allMatchCount = 0;
            int equalKeySeriesCount = 0;
            int spillCount = 0;
            boolean atLeastOneNonMatch = someRowsFilteredOut;
            /*
         * Multi-Key specific variables.
         */
            Output temp;
            // We optimize performance by only looking up the first key in a series of equal keys.
            boolean haveSaveKey = false;
            JoinUtil.JoinResult saveJoinResult = JoinUtil.JoinResult.NOMATCH;
            // Logical loop over the rows in the batch since the batch may have selected in use.
            for (int logical = 0; logical < batch.size; logical++) {
                int batchIndex = (selectedInUse ? selected[logical] : logical);
                // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, taskName + ", " + getOperatorId() + " candidate " + CLASS_NAME + " batch");
                /*
           * Multi-Key outer null detection.
           */
                // Generate binary sortable key for current row in vectorized row batch.
                keyVectorSerializeWrite.setOutput(currentKeyOutput);
                keyVectorSerializeWrite.serializeWrite(batch, batchIndex);
                if (keyVectorSerializeWrite.getHasAnyNulls()) {
                    // Have that the NULL does not interfere with the current equal key series, if there
                    // is one. We do not set saveJoinResult.
                    // 
                    // Let a current MATCH equal key series keep going, or
                    // Let a current SPILL equal key series keep going, or
                    // Let a current NOMATCH keep not matching.
                    atLeastOneNonMatch = true;
                // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " NULL");
                } else {
                    if (!haveSaveKey || !saveKeyOutput.arraysEquals(currentKeyOutput)) {
                        if (haveSaveKey) {
                            // Move on with our counts.
                            switch(saveJoinResult) {
                                case MATCH:
                                    hashMapResultCount++;
                                    equalKeySeriesCount++;
                                    break;
                                case SPILL:
                                    hashMapResultCount++;
                                    break;
                                case NOMATCH:
                                    break;
                            }
                        }
                        // Regardless of our matching result, we keep that information to make multiple use
                        // of it for a possible series of equal keys.
                        haveSaveKey = true;
                        /*
               * Multi-Key specific save key.
               */
                        temp = saveKeyOutput;
                        saveKeyOutput = currentKeyOutput;
                        currentKeyOutput = temp;
                        /*
               * Multi-Key specific lookup key.
               */
                        byte[] keyBytes = saveKeyOutput.getData();
                        int keyLength = saveKeyOutput.getLength();
                        saveJoinResult = hashMap.lookup(keyBytes, 0, keyLength, hashMapResults[hashMapResultCount]);
                        switch(saveJoinResult) {
                            case MATCH:
                                equalKeySeriesHashMapResultIndices[equalKeySeriesCount] = hashMapResultCount;
                                equalKeySeriesAllMatchIndices[equalKeySeriesCount] = allMatchCount;
                                equalKeySeriesIsSingleValue[equalKeySeriesCount] = hashMapResults[hashMapResultCount].isSingleRow();
                                equalKeySeriesDuplicateCounts[equalKeySeriesCount] = 1;
                                allMatchs[allMatchCount++] = batchIndex;
                                // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH isSingleValue " + equalKeySeriesIsSingleValue[equalKeySeriesCount] + " currentKey " + currentKey);
                                break;
                            case SPILL:
                                spills[spillCount] = batchIndex;
                                spillHashMapResultIndices[spillCount] = hashMapResultCount;
                                spillCount++;
                                break;
                            case NOMATCH:
                                atLeastOneNonMatch = true;
                                // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH" + " currentKey " + currentKey);
                                break;
                        }
                    } else {
                        switch(saveJoinResult) {
                            case MATCH:
                                equalKeySeriesDuplicateCounts[equalKeySeriesCount]++;
                                allMatchs[allMatchCount++] = batchIndex;
                                // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH duplicate");
                                break;
                            case SPILL:
                                spills[spillCount] = batchIndex;
                                spillHashMapResultIndices[spillCount] = hashMapResultCount;
                                spillCount++;
                                break;
                            case NOMATCH:
                                // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH duplicate");
                                break;
                        }
                    }
                // if (!verifyMonotonicallyIncreasing(allMatchs, allMatchCount)) {
                // throw new HiveException("allMatchs is not in sort order and unique");
                // }
                }
            }
            if (haveSaveKey) {
                // Update our counts for the last key.
                switch(saveJoinResult) {
                    case MATCH:
                        hashMapResultCount++;
                        equalKeySeriesCount++;
                        break;
                    case SPILL:
                        hashMapResultCount++;
                        break;
                    case NOMATCH:
                        break;
                }
            }
            if (LOG.isDebugEnabled()) {
                LOG.debug(CLASS_NAME + " batch #" + batchCounter + " allMatchs " + intArrayToRangesString(allMatchs, allMatchCount) + " equalKeySeriesHashMapResultIndices " + intArrayToRangesString(equalKeySeriesHashMapResultIndices, equalKeySeriesCount) + " equalKeySeriesAllMatchIndices " + intArrayToRangesString(equalKeySeriesAllMatchIndices, equalKeySeriesCount) + " equalKeySeriesIsSingleValue " + Arrays.toString(Arrays.copyOfRange(equalKeySeriesIsSingleValue, 0, equalKeySeriesCount)) + " equalKeySeriesDuplicateCounts " + Arrays.toString(Arrays.copyOfRange(equalKeySeriesDuplicateCounts, 0, equalKeySeriesCount)) + " atLeastOneNonMatch " + atLeastOneNonMatch + " inputSelectedInUse " + inputSelectedInUse + " inputLogicalSize " + inputLogicalSize + " spills " + intArrayToRangesString(spills, spillCount) + " spillHashMapResultIndices " + intArrayToRangesString(spillHashMapResultIndices, spillCount) + " hashMapResults " + Arrays.toString(Arrays.copyOfRange(hashMapResults, 0, hashMapResultCount)));
            }
            // We will generate results for all matching and non-matching rows.
            finishOuter(batch, allMatchCount, equalKeySeriesCount, atLeastOneNonMatch, inputSelectedInUse, inputLogicalSize, spillCount, hashMapResultCount);
        }
        if (batch.size > 0) {
            // Forward any remaining selected rows.
            forwardBigTableBatch(batch);
        }
    } catch (IOException e) {
        throw new HiveException(e);
    } catch (Exception e) {
        throw new HiveException(e);
    }
}
Also used : JoinUtil(org.apache.hadoop.hive.ql.exec.JoinUtil) VectorSerializeRow(org.apache.hadoop.hive.ql.exec.vector.VectorSerializeRow) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) BinarySortableSerializeWrite(org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite) IOException(java.io.IOException) IOException(java.io.IOException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) ColumnVector(org.apache.hadoop.hive.ql.exec.vector.ColumnVector) VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) Output(org.apache.hadoop.hive.serde2.ByteStream.Output) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)

Example 87 with VectorExpression

use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.

the class VectorMapJoinOuterStringOperator method process.

// ---------------------------------------------------------------------------
// Process Single-Column String Outer Join on a vectorized row batch.
// 
@Override
public void process(Object row, int tag) throws HiveException {
    try {
        VectorizedRowBatch batch = (VectorizedRowBatch) row;
        alias = (byte) tag;
        if (needCommonSetup) {
            // Our one time process method initialization.
            commonSetup(batch);
            /*
         * Initialize Single-Column String members for this specialized class.
         */
            singleJoinColumn = bigTableKeyColumnMap[0];
            needCommonSetup = false;
        }
        if (needHashTableSetup) {
            // Setup our hash table specialization.  It will be the first time the process
            // method is called, or after a Hybrid Grace reload.
            /*
         * Get our Single-Column String hash map information for this specialized class.
         */
            hashMap = (VectorMapJoinBytesHashMap) vectorMapJoinHashTable;
            needHashTableSetup = false;
        }
        batchCounter++;
        final int inputLogicalSize = batch.size;
        if (inputLogicalSize == 0) {
            if (LOG.isDebugEnabled()) {
                LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty");
            }
            return;
        }
        // Do the per-batch setup for an outer join.
        outerPerBatchSetup(batch);
        // For outer join, remember our input rows before ON expression filtering or before
        // hash table matching so we can generate results for all rows (matching and non matching)
        // later.
        boolean inputSelectedInUse = batch.selectedInUse;
        if (inputSelectedInUse) {
            // if (!verifyMonotonicallyIncreasing(batch.selected, batch.size)) {
            // throw new HiveException("batch.selected is not in sort order and unique");
            // }
            System.arraycopy(batch.selected, 0, inputSelected, 0, inputLogicalSize);
        }
        // Filtering for outer join just removes rows available for hash table matching.
        boolean someRowsFilteredOut = false;
        if (bigTableFilterExpressions.length > 0) {
            // Since the input
            for (VectorExpression ve : bigTableFilterExpressions) {
                ve.evaluate(batch);
            }
            someRowsFilteredOut = (batch.size != inputLogicalSize);
            if (LOG.isDebugEnabled()) {
                if (batch.selectedInUse) {
                    if (inputSelectedInUse) {
                        LOG.debug(CLASS_NAME + " inputSelected " + intArrayToRangesString(inputSelected, inputLogicalSize) + " filtered batch.selected " + intArrayToRangesString(batch.selected, batch.size));
                    } else {
                        LOG.debug(CLASS_NAME + " inputLogicalSize " + inputLogicalSize + " filtered batch.selected " + intArrayToRangesString(batch.selected, batch.size));
                    }
                }
            }
        }
        // Perform any key expressions.  Results will go into scratch columns.
        if (bigTableKeyExpressions != null) {
            for (VectorExpression ve : bigTableKeyExpressions) {
                ve.evaluate(batch);
            }
        }
        /*
       * Single-Column String specific declarations.
       */
        // The one join column for this specialized class.
        BytesColumnVector joinColVector = (BytesColumnVector) batch.cols[singleJoinColumn];
        byte[][] vector = joinColVector.vector;
        int[] start = joinColVector.start;
        int[] length = joinColVector.length;
        /*
       * Single-Column String check for repeating.
       */
        // Check single column for repeating.
        boolean allKeyInputColumnsRepeating = joinColVector.isRepeating;
        if (allKeyInputColumnsRepeating) {
            /*
         * Repeating.
         */
            // All key input columns are repeating.  Generate key once.  Lookup once.
            // Since the key is repeated, we must use entry 0 regardless of selectedInUse.
            /*
         * Single-Column String specific repeated lookup.
         */
            JoinUtil.JoinResult joinResult;
            if (batch.size == 0) {
                // Whole repeated key batch was filtered out.
                joinResult = JoinUtil.JoinResult.NOMATCH;
            } else if (!joinColVector.noNulls && joinColVector.isNull[0]) {
                // Any (repeated) null key column is no match for whole batch.
                joinResult = JoinUtil.JoinResult.NOMATCH;
            } else {
                // Handle *repeated* join key, if found.
                byte[] keyBytes = vector[0];
                int keyStart = start[0];
                int keyLength = length[0];
                joinResult = hashMap.lookup(keyBytes, keyStart, keyLength, hashMapResults[0]);
            }
            if (LOG.isDebugEnabled()) {
                LOG.debug(CLASS_NAME + " batch #" + batchCounter + " repeated joinResult " + joinResult.name());
            }
            finishOuterRepeated(batch, joinResult, hashMapResults[0], someRowsFilteredOut, inputSelectedInUse, inputLogicalSize);
        } else {
            if (LOG.isDebugEnabled()) {
                LOG.debug(CLASS_NAME + " batch #" + batchCounter + " non-repeated");
            }
            int[] selected = batch.selected;
            boolean selectedInUse = batch.selectedInUse;
            int hashMapResultCount = 0;
            int allMatchCount = 0;
            int equalKeySeriesCount = 0;
            int spillCount = 0;
            boolean atLeastOneNonMatch = someRowsFilteredOut;
            /*
         * Single-Column String specific variables.
         */
            int saveKeyBatchIndex = -1;
            // We optimize performance by only looking up the first key in a series of equal keys.
            boolean haveSaveKey = false;
            JoinUtil.JoinResult saveJoinResult = JoinUtil.JoinResult.NOMATCH;
            // Logical loop over the rows in the batch since the batch may have selected in use.
            for (int logical = 0; logical < batch.size; logical++) {
                int batchIndex = (selectedInUse ? selected[logical] : logical);
                // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, taskName + ", " + getOperatorId() + " candidate " + CLASS_NAME + " batch");
                /*
           * Single-Column String outer null detection.
           */
                boolean isNull = !joinColVector.noNulls && joinColVector.isNull[batchIndex];
                if (isNull) {
                    // Have that the NULL does not interfere with the current equal key series, if there
                    // is one. We do not set saveJoinResult.
                    // 
                    // Let a current MATCH equal key series keep going, or
                    // Let a current SPILL equal key series keep going, or
                    // Let a current NOMATCH keep not matching.
                    atLeastOneNonMatch = true;
                // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " NULL");
                } else {
                    if (!haveSaveKey || StringExpr.equal(vector[saveKeyBatchIndex], start[saveKeyBatchIndex], length[saveKeyBatchIndex], vector[batchIndex], start[batchIndex], length[batchIndex]) == false) {
                        if (haveSaveKey) {
                            // Move on with our counts.
                            switch(saveJoinResult) {
                                case MATCH:
                                    hashMapResultCount++;
                                    equalKeySeriesCount++;
                                    break;
                                case SPILL:
                                    hashMapResultCount++;
                                    break;
                                case NOMATCH:
                                    break;
                            }
                        }
                        // Regardless of our matching result, we keep that information to make multiple use
                        // of it for a possible series of equal keys.
                        haveSaveKey = true;
                        /*
               * Single-Column String specific save key.
               */
                        saveKeyBatchIndex = batchIndex;
                        /*
               * Single-Column Long specific lookup key.
               */
                        byte[] keyBytes = vector[batchIndex];
                        int keyStart = start[batchIndex];
                        int keyLength = length[batchIndex];
                        saveJoinResult = hashMap.lookup(keyBytes, keyStart, keyLength, hashMapResults[hashMapResultCount]);
                        switch(saveJoinResult) {
                            case MATCH:
                                equalKeySeriesHashMapResultIndices[equalKeySeriesCount] = hashMapResultCount;
                                equalKeySeriesAllMatchIndices[equalKeySeriesCount] = allMatchCount;
                                equalKeySeriesIsSingleValue[equalKeySeriesCount] = hashMapResults[hashMapResultCount].isSingleRow();
                                equalKeySeriesDuplicateCounts[equalKeySeriesCount] = 1;
                                allMatchs[allMatchCount++] = batchIndex;
                                // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH isSingleValue " + equalKeySeriesIsSingleValue[equalKeySeriesCount] + " currentKey " + currentKey);
                                break;
                            case SPILL:
                                spills[spillCount] = batchIndex;
                                spillHashMapResultIndices[spillCount] = hashMapResultCount;
                                spillCount++;
                                break;
                            case NOMATCH:
                                atLeastOneNonMatch = true;
                                // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH" + " currentKey " + currentKey);
                                break;
                        }
                    } else {
                        switch(saveJoinResult) {
                            case MATCH:
                                equalKeySeriesDuplicateCounts[equalKeySeriesCount]++;
                                allMatchs[allMatchCount++] = batchIndex;
                                // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH duplicate");
                                break;
                            case SPILL:
                                spills[spillCount] = batchIndex;
                                spillHashMapResultIndices[spillCount] = hashMapResultCount;
                                spillCount++;
                                break;
                            case NOMATCH:
                                // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH duplicate");
                                break;
                        }
                    }
                // if (!verifyMonotonicallyIncreasing(allMatchs, allMatchCount)) {
                // throw new HiveException("allMatchs is not in sort order and unique");
                // }
                }
            }
            if (haveSaveKey) {
                // Update our counts for the last key.
                switch(saveJoinResult) {
                    case MATCH:
                        hashMapResultCount++;
                        equalKeySeriesCount++;
                        break;
                    case SPILL:
                        hashMapResultCount++;
                        break;
                    case NOMATCH:
                        break;
                }
            }
            if (LOG.isDebugEnabled()) {
                LOG.debug(CLASS_NAME + " batch #" + batchCounter + " allMatchs " + intArrayToRangesString(allMatchs, allMatchCount) + " equalKeySeriesHashMapResultIndices " + intArrayToRangesString(equalKeySeriesHashMapResultIndices, equalKeySeriesCount) + " equalKeySeriesAllMatchIndices " + intArrayToRangesString(equalKeySeriesAllMatchIndices, equalKeySeriesCount) + " equalKeySeriesIsSingleValue " + Arrays.toString(Arrays.copyOfRange(equalKeySeriesIsSingleValue, 0, equalKeySeriesCount)) + " equalKeySeriesDuplicateCounts " + Arrays.toString(Arrays.copyOfRange(equalKeySeriesDuplicateCounts, 0, equalKeySeriesCount)) + " atLeastOneNonMatch " + atLeastOneNonMatch + " inputSelectedInUse " + inputSelectedInUse + " inputLogicalSize " + inputLogicalSize + " spills " + intArrayToRangesString(spills, spillCount) + " spillHashMapResultIndices " + intArrayToRangesString(spillHashMapResultIndices, spillCount) + " hashMapResults " + Arrays.toString(Arrays.copyOfRange(hashMapResults, 0, hashMapResultCount)));
            }
            // We will generate results for all matching and non-matching rows.
            finishOuter(batch, allMatchCount, equalKeySeriesCount, atLeastOneNonMatch, inputSelectedInUse, inputLogicalSize, spillCount, hashMapResultCount);
        }
        if (batch.size > 0) {
            // Forward any remaining selected rows.
            forwardBigTableBatch(batch);
        }
    } catch (IOException e) {
        throw new HiveException(e);
    } catch (Exception e) {
        throw new HiveException(e);
    }
}
Also used : JoinUtil(org.apache.hadoop.hive.ql.exec.JoinUtil) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) IOException(java.io.IOException) IOException(java.io.IOException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)

Example 88 with VectorExpression

use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.

the class MapJoinTestConfig method createMapJoin.

public static MapJoinOperator createMapJoin(MapJoinTestDescription testDesc, Operator<? extends OperatorDesc> collectorOperator, MapJoinTestData testData, MapJoinDesc mapJoinDesc, boolean isVectorMapJoin, boolean isOriginalMapJoin) throws SerDeException, IOException, HiveException {
    final Byte bigTablePos = 0;
    MapJoinTableContainerSerDe mapJoinTableContainerSerDe = MapJoinTestConfig.createMapJoinTableContainerSerDe(mapJoinDesc);
    MapJoinObjectSerDeContext valCtx = mapJoinTableContainerSerDe.getValueContext();
    MapJoinTableContainer mapJoinTableContainer = (isOriginalMapJoin ? new HashMapWrapper(testDesc.hiveConf, -1) : new MapJoinBytesTableContainer(testDesc.hiveConf, valCtx, testData.smallTableKeyHashMap.size(), 0));
    mapJoinTableContainer.setSerde(mapJoinTableContainerSerDe.getKeyContext(), mapJoinTableContainerSerDe.getValueContext());
    loadTableContainerData(testDesc, testData, mapJoinTableContainer);
    MapJoinOperator operator;
    if (!isVectorMapJoin) {
        operator = new MapJoinOperator(new CompilationOpContext());
        operator.setConf(mapJoinDesc);
    } else {
        VectorizationContext vContext = new VectorizationContext("test", testDesc.bigTableColumnNamesList);
        // Create scratch columns to hold small table results.
        for (int i = 0; i < testDesc.smallTableValueTypeInfos.length; i++) {
            vContext.allocateScratchColumn(testDesc.smallTableValueTypeInfos[i]);
        }
        // This is what the Vectorizer class does.
        VectorMapJoinDesc vectorMapJoinDesc = new VectorMapJoinDesc();
        byte posBigTable = (byte) mapJoinDesc.getPosBigTable();
        VectorExpression[] allBigTableKeyExpressions = vContext.getVectorExpressions(mapJoinDesc.getKeys().get(posBigTable));
        vectorMapJoinDesc.setAllBigTableKeyExpressions(allBigTableKeyExpressions);
        Map<Byte, List<ExprNodeDesc>> exprs = mapJoinDesc.getExprs();
        VectorExpression[] allBigTableValueExpressions = vContext.getVectorExpressions(exprs.get(posBigTable));
        vectorMapJoinDesc.setAllBigTableValueExpressions(allBigTableValueExpressions);
        List<ExprNodeDesc> bigTableFilters = mapJoinDesc.getFilters().get(bigTablePos);
        boolean isOuterAndFiltered = (!mapJoinDesc.isNoOuterJoin() && bigTableFilters.size() > 0);
        if (!isOuterAndFiltered) {
            operator = new VectorMapJoinOperator(new CompilationOpContext(), mapJoinDesc, vContext, vectorMapJoinDesc);
        } else {
            operator = new VectorMapJoinOuterFilteredOperator(new CompilationOpContext(), mapJoinDesc, vContext, vectorMapJoinDesc);
        }
    }
    MapJoinTestConfig.connectOperators(testDesc, operator, collectorOperator);
    operator.setTestMapJoinTableContainer(1, mapJoinTableContainer, mapJoinTableContainerSerDe);
    return operator;
}
Also used : MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) VectorMapJoinOperator(org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator) VectorMapJoinDesc(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc) MapJoinBytesTableContainer(org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer) VectorMapJoinOperator(org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator) VectorizationContext(org.apache.hadoop.hive.ql.exec.vector.VectorizationContext) MapJoinTableContainerSerDe(org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe) HashMapWrapper(org.apache.hadoop.hive.ql.exec.persistence.HashMapWrapper) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) VectorMapJoinOuterFilteredOperator(org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOuterFilteredOperator) MapJoinObjectSerDeContext(org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) List(java.util.List) ArrayList(java.util.ArrayList) MapJoinTableContainer(org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 89 with VectorExpression

use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.

the class MapJoinTestConfig method createNativeVectorMapJoin.

public static MapJoinOperator createNativeVectorMapJoin(MapJoinTestDescription testDesc, Operator<? extends OperatorDesc> collectorOperator, MapJoinTestData testData, MapJoinDesc mapJoinDesc, HashTableImplementationType hashTableImplementationType) throws SerDeException, IOException, HiveException {
    VectorMapJoinDesc vectorDesc = MapJoinTestConfig.createVectorMapJoinDesc(testDesc);
    // UNDONE
    mapJoinDesc.setVectorDesc(vectorDesc);
    vectorDesc.setHashTableImplementationType(hashTableImplementationType);
    VectorMapJoinInfo vectorMapJoinInfo = vectorDesc.getVectorMapJoinInfo();
    MapJoinTableContainer mapJoinTableContainer;
    switch(vectorDesc.getHashTableImplementationType()) {
        case OPTIMIZED:
            mapJoinTableContainer = new MapJoinBytesTableContainer(testDesc.hiveConf, null, testData.smallTableKeyHashMap.size(), 0);
            MapJoinTableContainerSerDe mapJoinTableContainerSerDe = MapJoinTestConfig.createMapJoinTableContainerSerDe(mapJoinDesc);
            mapJoinTableContainer.setSerde(mapJoinTableContainerSerDe.getKeyContext(), mapJoinTableContainerSerDe.getValueContext());
            break;
        case FAST:
            mapJoinTableContainer = new VectorMapJoinFastTableContainer(mapJoinDesc, testDesc.hiveConf, testData.smallTableKeyHashMap.size());
            break;
        default:
            throw new RuntimeException("Unexpected hash table implementation type " + vectorDesc.getHashTableImplementationType());
    }
    loadTableContainerData(testDesc, testData, mapJoinTableContainer);
    VectorizationContext vContext = MapJoinTestConfig.createVectorizationContext(testDesc);
    byte posBigTable = (byte) mapJoinDesc.getPosBigTable();
    VectorExpression[] slimmedBigTableKeyExpressions = vContext.getVectorExpressions(mapJoinDesc.getKeys().get(posBigTable));
    vectorMapJoinInfo.setSlimmedBigTableKeyExpressions(slimmedBigTableKeyExpressions);
    Map<Byte, List<ExprNodeDesc>> exprs = mapJoinDesc.getExprs();
    VectorExpression[] slimmedBigTableValueExpressions = vContext.getVectorExpressions(exprs.get(posBigTable));
    vectorMapJoinInfo.setSlimmedBigTableValueExpressions(slimmedBigTableValueExpressions);
    VectorMapJoinCommonOperator operator = MapJoinTestConfig.createNativeVectorMapJoinOperator(testDesc.vectorMapJoinVariation, mapJoinDesc, vectorDesc, vContext);
    MapJoinTestConfig.connectOperators(testDesc, operator, collectorOperator);
    operator.setTestMapJoinTableContainer(1, mapJoinTableContainer, null);
    return operator;
}
Also used : VectorMapJoinDesc(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc) MapJoinBytesTableContainer(org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer) VectorMapJoinInfo(org.apache.hadoop.hive.ql.plan.VectorMapJoinInfo) VectorizationContext(org.apache.hadoop.hive.ql.exec.vector.VectorizationContext) MapJoinTableContainerSerDe(org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe) VectorMapJoinFastTableContainer(org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastTableContainer) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) List(java.util.List) ArrayList(java.util.ArrayList) MapJoinTableContainer(org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer)

Example 90 with VectorExpression

use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.

the class VectorizationContext method createDecimal64ToDecimalConversion.

private VectorExpression createDecimal64ToDecimalConversion(int colIndex, TypeInfo resultTypeInfo) throws HiveException {
    Object[] conversionArgs = new Object[1];
    conversionArgs[0] = colIndex;
    VectorExpression vectorExpression = instantiateExpression(ConvertDecimal64ToDecimal.class, resultTypeInfo, DataTypePhysicalVariation.NONE, conversionArgs);
    if (vectorExpression == null) {
        handleCouldNotInstantiateVectorExpression(ConvertDecimal64ToDecimal.class, resultTypeInfo, DataTypePhysicalVariation.NONE, conversionArgs);
    }
    Objects.requireNonNull(vectorExpression).setInputTypeInfos(resultTypeInfo);
    vectorExpression.setInputDataTypePhysicalVariations(DataTypePhysicalVariation.DECIMAL_64);
    return vectorExpression;
}
Also used : FilterConstantBooleanVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.FilterConstantBooleanVectorExpression) ConstantVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.ConstantVectorExpression) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) DynamicValueVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.DynamicValueVectorExpression)

Aggregations

VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)140 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)57 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)44 ArrayList (java.util.ArrayList)43 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)38 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)32 VectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch)30 Test (org.junit.Test)29 JoinUtil (org.apache.hadoop.hive.ql.exec.JoinUtil)27 ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)27 IOException (java.io.IOException)25 ConstantVectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.ConstantVectorExpression)25 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)25 DynamicValueVectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.DynamicValueVectorExpression)23 FilterConstantBooleanVectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.FilterConstantBooleanVectorExpression)23 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)23 VectorizationContext (org.apache.hadoop.hive.ql.exec.vector.VectorizationContext)19 DecimalTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo)19 StructTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo)19 DataTypePhysicalVariation (org.apache.hadoop.hive.common.type.DataTypePhysicalVariation)15