Search in sources :

Example 6 with VectorExpression

use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.

the class VectorMapJoinLeftSemiMultiKeyOperator method process.

// ---------------------------------------------------------------------------
// Process Multi-Key Left-Semi Join on a vectorized row batch.
// 
@Override
public void process(Object row, int tag) throws HiveException {
    try {
        VectorizedRowBatch batch = (VectorizedRowBatch) row;
        alias = (byte) tag;
        if (needCommonSetup) {
            // Our one time process method initialization.
            commonSetup(batch);
            /*
         * Initialize Multi-Key members for this specialized class.
         */
            keyVectorSerializeWrite = new VectorSerializeRow(new BinarySortableSerializeWrite(bigTableKeyColumnMap.length));
            keyVectorSerializeWrite.init(bigTableKeyTypeInfos, bigTableKeyColumnMap);
            currentKeyOutput = new Output();
            saveKeyOutput = new Output();
            needCommonSetup = false;
        }
        if (needHashTableSetup) {
            // Setup our hash table specialization.  It will be the first time the process
            // method is called, or after a Hybrid Grace reload.
            /*
         * Get our Multi-Key hash set information for this specialized class.
         */
            hashSet = (VectorMapJoinBytesHashSet) vectorMapJoinHashTable;
            needHashTableSetup = false;
        }
        batchCounter++;
        // For left semi joins, we may apply the filter(s) now.
        for (VectorExpression ve : bigTableFilterExpressions) {
            ve.evaluate(batch);
        }
        final int inputLogicalSize = batch.size;
        if (inputLogicalSize == 0) {
            if (LOG.isDebugEnabled()) {
                LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty");
            }
            return;
        }
        // Perform any key expressions.  Results will go into scratch columns.
        if (bigTableKeyExpressions != null) {
            for (VectorExpression ve : bigTableKeyExpressions) {
                ve.evaluate(batch);
            }
        }
        /*
       * Multi-Key specific declarations.
       */
        // None.
        /*
       * Multi-Key Long check for repeating.
       */
        // If all BigTable input columns to key expressions are isRepeating, then
        // calculate key once; lookup once.
        boolean allKeyInputColumnsRepeating;
        if (bigTableKeyColumnMap.length == 0) {
            allKeyInputColumnsRepeating = false;
        } else {
            allKeyInputColumnsRepeating = true;
            for (int i = 0; i < bigTableKeyColumnMap.length; i++) {
                if (!batch.cols[bigTableKeyColumnMap[i]].isRepeating) {
                    allKeyInputColumnsRepeating = false;
                    break;
                }
            }
        }
        if (allKeyInputColumnsRepeating) {
            /*
         * Repeating.
         */
            // All key input columns are repeating.  Generate key once.  Lookup once.
            // Since the key is repeated, we must use entry 0 regardless of selectedInUse.
            /*
         * Multi-Key specific repeated lookup.
         */
            keyVectorSerializeWrite.setOutput(currentKeyOutput);
            keyVectorSerializeWrite.serializeWrite(batch, 0);
            JoinUtil.JoinResult joinResult;
            if (keyVectorSerializeWrite.getHasAnyNulls()) {
                joinResult = JoinUtil.JoinResult.NOMATCH;
            } else {
                byte[] keyBytes = currentKeyOutput.getData();
                int keyLength = currentKeyOutput.getLength();
                // LOG.debug(CLASS_NAME + " processOp all " + displayBytes(keyBytes, 0, keyLength));
                joinResult = hashSet.contains(keyBytes, 0, keyLength, hashSetResults[0]);
            }
            if (LOG.isDebugEnabled()) {
                LOG.debug(CLASS_NAME + " batch #" + batchCounter + " repeated joinResult " + joinResult.name());
            }
            finishLeftSemiRepeated(batch, joinResult, hashSetResults[0]);
        } else {
            if (LOG.isDebugEnabled()) {
                LOG.debug(CLASS_NAME + " batch #" + batchCounter + " non-repeated");
            }
            // We remember any matching rows in matchs / matchSize.  At the end of the loop,
            // selected / batch.size will represent both matching and non-matching rows for outer join.
            // Only deferred rows will have been removed from selected.
            int[] selected = batch.selected;
            boolean selectedInUse = batch.selectedInUse;
            int hashSetResultCount = 0;
            int allMatchCount = 0;
            int spillCount = 0;
            /*
         * Multi-Key specific variables.
         */
            Output temp;
            // We optimize performance by only looking up the first key in a series of equal keys.
            boolean haveSaveKey = false;
            JoinUtil.JoinResult saveJoinResult = JoinUtil.JoinResult.NOMATCH;
            // Logical loop over the rows in the batch since the batch may have selected in use.
            for (int logical = 0; logical < inputLogicalSize; logical++) {
                int batchIndex = (selectedInUse ? selected[logical] : logical);
                /*
           * Multi-Key get key.
           */
                // Generate binary sortable key for current row in vectorized row batch.
                keyVectorSerializeWrite.setOutput(currentKeyOutput);
                keyVectorSerializeWrite.serializeWrite(batch, batchIndex);
                boolean isAnyNull = keyVectorSerializeWrite.getHasAnyNulls();
                if (isAnyNull || !haveSaveKey || !saveKeyOutput.arraysEquals(currentKeyOutput)) {
                    if (haveSaveKey) {
                        // Move on with our counts.
                        switch(saveJoinResult) {
                            case MATCH:
                                // We have extracted the existence from the hash set result, so we don't keep it.
                                break;
                            case SPILL:
                                // We keep the hash set result for its spill information.
                                hashSetResultCount++;
                                break;
                            case NOMATCH:
                                break;
                        }
                    }
                    if (isAnyNull) {
                        saveJoinResult = JoinUtil.JoinResult.NOMATCH;
                        haveSaveKey = false;
                    } else {
                        // Regardless of our matching result, we keep that information to make multiple use
                        // of it for a possible series of equal keys.
                        haveSaveKey = true;
                        /*
               * Multi-Key specific save key and lookup.
               */
                        temp = saveKeyOutput;
                        saveKeyOutput = currentKeyOutput;
                        currentKeyOutput = temp;
                        /*
               * Multi-key specific lookup key.
               */
                        byte[] keyBytes = saveKeyOutput.getData();
                        int keyLength = saveKeyOutput.getLength();
                        saveJoinResult = hashSet.contains(keyBytes, 0, keyLength, hashSetResults[hashSetResultCount]);
                    }
                    switch(saveJoinResult) {
                        case MATCH:
                            allMatchs[allMatchCount++] = batchIndex;
                            // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH isSingleValue " + equalKeySeriesIsSingleValue[equalKeySeriesCount] + " currentKey " + currentKey);
                            break;
                        case SPILL:
                            spills[spillCount] = batchIndex;
                            spillHashMapResultIndices[spillCount] = hashSetResultCount;
                            spillCount++;
                            break;
                        case NOMATCH:
                            // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH" + " currentKey " + currentKey);
                            break;
                    }
                } else {
                    switch(saveJoinResult) {
                        case MATCH:
                            allMatchs[allMatchCount++] = batchIndex;
                            // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH duplicate");
                            break;
                        case SPILL:
                            spills[spillCount] = batchIndex;
                            spillHashMapResultIndices[spillCount] = hashSetResultCount;
                            spillCount++;
                            break;
                        case NOMATCH:
                            // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH duplicate");
                            break;
                    }
                }
            }
            if (haveSaveKey) {
                // Update our counts for the last key.
                switch(saveJoinResult) {
                    case MATCH:
                        // We have extracted the existence from the hash set result, so we don't keep it.
                        break;
                    case SPILL:
                        // We keep the hash set result for its spill information.
                        hashSetResultCount++;
                        break;
                    case NOMATCH:
                        break;
                }
            }
            if (LOG.isDebugEnabled()) {
                LOG.debug(CLASS_NAME + " allMatchs " + intArrayToRangesString(allMatchs, allMatchCount) + " spills " + intArrayToRangesString(spills, spillCount) + " spillHashMapResultIndices " + intArrayToRangesString(spillHashMapResultIndices, spillCount) + " hashMapResults " + Arrays.toString(Arrays.copyOfRange(hashSetResults, 0, hashSetResultCount)));
            }
            finishLeftSemi(batch, allMatchCount, spillCount, (VectorMapJoinHashTableResult[]) hashSetResults);
        }
        if (batch.size > 0) {
            // Forward any remaining selected rows.
            forwardBigTableBatch(batch);
        }
    } catch (IOException e) {
        throw new HiveException(e);
    } catch (Exception e) {
        throw new HiveException(e);
    }
}
Also used : VectorMapJoinHashTableResult(org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashTableResult) JoinUtil(org.apache.hadoop.hive.ql.exec.JoinUtil) VectorSerializeRow(org.apache.hadoop.hive.ql.exec.vector.VectorSerializeRow) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) BinarySortableSerializeWrite(org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite) IOException(java.io.IOException) IOException(java.io.IOException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) Output(org.apache.hadoop.hive.serde2.ByteStream.Output) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)

Example 7 with VectorExpression

use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.

the class VectorMapJoinLeftSemiStringOperator method process.

// ---------------------------------------------------------------------------
// Process Single-Column String Left-Semi Join on a vectorized row batch.
// 
@Override
public void process(Object row, int tag) throws HiveException {
    try {
        VectorizedRowBatch batch = (VectorizedRowBatch) row;
        alias = (byte) tag;
        if (needCommonSetup) {
            // Our one time process method initialization.
            commonSetup(batch);
            /*
         * Initialize Single-Column String members for this specialized class.
         */
            singleJoinColumn = bigTableKeyColumnMap[0];
            needCommonSetup = false;
        }
        if (needHashTableSetup) {
            // Setup our hash table specialization.  It will be the first time the process
            // method is called, or after a Hybrid Grace reload.
            /*
         * Get our Single-Column String hash set information for this specialized class.
         */
            hashSet = (VectorMapJoinBytesHashSet) vectorMapJoinHashTable;
            needHashTableSetup = false;
        }
        batchCounter++;
        // For left semi joins, we may apply the filter(s) now.
        for (VectorExpression ve : bigTableFilterExpressions) {
            ve.evaluate(batch);
        }
        final int inputLogicalSize = batch.size;
        if (inputLogicalSize == 0) {
            if (LOG.isDebugEnabled()) {
                LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty");
            }
            return;
        }
        // Perform any key expressions.  Results will go into scratch columns.
        if (bigTableKeyExpressions != null) {
            for (VectorExpression ve : bigTableKeyExpressions) {
                ve.evaluate(batch);
            }
        }
        /*
       * Single-Column String specific declarations.
       */
        // The one join column for this specialized class.
        BytesColumnVector joinColVector = (BytesColumnVector) batch.cols[singleJoinColumn];
        byte[][] vector = joinColVector.vector;
        int[] start = joinColVector.start;
        int[] length = joinColVector.length;
        /*
       * Single-Column Long check for repeating.
       */
        // Check single column for repeating.
        boolean allKeyInputColumnsRepeating = joinColVector.isRepeating;
        if (allKeyInputColumnsRepeating) {
            /*
         * Repeating.
         */
            // All key input columns are repeating.  Generate key once.  Lookup once.
            // Since the key is repeated, we must use entry 0 regardless of selectedInUse.
            /*
         * Single-Column String specific repeated lookup.
         */
            JoinUtil.JoinResult joinResult;
            if (!joinColVector.noNulls && joinColVector.isNull[0]) {
                joinResult = JoinUtil.JoinResult.NOMATCH;
            } else {
                byte[] keyBytes = vector[0];
                int keyStart = start[0];
                int keyLength = length[0];
                joinResult = hashSet.contains(keyBytes, keyStart, keyLength, hashSetResults[0]);
            }
            if (LOG.isDebugEnabled()) {
                LOG.debug(CLASS_NAME + " batch #" + batchCounter + " repeated joinResult " + joinResult.name());
            }
            finishLeftSemiRepeated(batch, joinResult, hashSetResults[0]);
        } else {
            if (LOG.isDebugEnabled()) {
                LOG.debug(CLASS_NAME + " batch #" + batchCounter + " non-repeated");
            }
            // We remember any matching rows in matchs / matchSize.  At the end of the loop,
            // selected / batch.size will represent both matching and non-matching rows for outer join.
            // Only deferred rows will have been removed from selected.
            int[] selected = batch.selected;
            boolean selectedInUse = batch.selectedInUse;
            int hashSetResultCount = 0;
            int allMatchCount = 0;
            int spillCount = 0;
            /*
         * Single-Column String specific variables.
         */
            int saveKeyBatchIndex = -1;
            // We optimize performance by only looking up the first key in a series of equal keys.
            boolean haveSaveKey = false;
            JoinUtil.JoinResult saveJoinResult = JoinUtil.JoinResult.NOMATCH;
            // Logical loop over the rows in the batch since the batch may have selected in use.
            for (int logical = 0; logical < inputLogicalSize; logical++) {
                int batchIndex = (selectedInUse ? selected[logical] : logical);
                /*
           * Single-Column String get key.
           */
                // Implicit -- use batchIndex.
                boolean isNull = !joinColVector.noNulls && joinColVector.isNull[batchIndex];
                if (isNull || !haveSaveKey || StringExpr.equal(vector[saveKeyBatchIndex], start[saveKeyBatchIndex], length[saveKeyBatchIndex], vector[batchIndex], start[batchIndex], length[batchIndex]) == false) {
                    if (haveSaveKey) {
                        // Move on with our counts.
                        switch(saveJoinResult) {
                            case MATCH:
                                // We have extracted the existence from the hash set result, so we don't keep it.
                                break;
                            case SPILL:
                                // We keep the hash set result for its spill information.
                                hashSetResultCount++;
                                break;
                            case NOMATCH:
                                break;
                        }
                    }
                    if (isNull) {
                        saveJoinResult = JoinUtil.JoinResult.NOMATCH;
                        haveSaveKey = false;
                    } else {
                        // Regardless of our matching result, we keep that information to make multiple use
                        // of it for a possible series of equal keys.
                        haveSaveKey = true;
                        /*
               * Single-Column String specific save key and lookup.
               */
                        saveKeyBatchIndex = batchIndex;
                        /*
               * Single-Column String specific lookup key.
               */
                        byte[] keyBytes = vector[batchIndex];
                        int keyStart = start[batchIndex];
                        int keyLength = length[batchIndex];
                        saveJoinResult = hashSet.contains(keyBytes, keyStart, keyLength, hashSetResults[hashSetResultCount]);
                    }
                    switch(saveJoinResult) {
                        case MATCH:
                            allMatchs[allMatchCount++] = batchIndex;
                            // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH isSingleValue " + equalKeySeriesIsSingleValue[equalKeySeriesCount] + " currentKey " + currentKey);
                            break;
                        case SPILL:
                            spills[spillCount] = batchIndex;
                            spillHashMapResultIndices[spillCount] = hashSetResultCount;
                            spillCount++;
                            break;
                        case NOMATCH:
                            // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH" + " currentKey " + currentKey);
                            break;
                    }
                } else {
                    switch(saveJoinResult) {
                        case MATCH:
                            allMatchs[allMatchCount++] = batchIndex;
                            // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH duplicate");
                            break;
                        case SPILL:
                            spills[spillCount] = batchIndex;
                            spillHashMapResultIndices[spillCount] = hashSetResultCount;
                            spillCount++;
                            break;
                        case NOMATCH:
                            // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH duplicate");
                            break;
                    }
                }
            }
            if (haveSaveKey) {
                // Update our counts for the last key.
                switch(saveJoinResult) {
                    case MATCH:
                        // We have extracted the existence from the hash set result, so we don't keep it.
                        break;
                    case SPILL:
                        // We keep the hash set result for its spill information.
                        hashSetResultCount++;
                        break;
                    case NOMATCH:
                        break;
                }
            }
            if (LOG.isDebugEnabled()) {
                LOG.debug(CLASS_NAME + " allMatchs " + intArrayToRangesString(allMatchs, allMatchCount) + " spills " + intArrayToRangesString(spills, spillCount) + " spillHashMapResultIndices " + intArrayToRangesString(spillHashMapResultIndices, spillCount) + " hashMapResults " + Arrays.toString(Arrays.copyOfRange(hashSetResults, 0, hashSetResultCount)));
            }
            finishLeftSemi(batch, allMatchCount, spillCount, (VectorMapJoinHashTableResult[]) hashSetResults);
        }
        if (batch.size > 0) {
            // Forward any remaining selected rows.
            forwardBigTableBatch(batch);
        }
    } catch (IOException e) {
        throw new HiveException(e);
    } catch (Exception e) {
        throw new HiveException(e);
    }
}
Also used : VectorMapJoinHashTableResult(org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashTableResult) JoinUtil(org.apache.hadoop.hive.ql.exec.JoinUtil) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) IOException(java.io.IOException) IOException(java.io.IOException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)

Example 8 with VectorExpression

use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.

the class VectorizationContext method getBetweenExpression.

/* Get a [NOT] BETWEEN filter or projection expression. This is treated as a special case
   * because the NOT is actually specified in the expression tree as the first argument,
   * and we don't want any runtime cost for that. So creating the VectorExpression
   * needs to be done differently than the standard way where all arguments are
   * passed to the VectorExpression constructor.
   */
private VectorExpression getBetweenExpression(List<ExprNodeDesc> childExpr, VectorExpressionDescriptor.Mode mode, TypeInfo returnType) throws HiveException {
    boolean hasDynamicValues = false;
    // We don't currently support the BETWEEN ends being columns.  They must be scalars.
    if ((childExpr.get(2) instanceof ExprNodeDynamicValueDesc) && (childExpr.get(3) instanceof ExprNodeDynamicValueDesc)) {
        hasDynamicValues = true;
        if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
            // Projection mode is not applicable.
            return null;
        }
    } else if (!(childExpr.get(2) instanceof ExprNodeConstantDesc) || !(childExpr.get(3) instanceof ExprNodeConstantDesc)) {
        return null;
    }
    boolean notKeywordPresent = (Boolean) ((ExprNodeConstantDesc) childExpr.get(0)).getValue();
    ExprNodeDesc colExpr = childExpr.get(1);
    // The children after not, might need a cast. Get common types for the two comparisons.
    // Casting for 'between' is handled here as a special case, because the first child is for NOT and doesn't need
    // cast
    TypeInfo commonType = FunctionRegistry.getCommonClassForComparison(childExpr.get(1).getTypeInfo(), childExpr.get(2).getTypeInfo());
    if (commonType == null) {
        // Can't vectorize
        return null;
    }
    commonType = FunctionRegistry.getCommonClassForComparison(commonType, childExpr.get(3).getTypeInfo());
    if (commonType == null) {
        // Can't vectorize
        return null;
    }
    List<ExprNodeDesc> castChildren = new ArrayList<>();
    boolean wereCastUdfs = false;
    Category commonTypeCategory = commonType.getCategory();
    for (ExprNodeDesc desc : childExpr.subList(1, 4)) {
        TypeInfo childTypeInfo = desc.getTypeInfo();
        Category childCategory = childTypeInfo.getCategory();
        if (childCategory != commonTypeCategory) {
            return null;
        }
        final boolean isNeedsCast;
        if (commonTypeCategory == Category.PRIMITIVE) {
            // Do not to strict TypeInfo comparisons for DECIMAL -- just compare the category.
            // Otherwise, we generate unnecessary casts.
            isNeedsCast = ((PrimitiveTypeInfo) commonType).getPrimitiveCategory() != ((PrimitiveTypeInfo) childTypeInfo).getPrimitiveCategory();
        } else {
            isNeedsCast = !commonType.equals(desc.getTypeInfo());
        }
        if (!isNeedsCast) {
            castChildren.add(desc);
        } else {
            GenericUDF castUdf = getGenericUDFForCast(commonType);
            ExprNodeGenericFuncDesc engfd = new ExprNodeGenericFuncDesc(commonType, castUdf, Arrays.asList(new ExprNodeDesc[] { desc }));
            castChildren.add(engfd);
            wereCastUdfs = true;
        }
    }
    String colType = commonType.getTypeName();
    // prepare arguments for createVectorExpression
    List<ExprNodeDesc> childrenAfterNot = evaluateCastOnConstants(castChildren);
    // determine class
    Class<?> cl = null;
    if (isIntFamily(colType) && !notKeywordPresent) {
        if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
            cl = LongColumnBetween.class;
        } else {
            cl = (hasDynamicValues ? FilterLongColumnBetweenDynamicValue.class : FilterLongColumnBetween.class);
        }
    } else if (isIntFamily(colType) && notKeywordPresent) {
        if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
            cl = LongColumnNotBetween.class;
        } else {
            cl = FilterLongColumnNotBetween.class;
        }
    } else if (isFloatFamily(colType) && !notKeywordPresent) {
        if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
            cl = DoubleColumnBetween.class;
        } else {
            cl = (hasDynamicValues ? FilterDoubleColumnBetweenDynamicValue.class : FilterDoubleColumnBetween.class);
        }
    } else if (isFloatFamily(colType) && notKeywordPresent) {
        if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
            cl = DoubleColumnNotBetween.class;
        } else {
            cl = FilterDoubleColumnNotBetween.class;
        }
    } else if (colType.equals("string") && !notKeywordPresent) {
        if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
            cl = StringColumnBetween.class;
        } else {
            cl = (hasDynamicValues ? FilterStringColumnBetweenDynamicValue.class : FilterStringColumnBetween.class);
        }
    } else if (colType.equals("string") && notKeywordPresent) {
        if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
            cl = StringColumnNotBetween.class;
        } else {
            cl = FilterStringColumnNotBetween.class;
        }
    } else if (varcharTypePattern.matcher(colType).matches() && !notKeywordPresent) {
        if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
            cl = VarCharColumnBetween.class;
        } else {
            cl = (hasDynamicValues ? FilterVarCharColumnBetweenDynamicValue.class : FilterVarCharColumnBetween.class);
        }
    } else if (varcharTypePattern.matcher(colType).matches() && notKeywordPresent) {
        if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
            cl = VarCharColumnNotBetween.class;
        } else {
            cl = FilterVarCharColumnNotBetween.class;
        }
    } else if (charTypePattern.matcher(colType).matches() && !notKeywordPresent) {
        if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
            cl = CharColumnBetween.class;
        } else {
            cl = (hasDynamicValues ? FilterCharColumnBetweenDynamicValue.class : FilterCharColumnBetween.class);
        }
    } else if (charTypePattern.matcher(colType).matches() && notKeywordPresent) {
        if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
            cl = CharColumnNotBetween.class;
        } else {
            cl = FilterCharColumnNotBetween.class;
        }
    } else if (colType.equals("timestamp") && !notKeywordPresent) {
        if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
            cl = TimestampColumnBetween.class;
        } else {
            cl = (hasDynamicValues ? FilterTimestampColumnBetweenDynamicValue.class : FilterTimestampColumnBetween.class);
        }
    } else if (colType.equals("timestamp") && notKeywordPresent) {
        if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
            cl = TimestampColumnNotBetween.class;
        } else {
            cl = FilterTimestampColumnNotBetween.class;
        }
    } else if (isDecimalFamily(colType) && !notKeywordPresent) {
        final boolean tryDecimal64 = checkExprNodeDescForDecimal64(colExpr) && !wereCastUdfs && !hasDynamicValues;
        if (tryDecimal64) {
            VectorExpression decimal64VecExpr = tryDecimal64Between(mode, /* isNot */
            false, colExpr, childrenAfterNot, returnType);
            if (decimal64VecExpr != null) {
                return decimal64VecExpr;
            }
        }
        if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
            cl = DecimalColumnBetween.class;
        } else {
            cl = (hasDynamicValues ? FilterDecimalColumnBetweenDynamicValue.class : FilterDecimalColumnBetween.class);
        }
    } else if (isDecimalFamily(colType) && notKeywordPresent) {
        final boolean tryDecimal64 = checkExprNodeDescForDecimal64(colExpr) && !wereCastUdfs && !hasDynamicValues;
        if (tryDecimal64) {
            VectorExpression decimal64VecExpr = tryDecimal64Between(mode, /* isNot */
            true, colExpr, childrenAfterNot, returnType);
            if (decimal64VecExpr != null) {
                return decimal64VecExpr;
            }
        }
        if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
            cl = DecimalColumnNotBetween.class;
        } else {
            cl = FilterDecimalColumnNotBetween.class;
        }
    } else if (isDateFamily(colType) && !notKeywordPresent) {
        if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
            cl = LongColumnBetween.class;
        } else {
            cl = (hasDynamicValues ? FilterDateColumnBetweenDynamicValue.class : FilterLongColumnBetween.class);
        }
    } else if (isDateFamily(colType) && notKeywordPresent) {
        if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
            cl = LongColumnNotBetween.class;
        } else {
            cl = FilterLongColumnNotBetween.class;
        }
    }
    return createVectorExpression(cl, childrenAfterNot, VectorExpressionDescriptor.Mode.PROJECTION, returnType, DataTypePhysicalVariation.NONE);
}
Also used : FilterDoubleColumnNotBetween(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterDoubleColumnNotBetween) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) Category(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category) ExprNodeDynamicValueDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDynamicValueDesc) ArrayList(java.util.ArrayList) FilterVarCharColumnNotBetween(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterVarCharColumnNotBetween) VarCharColumnNotBetween(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.VarCharColumnNotBetween) StringColumnNotBetween(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.StringColumnNotBetween) FilterStringColumnNotBetween(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringColumnNotBetween) CastDecimalToString(org.apache.hadoop.hive.ql.exec.vector.expressions.CastDecimalToString) CastLongToString(org.apache.hadoop.hive.ql.exec.vector.expressions.CastLongToString) CastFloatToString(org.apache.hadoop.hive.ql.exec.vector.expressions.CastFloatToString) CastDateToString(org.apache.hadoop.hive.ql.exec.vector.expressions.CastDateToString) CastTimestampToString(org.apache.hadoop.hive.ql.exec.vector.expressions.CastTimestampToString) CastDoubleToString(org.apache.hadoop.hive.ql.exec.vector.expressions.CastDoubleToString) CastBooleanToStringViaLongToString(org.apache.hadoop.hive.ql.exec.vector.expressions.CastBooleanToStringViaLongToString) FilterVarCharColumnNotBetween(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterVarCharColumnNotBetween) VarCharColumnNotBetween(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.VarCharColumnNotBetween) CharColumnNotBetween(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.CharColumnNotBetween) FilterCharColumnNotBetween(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterCharColumnNotBetween) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) FilterCharColumnNotBetween(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterCharColumnNotBetween) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) FilterDoubleColumnNotBetween(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterDoubleColumnNotBetween) DoubleColumnNotBetween(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DoubleColumnNotBetween) CastStringToBoolean(org.apache.hadoop.hive.ql.exec.vector.expressions.CastStringToBoolean) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) LongColumnNotBetween(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColumnNotBetween) FilterLongColumnNotBetween(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterLongColumnNotBetween) FilterDecimalColumnNotBetween(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterDecimalColumnNotBetween) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) BaseCharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) FilterVarCharColumnNotBetween(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterVarCharColumnNotBetween) TimestampColumnNotBetween(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.TimestampColumnNotBetween) FilterTimestampColumnNotBetween(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterTimestampColumnNotBetween) FilterTimestampColumnNotBetween(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterTimestampColumnNotBetween) FilterStringColumnNotBetween(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringColumnNotBetween) FilterConstantBooleanVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.FilterConstantBooleanVectorExpression) ConstantVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.ConstantVectorExpression) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) DynamicValueVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.DynamicValueVectorExpression) DecimalColumnNotBetween(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DecimalColumnNotBetween) FilterDecimalColumnNotBetween(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterDecimalColumnNotBetween) FilterLongColumnNotBetween(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterLongColumnNotBetween)

Example 9 with VectorExpression

use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.

the class VectorizationContext method getIfExpression.

private VectorExpression getIfExpression(GenericUDFIf genericUDFIf, List<ExprNodeDesc> childExpr, VectorExpressionDescriptor.Mode mode, TypeInfo returnType) throws HiveException {
    // Assume.
    boolean isFilter = false;
    if (mode == VectorExpressionDescriptor.Mode.FILTER) {
        // Is output type a BOOLEAN?
        if (returnType.getCategory() == Category.PRIMITIVE && ((PrimitiveTypeInfo) returnType).getPrimitiveCategory() == PrimitiveCategory.BOOLEAN) {
            isFilter = true;
        } else {
            return null;
        }
    }
    // Get a PROJECTION IF expression.
    VectorExpression ve = doGetIfExpression(genericUDFIf, childExpr, returnType);
    if (ve == null) {
        return null;
    }
    if (isFilter) {
        // Wrap the PROJECTION IF expression output with a filter.
        SelectColumnIsTrue filterVectorExpr = new SelectColumnIsTrue(ve.getOutputColumnNum());
        filterVectorExpr.setChildExpressions(new VectorExpression[] { ve });
        filterVectorExpr.setInputTypeInfos(ve.getOutputTypeInfo());
        filterVectorExpr.setInputDataTypePhysicalVariations(ve.getOutputDataTypePhysicalVariation());
        return filterVectorExpr;
    } else {
        return ve;
    }
}
Also used : FilterConstantBooleanVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.FilterConstantBooleanVectorExpression) ConstantVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.ConstantVectorExpression) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) DynamicValueVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.DynamicValueVectorExpression) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) SelectColumnIsTrue(org.apache.hadoop.hive.ql.exec.vector.expressions.SelectColumnIsTrue)

Example 10 with VectorExpression

use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.

the class VectorizationContext method getFilterOnBooleanColumnExpression.

private VectorExpression getFilterOnBooleanColumnExpression(ExprNodeColumnDesc exprDesc, int columnNum) throws HiveException {
    final VectorExpression expr;
    // Evaluate the column as a boolean, converting if necessary.
    TypeInfo typeInfo = exprDesc.getTypeInfo();
    if (typeInfo.getCategory() == Category.PRIMITIVE && ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory() == PrimitiveCategory.BOOLEAN) {
        expr = new SelectColumnIsTrue(columnNum);
        expr.setInputTypeInfos(typeInfo);
        expr.setInputDataTypePhysicalVariations(DataTypePhysicalVariation.NONE);
    } else {
        // Ok, we need to convert.
        List<ExprNodeDesc> exprAsList = Collections.singletonList(exprDesc);
        expr = getCastToBooleanExpression(exprAsList, VectorExpressionDescriptor.Mode.FILTER);
        if (expr == null) {
            throw new HiveException("Cannot vectorize converting expression " + exprDesc.getExprString() + " to boolean");
        }
    }
    return expr;
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) FilterConstantBooleanVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.FilterConstantBooleanVectorExpression) ConstantVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.ConstantVectorExpression) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) DynamicValueVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.DynamicValueVectorExpression) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) BaseCharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) SelectColumnIsTrue(org.apache.hadoop.hive.ql.exec.vector.expressions.SelectColumnIsTrue)

Aggregations

VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)140 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)57 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)44 ArrayList (java.util.ArrayList)43 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)38 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)32 VectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch)30 Test (org.junit.Test)29 JoinUtil (org.apache.hadoop.hive.ql.exec.JoinUtil)27 ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)27 IOException (java.io.IOException)25 ConstantVectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.ConstantVectorExpression)25 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)25 DynamicValueVectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.DynamicValueVectorExpression)23 FilterConstantBooleanVectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.FilterConstantBooleanVectorExpression)23 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)23 VectorizationContext (org.apache.hadoop.hive.ql.exec.vector.VectorizationContext)19 DecimalTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo)19 StructTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo)19 DataTypePhysicalVariation (org.apache.hadoop.hive.common.type.DataTypePhysicalVariation)15