Search in sources :

Example 1 with VectorPTFEvaluatorDecimalCountDistinct

use of org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorDecimalCountDistinct in project hive by apache.

the class VectorPTFDesc method getEvaluator.

// We provide this public method to help EXPLAIN VECTORIZATION show the evaluator classes.
public static VectorPTFEvaluatorBase getEvaluator(SupportedFunctionType functionType, boolean isDistinct, WindowFrameDef windowFrameDef, Type[] columnVectorTypes, VectorExpression[] inputVectorExpressions, int outputColumnNum) {
    final boolean isRowEndCurrent = (windowFrameDef.getWindowType() == WindowType.ROWS && windowFrameDef.getEnd().isCurrentRow());
    /*
     * we should only stream when the window start is unbounded and the end row is the current,
     * because that's the way how streaming evaluation works: calculate from the very-first row then
     * create result for the current row on the fly, so with other words: currently we cannot force
     * a boundary on a streaming evaluator
     */
    final boolean canStream = windowFrameDef.getStart().isUnbounded() && isRowEndCurrent;
    // most of the evaluators will use only first argument
    VectorExpression inputVectorExpression = inputVectorExpressions[0];
    Type columnVectorType = columnVectorTypes[0];
    VectorPTFEvaluatorBase evaluator;
    switch(functionType) {
        case ROW_NUMBER:
            evaluator = new VectorPTFEvaluatorRowNumber(windowFrameDef, inputVectorExpression, outputColumnNum);
            break;
        case RANK:
            evaluator = new VectorPTFEvaluatorRank(windowFrameDef, outputColumnNum);
            break;
        case DENSE_RANK:
            evaluator = new VectorPTFEvaluatorDenseRank(windowFrameDef, outputColumnNum);
            break;
        case MIN:
            switch(columnVectorType) {
                case LONG:
                    evaluator = !canStream ? new VectorPTFEvaluatorLongMin(windowFrameDef, inputVectorExpression, outputColumnNum) : new VectorPTFEvaluatorStreamingLongMin(windowFrameDef, inputVectorExpression, outputColumnNum);
                    break;
                case DOUBLE:
                    evaluator = !canStream ? new VectorPTFEvaluatorDoubleMin(windowFrameDef, inputVectorExpression, outputColumnNum) : new VectorPTFEvaluatorStreamingDoubleMin(windowFrameDef, inputVectorExpression, outputColumnNum);
                    break;
                case DECIMAL:
                    evaluator = !canStream ? new VectorPTFEvaluatorDecimalMin(windowFrameDef, inputVectorExpression, outputColumnNum) : new VectorPTFEvaluatorStreamingDecimalMin(windowFrameDef, inputVectorExpression, outputColumnNum);
                    break;
                default:
                    throw new RuntimeException("Unexpected column vector type " + columnVectorType + " for " + functionType);
            }
            break;
        case MAX:
            switch(columnVectorType) {
                case LONG:
                    evaluator = !canStream ? new VectorPTFEvaluatorLongMax(windowFrameDef, inputVectorExpression, outputColumnNum) : new VectorPTFEvaluatorStreamingLongMax(windowFrameDef, inputVectorExpression, outputColumnNum);
                    break;
                case DOUBLE:
                    evaluator = !canStream ? new VectorPTFEvaluatorDoubleMax(windowFrameDef, inputVectorExpression, outputColumnNum) : new VectorPTFEvaluatorStreamingDoubleMax(windowFrameDef, inputVectorExpression, outputColumnNum);
                    break;
                case DECIMAL:
                    evaluator = !canStream ? new VectorPTFEvaluatorDecimalMax(windowFrameDef, inputVectorExpression, outputColumnNum) : new VectorPTFEvaluatorStreamingDecimalMax(windowFrameDef, inputVectorExpression, outputColumnNum);
                    break;
                default:
                    throw new RuntimeException("Unexpected column vector type " + columnVectorType + " for " + functionType);
            }
            break;
        case SUM:
            switch(columnVectorType) {
                case LONG:
                    evaluator = !canStream ? new VectorPTFEvaluatorLongSum(windowFrameDef, inputVectorExpression, outputColumnNum) : new VectorPTFEvaluatorStreamingLongSum(windowFrameDef, inputVectorExpression, outputColumnNum);
                    break;
                case DOUBLE:
                    evaluator = !canStream ? new VectorPTFEvaluatorDoubleSum(windowFrameDef, inputVectorExpression, outputColumnNum) : new VectorPTFEvaluatorStreamingDoubleSum(windowFrameDef, inputVectorExpression, outputColumnNum);
                    break;
                case DECIMAL:
                    evaluator = !canStream ? new VectorPTFEvaluatorDecimalSum(windowFrameDef, inputVectorExpression, outputColumnNum) : new VectorPTFEvaluatorStreamingDecimalSum(windowFrameDef, inputVectorExpression, outputColumnNum);
                    break;
                default:
                    throw new RuntimeException("Unexpected column vector type " + columnVectorType + " for " + functionType);
            }
            break;
        case AVG:
            switch(columnVectorType) {
                case LONG:
                    evaluator = !canStream ? new VectorPTFEvaluatorLongAvg(windowFrameDef, inputVectorExpression, outputColumnNum) : new VectorPTFEvaluatorStreamingLongAvg(windowFrameDef, inputVectorExpression, outputColumnNum);
                    break;
                case DOUBLE:
                    evaluator = !canStream ? new VectorPTFEvaluatorDoubleAvg(windowFrameDef, inputVectorExpression, outputColumnNum) : new VectorPTFEvaluatorStreamingDoubleAvg(windowFrameDef, inputVectorExpression, outputColumnNum);
                    break;
                case DECIMAL:
                    evaluator = !canStream ? new VectorPTFEvaluatorDecimalAvg(windowFrameDef, inputVectorExpression, outputColumnNum) : new VectorPTFEvaluatorStreamingDecimalAvg(windowFrameDef, inputVectorExpression, outputColumnNum);
                    break;
                default:
                    throw new RuntimeException("Unexpected column vector type " + columnVectorType + " for " + functionType);
            }
            break;
        case FIRST_VALUE:
            switch(columnVectorType) {
                case LONG:
                    evaluator = new VectorPTFEvaluatorLongFirstValue(windowFrameDef, inputVectorExpression, outputColumnNum);
                    break;
                case DOUBLE:
                    evaluator = new VectorPTFEvaluatorDoubleFirstValue(windowFrameDef, inputVectorExpression, outputColumnNum);
                    break;
                case DECIMAL:
                    evaluator = new VectorPTFEvaluatorDecimalFirstValue(windowFrameDef, inputVectorExpression, outputColumnNum);
                    break;
                default:
                    throw new RuntimeException("Unexpected column vector type " + columnVectorType + " for " + functionType);
            }
            break;
        case LAST_VALUE:
            switch(columnVectorType) {
                case LONG:
                    evaluator = new VectorPTFEvaluatorLongLastValue(windowFrameDef, inputVectorExpression, outputColumnNum);
                    break;
                case DOUBLE:
                    evaluator = new VectorPTFEvaluatorDoubleLastValue(windowFrameDef, inputVectorExpression, outputColumnNum);
                    break;
                case DECIMAL:
                    evaluator = new VectorPTFEvaluatorDecimalLastValue(windowFrameDef, inputVectorExpression, outputColumnNum);
                    break;
                default:
                    throw new RuntimeException("Unexpected column vector type " + columnVectorType + " for " + functionType);
            }
            break;
        case COUNT:
            if (inputVectorExpression == null) {
                evaluator = new VectorPTFEvaluatorCountStar(windowFrameDef, inputVectorExpression, outputColumnNum);
            } else {
                if (isDistinct) {
                    switch(columnVectorType) {
                        case BYTES:
                            evaluator = new VectorPTFEvaluatorBytesCountDistinct(windowFrameDef, inputVectorExpression, outputColumnNum);
                            break;
                        // Decimal64ColumnVector is a LongColumnVector
                        case DECIMAL_64:
                        case LONG:
                            evaluator = new VectorPTFEvaluatorLongCountDistinct(windowFrameDef, inputVectorExpression, outputColumnNum);
                            break;
                        case DOUBLE:
                            evaluator = new VectorPTFEvaluatorDoubleCountDistinct(windowFrameDef, inputVectorExpression, outputColumnNum);
                            break;
                        case DECIMAL:
                            evaluator = new VectorPTFEvaluatorDecimalCountDistinct(windowFrameDef, inputVectorExpression, outputColumnNum);
                            break;
                        case TIMESTAMP:
                            evaluator = new VectorPTFEvaluatorTimestampCountDistinct(windowFrameDef, inputVectorExpression, outputColumnNum);
                            break;
                        default:
                            throw new RuntimeException("Unexpected column type for ptf count distinct: " + columnVectorType);
                    }
                } else {
                    evaluator = new VectorPTFEvaluatorCount(windowFrameDef, inputVectorExpression, outputColumnNum);
                }
            }
            break;
        case LAG:
            // lag(column, constant, ...)
            int amt = inputVectorExpressions.length > 1 ? (int) ((ConstantVectorExpression) inputVectorExpressions[1]).getLongValue() : 1;
            // lag(column, constant, constant/column)
            VectorExpression defaultValueExpression = inputVectorExpressions.length > 2 ? inputVectorExpressions[2] : null;
            switch(columnVectorType) {
                case LONG:
                case DOUBLE:
                case DECIMAL:
                    evaluator = new VectorPTFEvaluatorLag(windowFrameDef, inputVectorExpression, outputColumnNum, columnVectorType, amt, defaultValueExpression);
                    break;
                default:
                    throw new RuntimeException("Unexpected column vector type " + columnVectorType + " for " + functionType);
            }
            break;
        case LEAD:
            // lead(column, constant, ...)
            amt = inputVectorExpressions.length > 1 ? (int) ((ConstantVectorExpression) inputVectorExpressions[1]).getLongValue() : 1;
            // lead(column, constant, constant/column)
            defaultValueExpression = inputVectorExpressions.length > 2 ? inputVectorExpressions[2] : null;
            switch(columnVectorType) {
                case LONG:
                case DOUBLE:
                case DECIMAL:
                    evaluator = new VectorPTFEvaluatorLead(windowFrameDef, inputVectorExpression, outputColumnNum, columnVectorType, amt, defaultValueExpression);
                    break;
                default:
                    throw new RuntimeException("Unexpected column vector type " + columnVectorType + " for " + functionType);
            }
            break;
        default:
            throw new RuntimeException("Unexpected function type " + functionType);
    }
    return evaluator;
}
Also used : VectorPTFEvaluatorStreamingLongSum(org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorStreamingLongSum) VectorPTFEvaluatorDecimalSum(org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorDecimalSum) VectorPTFEvaluatorDoubleSum(org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorDoubleSum) VectorPTFEvaluatorDoubleCountDistinct(org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorDoubleCountDistinct) VectorPTFEvaluatorLongCountDistinct(org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorLongCountDistinct) VectorPTFEvaluatorCount(org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorCount) VectorPTFEvaluatorDoubleMin(org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorDoubleMin) VectorPTFEvaluatorDoubleAvg(org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorDoubleAvg) VectorPTFEvaluatorStreamingDoubleSum(org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorStreamingDoubleSum) VectorPTFEvaluatorBytesCountDistinct(org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorBytesCountDistinct) ConstantVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.ConstantVectorExpression) VectorPTFEvaluatorStreamingDoubleMin(org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorStreamingDoubleMin) VectorPTFEvaluatorStreamingDoubleAvg(org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorStreamingDoubleAvg) VectorPTFEvaluatorDecimalMin(org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorDecimalMin) VectorPTFEvaluatorDecimalAvg(org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorDecimalAvg) VectorPTFEvaluatorBase(org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorBase) VectorPTFEvaluatorDoubleFirstValue(org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorDoubleFirstValue) VectorPTFEvaluatorLead(org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorLead) VectorPTFEvaluatorDecimalMax(org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorDecimalMax) VectorPTFEvaluatorStreamingDoubleMax(org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorStreamingDoubleMax) VectorPTFEvaluatorLongFirstValue(org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorLongFirstValue) VectorPTFEvaluatorLongLastValue(org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorLongLastValue) VectorPTFEvaluatorDecimalLastValue(org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorDecimalLastValue) ConstantVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.ConstantVectorExpression) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) VectorPTFEvaluatorRowNumber(org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorRowNumber) VectorPTFEvaluatorStreamingDecimalMax(org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorStreamingDecimalMax) VectorPTFEvaluatorStreamingDecimalMin(org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorStreamingDecimalMin) VectorPTFEvaluatorStreamingDecimalAvg(org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorStreamingDecimalAvg) VectorPTFEvaluatorTimestampCountDistinct(org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorTimestampCountDistinct) VectorPTFEvaluatorLongMin(org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorLongMin) VectorPTFEvaluatorLongAvg(org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorLongAvg) VectorPTFEvaluatorCountStar(org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorCountStar) VectorPTFEvaluatorLongMax(org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorLongMax) VectorPTFEvaluatorDecimalFirstValue(org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorDecimalFirstValue) VectorPTFEvaluatorStreamingDecimalSum(org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorStreamingDecimalSum) VectorPTFEvaluatorDecimalCountDistinct(org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorDecimalCountDistinct) VectorPTFEvaluatorDoubleLastValue(org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorDoubleLastValue) VectorPTFEvaluatorDoubleMax(org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorDoubleMax) VectorPTFEvaluatorStreamingLongMax(org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorStreamingLongMax) VectorPTFEvaluatorLag(org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorLag) WindowType(org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowType) Type(org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type) VectorPTFEvaluatorRank(org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorRank) VectorPTFEvaluatorLongSum(org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorLongSum) VectorPTFEvaluatorStreamingLongMin(org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorStreamingLongMin) VectorPTFEvaluatorStreamingLongAvg(org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorStreamingLongAvg) VectorPTFEvaluatorDenseRank(org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorDenseRank)

Aggregations

Type (org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type)1 ConstantVectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.ConstantVectorExpression)1 VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)1 VectorPTFEvaluatorBase (org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorBase)1 VectorPTFEvaluatorBytesCountDistinct (org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorBytesCountDistinct)1 VectorPTFEvaluatorCount (org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorCount)1 VectorPTFEvaluatorCountStar (org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorCountStar)1 VectorPTFEvaluatorDecimalAvg (org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorDecimalAvg)1 VectorPTFEvaluatorDecimalCountDistinct (org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorDecimalCountDistinct)1 VectorPTFEvaluatorDecimalFirstValue (org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorDecimalFirstValue)1 VectorPTFEvaluatorDecimalLastValue (org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorDecimalLastValue)1 VectorPTFEvaluatorDecimalMax (org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorDecimalMax)1 VectorPTFEvaluatorDecimalMin (org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorDecimalMin)1 VectorPTFEvaluatorDecimalSum (org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorDecimalSum)1 VectorPTFEvaluatorDenseRank (org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorDenseRank)1 VectorPTFEvaluatorDoubleAvg (org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorDoubleAvg)1 VectorPTFEvaluatorDoubleCountDistinct (org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorDoubleCountDistinct)1 VectorPTFEvaluatorDoubleFirstValue (org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorDoubleFirstValue)1 VectorPTFEvaluatorDoubleLastValue (org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorDoubleLastValue)1 VectorPTFEvaluatorDoubleMax (org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorDoubleMax)1