Examples with WindowFrameDef - org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef

Example 1 with WindowFrameDef

use of org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef in project hive by apache.

the class VectorPTFDesc method getEvaluators.

public static VectorPTFEvaluatorBase[] getEvaluators(VectorPTFDesc vectorPTFDesc, VectorPTFInfo vectorPTFInfo) {
    String[] evaluatorFunctionNames = vectorPTFDesc.getEvaluatorFunctionNames();
    boolean[] evaluatorsAreDistinct = vectorPTFDesc.getEvaluatorsAreDistinct();
    int evaluatorCount = evaluatorFunctionNames.length;
    WindowFrameDef[] evaluatorWindowFrameDefs = vectorPTFDesc.getEvaluatorWindowFrameDefs();
    VectorExpression[][] evaluatorInputExpressions = vectorPTFInfo.getEvaluatorInputExpressions();
    Type[][] evaluatorInputColumnVectorTypes = vectorPTFInfo.getEvaluatorInputColumnVectorTypes();
    int[] outputColumnMap = vectorPTFInfo.getOutputColumnMap();
    VectorPTFEvaluatorBase[] evaluators = new VectorPTFEvaluatorBase[evaluatorCount];
    for (int i = 0; i < evaluatorCount; i++) {
        String functionName = evaluatorFunctionNames[i];
        boolean isDistinct = evaluatorsAreDistinct[i];
        WindowFrameDef windowFrameDef = evaluatorWindowFrameDefs[i];
        SupportedFunctionType functionType = VectorPTFDesc.supportedFunctionsMap.get(functionName);
        VectorExpression[] inputVectorExpressions = evaluatorInputExpressions[i];
        final Type[] columnVectorTypes = evaluatorInputColumnVectorTypes[i];
        // The output* arrays start at index 0 for output evaluator aggregations.
        final int outputColumnNum = outputColumnMap[i];
        VectorPTFEvaluatorBase evaluator = VectorPTFDesc.getEvaluator(functionType, isDistinct, windowFrameDef, columnVectorTypes, inputVectorExpressions, outputColumnNum);
        evaluators[i] = evaluator;
    }
    return evaluators;
}

Also used : VectorPTFEvaluatorBase(org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorBase) WindowType(org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowType) Type(org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type) WindowFrameDef(org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef) ConstantVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.ConstantVectorExpression) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)

Example 2 with WindowFrameDef

use of org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef in project hive by apache.

the class WindowingTableFunction method streamingPossible.

private boolean streamingPossible(Configuration cfg, WindowFunctionDef wFnDef) throws HiveException {
    WindowFrameDef wdwFrame = wFnDef.getWindowFrame();
    WindowingFunctionInfoHelper wFnInfo = getWindowingFunctionInfoHelper(wFnDef.getName());
    if (!wFnInfo.isSupportsWindow()) {
        return true;
    }
    BoundaryDef start = wdwFrame.getStart();
    BoundaryDef end = wdwFrame.getEnd();
    /*
     * Currently we are not handling dynamic sized windows implied by range
     * based windows.
     */
    if (wdwFrame.getWindowType() == WindowType.RANGE) {
        return false;
    }
    /*
     * Windows that are unbounded following don't benefit from Streaming.
     */
    if (end.getAmt() == BoundarySpec.UNBOUNDED_AMOUNT) {
        return false;
    }
    /*
     * let function decide if it can handle this special case.
     */
    if (start.getAmt() == BoundarySpec.UNBOUNDED_AMOUNT) {
        return true;
    }
    int windowLimit = HiveConf.getIntVar(cfg, ConfVars.HIVEJOINCACHESIZE);
    if (windowLimit < (start.getAmt() + end.getAmt() + 1)) {
        return false;
    }
    return true;
}

Also used : BoundaryDef(org.apache.hadoop.hive.ql.plan.ptf.BoundaryDef) WindowFrameDef(org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef)

Example 3 with WindowFrameDef

use of org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef in project hive by apache.

the class PTFDeserializer method initializeWindowing.

public void initializeWindowing(WindowTableFunctionDef def) throws HiveException {
    ShapeDetails inpShape = def.getInput().getOutputShape();
    /*
     * 1. setup resolve, make connections
     */
    TableFunctionEvaluator tEval = def.getTFunction();
    WindowingTableFunctionResolver tResolver = (WindowingTableFunctionResolver) constructResolver(def.getResolverClassName());
    tResolver.initialize(hConf, ptfDesc, def, tEval);
    /*
     * 2. initialize WFns.
     */
    for (WindowFunctionDef wFnDef : def.getWindowFunctions()) {
        if (wFnDef.getArgs() != null) {
            for (PTFExpressionDef arg : wFnDef.getArgs()) {
                initialize(arg, inpShape);
            }
        }
        if (wFnDef.getWindowFrame() != null) {
            WindowFrameDef wFrmDef = wFnDef.getWindowFrame();
            initialize(wFrmDef, inpShape);
        }
        setupWdwFnEvaluator(wFnDef);
    }
    ArrayList<String> aliases = new ArrayList<String>();
    ArrayList<ObjectInspector> fieldOIs = new ArrayList<ObjectInspector>();
    for (WindowFunctionDef wFnDef : def.getWindowFunctions()) {
        aliases.add(wFnDef.getAlias());
        if (wFnDef.isPivotResult()) {
            fieldOIs.add(((ListObjectInspector) wFnDef.getOI()).getListElementObjectInspector());
        } else {
            fieldOIs.add(wFnDef.getOI());
        }
    }
    PTFDeserializer.addInputColumnsToList(inpShape, aliases, fieldOIs);
    StructObjectInspector wdwOutOI = ObjectInspectorFactory.getStandardStructObjectInspector(aliases, fieldOIs);
    tResolver.setWdwProcessingOutputOI(wdwOutOI);
    initialize(def.getOutputShape(), wdwOutOI);
    tResolver.initializeOutputOI();
}

Also used : WindowingTableFunctionResolver(org.apache.hadoop.hive.ql.udf.ptf.WindowingTableFunction.WindowingTableFunctionResolver) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) TableFunctionEvaluator(org.apache.hadoop.hive.ql.udf.ptf.TableFunctionEvaluator) WindowFrameDef(org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef) ArrayList(java.util.ArrayList) PTFExpressionDef(org.apache.hadoop.hive.ql.plan.ptf.PTFExpressionDef) ShapeDetails(org.apache.hadoop.hive.ql.plan.ptf.ShapeDetails) WindowFunctionDef(org.apache.hadoop.hive.ql.plan.ptf.WindowFunctionDef) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 4 with WindowFrameDef

use of org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef in project hive by apache.

the class Vectorizer method createVectorPTFDesc.

/*
   * Update the VectorPTFDesc with data that is used during validation and that doesn't rely on
   * VectorizationContext to lookup column names, etc.
   */
private static void createVectorPTFDesc(Operator<? extends OperatorDesc> ptfOp, PTFDesc ptfDesc, VectorizationContext vContext, VectorPTFDesc vectorPTFDesc, int vectorizedPTFMaxMemoryBufferingBatchCount) throws HiveException {
    PartitionedTableFunctionDef funcDef = ptfDesc.getFuncDef();
    WindowTableFunctionDef windowTableFunctionDef = (WindowTableFunctionDef) funcDef;
    List<WindowFunctionDef> windowsFunctions = windowTableFunctionDef.getWindowFunctions();
    final int functionCount = windowsFunctions.size();
    List<ColumnInfo> outputSignature = ptfOp.getSchema().getSignature();
    final int outputSize = outputSignature.size();
    /*
     * Output columns.
     */
    TypeInfo[] reducerBatchTypeInfos = vContext.getAllTypeInfos();
    DataTypePhysicalVariation[] reducerBatchDataTypePhysicalVariations = vContext.getAllDataTypePhysicalVariations();
    // Evaluator results are first.
    String[] outputColumnNames = new String[outputSize];
    TypeInfo[] outputTypeInfos = new TypeInfo[outputSize];
    DataTypePhysicalVariation[] outputDataTypePhysicalVariations = new DataTypePhysicalVariation[outputSize];
    for (int i = 0; i < functionCount; i++) {
        ColumnInfo colInfo = outputSignature.get(i);
        TypeInfo typeInfo = colInfo.getType();
        outputColumnNames[i] = colInfo.getInternalName();
        outputTypeInfos[i] = typeInfo;
        outputDataTypePhysicalVariations[i] = DataTypePhysicalVariation.NONE;
    }
    // Followed by key and non-key input columns (some may be missing).
    for (int i = functionCount; i < outputSize; i++) {
        ColumnInfo colInfo = outputSignature.get(i);
        outputColumnNames[i] = colInfo.getInternalName();
        outputTypeInfos[i] = colInfo.getType();
        outputDataTypePhysicalVariations[i] = reducerBatchDataTypePhysicalVariations[i - functionCount];
    }
    List<PTFExpressionDef> partitionExpressions = funcDef.getPartition().getExpressions();
    final int partitionKeyCount = partitionExpressions.size();
    ExprNodeDesc[] partitionExprNodeDescs = getPartitionExprNodeDescs(partitionExpressions);
    List<OrderExpressionDef> orderExpressions = funcDef.getOrder().getExpressions();
    final int orderKeyCount = orderExpressions.size();
    ExprNodeDesc[] orderExprNodeDescs = getOrderExprNodeDescs(orderExpressions);
    // When there are PARTITION and ORDER BY clauses, will have different partition expressions.
    // Otherwise, only order by expressions.
    boolean isPartitionOrderBy = false;
    if (partitionKeyCount != orderKeyCount) {
        // Obviously different expressions.
        isPartitionOrderBy = true;
    } else {
        // Check each ExprNodeDesc.
        for (int i = 0; i < partitionKeyCount; i++) {
            final ExprNodeDescEqualityWrapper partitionExprEqualityWrapper = new ExprNodeDesc.ExprNodeDescEqualityWrapper(partitionExprNodeDescs[i]);
            final ExprNodeDescEqualityWrapper orderExprEqualityWrapper = new ExprNodeDesc.ExprNodeDescEqualityWrapper(orderExprNodeDescs[i]);
            if (!partitionExprEqualityWrapper.equals(orderExprEqualityWrapper)) {
                isPartitionOrderBy = true;
                break;
            }
        }
    }
    String[] evaluatorFunctionNames = new String[functionCount];
    boolean[] evaluatorsAreDistinct = new boolean[functionCount];
    WindowFrameDef[] evaluatorWindowFrameDefs = new WindowFrameDef[functionCount];
    List<ExprNodeDesc>[] evaluatorInputExprNodeDescLists = (List<ExprNodeDesc>[]) new List<?>[functionCount];
    fillInPTFEvaluators(windowsFunctions, evaluatorFunctionNames, evaluatorsAreDistinct, evaluatorWindowFrameDefs, evaluatorInputExprNodeDescLists);
    vectorPTFDesc.setReducerBatchTypeInfos(reducerBatchTypeInfos, reducerBatchDataTypePhysicalVariations);
    vectorPTFDesc.setIsPartitionOrderBy(isPartitionOrderBy);
    vectorPTFDesc.setOrderExprNodeDescs(orderExprNodeDescs);
    vectorPTFDesc.setPartitionExprNodeDescs(partitionExprNodeDescs);
    vectorPTFDesc.setEvaluatorFunctionNames(evaluatorFunctionNames);
    vectorPTFDesc.setEvaluatorsAreDistinct(evaluatorsAreDistinct);
    vectorPTFDesc.setEvaluatorWindowFrameDefs(evaluatorWindowFrameDefs);
    vectorPTFDesc.setEvaluatorInputExprNodeDescLists(evaluatorInputExprNodeDescLists);
    vectorPTFDesc.setOutputColumnNames(outputColumnNames);
    vectorPTFDesc.setOutputTypeInfos(outputTypeInfos, outputDataTypePhysicalVariations);
    vectorPTFDesc.setVectorizedPTFMaxMemoryBufferingBatchCount(vectorizedPTFMaxMemoryBufferingBatchCount);
}

Also used : PTFExpressionDef(org.apache.hadoop.hive.ql.plan.ptf.PTFExpressionDef) DataTypePhysicalVariation(org.apache.hadoop.hive.common.type.DataTypePhysicalVariation) ArrayList(java.util.ArrayList) List(java.util.List) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) WindowFunctionDef(org.apache.hadoop.hive.ql.plan.ptf.WindowFunctionDef) OrderExpressionDef(org.apache.hadoop.hive.ql.plan.ptf.OrderExpressionDef) WindowTableFunctionDef(org.apache.hadoop.hive.ql.plan.ptf.WindowTableFunctionDef) PartitionedTableFunctionDef(org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) ExprNodeDescEqualityWrapper(org.apache.hadoop.hive.ql.plan.ExprNodeDesc.ExprNodeDescEqualityWrapper) WindowFrameDef(org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef)

Example 5 with WindowFrameDef

use of org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef in project hive by apache.

the class PTFTranslator method translate.

private WindowFrameDef translate(ShapeDetails inpShape, WindowFrameSpec spec, List<OrderExpression> orderExpressions) throws SemanticException {
    if (spec == null) {
        return null;
    }
    BoundarySpec s = spec.getStart();
    BoundarySpec e = spec.getEnd();
    int cmp = s.compareTo(e);
    if (cmp > 0) {
        throw new SemanticException(String.format("Window range invalid, start boundary is greater than end boundary: %s", spec));
    }
    WindowFrameDef winFrame = new WindowFrameDef(spec.getWindowType(), new BoundaryDef(s.direction, s.getAmt()), new BoundaryDef(e.direction, e.getAmt()));
    if (winFrame.getWindowType() == WindowType.RANGE) {
        winFrame.setOrderDef(buildOrderExpressions(inpShape, orderExpressions));
    }
    return winFrame;
}

Also used : BoundaryDef(org.apache.hadoop.hive.ql.plan.ptf.BoundaryDef) WindowFrameDef(org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef) BoundarySpec(org.apache.hadoop.hive.ql.parse.WindowingSpec.BoundarySpec)

Aggregations

WindowFrameDef (org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef)10 WindowFunctionDef (org.apache.hadoop.hive.ql.plan.ptf.WindowFunctionDef)5 BoundaryDef (org.apache.hadoop.hive.ql.plan.ptf.BoundaryDef)4 WindowTableFunctionDef (org.apache.hadoop.hive.ql.plan.ptf.WindowTableFunctionDef)4 ArrayList (java.util.ArrayList)3 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)3 List (java.util.List)2 Type (org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type)2 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)2 BoundarySpec (org.apache.hadoop.hive.ql.parse.WindowingSpec.BoundarySpec)2 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)2 PTFExpressionDef (org.apache.hadoop.hive.ql.plan.ptf.PTFExpressionDef)2 GenericUDAFEvaluator (org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator)2 DataTypePhysicalVariation (org.apache.hadoop.hive.common.type.DataTypePhysicalVariation)1 WindowFunctionInfo (org.apache.hadoop.hive.ql.exec.WindowFunctionInfo)1 ColumnVector (org.apache.hadoop.hive.ql.exec.vector.ColumnVector)1 VectorMapJoinOperator (org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator)1 VectorMapJoinOuterFilteredOperator (org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOuterFilteredOperator)1 VectorizationOperator (org.apache.hadoop.hive.ql.exec.vector.VectorizationOperator)1 ConstantVectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.ConstantVectorExpression)1