Search in sources :

Example 6 with WindowTableFunctionDef

use of org.apache.hadoop.hive.ql.plan.ptf.WindowTableFunctionDef in project hive by apache.

the class WindowingTableFunction method startPartition.

/*
   * (non-Javadoc)
   * 
   * @see
   * org.apache.hadoop.hive.ql.udf.ptf.TableFunctionEvaluator#startPartition()
   */
@Override
public void startPartition() throws HiveException {
    WindowTableFunctionDef tabDef = (WindowTableFunctionDef) getTableDef();
    streamingState.reset(tabDef);
}
Also used : WindowTableFunctionDef(org.apache.hadoop.hive.ql.plan.ptf.WindowTableFunctionDef)

Example 7 with WindowTableFunctionDef

use of org.apache.hadoop.hive.ql.plan.ptf.WindowTableFunctionDef in project hive by apache.

the class Vectorizer method createVectorPTFDesc.

/*
   * Update the VectorPTFDesc with data that is used during validation and that doesn't rely on
   * VectorizationContext to lookup column names, etc.
   */
private static void createVectorPTFDesc(Operator<? extends OperatorDesc> ptfOp, PTFDesc ptfDesc, VectorizationContext vContext, VectorPTFDesc vectorPTFDesc, int vectorizedPTFMaxMemoryBufferingBatchCount) throws HiveException {
    PartitionedTableFunctionDef funcDef = ptfDesc.getFuncDef();
    WindowTableFunctionDef windowTableFunctionDef = (WindowTableFunctionDef) funcDef;
    List<WindowFunctionDef> windowsFunctions = windowTableFunctionDef.getWindowFunctions();
    final int functionCount = windowsFunctions.size();
    ArrayList<ColumnInfo> outputSignature = ptfOp.getSchema().getSignature();
    final int outputSize = outputSignature.size();
    /*
     * Output columns.
     */
    // Evaluator results are first.
    String[] outputColumnNames = new String[outputSize];
    TypeInfo[] outputTypeInfos = new TypeInfo[outputSize];
    for (int i = 0; i < functionCount; i++) {
        ColumnInfo colInfo = outputSignature.get(i);
        TypeInfo typeInfo = colInfo.getType();
        outputColumnNames[i] = colInfo.getInternalName();
        outputTypeInfos[i] = typeInfo;
    }
    // Followed by key and non-key input columns (some may be missing).
    for (int i = functionCount; i < outputSize; i++) {
        ColumnInfo colInfo = outputSignature.get(i);
        outputColumnNames[i] = colInfo.getInternalName();
        outputTypeInfos[i] = colInfo.getType();
    }
    List<PTFExpressionDef> partitionExpressions = funcDef.getPartition().getExpressions();
    final int partitionKeyCount = partitionExpressions.size();
    ExprNodeDesc[] partitionExprNodeDescs = getPartitionExprNodeDescs(partitionExpressions);
    List<OrderExpressionDef> orderExpressions = funcDef.getOrder().getExpressions();
    final int orderKeyCount = orderExpressions.size();
    ExprNodeDesc[] orderExprNodeDescs = getOrderExprNodeDescs(orderExpressions);
    // When there are PARTITION and ORDER BY clauses, will have different partition expressions.
    // Otherwise, only order by expressions.
    boolean isPartitionOrderBy = false;
    if (partitionKeyCount != orderKeyCount) {
        // Obviously different expressions.
        isPartitionOrderBy = true;
    } else {
        // Check each ExprNodeDesc.
        for (int i = 0; i < partitionKeyCount; i++) {
            final ExprNodeDescEqualityWrapper partitionExprEqualityWrapper = new ExprNodeDesc.ExprNodeDescEqualityWrapper(partitionExprNodeDescs[i]);
            final ExprNodeDescEqualityWrapper orderExprEqualityWrapper = new ExprNodeDesc.ExprNodeDescEqualityWrapper(orderExprNodeDescs[i]);
            if (!partitionExprEqualityWrapper.equals(orderExprEqualityWrapper)) {
                isPartitionOrderBy = true;
                break;
            }
        }
    }
    String[] evaluatorFunctionNames = new String[functionCount];
    WindowFrameDef[] evaluatorWindowFrameDefs = new WindowFrameDef[functionCount];
    List<ExprNodeDesc>[] evaluatorInputExprNodeDescLists = (List<ExprNodeDesc>[]) new List<?>[functionCount];
    fillInPTFEvaluators(windowsFunctions, evaluatorFunctionNames, evaluatorWindowFrameDefs, evaluatorInputExprNodeDescLists);
    TypeInfo[] reducerBatchTypeInfos = vContext.getAllTypeInfos();
    vectorPTFDesc.setReducerBatchTypeInfos(reducerBatchTypeInfos);
    vectorPTFDesc.setIsPartitionOrderBy(isPartitionOrderBy);
    vectorPTFDesc.setOrderExprNodeDescs(orderExprNodeDescs);
    vectorPTFDesc.setPartitionExprNodeDescs(partitionExprNodeDescs);
    vectorPTFDesc.setEvaluatorFunctionNames(evaluatorFunctionNames);
    vectorPTFDesc.setEvaluatorWindowFrameDefs(evaluatorWindowFrameDefs);
    vectorPTFDesc.setEvaluatorInputExprNodeDescLists(evaluatorInputExprNodeDescLists);
    vectorPTFDesc.setOutputColumnNames(outputColumnNames);
    vectorPTFDesc.setOutputTypeInfos(outputTypeInfos);
    vectorPTFDesc.setVectorizedPTFMaxMemoryBufferingBatchCount(vectorizedPTFMaxMemoryBufferingBatchCount);
}
Also used : WindowTableFunctionDef(org.apache.hadoop.hive.ql.plan.ptf.WindowTableFunctionDef) PTFExpressionDef(org.apache.hadoop.hive.ql.plan.ptf.PTFExpressionDef) UDFToString(org.apache.hadoop.hive.ql.udf.UDFToString) PartitionedTableFunctionDef(org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) ExprNodeDescEqualityWrapper(org.apache.hadoop.hive.ql.plan.ExprNodeDesc.ExprNodeDescEqualityWrapper) WindowFrameDef(org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef) ArrayList(java.util.ArrayList) List(java.util.List) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) WindowFunctionDef(org.apache.hadoop.hive.ql.plan.ptf.WindowFunctionDef) OrderExpressionDef(org.apache.hadoop.hive.ql.plan.ptf.OrderExpressionDef)

Example 8 with WindowTableFunctionDef

use of org.apache.hadoop.hive.ql.plan.ptf.WindowTableFunctionDef in project hive by apache.

the class PTFTranslator method translate.

public PTFDesc translate(WindowingSpec wdwSpec, SemanticAnalyzer semAly, HiveConf hCfg, RowResolver inputRR, UnparseTranslator unparseT) throws SemanticException {
    init(semAly, hCfg, inputRR, unparseT);
    windowingSpec = wdwSpec;
    ptfDesc = new PTFDesc();
    ptfDesc.setCfg(hCfg);
    ptfDesc.setLlInfo(llInfo);
    WindowTableFunctionDef wdwTFnDef = new WindowTableFunctionDef();
    ptfDesc.setFuncDef(wdwTFnDef);
    PTFQueryInputSpec inpSpec = new PTFQueryInputSpec();
    inpSpec.setType(PTFQueryInputType.WINDOWING);
    wdwTFnDef.setInput(translate(inpSpec, 0));
    ShapeDetails inpShape = wdwTFnDef.getInput().getOutputShape();
    WindowingTableFunctionResolver tFn = (WindowingTableFunctionResolver) FunctionRegistry.getTableFunctionResolver(FunctionRegistry.WINDOWING_TABLE_FUNCTION);
    if (tFn == null) {
        throw new SemanticException(String.format("Internal Error: Unknown Table Function %s", FunctionRegistry.WINDOWING_TABLE_FUNCTION));
    }
    wdwTFnDef.setName(FunctionRegistry.WINDOWING_TABLE_FUNCTION);
    wdwTFnDef.setResolverClassName(tFn.getClass().getName());
    wdwTFnDef.setAlias("ptf_" + 1);
    wdwTFnDef.setExpressionTreeString(null);
    wdwTFnDef.setTransformsRawInput(false);
    tFn.initialize(hCfg, ptfDesc, wdwTFnDef);
    TableFunctionEvaluator tEval = tFn.getEvaluator();
    wdwTFnDef.setTFunction(tEval);
    wdwTFnDef.setCarryForwardNames(tFn.carryForwardNames());
    wdwTFnDef.setRawInputShape(inpShape);
    PartitioningSpec partiSpec = wdwSpec.getQueryPartitioningSpec();
    if (partiSpec == null) {
        throw new SemanticException("Invalid use of Windowing: there is no Partitioning associated with Windowing");
    }
    PartitionDef partDef = translate(inpShape, wdwSpec.getQueryPartitionSpec());
    OrderDef ordDef = translate(inpShape, wdwSpec.getQueryOrderSpec(), partDef);
    wdwTFnDef.setPartition(partDef);
    wdwTFnDef.setOrder(ordDef);
    /*
     * process Wdw functions
     */
    ArrayList<WindowFunctionDef> windowFunctions = new ArrayList<WindowFunctionDef>();
    if (wdwSpec.getWindowExpressions() != null) {
        for (WindowExpressionSpec expr : wdwSpec.getWindowExpressions()) {
            if (expr instanceof WindowFunctionSpec) {
                WindowFunctionDef wFnDef = translate(wdwTFnDef, (WindowFunctionSpec) expr);
                windowFunctions.add(wFnDef);
            }
        }
        wdwTFnDef.setWindowFunctions(windowFunctions);
    }
    /*
     * set outputFromWdwFnProcessing
     */
    ArrayList<String> aliases = new ArrayList<String>();
    ArrayList<ObjectInspector> fieldOIs = new ArrayList<ObjectInspector>();
    for (WindowFunctionDef wFnDef : windowFunctions) {
        aliases.add(wFnDef.getAlias());
        if (wFnDef.isPivotResult()) {
            fieldOIs.add(((ListObjectInspector) wFnDef.getOI()).getListElementObjectInspector());
        } else {
            fieldOIs.add(wFnDef.getOI());
        }
    }
    PTFTranslator.addInputColumnsToList(inpShape, aliases, fieldOIs);
    StructObjectInspector wdwOutOI = ObjectInspectorFactory.getStandardStructObjectInspector(aliases, fieldOIs);
    tFn.setWdwProcessingOutputOI(wdwOutOI);
    RowResolver wdwOutRR = buildRowResolverForWindowing(wdwTFnDef);
    ShapeDetails wdwOutShape = setupShape(wdwOutOI, null, wdwOutRR);
    wdwTFnDef.setOutputShape(wdwOutShape);
    tFn.setupOutputOI();
    PTFDeserializer.alterOutputOIForStreaming(ptfDesc);
    return ptfDesc;
}
Also used : WindowingTableFunctionResolver(org.apache.hadoop.hive.ql.udf.ptf.WindowingTableFunction.WindowingTableFunctionResolver) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) WindowTableFunctionDef(org.apache.hadoop.hive.ql.plan.ptf.WindowTableFunctionDef) ArrayList(java.util.ArrayList) WindowFunctionSpec(org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowFunctionSpec) PTFQueryInputSpec(org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PTFQueryInputSpec) ShapeDetails(org.apache.hadoop.hive.ql.plan.ptf.ShapeDetails) PartitioningSpec(org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitioningSpec) TableFunctionEvaluator(org.apache.hadoop.hive.ql.udf.ptf.TableFunctionEvaluator) PTFDesc(org.apache.hadoop.hive.ql.plan.PTFDesc) PartitionDef(org.apache.hadoop.hive.ql.plan.ptf.PartitionDef) WindowExpressionSpec(org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowExpressionSpec) OrderDef(org.apache.hadoop.hive.ql.plan.ptf.OrderDef) WindowFunctionDef(org.apache.hadoop.hive.ql.plan.ptf.WindowFunctionDef) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 9 with WindowTableFunctionDef

use of org.apache.hadoop.hive.ql.plan.ptf.WindowTableFunctionDef in project hive by apache.

the class WindowingTableFunction method initializeStreaming.

@Override
public void initializeStreaming(Configuration cfg, StructObjectInspector inputOI, boolean isMapSide) throws HiveException {
    int[] span = setCanAcceptInputAsStream(cfg);
    if (!canAcceptInputAsStream) {
        return;
    }
    WindowTableFunctionDef tabDef = (WindowTableFunctionDef) getTableDef();
    for (int i = 0; i < tabDef.getWindowFunctions().size(); i++) {
        WindowFunctionDef wFnDef = tabDef.getWindowFunctions().get(i);
        WindowFrameDef wdwFrame = wFnDef.getWindowFrame();
        GenericUDAFEvaluator fnEval = wFnDef.getWFnEval();
        GenericUDAFEvaluator streamingEval = fnEval.getWindowingEvaluator(wdwFrame);
        if (streamingEval != null) {
            wFnDef.setWFnEval(streamingEval);
            if (wFnDef.isPivotResult()) {
                ListObjectInspector listOI = (ListObjectInspector) wFnDef.getOI();
                wFnDef.setOI(listOI.getListElementObjectInspector());
            }
        }
    }
    if (tabDef.getRankLimit() != -1) {
        rnkLimitDef = new RankLimit(tabDef.getRankLimit(), tabDef.getRankLimitFunction(), tabDef.getWindowFunctions());
    }
    streamingState = new StreamingState(cfg, inputOI, isMapSide, tabDef, span[0], span[1]);
}
Also used : WindowFrameDef(org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef) GenericUDAFEvaluator(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) WindowTableFunctionDef(org.apache.hadoop.hive.ql.plan.ptf.WindowTableFunctionDef) WindowFunctionDef(org.apache.hadoop.hive.ql.plan.ptf.WindowFunctionDef)

Example 10 with WindowTableFunctionDef

use of org.apache.hadoop.hive.ql.plan.ptf.WindowTableFunctionDef in project hive by apache.

the class WindowingTableFunction method setCanAcceptInputAsStream.

/*
   * (non-Javadoc)
   * 
   * @see
   * org.apache.hadoop.hive.ql.udf.ptf.TableFunctionEvaluator#canAcceptInputAsStream
   * ()
   * 
   * WindowTableFunction supports streaming if all functions meet one of these
   * conditions: 1. The Function implements ISupportStreamingModeForWindowing 2.
   * Or returns a non null Object for the getWindowingEvaluator, that implements
   * ISupportStreamingModeForWindowing. 3. Is an invocation on a 'fixed' window.
   * So no Unbounded Preceding or Following.
   */
@SuppressWarnings("resource")
private int[] setCanAcceptInputAsStream(Configuration cfg) throws HiveException {
    canAcceptInputAsStream = false;
    if (ptfDesc.getLlInfo().getLeadLagExprs() != null) {
        return null;
    }
    WindowTableFunctionDef tabDef = (WindowTableFunctionDef) getTableDef();
    int startPos = Integer.MAX_VALUE;
    int endPos = Integer.MIN_VALUE;
    for (int i = 0; i < tabDef.getWindowFunctions().size(); i++) {
        WindowFunctionDef wFnDef = tabDef.getWindowFunctions().get(i);
        WindowFrameDef wdwFrame = wFnDef.getWindowFrame();
        GenericUDAFEvaluator fnEval = wFnDef.getWFnEval();
        boolean streamingPossible = streamingPossible(cfg, wFnDef);
        GenericUDAFEvaluator streamingEval = streamingPossible ? fnEval.getWindowingEvaluator(wdwFrame) : null;
        if (streamingEval != null && streamingEval instanceof ISupportStreamingModeForWindowing) {
            continue;
        }
        BoundaryDef start = wdwFrame.getStart();
        BoundaryDef end = wdwFrame.getEnd();
        if (wdwFrame.getWindowType() == WindowType.ROWS) {
            if (!end.isUnbounded() && !start.isUnbounded()) {
                startPos = Math.min(startPos, wdwFrame.getStart().getRelativeOffset());
                endPos = Math.max(endPos, wdwFrame.getEnd().getRelativeOffset());
                continue;
            }
        }
        return null;
    }
    int windowLimit = HiveConf.getIntVar(cfg, ConfVars.HIVEJOINCACHESIZE);
    if (windowLimit < (endPos - startPos + 1)) {
        return null;
    }
    canAcceptInputAsStream = true;
    return new int[] { startPos, endPos };
}
Also used : ISupportStreamingModeForWindowing(org.apache.hadoop.hive.ql.udf.generic.ISupportStreamingModeForWindowing) BoundaryDef(org.apache.hadoop.hive.ql.plan.ptf.BoundaryDef) WindowFrameDef(org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef) GenericUDAFEvaluator(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator) WindowTableFunctionDef(org.apache.hadoop.hive.ql.plan.ptf.WindowTableFunctionDef) WindowFunctionDef(org.apache.hadoop.hive.ql.plan.ptf.WindowFunctionDef)

Aggregations

WindowTableFunctionDef (org.apache.hadoop.hive.ql.plan.ptf.WindowTableFunctionDef)12 WindowFunctionDef (org.apache.hadoop.hive.ql.plan.ptf.WindowFunctionDef)9 ArrayList (java.util.ArrayList)7 GenericUDAFEvaluator (org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator)5 List (java.util.List)4 WindowFrameDef (org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef)4 ISupportStreamingModeForWindowing (org.apache.hadoop.hive.ql.udf.generic.ISupportStreamingModeForWindowing)4 ListObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector)3 AbstractList (java.util.AbstractList)2 PTFPartition (org.apache.hadoop.hive.ql.exec.PTFPartition)2 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)2 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)2 PTFDesc (org.apache.hadoop.hive.ql.plan.PTFDesc)2 PTFExpressionDef (org.apache.hadoop.hive.ql.plan.ptf.PTFExpressionDef)2 PartitionedTableFunctionDef (org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef)2 ArrayDeque (java.util.ArrayDeque)1 WindowFunctionInfo (org.apache.hadoop.hive.ql.exec.WindowFunctionInfo)1 ColumnVector (org.apache.hadoop.hive.ql.exec.vector.ColumnVector)1 Type (org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type)1 VectorMapJoinOperator (org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator)1