Search in sources :

Example 1 with PartitionedTableFunctionDef

use of org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef in project hive by apache.

the class PTFOperator method initializeOp.

/*
   * 1. Find out if the operator is invoked at Map-Side or Reduce-side
   * 2. Get the deserialized QueryDef
   * 3. Reconstruct the transient variables in QueryDef
   * 4. Create input partition to store rows coming from previous operator
   */
@Override
protected void initializeOp(Configuration jobConf) throws HiveException {
    super.initializeOp(jobConf);
    hiveConf = jobConf;
    isMapOperator = conf.isMapSide();
    currentKeys = null;
    reconstructQueryDef(hiveConf);
    if (isMapOperator) {
        PartitionedTableFunctionDef tDef = conf.getStartOfChain();
        outputObjInspector = tDef.getRawInputShape().getOI();
    } else {
        outputObjInspector = conf.getFuncDef().getOutputShape().getOI();
    }
    setupKeysWrapper(inputObjInspectors[0]);
    ptfInvocation = setupChain();
    ptfInvocation.initializeStreaming(jobConf, isMapOperator);
    firstMapRow = true;
}
Also used : PartitionedTableFunctionDef(org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef)

Example 2 with PartitionedTableFunctionDef

use of org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef in project hive by apache.

the class PTFOperator method setupChain.

private PTFInvocation setupChain() {
    Stack<PartitionedTableFunctionDef> fnDefs = new Stack<PartitionedTableFunctionDef>();
    PTFInputDef iDef = conf.getFuncDef();
    while (iDef instanceof PartitionedTableFunctionDef) {
        fnDefs.push((PartitionedTableFunctionDef) iDef);
        iDef = ((PartitionedTableFunctionDef) iDef).getInput();
    }
    PTFInvocation curr = null, first = null;
    while (!fnDefs.isEmpty()) {
        PartitionedTableFunctionDef currFn = fnDefs.pop();
        curr = new PTFInvocation(curr, currFn.getTFunction());
        if (first == null) {
            first = curr;
        }
    }
    return first;
}
Also used : PTFInputDef(org.apache.hadoop.hive.ql.plan.ptf.PTFInputDef) PartitionedTableFunctionDef(org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef) Stack(java.util.Stack)

Example 3 with PartitionedTableFunctionDef

use of org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef in project hive by apache.

the class Vectorizer method createVectorPTFDesc.

/*
   * Update the VectorPTFDesc with data that is used during validation and that doesn't rely on
   * VectorizationContext to lookup column names, etc.
   */
private static void createVectorPTFDesc(Operator<? extends OperatorDesc> ptfOp, PTFDesc ptfDesc, VectorizationContext vContext, VectorPTFDesc vectorPTFDesc, int vectorizedPTFMaxMemoryBufferingBatchCount) throws HiveException {
    PartitionedTableFunctionDef funcDef = ptfDesc.getFuncDef();
    WindowTableFunctionDef windowTableFunctionDef = (WindowTableFunctionDef) funcDef;
    List<WindowFunctionDef> windowsFunctions = windowTableFunctionDef.getWindowFunctions();
    final int functionCount = windowsFunctions.size();
    List<ColumnInfo> outputSignature = ptfOp.getSchema().getSignature();
    final int outputSize = outputSignature.size();
    /*
     * Output columns.
     */
    TypeInfo[] reducerBatchTypeInfos = vContext.getAllTypeInfos();
    DataTypePhysicalVariation[] reducerBatchDataTypePhysicalVariations = vContext.getAllDataTypePhysicalVariations();
    // Evaluator results are first.
    String[] outputColumnNames = new String[outputSize];
    TypeInfo[] outputTypeInfos = new TypeInfo[outputSize];
    DataTypePhysicalVariation[] outputDataTypePhysicalVariations = new DataTypePhysicalVariation[outputSize];
    for (int i = 0; i < functionCount; i++) {
        ColumnInfo colInfo = outputSignature.get(i);
        TypeInfo typeInfo = colInfo.getType();
        outputColumnNames[i] = colInfo.getInternalName();
        outputTypeInfos[i] = typeInfo;
        outputDataTypePhysicalVariations[i] = DataTypePhysicalVariation.NONE;
    }
    // Followed by key and non-key input columns (some may be missing).
    for (int i = functionCount; i < outputSize; i++) {
        ColumnInfo colInfo = outputSignature.get(i);
        outputColumnNames[i] = colInfo.getInternalName();
        outputTypeInfos[i] = colInfo.getType();
        outputDataTypePhysicalVariations[i] = reducerBatchDataTypePhysicalVariations[i - functionCount];
    }
    List<PTFExpressionDef> partitionExpressions = funcDef.getPartition().getExpressions();
    final int partitionKeyCount = partitionExpressions.size();
    ExprNodeDesc[] partitionExprNodeDescs = getPartitionExprNodeDescs(partitionExpressions);
    List<OrderExpressionDef> orderExpressions = funcDef.getOrder().getExpressions();
    final int orderKeyCount = orderExpressions.size();
    ExprNodeDesc[] orderExprNodeDescs = getOrderExprNodeDescs(orderExpressions);
    // When there are PARTITION and ORDER BY clauses, will have different partition expressions.
    // Otherwise, only order by expressions.
    boolean isPartitionOrderBy = false;
    if (partitionKeyCount != orderKeyCount) {
        // Obviously different expressions.
        isPartitionOrderBy = true;
    } else {
        // Check each ExprNodeDesc.
        for (int i = 0; i < partitionKeyCount; i++) {
            final ExprNodeDescEqualityWrapper partitionExprEqualityWrapper = new ExprNodeDesc.ExprNodeDescEqualityWrapper(partitionExprNodeDescs[i]);
            final ExprNodeDescEqualityWrapper orderExprEqualityWrapper = new ExprNodeDesc.ExprNodeDescEqualityWrapper(orderExprNodeDescs[i]);
            if (!partitionExprEqualityWrapper.equals(orderExprEqualityWrapper)) {
                isPartitionOrderBy = true;
                break;
            }
        }
    }
    String[] evaluatorFunctionNames = new String[functionCount];
    boolean[] evaluatorsAreDistinct = new boolean[functionCount];
    WindowFrameDef[] evaluatorWindowFrameDefs = new WindowFrameDef[functionCount];
    List<ExprNodeDesc>[] evaluatorInputExprNodeDescLists = (List<ExprNodeDesc>[]) new List<?>[functionCount];
    fillInPTFEvaluators(windowsFunctions, evaluatorFunctionNames, evaluatorsAreDistinct, evaluatorWindowFrameDefs, evaluatorInputExprNodeDescLists);
    vectorPTFDesc.setReducerBatchTypeInfos(reducerBatchTypeInfos, reducerBatchDataTypePhysicalVariations);
    vectorPTFDesc.setIsPartitionOrderBy(isPartitionOrderBy);
    vectorPTFDesc.setOrderExprNodeDescs(orderExprNodeDescs);
    vectorPTFDesc.setPartitionExprNodeDescs(partitionExprNodeDescs);
    vectorPTFDesc.setEvaluatorFunctionNames(evaluatorFunctionNames);
    vectorPTFDesc.setEvaluatorsAreDistinct(evaluatorsAreDistinct);
    vectorPTFDesc.setEvaluatorWindowFrameDefs(evaluatorWindowFrameDefs);
    vectorPTFDesc.setEvaluatorInputExprNodeDescLists(evaluatorInputExprNodeDescLists);
    vectorPTFDesc.setOutputColumnNames(outputColumnNames);
    vectorPTFDesc.setOutputTypeInfos(outputTypeInfos, outputDataTypePhysicalVariations);
    vectorPTFDesc.setVectorizedPTFMaxMemoryBufferingBatchCount(vectorizedPTFMaxMemoryBufferingBatchCount);
}
Also used : PTFExpressionDef(org.apache.hadoop.hive.ql.plan.ptf.PTFExpressionDef) DataTypePhysicalVariation(org.apache.hadoop.hive.common.type.DataTypePhysicalVariation) ArrayList(java.util.ArrayList) List(java.util.List) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) WindowFunctionDef(org.apache.hadoop.hive.ql.plan.ptf.WindowFunctionDef) OrderExpressionDef(org.apache.hadoop.hive.ql.plan.ptf.OrderExpressionDef) WindowTableFunctionDef(org.apache.hadoop.hive.ql.plan.ptf.WindowTableFunctionDef) PartitionedTableFunctionDef(org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) ExprNodeDescEqualityWrapper(org.apache.hadoop.hive.ql.plan.ExprNodeDesc.ExprNodeDescEqualityWrapper) WindowFrameDef(org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef)

Example 4 with PartitionedTableFunctionDef

use of org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef in project hive by apache.

the class PTFDeserializer method initializePTFChain.

public void initializePTFChain(PartitionedTableFunctionDef tblFnDef) throws HiveException {
    Deque<PTFInputDef> ptfChain = new ArrayDeque<PTFInputDef>();
    PTFInputDef currentDef = tblFnDef;
    while (currentDef != null) {
        ptfChain.push(currentDef);
        currentDef = currentDef.getInput();
    }
    while (!ptfChain.isEmpty()) {
        currentDef = ptfChain.pop();
        if (currentDef instanceof PTFQueryInputDef) {
            initialize((PTFQueryInputDef) currentDef, inputOI);
        } else if (currentDef instanceof WindowTableFunctionDef) {
            initializeWindowing((WindowTableFunctionDef) currentDef);
        } else {
            initialize((PartitionedTableFunctionDef) currentDef);
        }
    }
    PTFDeserializer.alterOutputOIForStreaming(ptfDesc);
}
Also used : PTFInputDef(org.apache.hadoop.hive.ql.plan.ptf.PTFInputDef) WindowTableFunctionDef(org.apache.hadoop.hive.ql.plan.ptf.WindowTableFunctionDef) PartitionedTableFunctionDef(org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef) ArrayDeque(java.util.ArrayDeque) PTFQueryInputDef(org.apache.hadoop.hive.ql.plan.ptf.PTFQueryInputDef)

Example 5 with PartitionedTableFunctionDef

use of org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef in project hive by apache.

the class PTFDeserializer method alterOutputOIForStreaming.

/*
   * If the final PTF in a PTFChain can stream its output, then set the OI of its OutputShape
   * to the OI returned by the TableFunctionEvaluator.
   */
public static void alterOutputOIForStreaming(PTFDesc ptfDesc) {
    PartitionedTableFunctionDef tDef = ptfDesc.getFuncDef();
    TableFunctionEvaluator tEval = tDef.getTFunction();
    if (tEval.canIterateOutput()) {
        tDef.getOutputShape().setOI(tEval.getOutputOI());
    }
}
Also used : TableFunctionEvaluator(org.apache.hadoop.hive.ql.udf.ptf.TableFunctionEvaluator) PartitionedTableFunctionDef(org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef)

Aggregations

PartitionedTableFunctionDef (org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef)8 ArrayList (java.util.ArrayList)3 List (java.util.List)3 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)3 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)2 PTFDesc (org.apache.hadoop.hive.ql.plan.PTFDesc)2 PTFInputDef (org.apache.hadoop.hive.ql.plan.ptf.PTFInputDef)2 WindowTableFunctionDef (org.apache.hadoop.hive.ql.plan.ptf.WindowTableFunctionDef)2 TableFunctionEvaluator (org.apache.hadoop.hive.ql.udf.ptf.TableFunctionEvaluator)2 ArrayDeque (java.util.ArrayDeque)1 LinkedList (java.util.LinkedList)1 Stack (java.util.Stack)1 ValidTxnList (org.apache.hadoop.hive.common.ValidTxnList)1 ValidTxnWriteIdList (org.apache.hadoop.hive.common.ValidTxnWriteIdList)1 DataTypePhysicalVariation (org.apache.hadoop.hive.common.type.DataTypePhysicalVariation)1 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)1 ColumnVector (org.apache.hadoop.hive.ql.exec.vector.ColumnVector)1 Type (org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type)1 VectorMapJoinOperator (org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator)1 VectorMapJoinOuterFilteredOperator (org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOuterFilteredOperator)1