Search in sources :

Example 1 with ExprNodeDescEqualityWrapper

use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc.ExprNodeDescEqualityWrapper in project hive by apache.

the class Vectorizer method createVectorPTFDesc.

/*
   * Update the VectorPTFDesc with data that is used during validation and that doesn't rely on
   * VectorizationContext to lookup column names, etc.
   */
private static void createVectorPTFDesc(Operator<? extends OperatorDesc> ptfOp, PTFDesc ptfDesc, VectorizationContext vContext, VectorPTFDesc vectorPTFDesc, int vectorizedPTFMaxMemoryBufferingBatchCount) throws HiveException {
    PartitionedTableFunctionDef funcDef = ptfDesc.getFuncDef();
    WindowTableFunctionDef windowTableFunctionDef = (WindowTableFunctionDef) funcDef;
    List<WindowFunctionDef> windowsFunctions = windowTableFunctionDef.getWindowFunctions();
    final int functionCount = windowsFunctions.size();
    List<ColumnInfo> outputSignature = ptfOp.getSchema().getSignature();
    final int outputSize = outputSignature.size();
    /*
     * Output columns.
     */
    TypeInfo[] reducerBatchTypeInfos = vContext.getAllTypeInfos();
    DataTypePhysicalVariation[] reducerBatchDataTypePhysicalVariations = vContext.getAllDataTypePhysicalVariations();
    // Evaluator results are first.
    String[] outputColumnNames = new String[outputSize];
    TypeInfo[] outputTypeInfos = new TypeInfo[outputSize];
    DataTypePhysicalVariation[] outputDataTypePhysicalVariations = new DataTypePhysicalVariation[outputSize];
    for (int i = 0; i < functionCount; i++) {
        ColumnInfo colInfo = outputSignature.get(i);
        TypeInfo typeInfo = colInfo.getType();
        outputColumnNames[i] = colInfo.getInternalName();
        outputTypeInfos[i] = typeInfo;
        outputDataTypePhysicalVariations[i] = DataTypePhysicalVariation.NONE;
    }
    // Followed by key and non-key input columns (some may be missing).
    for (int i = functionCount; i < outputSize; i++) {
        ColumnInfo colInfo = outputSignature.get(i);
        outputColumnNames[i] = colInfo.getInternalName();
        outputTypeInfos[i] = colInfo.getType();
        outputDataTypePhysicalVariations[i] = reducerBatchDataTypePhysicalVariations[i - functionCount];
    }
    List<PTFExpressionDef> partitionExpressions = funcDef.getPartition().getExpressions();
    final int partitionKeyCount = partitionExpressions.size();
    ExprNodeDesc[] partitionExprNodeDescs = getPartitionExprNodeDescs(partitionExpressions);
    List<OrderExpressionDef> orderExpressions = funcDef.getOrder().getExpressions();
    final int orderKeyCount = orderExpressions.size();
    ExprNodeDesc[] orderExprNodeDescs = getOrderExprNodeDescs(orderExpressions);
    // When there are PARTITION and ORDER BY clauses, will have different partition expressions.
    // Otherwise, only order by expressions.
    boolean isPartitionOrderBy = false;
    if (partitionKeyCount != orderKeyCount) {
        // Obviously different expressions.
        isPartitionOrderBy = true;
    } else {
        // Check each ExprNodeDesc.
        for (int i = 0; i < partitionKeyCount; i++) {
            final ExprNodeDescEqualityWrapper partitionExprEqualityWrapper = new ExprNodeDesc.ExprNodeDescEqualityWrapper(partitionExprNodeDescs[i]);
            final ExprNodeDescEqualityWrapper orderExprEqualityWrapper = new ExprNodeDesc.ExprNodeDescEqualityWrapper(orderExprNodeDescs[i]);
            if (!partitionExprEqualityWrapper.equals(orderExprEqualityWrapper)) {
                isPartitionOrderBy = true;
                break;
            }
        }
    }
    String[] evaluatorFunctionNames = new String[functionCount];
    boolean[] evaluatorsAreDistinct = new boolean[functionCount];
    WindowFrameDef[] evaluatorWindowFrameDefs = new WindowFrameDef[functionCount];
    List<ExprNodeDesc>[] evaluatorInputExprNodeDescLists = (List<ExprNodeDesc>[]) new List<?>[functionCount];
    fillInPTFEvaluators(windowsFunctions, evaluatorFunctionNames, evaluatorsAreDistinct, evaluatorWindowFrameDefs, evaluatorInputExprNodeDescLists);
    vectorPTFDesc.setReducerBatchTypeInfos(reducerBatchTypeInfos, reducerBatchDataTypePhysicalVariations);
    vectorPTFDesc.setIsPartitionOrderBy(isPartitionOrderBy);
    vectorPTFDesc.setOrderExprNodeDescs(orderExprNodeDescs);
    vectorPTFDesc.setPartitionExprNodeDescs(partitionExprNodeDescs);
    vectorPTFDesc.setEvaluatorFunctionNames(evaluatorFunctionNames);
    vectorPTFDesc.setEvaluatorsAreDistinct(evaluatorsAreDistinct);
    vectorPTFDesc.setEvaluatorWindowFrameDefs(evaluatorWindowFrameDefs);
    vectorPTFDesc.setEvaluatorInputExprNodeDescLists(evaluatorInputExprNodeDescLists);
    vectorPTFDesc.setOutputColumnNames(outputColumnNames);
    vectorPTFDesc.setOutputTypeInfos(outputTypeInfos, outputDataTypePhysicalVariations);
    vectorPTFDesc.setVectorizedPTFMaxMemoryBufferingBatchCount(vectorizedPTFMaxMemoryBufferingBatchCount);
}
Also used : PTFExpressionDef(org.apache.hadoop.hive.ql.plan.ptf.PTFExpressionDef) DataTypePhysicalVariation(org.apache.hadoop.hive.common.type.DataTypePhysicalVariation) ArrayList(java.util.ArrayList) List(java.util.List) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) WindowFunctionDef(org.apache.hadoop.hive.ql.plan.ptf.WindowFunctionDef) OrderExpressionDef(org.apache.hadoop.hive.ql.plan.ptf.OrderExpressionDef) WindowTableFunctionDef(org.apache.hadoop.hive.ql.plan.ptf.WindowTableFunctionDef) PartitionedTableFunctionDef(org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) ExprNodeDescEqualityWrapper(org.apache.hadoop.hive.ql.plan.ExprNodeDesc.ExprNodeDescEqualityWrapper) WindowFrameDef(org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef)

Example 2 with ExprNodeDescEqualityWrapper

use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc.ExprNodeDescEqualityWrapper in project hive by apache.

the class SetReducerParallelism method process.

@SuppressWarnings("unchecked")
@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procContext, Object... nodeOutputs) throws SemanticException {
    OptimizeTezProcContext context = (OptimizeTezProcContext) procContext;
    ReduceSinkOperator sink = (ReduceSinkOperator) nd;
    ReduceSinkDesc desc = sink.getConf();
    long bytesPerReducer = context.conf.getLongVar(HiveConf.ConfVars.BYTESPERREDUCER);
    int maxReducers = context.conf.getIntVar(HiveConf.ConfVars.MAXREDUCERS);
    int constantReducers = context.conf.getIntVar(HiveConf.ConfVars.HADOOPNUMREDUCERS);
    if (context.visitedReduceSinks.contains(sink)) {
        // skip walking the children
        LOG.debug("Already processed reduce sink: " + sink.getName());
        return true;
    }
    context.visitedReduceSinks.add(sink);
    if (desc.getNumReducers() <= 0) {
        if (constantReducers > 0) {
            LOG.info("Parallelism for reduce sink " + sink + " set by user to " + constantReducers);
            desc.setNumReducers(constantReducers);
        } else {
            long numberOfBytes = 0;
            // we need to add up all the estimates from the siblings of this reduce sink
            for (Operator<? extends OperatorDesc> sibling : sink.getChildOperators().get(0).getParentOperators()) {
                if (sibling.getStatistics() != null) {
                    numberOfBytes = StatsUtils.safeAdd(numberOfBytes, sibling.getStatistics().getDataSize());
                } else {
                    LOG.warn("No stats available from: " + sibling);
                }
            }
            int numReducers = Utilities.estimateReducers(numberOfBytes, bytesPerReducer, maxReducers, false);
            LOG.info("Set parallelism for reduce sink " + sink + " to: " + numReducers);
            desc.setNumReducers(numReducers);
            final Collection<ExprNodeDescEqualityWrapper> keyCols = ExprNodeDescEqualityWrapper.transform(desc.getKeyCols());
            final Collection<ExprNodeDescEqualityWrapper> partCols = ExprNodeDescEqualityWrapper.transform(desc.getPartitionCols());
            if (keyCols != null && keyCols.equals(partCols)) {
                desc.setReducerTraits(EnumSet.of(UNIFORM, AUTOPARALLEL));
            } else {
                desc.setReducerTraits(EnumSet.of(AUTOPARALLEL));
            }
        }
    } else {
        LOG.info("Number of reducers determined to be: " + desc.getNumReducers());
        // usually controlled by bucketing
        desc.setReducerTraits(EnumSet.of(FIXED));
    }
    return false;
}
Also used : ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc) OptimizeTezProcContext(org.apache.hadoop.hive.ql.parse.OptimizeTezProcContext) ExprNodeDescEqualityWrapper(org.apache.hadoop.hive.ql.plan.ExprNodeDesc.ExprNodeDescEqualityWrapper)

Aggregations

ExprNodeDescEqualityWrapper (org.apache.hadoop.hive.ql.plan.ExprNodeDesc.ExprNodeDescEqualityWrapper)2 ArrayList (java.util.ArrayList)1 List (java.util.List)1 DataTypePhysicalVariation (org.apache.hadoop.hive.common.type.DataTypePhysicalVariation)1 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)1 OptimizeTezProcContext (org.apache.hadoop.hive.ql.parse.OptimizeTezProcContext)1 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)1 ReduceSinkDesc (org.apache.hadoop.hive.ql.plan.ReduceSinkDesc)1 OrderExpressionDef (org.apache.hadoop.hive.ql.plan.ptf.OrderExpressionDef)1 PTFExpressionDef (org.apache.hadoop.hive.ql.plan.ptf.PTFExpressionDef)1 PartitionedTableFunctionDef (org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef)1 WindowFrameDef (org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef)1 WindowFunctionDef (org.apache.hadoop.hive.ql.plan.ptf.WindowFunctionDef)1 WindowTableFunctionDef (org.apache.hadoop.hive.ql.plan.ptf.WindowTableFunctionDef)1 DecimalTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo)1 ListTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo)1 MapTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo)1 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)1 StructTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo)1 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)1