Examples with ReduceSinkDesc - org.apache.hadoop.hive.ql.plan.ReduceSinkDesc

Example 11 with ReduceSinkDesc

use of org.apache.hadoop.hive.ql.plan.ReduceSinkDesc in project hive by apache.

the class SetSparkReducerParallelism method needSetParallelism.

// tests whether the RS needs automatic setting parallelism
private boolean needSetParallelism(ReduceSinkOperator reduceSink, HiveConf hiveConf) {
    ReduceSinkDesc desc = reduceSink.getConf();
    if (desc.getNumReducers() <= 0) {
        return true;
    }
    if (desc.getNumReducers() == 1 && desc.hasOrderBy() && hiveConf.getBoolVar(HiveConf.ConfVars.HIVESAMPLINGFORORDERBY) && !desc.isDeduplicated()) {
        Stack<Operator<? extends OperatorDesc>> descendants = new Stack<Operator<? extends OperatorDesc>>();
        List<Operator<? extends OperatorDesc>> children = reduceSink.getChildOperators();
        if (children != null) {
            for (Operator<? extends OperatorDesc> child : children) {
                descendants.push(child);
            }
        }
        while (descendants.size() != 0) {
            Operator<? extends OperatorDesc> descendant = descendants.pop();
            // If the decendants contains LimitOperator,return false
            if (descendant instanceof LimitOperator) {
                return false;
            }
            boolean reachTerminalOperator = (descendant instanceof TerminalOperator);
            if (!reachTerminalOperator) {
                List<Operator<? extends OperatorDesc>> childrenOfDescendant = descendant.getChildOperators();
                if (childrenOfDescendant != null) {
                    for (Operator<? extends OperatorDesc> childOfDescendant : childrenOfDescendant) {
                        descendants.push(childOfDescendant);
                    }
                }
            }
        }
        return true;
    }
    return false;
}

Also used : ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) TerminalOperator(org.apache.hadoop.hive.ql.exec.TerminalOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) LimitOperator(org.apache.hadoop.hive.ql.exec.LimitOperator) TerminalOperator(org.apache.hadoop.hive.ql.exec.TerminalOperator) LimitOperator(org.apache.hadoop.hive.ql.exec.LimitOperator) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc) Stack(java.util.Stack)

Example 12 with ReduceSinkDesc

use of org.apache.hadoop.hive.ql.plan.ReduceSinkDesc in project hive by apache.

the class TopNKeyProcessor method process.

@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException {
    // Get ReduceSinkOperator
    ReduceSinkOperator reduceSinkOperator = (ReduceSinkOperator) nd;
    ReduceSinkDesc reduceSinkDesc = reduceSinkOperator.getConf();
    // Check whether the reduce sink operator contains top n
    if (reduceSinkDesc.getTopN() < 0 || !reduceSinkDesc.isOrdering()) {
        return null;
    }
    if (reduceSinkDesc.getTopN() > maxTopNAllowed) {
        return null;
    }
    // Check whether there already is a top n key operator
    Operator<? extends OperatorDesc> parentOperator = reduceSinkOperator.getParentOperators().get(0);
    if (parentOperator instanceof TopNKeyOperator) {
        return null;
    }
    List<ExprNodeDesc> partitionCols = Collections.emptyList();
    if (reduceSinkDesc.isPTFReduceSink()) {
        // Note: partition cols are prefix of key cols
        if (reduceSinkDesc.getPartitionCols().size() >= reduceSinkDesc.getKeyCols().size()) {
            return null;
        }
        partitionCols = reduceSinkDesc.getPartitionCols();
    }
    TopNKeyDesc topNKeyDesc = new TopNKeyDesc(reduceSinkDesc.getTopN(), reduceSinkDesc.getOrder(), reduceSinkDesc.getNullOrder(), reduceSinkDesc.getKeyCols(), partitionCols, efficiencyThreshold, checkEfficiencyNumBatches, maxNumberOfPartitions);
    copyDown(reduceSinkOperator, topNKeyDesc);
    reduceSinkDesc.setTopN(-1);
    return null;
}

Also used : TopNKeyDesc(org.apache.hadoop.hive.ql.plan.TopNKeyDesc) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TopNKeyOperator(org.apache.hadoop.hive.ql.exec.TopNKeyOperator) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc)

Example 13 with ReduceSinkDesc

use of org.apache.hadoop.hive.ql.plan.ReduceSinkDesc in project hive by apache.

the class TestGenTezWork method setUp.

/**
 * @throws java.lang.Exception
 */
@SuppressWarnings("unchecked")
@Before
public void setUp() throws Exception {
    // Init conf
    final HiveConf conf = new HiveConf(SemanticAnalyzer.class);
    SessionState.start(conf);
    // Init parse context
    final ParseContext pctx = new ParseContext();
    pctx.setContext(new Context(conf));
    ctx = new GenTezProcContext(conf, pctx, Collections.EMPTY_LIST, new ArrayList<Task<?>>(), Collections.EMPTY_SET, Collections.EMPTY_SET);
    proc = new GenTezWork(new GenTezUtils() {

        @Override
        protected void setupMapWork(MapWork mapWork, GenTezProcContext context, PrunedPartitionList partitions, TableScanOperator root, String alias) throws SemanticException {
            LinkedHashMap<String, Operator<? extends OperatorDesc>> map = new LinkedHashMap<String, Operator<? extends OperatorDesc>>();
            map.put("foo", root);
            mapWork.setAliasToWork(map);
            return;
        }
    });
    CompilationOpContext cCtx = new CompilationOpContext();
    fs = new FileSinkOperator(cCtx);
    fs.setConf(new FileSinkDesc());
    rs = new ReduceSinkOperator(cCtx);
    rs.setConf(new ReduceSinkDesc());
    TableDesc tableDesc = new TableDesc();
    tableDesc.setProperties(new Properties());
    rs.getConf().setKeySerializeInfo(tableDesc);
    ts = new TableScanOperator(cCtx);
    ts.setConf(new TableScanDesc(null));
    ts.getChildOperators().add(rs);
    rs.getParentOperators().add(ts);
    rs.getChildOperators().add(fs);
    fs.getParentOperators().add(rs);
    ctx.preceedingWork = null;
    ctx.currentRootOperator = ts;
}

Also used : Context(org.apache.hadoop.hive.ql.Context) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FileSinkDesc(org.apache.hadoop.hive.ql.plan.FileSinkDesc) ArrayList(java.util.ArrayList) TableScanDesc(org.apache.hadoop.hive.ql.plan.TableScanDesc) Properties(java.util.Properties) LinkedHashMap(java.util.LinkedHashMap) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) HiveConf(org.apache.hadoop.hive.conf.HiveConf) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc) Before(org.junit.Before)

Example 14 with ReduceSinkDesc

use of org.apache.hadoop.hive.ql.plan.ReduceSinkDesc in project hive by apache.

the class SharedWorkOptimizer method compareOperator.

private static boolean compareOperator(ParseContext pctx, Operator<?> op1, Operator<?> op2) throws SemanticException {
    if (!op1.getClass().getName().equals(op2.getClass().getName())) {
        return false;
    }
    // TODO: move this to logicalEquals
    if (op1 instanceof ReduceSinkOperator) {
        ReduceSinkDesc op1Conf = ((ReduceSinkOperator) op1).getConf();
        ReduceSinkDesc op2Conf = ((ReduceSinkOperator) op2).getConf();
        if (StringUtils.equals(op1Conf.getKeyColString(), op2Conf.getKeyColString()) && StringUtils.equals(op1Conf.getValueColsString(), op2Conf.getValueColsString()) && StringUtils.equals(op1Conf.getParitionColsString(), op2Conf.getParitionColsString()) && op1Conf.getTag() == op2Conf.getTag() && StringUtils.equals(op1Conf.getOrder(), op2Conf.getOrder()) && StringUtils.equals(op1Conf.getNullOrder(), op2Conf.getNullOrder()) && op1Conf.getTopN() == op2Conf.getTopN() && canDeduplicateReduceTraits(op1Conf, op2Conf)) {
            return true;
        } else {
            return false;
        }
    }
    // TODO: move this to logicalEquals
    if (op1 instanceof TableScanOperator) {
        TableScanOperator tsOp1 = (TableScanOperator) op1;
        TableScanOperator tsOp2 = (TableScanOperator) op2;
        TableScanDesc op1Conf = tsOp1.getConf();
        TableScanDesc op2Conf = tsOp2.getConf();
        Table tableMeta1 = op1Conf.getTableMetadata();
        Table tableMeta2 = op2Conf.getTableMetadata();
        if (StringUtils.equals(tableMeta1.getFullyQualifiedName(), tableMeta2.getFullyQualifiedName()) && op1Conf.getNeededColumns().equals(op2Conf.getNeededColumns()) && StringUtils.equals(op1Conf.getFilterExprString(), op2Conf.getFilterExprString()) && pctx.getPrunedPartitions(tsOp1).getPartitions().equals(pctx.getPrunedPartitions(tsOp2).getPartitions()) && op1Conf.getRowLimit() == op2Conf.getRowLimit() && Objects.equals(op1Conf.getIncludedBuckets(), op2Conf.getIncludedBuckets()) && Objects.equals(op1Conf.getOpProps(), op2Conf.getOpProps())) {
            return true;
        } else {
            return false;
        }
    }
    return op1.logicalEquals(op2);
}

Also used : TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Table(org.apache.hadoop.hive.ql.metadata.Table) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TableScanDesc(org.apache.hadoop.hive.ql.plan.TableScanDesc) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc)

Example 15 with ReduceSinkDesc

use of org.apache.hadoop.hive.ql.plan.ReduceSinkDesc in project hive by apache.

the class ParallelEdgeFixer method fixParallelEdge.

/**
 * Fixes a parallel edge going into a mapjoin by introducing a concentrator RS.
 */
private void fixParallelEdge(Operator<? extends OperatorDesc> p, Operator<?> o) throws SemanticException {
    LOG.info("Fixing parallel by adding a concentrator RS between {} -> {}", p, o);
    ReduceSinkDesc conf = (ReduceSinkDesc) p.getConf();
    ReduceSinkDesc newConf = (ReduceSinkDesc) conf.clone();
    Operator<SelectDesc> newSEL = buildSEL(p, conf);
    Operator<ReduceSinkDesc> newRS = OperatorFactory.getAndMakeChild(p.getCompilationOpContext(), newConf, new ArrayList<>());
    conf.setOutputName("forward_to_" + newRS);
    conf.setTag(0);
    newConf.setKeyCols(new ArrayList<>(conf.getKeyCols()));
    newRS.setSchema(new RowSchema(p.getSchema()));
    p.replaceChild(o, newSEL);
    newSEL.setParentOperators(Lists.<Operator<?>>newArrayList(p));
    newSEL.setChildOperators(Lists.<Operator<?>>newArrayList(newRS));
    newRS.setParentOperators(Lists.<Operator<?>>newArrayList(newSEL));
    newRS.setChildOperators(Lists.<Operator<?>>newArrayList(o));
    o.replaceParent(p, newRS);
}

Also used : RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc)

Aggregations

ReduceSinkDesc (org.apache.hadoop.hive.ql.plan.ReduceSinkDesc)50 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)31 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)31 ArrayList (java.util.ArrayList)29 Operator (org.apache.hadoop.hive.ql.exec.Operator)21 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)20 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)19 HashMap (java.util.HashMap)18 ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)17 LinkedHashMap (java.util.LinkedHashMap)16 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)16 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)16 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)14 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)14 SelectDesc (org.apache.hadoop.hive.ql.plan.SelectDesc)13 SMBMapJoinOperator (org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator)12 FilterOperator (org.apache.hadoop.hive.ql.exec.FilterOperator)11 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)11 LimitOperator (org.apache.hadoop.hive.ql.exec.LimitOperator)11 UnionOperator (org.apache.hadoop.hive.ql.exec.UnionOperator)11