Search in sources :

Example 46 with OperatorDesc

use of org.apache.hadoop.hive.ql.plan.OperatorDesc in project hive by apache.

the class QueryPlan method populateQueryPlan.

/**
 * Populate api.QueryPlan from exec structures. This includes constructing the
 * dependency graphs of stages and operators.
 *
 * @throws IOException
 */
private void populateQueryPlan() throws IOException {
    query.setStageGraph(new org.apache.hadoop.hive.ql.plan.api.Graph());
    query.getStageGraph().setNodeType(NodeType.STAGE);
    Queue<Task<? extends Serializable>> tasksToVisit = new LinkedList<Task<? extends Serializable>>();
    Set<Task<? extends Serializable>> tasksVisited = new HashSet<Task<? extends Serializable>>();
    tasksToVisit.addAll(rootTasks);
    while (tasksToVisit.size() != 0) {
        Task<? extends Serializable> task = tasksToVisit.remove();
        tasksVisited.add(task);
        // populate stage
        org.apache.hadoop.hive.ql.plan.api.Stage stage = new org.apache.hadoop.hive.ql.plan.api.Stage();
        stage.setStageId(task.getId());
        stage.setStageType(task.getType());
        query.addToStageList(stage);
        if (task instanceof ExecDriver) {
            // populate map task
            ExecDriver mrTask = (ExecDriver) task;
            org.apache.hadoop.hive.ql.plan.api.Task mapTask = new org.apache.hadoop.hive.ql.plan.api.Task();
            mapTask.setTaskId(stage.getStageId() + "_MAP");
            mapTask.setTaskType(TaskType.MAP);
            stage.addToTaskList(mapTask);
            populateOperatorGraph(mapTask, mrTask.getWork().getMapWork().getAliasToWork().values());
            // populate reduce task
            if (mrTask.hasReduce()) {
                org.apache.hadoop.hive.ql.plan.api.Task reduceTask = new org.apache.hadoop.hive.ql.plan.api.Task();
                reduceTask.setTaskId(stage.getStageId() + "_REDUCE");
                reduceTask.setTaskType(TaskType.REDUCE);
                stage.addToTaskList(reduceTask);
                Collection<Operator<? extends OperatorDesc>> reducerTopOps = new ArrayList<Operator<? extends OperatorDesc>>();
                reducerTopOps.add(mrTask.getWork().getReduceWork().getReducer());
                populateOperatorGraph(reduceTask, reducerTopOps);
            }
        } else {
            org.apache.hadoop.hive.ql.plan.api.Task otherTask = new org.apache.hadoop.hive.ql.plan.api.Task();
            otherTask.setTaskId(stage.getStageId() + "_OTHER");
            otherTask.setTaskType(TaskType.OTHER);
            stage.addToTaskList(otherTask);
        }
        if (task instanceof ConditionalTask) {
            org.apache.hadoop.hive.ql.plan.api.Adjacency listEntry = new org.apache.hadoop.hive.ql.plan.api.Adjacency();
            listEntry.setAdjacencyType(AdjacencyType.DISJUNCTIVE);
            listEntry.setNode(task.getId());
            ConditionalTask t = (ConditionalTask) task;
            for (Task<? extends Serializable> listTask : t.getListTasks()) {
                if (t.getChildTasks() != null) {
                    org.apache.hadoop.hive.ql.plan.api.Adjacency childEntry = new org.apache.hadoop.hive.ql.plan.api.Adjacency();
                    childEntry.setAdjacencyType(AdjacencyType.DISJUNCTIVE);
                    childEntry.setNode(listTask.getId());
                    // done processing the task
                    for (Task<? extends Serializable> childTask : t.getChildTasks()) {
                        childEntry.addToChildren(childTask.getId());
                        if (!tasksVisited.contains(childTask)) {
                            tasksToVisit.add(childTask);
                        }
                    }
                    query.getStageGraph().addToAdjacencyList(childEntry);
                }
                listEntry.addToChildren(listTask.getId());
                if (!tasksVisited.contains(listTask)) {
                    tasksToVisit.add(listTask);
                }
            }
            query.getStageGraph().addToAdjacencyList(listEntry);
        } else if (task.getChildTasks() != null) {
            org.apache.hadoop.hive.ql.plan.api.Adjacency entry = new org.apache.hadoop.hive.ql.plan.api.Adjacency();
            entry.setAdjacencyType(AdjacencyType.CONJUNCTIVE);
            entry.setNode(task.getId());
            // done processing the task
            for (Task<? extends Serializable> childTask : task.getChildTasks()) {
                entry.addToChildren(childTask.getId());
                if (!tasksVisited.contains(childTask)) {
                    tasksToVisit.add(childTask);
                }
            }
            query.getStageGraph().addToAdjacencyList(entry);
        }
    }
}
Also used : Operator(org.apache.hadoop.hive.ql.exec.Operator) ConditionalTask(org.apache.hadoop.hive.ql.exec.ConditionalTask) Task(org.apache.hadoop.hive.ql.exec.Task) FetchTask(org.apache.hadoop.hive.ql.exec.FetchTask) ExplainTask(org.apache.hadoop.hive.ql.exec.ExplainTask) Serializable(java.io.Serializable) ArrayList(java.util.ArrayList) ConditionalTask(org.apache.hadoop.hive.ql.exec.ConditionalTask) HashSet(java.util.HashSet) LinkedList(java.util.LinkedList) ExecDriver(org.apache.hadoop.hive.ql.exec.mr.ExecDriver) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc)

Example 47 with OperatorDesc

use of org.apache.hadoop.hive.ql.plan.OperatorDesc in project hive by apache.

the class TestGenTezWork method setUp.

/**
 * @throws java.lang.Exception
 */
@SuppressWarnings("unchecked")
@Before
public void setUp() throws Exception {
    // Init conf
    final HiveConf conf = new HiveConf(SemanticAnalyzer.class);
    SessionState.start(conf);
    // Init parse context
    final ParseContext pctx = new ParseContext();
    pctx.setContext(new Context(conf));
    ctx = new GenTezProcContext(conf, pctx, Collections.EMPTY_LIST, new ArrayList<Task<? extends Serializable>>(), Collections.EMPTY_SET, Collections.EMPTY_SET);
    proc = new GenTezWork(new GenTezUtils() {

        @Override
        protected void setupMapWork(MapWork mapWork, GenTezProcContext context, PrunedPartitionList partitions, TableScanOperator root, String alias) throws SemanticException {
            LinkedHashMap<String, Operator<? extends OperatorDesc>> map = new LinkedHashMap<String, Operator<? extends OperatorDesc>>();
            map.put("foo", root);
            mapWork.setAliasToWork(map);
            return;
        }
    });
    CompilationOpContext cCtx = new CompilationOpContext();
    fs = new FileSinkOperator(cCtx);
    fs.setConf(new FileSinkDesc());
    rs = new ReduceSinkOperator(cCtx);
    rs.setConf(new ReduceSinkDesc());
    TableDesc tableDesc = new TableDesc();
    tableDesc.setProperties(new Properties());
    rs.getConf().setKeySerializeInfo(tableDesc);
    ts = new TableScanOperator(cCtx);
    ts.setConf(new TableScanDesc(null));
    ts.getChildOperators().add(rs);
    rs.getParentOperators().add(ts);
    rs.getChildOperators().add(fs);
    fs.getParentOperators().add(rs);
    ctx.preceedingWork = null;
    ctx.currentRootOperator = ts;
}
Also used : Context(org.apache.hadoop.hive.ql.Context) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FileSinkDesc(org.apache.hadoop.hive.ql.plan.FileSinkDesc) ArrayList(java.util.ArrayList) TableScanDesc(org.apache.hadoop.hive.ql.plan.TableScanDesc) Properties(java.util.Properties) LinkedHashMap(java.util.LinkedHashMap) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) HiveConf(org.apache.hadoop.hive.conf.HiveConf) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc) Before(org.junit.Before)

Example 48 with OperatorDesc

use of org.apache.hadoop.hive.ql.plan.OperatorDesc in project hive by apache.

the class SplitSparkWorkResolver method setStatistics.

// we lost statistics & opTraits through cloning, try to get them back
private void setStatistics(BaseWork origin, BaseWork clone) {
    if (origin instanceof MapWork && clone instanceof MapWork) {
        MapWork originMW = (MapWork) origin;
        MapWork cloneMW = (MapWork) clone;
        for (Map.Entry<String, Operator<? extends OperatorDesc>> entry : originMW.getAliasToWork().entrySet()) {
            String alias = entry.getKey();
            Operator<? extends OperatorDesc> cloneOP = cloneMW.getAliasToWork().get(alias);
            if (cloneOP != null) {
                setStatistics(entry.getValue(), cloneOP);
            }
        }
    } else if (origin instanceof ReduceWork && clone instanceof ReduceWork) {
        setStatistics(((ReduceWork) origin).getReducer(), ((ReduceWork) clone).getReducer());
    }
}
Also used : ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) ReduceWork(org.apache.hadoop.hive.ql.plan.ReduceWork) Map(java.util.Map) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc)

Example 49 with OperatorDesc

use of org.apache.hadoop.hive.ql.plan.OperatorDesc in project hive by apache.

the class AbstractSMBJoinProc method canConvertJoinToBucketMapJoin.

// Can the join operator be converted to a bucket map-merge join operator ?
@SuppressWarnings("unchecked")
protected boolean canConvertJoinToBucketMapJoin(JoinOperator joinOp, SortBucketJoinProcCtx context) throws SemanticException {
    // This has already been inspected and rejected
    if (context.getRejectedJoinOps().contains(joinOp)) {
        return false;
    }
    if (!this.pGraphContext.getJoinOps().contains(joinOp)) {
        return false;
    }
    Class<? extends BigTableSelectorForAutoSMJ> bigTableMatcherClass = null;
    try {
        String selector = HiveConf.getVar(pGraphContext.getConf(), HiveConf.ConfVars.HIVE_AUTO_SORTMERGE_JOIN_BIGTABLE_SELECTOR);
        bigTableMatcherClass = JavaUtils.loadClass(selector);
    } catch (ClassNotFoundException e) {
        throw new SemanticException(e.getMessage());
    }
    BigTableSelectorForAutoSMJ bigTableMatcher = ReflectionUtils.newInstance(bigTableMatcherClass, null);
    JoinDesc joinDesc = joinOp.getConf();
    JoinCondDesc[] joinCondns = joinDesc.getConds();
    Set<Integer> joinCandidates = MapJoinProcessor.getBigTableCandidates(joinCondns);
    if (joinCandidates.isEmpty()) {
        // of any type. So return false.
        return false;
    }
    int bigTablePosition = bigTableMatcher.getBigTablePosition(pGraphContext, joinOp, joinCandidates);
    if (bigTablePosition < 0) {
        // contains aliases from sub-query
        return false;
    }
    context.setBigTablePosition(bigTablePosition);
    String joinAlias = bigTablePosition == 0 ? joinOp.getConf().getLeftAlias() : joinOp.getConf().getRightAliases()[bigTablePosition - 1];
    joinAlias = QB.getAppendedAliasFromId(joinOp.getConf().getId(), joinAlias);
    Map<Byte, List<ExprNodeDesc>> keyExprMap = new HashMap<Byte, List<ExprNodeDesc>>();
    List<Operator<? extends OperatorDesc>> parentOps = joinOp.getParentOperators();
    // get the join keys from parent ReduceSink operators
    for (Operator<? extends OperatorDesc> parentOp : parentOps) {
        ReduceSinkDesc rsconf = ((ReduceSinkOperator) parentOp).getConf();
        Byte tag = (byte) rsconf.getTag();
        List<ExprNodeDesc> keys = rsconf.getKeyCols();
        keyExprMap.put(tag, keys);
    }
    context.setKeyExprMap(keyExprMap);
    // Make a deep copy of the aliases so that they are not changed in the context
    String[] joinSrcs = joinOp.getConf().getBaseSrc();
    String[] srcs = new String[joinSrcs.length];
    for (int srcPos = 0; srcPos < joinSrcs.length; srcPos++) {
        joinSrcs[srcPos] = QB.getAppendedAliasFromId(joinOp.getConf().getId(), joinSrcs[srcPos]);
        srcs[srcPos] = new String(joinSrcs[srcPos]);
    }
    // table matcher.
    return checkConvertBucketMapJoin(context, joinOp.getConf().getAliasToOpInfo(), keyExprMap, joinAlias, Arrays.asList(srcs));
}
Also used : ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) DummyStoreOperator(org.apache.hadoop.hive.ql.exec.DummyStoreOperator) HashMap(java.util.HashMap) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) PrunedPartitionList(org.apache.hadoop.hive.ql.parse.PrunedPartitionList) ArrayList(java.util.ArrayList) List(java.util.List) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc) SMBJoinDesc(org.apache.hadoop.hive.ql.plan.SMBJoinDesc) JoinDesc(org.apache.hadoop.hive.ql.plan.JoinDesc) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc) JoinCondDesc(org.apache.hadoop.hive.ql.plan.JoinCondDesc) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Example 50 with OperatorDesc

use of org.apache.hadoop.hive.ql.plan.OperatorDesc in project hive by apache.

the class TestPlan method testPlan.

public void testPlan() throws Exception {
    final String F1 = "#affiliations";
    final String F2 = "friends[0].friendid";
    try {
        // initialize a complete map reduce configuration
        ExprNodeDesc expr1 = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, F1, "", false);
        ExprNodeDesc expr2 = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, F2, "", false);
        ExprNodeDesc filterExpr = TypeCheckProcFactory.DefaultExprProcessor.getFuncExprNodeDesc("==", expr1, expr2);
        FilterDesc filterCtx = new FilterDesc(filterExpr, false);
        Operator<FilterDesc> op = OperatorFactory.get(new CompilationOpContext(), FilterDesc.class);
        op.setConf(filterCtx);
        ArrayList<String> aliasList = new ArrayList<String>();
        aliasList.add("a");
        LinkedHashMap<Path, ArrayList<String>> pa = new LinkedHashMap<>();
        pa.put(new Path("/tmp/testfolder"), aliasList);
        TableDesc tblDesc = Utilities.defaultTd;
        PartitionDesc partDesc = new PartitionDesc(tblDesc, null);
        LinkedHashMap<Path, PartitionDesc> pt = new LinkedHashMap<>();
        pt.put(new Path("/tmp/testfolder"), partDesc);
        LinkedHashMap<String, Operator<? extends OperatorDesc>> ao = new LinkedHashMap<String, Operator<? extends OperatorDesc>>();
        ao.put("a", op);
        MapredWork mrwork = new MapredWork();
        mrwork.getMapWork().setPathToAliases(pa);
        mrwork.getMapWork().setPathToPartitionInfo(pt);
        mrwork.getMapWork().setAliasToWork(ao);
        JobConf job = new JobConf(TestPlan.class);
        // serialize the configuration once ..
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        SerializationUtilities.serializePlan(mrwork, baos);
        baos.close();
        String v1 = baos.toString();
        // store into configuration
        job.set("fs.default.name", "file:///");
        Utilities.setMapRedWork(job, mrwork, new Path(System.getProperty("java.io.tmpdir") + File.separator + System.getProperty("user.name") + File.separator + "hive"));
        MapredWork mrwork2 = Utilities.getMapRedWork(job);
        Utilities.clearWork(job);
        // over here we should have some checks of the deserialized object against
        // the orginal object
        // System.out.println(v1);
        // serialize again
        baos.reset();
        SerializationUtilities.serializePlan(mrwork2, baos);
        baos.close();
        // verify that the two are equal
        assertEquals(v1, baos.toString());
    } catch (Exception excp) {
        excp.printStackTrace();
        throw excp;
    }
    System.out.println("Serialization/Deserialization of plan successful");
}
Also used : Path(org.apache.hadoop.fs.Path) ArrayList(java.util.ArrayList) ByteArrayOutputStream(java.io.ByteArrayOutputStream) LinkedHashMap(java.util.LinkedHashMap) FilterDesc(org.apache.hadoop.hive.ql.plan.FilterDesc) MapredWork(org.apache.hadoop.hive.ql.plan.MapredWork) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc) JobConf(org.apache.hadoop.mapred.JobConf)

Aggregations

OperatorDesc (org.apache.hadoop.hive.ql.plan.OperatorDesc)87 Operator (org.apache.hadoop.hive.ql.exec.Operator)70 ArrayList (java.util.ArrayList)50 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)44 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)41 MapJoinOperator (org.apache.hadoop.hive.ql.exec.MapJoinOperator)36 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)31 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)30 UnionOperator (org.apache.hadoop.hive.ql.exec.UnionOperator)27 Path (org.apache.hadoop.fs.Path)21 SMBMapJoinOperator (org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator)21 LinkedHashMap (java.util.LinkedHashMap)18 Serializable (java.io.Serializable)17 Task (org.apache.hadoop.hive.ql.exec.Task)17 MapWork (org.apache.hadoop.hive.ql.plan.MapWork)17 HashMap (java.util.HashMap)16 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)16 TableDesc (org.apache.hadoop.hive.ql.plan.TableDesc)16 List (java.util.List)15 Map (java.util.Map)14