use of org.apache.hadoop.hive.ql.plan.OperatorDesc in project hive by apache.
the class QueryPlan method populateQueryPlan.
/**
* Populate api.QueryPlan from exec structures. This includes constructing the
* dependency graphs of stages and operators.
*
* @throws IOException
*/
private void populateQueryPlan() throws IOException {
query.setStageGraph(new org.apache.hadoop.hive.ql.plan.api.Graph());
query.getStageGraph().setNodeType(NodeType.STAGE);
Queue<Task<? extends Serializable>> tasksToVisit = new LinkedList<Task<? extends Serializable>>();
Set<Task<? extends Serializable>> tasksVisited = new HashSet<Task<? extends Serializable>>();
tasksToVisit.addAll(rootTasks);
while (tasksToVisit.size() != 0) {
Task<? extends Serializable> task = tasksToVisit.remove();
tasksVisited.add(task);
// populate stage
org.apache.hadoop.hive.ql.plan.api.Stage stage = new org.apache.hadoop.hive.ql.plan.api.Stage();
stage.setStageId(task.getId());
stage.setStageType(task.getType());
query.addToStageList(stage);
if (task instanceof ExecDriver) {
// populate map task
ExecDriver mrTask = (ExecDriver) task;
org.apache.hadoop.hive.ql.plan.api.Task mapTask = new org.apache.hadoop.hive.ql.plan.api.Task();
mapTask.setTaskId(stage.getStageId() + "_MAP");
mapTask.setTaskType(TaskType.MAP);
stage.addToTaskList(mapTask);
populateOperatorGraph(mapTask, mrTask.getWork().getMapWork().getAliasToWork().values());
// populate reduce task
if (mrTask.hasReduce()) {
org.apache.hadoop.hive.ql.plan.api.Task reduceTask = new org.apache.hadoop.hive.ql.plan.api.Task();
reduceTask.setTaskId(stage.getStageId() + "_REDUCE");
reduceTask.setTaskType(TaskType.REDUCE);
stage.addToTaskList(reduceTask);
Collection<Operator<? extends OperatorDesc>> reducerTopOps = new ArrayList<Operator<? extends OperatorDesc>>();
reducerTopOps.add(mrTask.getWork().getReduceWork().getReducer());
populateOperatorGraph(reduceTask, reducerTopOps);
}
} else {
org.apache.hadoop.hive.ql.plan.api.Task otherTask = new org.apache.hadoop.hive.ql.plan.api.Task();
otherTask.setTaskId(stage.getStageId() + "_OTHER");
otherTask.setTaskType(TaskType.OTHER);
stage.addToTaskList(otherTask);
}
if (task instanceof ConditionalTask) {
org.apache.hadoop.hive.ql.plan.api.Adjacency listEntry = new org.apache.hadoop.hive.ql.plan.api.Adjacency();
listEntry.setAdjacencyType(AdjacencyType.DISJUNCTIVE);
listEntry.setNode(task.getId());
ConditionalTask t = (ConditionalTask) task;
for (Task<? extends Serializable> listTask : t.getListTasks()) {
if (t.getChildTasks() != null) {
org.apache.hadoop.hive.ql.plan.api.Adjacency childEntry = new org.apache.hadoop.hive.ql.plan.api.Adjacency();
childEntry.setAdjacencyType(AdjacencyType.DISJUNCTIVE);
childEntry.setNode(listTask.getId());
// done processing the task
for (Task<? extends Serializable> childTask : t.getChildTasks()) {
childEntry.addToChildren(childTask.getId());
if (!tasksVisited.contains(childTask)) {
tasksToVisit.add(childTask);
}
}
query.getStageGraph().addToAdjacencyList(childEntry);
}
listEntry.addToChildren(listTask.getId());
if (!tasksVisited.contains(listTask)) {
tasksToVisit.add(listTask);
}
}
query.getStageGraph().addToAdjacencyList(listEntry);
} else if (task.getChildTasks() != null) {
org.apache.hadoop.hive.ql.plan.api.Adjacency entry = new org.apache.hadoop.hive.ql.plan.api.Adjacency();
entry.setAdjacencyType(AdjacencyType.CONJUNCTIVE);
entry.setNode(task.getId());
// done processing the task
for (Task<? extends Serializable> childTask : task.getChildTasks()) {
entry.addToChildren(childTask.getId());
if (!tasksVisited.contains(childTask)) {
tasksToVisit.add(childTask);
}
}
query.getStageGraph().addToAdjacencyList(entry);
}
}
}
use of org.apache.hadoop.hive.ql.plan.OperatorDesc in project hive by apache.
the class TestGenTezWork method setUp.
/**
* @throws java.lang.Exception
*/
@SuppressWarnings("unchecked")
@Before
public void setUp() throws Exception {
// Init conf
final HiveConf conf = new HiveConf(SemanticAnalyzer.class);
SessionState.start(conf);
// Init parse context
final ParseContext pctx = new ParseContext();
pctx.setContext(new Context(conf));
ctx = new GenTezProcContext(conf, pctx, Collections.EMPTY_LIST, new ArrayList<Task<? extends Serializable>>(), Collections.EMPTY_SET, Collections.EMPTY_SET);
proc = new GenTezWork(new GenTezUtils() {
@Override
protected void setupMapWork(MapWork mapWork, GenTezProcContext context, PrunedPartitionList partitions, TableScanOperator root, String alias) throws SemanticException {
LinkedHashMap<String, Operator<? extends OperatorDesc>> map = new LinkedHashMap<String, Operator<? extends OperatorDesc>>();
map.put("foo", root);
mapWork.setAliasToWork(map);
return;
}
});
CompilationOpContext cCtx = new CompilationOpContext();
fs = new FileSinkOperator(cCtx);
fs.setConf(new FileSinkDesc());
rs = new ReduceSinkOperator(cCtx);
rs.setConf(new ReduceSinkDesc());
TableDesc tableDesc = new TableDesc();
tableDesc.setProperties(new Properties());
rs.getConf().setKeySerializeInfo(tableDesc);
ts = new TableScanOperator(cCtx);
ts.setConf(new TableScanDesc(null));
ts.getChildOperators().add(rs);
rs.getParentOperators().add(ts);
rs.getChildOperators().add(fs);
fs.getParentOperators().add(rs);
ctx.preceedingWork = null;
ctx.currentRootOperator = ts;
}
use of org.apache.hadoop.hive.ql.plan.OperatorDesc in project hive by apache.
the class SplitSparkWorkResolver method setStatistics.
// we lost statistics & opTraits through cloning, try to get them back
private void setStatistics(BaseWork origin, BaseWork clone) {
if (origin instanceof MapWork && clone instanceof MapWork) {
MapWork originMW = (MapWork) origin;
MapWork cloneMW = (MapWork) clone;
for (Map.Entry<String, Operator<? extends OperatorDesc>> entry : originMW.getAliasToWork().entrySet()) {
String alias = entry.getKey();
Operator<? extends OperatorDesc> cloneOP = cloneMW.getAliasToWork().get(alias);
if (cloneOP != null) {
setStatistics(entry.getValue(), cloneOP);
}
}
} else if (origin instanceof ReduceWork && clone instanceof ReduceWork) {
setStatistics(((ReduceWork) origin).getReducer(), ((ReduceWork) clone).getReducer());
}
}
use of org.apache.hadoop.hive.ql.plan.OperatorDesc in project hive by apache.
the class AbstractSMBJoinProc method canConvertJoinToBucketMapJoin.
// Can the join operator be converted to a bucket map-merge join operator ?
@SuppressWarnings("unchecked")
protected boolean canConvertJoinToBucketMapJoin(JoinOperator joinOp, SortBucketJoinProcCtx context) throws SemanticException {
// This has already been inspected and rejected
if (context.getRejectedJoinOps().contains(joinOp)) {
return false;
}
if (!this.pGraphContext.getJoinOps().contains(joinOp)) {
return false;
}
Class<? extends BigTableSelectorForAutoSMJ> bigTableMatcherClass = null;
try {
String selector = HiveConf.getVar(pGraphContext.getConf(), HiveConf.ConfVars.HIVE_AUTO_SORTMERGE_JOIN_BIGTABLE_SELECTOR);
bigTableMatcherClass = JavaUtils.loadClass(selector);
} catch (ClassNotFoundException e) {
throw new SemanticException(e.getMessage());
}
BigTableSelectorForAutoSMJ bigTableMatcher = ReflectionUtils.newInstance(bigTableMatcherClass, null);
JoinDesc joinDesc = joinOp.getConf();
JoinCondDesc[] joinCondns = joinDesc.getConds();
Set<Integer> joinCandidates = MapJoinProcessor.getBigTableCandidates(joinCondns);
if (joinCandidates.isEmpty()) {
// of any type. So return false.
return false;
}
int bigTablePosition = bigTableMatcher.getBigTablePosition(pGraphContext, joinOp, joinCandidates);
if (bigTablePosition < 0) {
// contains aliases from sub-query
return false;
}
context.setBigTablePosition(bigTablePosition);
String joinAlias = bigTablePosition == 0 ? joinOp.getConf().getLeftAlias() : joinOp.getConf().getRightAliases()[bigTablePosition - 1];
joinAlias = QB.getAppendedAliasFromId(joinOp.getConf().getId(), joinAlias);
Map<Byte, List<ExprNodeDesc>> keyExprMap = new HashMap<Byte, List<ExprNodeDesc>>();
List<Operator<? extends OperatorDesc>> parentOps = joinOp.getParentOperators();
// get the join keys from parent ReduceSink operators
for (Operator<? extends OperatorDesc> parentOp : parentOps) {
ReduceSinkDesc rsconf = ((ReduceSinkOperator) parentOp).getConf();
Byte tag = (byte) rsconf.getTag();
List<ExprNodeDesc> keys = rsconf.getKeyCols();
keyExprMap.put(tag, keys);
}
context.setKeyExprMap(keyExprMap);
// Make a deep copy of the aliases so that they are not changed in the context
String[] joinSrcs = joinOp.getConf().getBaseSrc();
String[] srcs = new String[joinSrcs.length];
for (int srcPos = 0; srcPos < joinSrcs.length; srcPos++) {
joinSrcs[srcPos] = QB.getAppendedAliasFromId(joinOp.getConf().getId(), joinSrcs[srcPos]);
srcs[srcPos] = new String(joinSrcs[srcPos]);
}
// table matcher.
return checkConvertBucketMapJoin(context, joinOp.getConf().getAliasToOpInfo(), keyExprMap, joinAlias, Arrays.asList(srcs));
}
use of org.apache.hadoop.hive.ql.plan.OperatorDesc in project hive by apache.
the class TestPlan method testPlan.
public void testPlan() throws Exception {
final String F1 = "#affiliations";
final String F2 = "friends[0].friendid";
try {
// initialize a complete map reduce configuration
ExprNodeDesc expr1 = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, F1, "", false);
ExprNodeDesc expr2 = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, F2, "", false);
ExprNodeDesc filterExpr = TypeCheckProcFactory.DefaultExprProcessor.getFuncExprNodeDesc("==", expr1, expr2);
FilterDesc filterCtx = new FilterDesc(filterExpr, false);
Operator<FilterDesc> op = OperatorFactory.get(new CompilationOpContext(), FilterDesc.class);
op.setConf(filterCtx);
ArrayList<String> aliasList = new ArrayList<String>();
aliasList.add("a");
LinkedHashMap<Path, ArrayList<String>> pa = new LinkedHashMap<>();
pa.put(new Path("/tmp/testfolder"), aliasList);
TableDesc tblDesc = Utilities.defaultTd;
PartitionDesc partDesc = new PartitionDesc(tblDesc, null);
LinkedHashMap<Path, PartitionDesc> pt = new LinkedHashMap<>();
pt.put(new Path("/tmp/testfolder"), partDesc);
LinkedHashMap<String, Operator<? extends OperatorDesc>> ao = new LinkedHashMap<String, Operator<? extends OperatorDesc>>();
ao.put("a", op);
MapredWork mrwork = new MapredWork();
mrwork.getMapWork().setPathToAliases(pa);
mrwork.getMapWork().setPathToPartitionInfo(pt);
mrwork.getMapWork().setAliasToWork(ao);
JobConf job = new JobConf(TestPlan.class);
// serialize the configuration once ..
ByteArrayOutputStream baos = new ByteArrayOutputStream();
SerializationUtilities.serializePlan(mrwork, baos);
baos.close();
String v1 = baos.toString();
// store into configuration
job.set("fs.default.name", "file:///");
Utilities.setMapRedWork(job, mrwork, new Path(System.getProperty("java.io.tmpdir") + File.separator + System.getProperty("user.name") + File.separator + "hive"));
MapredWork mrwork2 = Utilities.getMapRedWork(job);
Utilities.clearWork(job);
// over here we should have some checks of the deserialized object against
// the orginal object
// System.out.println(v1);
// serialize again
baos.reset();
SerializationUtilities.serializePlan(mrwork2, baos);
baos.close();
// verify that the two are equal
assertEquals(v1, baos.toString());
} catch (Exception excp) {
excp.printStackTrace();
throw excp;
}
System.out.println("Serialization/Deserialization of plan successful");
}
Aggregations