use of org.apache.hadoop.hive.ql.optimizer.graph.OperatorGraph in project hive by apache.
the class SharedWorkOptimizer method validPreConditions.
private static boolean validPreConditions(ParseContext pctx, SharedWorkOptimizerCache optimizerCache, SharedResult sr) {
// TODO: Currently ignores GBY and PTF which may also buffer data in memory.
if (sr.dataSize > sr.maxDataSize) {
// Size surpasses limit, we cannot convert
LOG.debug("accumulated data size: {} / max size: {}", sr.dataSize, sr.maxDataSize);
return false;
}
Operator<?> op1 = sr.retainableOps.get(0);
Operator<?> op2 = sr.discardableOps.get(0);
// 1) The set of operators in the works that we are merging need to meet
// some requirements. In particular:
// 1.1. None of the works that we are merging can contain a Union
// operator. This is not supported yet as we might end up with cycles in
// the Tez DAG.
// 1.2. There cannot be any DummyStore operator in the works being merged.
// This is due to an assumption in MergeJoinProc that needs to be further explored.
// This is also due to some assumption in task generation
// If any of these conditions are not met, we cannot merge.
// TODO: Extend rule so it can be applied for these cases.
final Set<Operator<?>> workOps1 = findWorkOperators(optimizerCache, op1);
final Set<Operator<?>> workOps2 = findWorkOperators(optimizerCache, op2);
for (Operator<?> op : workOps1) {
if (op instanceof UnionOperator) {
// We cannot merge (1.1)
return false;
}
if (op instanceof DummyStoreOperator) {
// We cannot merge (1.2)
return false;
}
}
for (Operator<?> op : workOps2) {
if (op instanceof UnionOperator) {
// We cannot merge (1.1)
return false;
}
if (op instanceof DummyStoreOperator) {
// We cannot merge (1.2)
return false;
}
}
// 2) We check whether one of the operators is part of a work that is an input for
// the work of the other operator.
//
// Work1 (merge TS in W1 & W3) Work1
// | -> | X
// Work2 Work2
// | |
// Work3 Work1
//
// If we do, we cannot merge, as we would end up with a cycle in the DAG.
final Set<Operator<?>> descendantWorksOps1 = findDescendantWorkOperators(pctx, optimizerCache, op1, sr.discardableInputOps);
final Set<Operator<?>> descendantWorksOps2 = findDescendantWorkOperators(pctx, optimizerCache, op2, sr.discardableInputOps);
if (!Collections.disjoint(descendantWorksOps1, workOps2) || !Collections.disjoint(workOps1, descendantWorksOps2)) {
return false;
}
// 3) We check whether output works when we merge the operators will collide.
//
// Work1 Work2 (merge TS in W1 & W2) Work1
// \ / -> | | X
// Work3 Work3
//
// If we do, we cannot merge. The reason is that Tez currently does
// not support parallel edges, i.e., multiple edges from same work x
// into same work y.
RelaxedVertexEdgePredicate edgePredicate;
if (pctx.getConf().getBoolVar(ConfVars.HIVE_SHARED_WORK_PARALLEL_EDGE_SUPPORT)) {
edgePredicate = new RelaxedVertexEdgePredicate(EnumSet.<EdgeType>of(EdgeType.DPP, EdgeType.SEMIJOIN, EdgeType.BROADCAST));
} else {
edgePredicate = new RelaxedVertexEdgePredicate(EnumSet.<EdgeType>of(EdgeType.DPP));
}
OperatorGraph og = new OperatorGraph(pctx);
Set<OperatorGraph.Cluster> cc1 = og.clusterOf(op1).childClusters(edgePredicate);
Set<OperatorGraph.Cluster> cc2 = og.clusterOf(op2).childClusters(edgePredicate);
if (!Collections.disjoint(cc1, cc2)) {
LOG.debug("merge would create an unsupported parallel edge(CHILDS)", op1, op2);
return false;
}
if (!og.mayMerge(op1, op2)) {
LOG.debug("merging {} and {} would violate dag properties", op1, op2);
return false;
}
// 4) We check whether we will end up with same operators inputing on same work.
//
// Work1 (merge TS in W2 & W3) Work1
// / \ -> | | X
// Work2 Work3 Work2
//
// If we do, we cannot merge. The reason is the same as above, currently
// Tez does not support parallel edges.
//
// In the check, we exclude the inputs to the root operator that we are trying
// to merge (only useful for extended merging as TS do not have inputs).
Set<OperatorGraph.Cluster> pc1 = og.clusterOf(op1).parentClusters(edgePredicate);
Set<OperatorGraph.Cluster> pc2 = og.clusterOf(op2).parentClusters(edgePredicate);
Set<Cluster> pc = new HashSet<>(Sets.intersection(pc1, pc2));
for (Operator<?> o : sr.discardableOps.get(0).getParentOperators()) {
pc.remove(og.clusterOf(o));
}
for (Operator<?> o : sr.discardableInputOps) {
pc.remove(og.clusterOf(o));
}
if (pc.size() > 0) {
LOG.debug("merge would create an unsupported parallel edge(PARENTS)", op1, op2);
return false;
}
return true;
}
Aggregations