use of org.apache.hadoop.hive.ql.exec.ReduceSinkOperator in project hive by apache.
the class GenMapRedUtils method initPlan.
/**
* Initialize the current plan by adding it to root tasks.
*
* @param op
* the reduce sink operator encountered
* @param opProcCtx
* processing context
*/
public static void initPlan(ReduceSinkOperator op, GenMRProcContext opProcCtx) throws SemanticException {
Operator<? extends OperatorDesc> reducer = op.getChildOperators().get(0);
Map<Operator<? extends OperatorDesc>, GenMapRedCtx> mapCurrCtx = opProcCtx.getMapCurrCtx();
GenMapRedCtx mapredCtx = mapCurrCtx.get(op.getParentOperators().get(0));
Task<? extends Serializable> currTask = mapredCtx.getCurrTask();
MapredWork plan = (MapredWork) currTask.getWork();
HashMap<Operator<? extends OperatorDesc>, Task<? extends Serializable>> opTaskMap = opProcCtx.getOpTaskMap();
TableScanOperator currTopOp = opProcCtx.getCurrTopOp();
opTaskMap.put(reducer, currTask);
plan.setReduceWork(new ReduceWork());
plan.getReduceWork().setReducer(reducer);
ReduceSinkDesc desc = op.getConf();
plan.getReduceWork().setNumReduceTasks(desc.getNumReducers());
if (needsTagging(plan.getReduceWork())) {
plan.getReduceWork().setNeedsTagging(true);
}
assert currTopOp != null;
String currAliasId = opProcCtx.getCurrAliasId();
if (!opProcCtx.isSeenOp(currTask, currTopOp)) {
setTaskPlan(currAliasId, currTopOp, currTask, false, opProcCtx);
}
currTopOp = null;
currAliasId = null;
opProcCtx.setCurrTask(currTask);
opProcCtx.setCurrTopOp(currTopOp);
opProcCtx.setCurrAliasId(currAliasId);
}
use of org.apache.hadoop.hive.ql.exec.ReduceSinkOperator in project hive by apache.
the class GenMapRedUtils method splitTasks.
@SuppressWarnings("nls")
private static /**
* Split two tasks by creating a temporary file between them.
*
* @param op reduce sink operator being processed
* @param parentTask the parent task
* @param childTask the child task
* @param opProcCtx context
*/
void splitTasks(ReduceSinkOperator op, Task<? extends Serializable> parentTask, Task<? extends Serializable> childTask, GenMRProcContext opProcCtx) throws SemanticException {
if (op.getNumParent() != 1) {
throw new IllegalStateException("Expecting operator " + op + " to have one parent. " + "But found multiple parents : " + op.getParentOperators());
}
ParseContext parseCtx = opProcCtx.getParseCtx();
parentTask.addDependentTask(childTask);
// Root Task cannot depend on any other task, therefore childTask cannot be
// a root Task
List<Task<? extends Serializable>> rootTasks = opProcCtx.getRootTasks();
if (rootTasks.contains(childTask)) {
rootTasks.remove(childTask);
}
// Generate the temporary file name
Context baseCtx = parseCtx.getContext();
Path taskTmpDir = baseCtx.getMRTmpPath();
Operator<? extends OperatorDesc> parent = op.getParentOperators().get(0);
TableDesc tt_desc = PlanUtils.getIntermediateFileTableDesc(PlanUtils.getFieldSchemasFromRowSchema(parent.getSchema(), "temporarycol"));
// Create the temporary file, its corresponding FileSinkOperaotr, and
// its corresponding TableScanOperator.
TableScanOperator tableScanOp = createTemporaryFile(parent, op, taskTmpDir, tt_desc, parseCtx);
Map<Operator<? extends OperatorDesc>, GenMapRedCtx> mapCurrCtx = opProcCtx.getMapCurrCtx();
mapCurrCtx.put(tableScanOp, new GenMapRedCtx(childTask, null));
String streamDesc = taskTmpDir.toUri().toString();
MapredWork cplan = (MapredWork) childTask.getWork();
if (needsTagging(cplan.getReduceWork())) {
Operator<? extends OperatorDesc> reducerOp = cplan.getReduceWork().getReducer();
String id = null;
if (reducerOp instanceof JoinOperator) {
if (parseCtx.getJoinOps().contains(reducerOp)) {
id = ((JoinOperator) reducerOp).getConf().getId();
}
} else if (reducerOp instanceof MapJoinOperator) {
if (parseCtx.getMapJoinOps().contains(reducerOp)) {
id = ((MapJoinOperator) reducerOp).getConf().getId();
}
} else if (reducerOp instanceof SMBMapJoinOperator) {
if (parseCtx.getSmbMapJoinOps().contains(reducerOp)) {
id = ((SMBMapJoinOperator) reducerOp).getConf().getId();
}
}
if (id != null) {
streamDesc = id + ":$INTNAME";
} else {
streamDesc = "$INTNAME";
}
String origStreamDesc = streamDesc;
int pos = 0;
while (cplan.getMapWork().getAliasToWork().get(streamDesc) != null) {
streamDesc = origStreamDesc.concat(String.valueOf(++pos));
}
// TODO: Allocate work to remove the temporary files and make that
// dependent on the redTask
cplan.getReduceWork().setNeedsTagging(true);
}
// Add the path to alias mapping
setTaskPlan(taskTmpDir, streamDesc, tableScanOp, cplan.getMapWork(), false, tt_desc);
opProcCtx.setCurrTopOp(null);
opProcCtx.setCurrAliasId(null);
opProcCtx.setCurrTask(childTask);
opProcCtx.addRootIfPossible(parentTask);
}
use of org.apache.hadoop.hive.ql.exec.ReduceSinkOperator in project hive by apache.
the class GenMapRedUtils method setKeyAndValueDesc.
/**
* set key and value descriptor.
*
* @param plan
* current plan
* @param topOp
* current top operator in the path
*/
public static void setKeyAndValueDesc(ReduceWork plan, Operator<? extends OperatorDesc> topOp) {
if (topOp == null) {
return;
}
if (topOp instanceof ReduceSinkOperator) {
ReduceSinkOperator rs = (ReduceSinkOperator) topOp;
setKeyAndValueDesc(plan, rs);
} else {
List<Operator<? extends OperatorDesc>> children = topOp.getChildOperators();
if (children != null) {
for (Operator<? extends OperatorDesc> op : children) {
setKeyAndValueDesc(plan, op);
}
}
}
}
use of org.apache.hadoop.hive.ql.exec.ReduceSinkOperator in project hive by apache.
the class SharedWorkOptimizer method extractSharedOptimizationInfo.
private static SharedResult extractSharedOptimizationInfo(ParseContext pctx, SharedWorkOptimizerCache optimizerCache, Operator<?> retainableOpEqualParent, Operator<?> discardableOpEqualParent, Operator<?> retainableOp, Operator<?> discardableOp, LinkedHashSet<Operator<?>> retainableOps, LinkedHashSet<Operator<?>> discardableOps, Set<Operator<?>> discardableInputOps, boolean removeInputBranch) throws SemanticException {
Operator<?> equalOp1 = retainableOpEqualParent;
Operator<?> equalOp2 = discardableOpEqualParent;
Operator<?> currentOp1 = retainableOp;
Operator<?> currentOp2 = discardableOp;
long dataSize = 0L;
long maxDataSize = 0L;
// Try to merge rest of operators
while (!(currentOp1 instanceof ReduceSinkOperator)) {
// Check whether current operators are equal
if (!compareOperator(pctx, currentOp1, currentOp2)) {
// If they are not equal, we could zip up till here
break;
}
if (currentOp1.getParentOperators().size() != currentOp2.getParentOperators().size()) {
// If they are not equal, we could zip up till here
break;
}
if (currentOp1.getParentOperators().size() > 1) {
List<Operator<?>> discardableOpsForCurrentOp = new ArrayList<>();
int idx = 0;
for (; idx < currentOp1.getParentOperators().size(); idx++) {
Operator<?> parentOp1 = currentOp1.getParentOperators().get(idx);
Operator<?> parentOp2 = currentOp2.getParentOperators().get(idx);
if (parentOp1 == equalOp1 && parentOp2 == equalOp2 && !removeInputBranch) {
continue;
}
if ((parentOp1 == equalOp1 && parentOp2 != equalOp2) || (parentOp1 != equalOp1 && parentOp2 == equalOp2)) {
// Input operator is not in the same position
break;
}
// Compare input
List<Operator<?>> removeOpsForCurrentInput = compareAndGatherOps(pctx, parentOp1, parentOp2);
if (removeOpsForCurrentInput == null) {
// Inputs are not the same, bail out
break;
}
// Add inputs to ops to remove
discardableOpsForCurrentOp.addAll(removeOpsForCurrentInput);
}
if (idx != currentOp1.getParentOperators().size()) {
// If inputs are not equal, we could zip up till here
break;
}
discardableInputOps.addAll(discardableOpsForCurrentOp);
}
equalOp1 = currentOp1;
equalOp2 = currentOp2;
retainableOps.add(equalOp1);
discardableOps.add(equalOp2);
if (equalOp1 instanceof MapJoinOperator) {
MapJoinOperator mop = (MapJoinOperator) equalOp1;
dataSize = StatsUtils.safeAdd(dataSize, mop.getConf().getInMemoryDataSize());
maxDataSize = mop.getConf().getMemoryMonitorInfo().getAdjustedNoConditionalTaskSize();
}
if (currentOp1.getChildOperators().size() > 1 || currentOp2.getChildOperators().size() > 1) {
// TODO: Support checking multiple child operators to merge further.
break;
}
// Update for next iteration
currentOp1 = currentOp1.getChildOperators().get(0);
currentOp2 = currentOp2.getChildOperators().get(0);
}
// Add the rest to the memory consumption
Set<Operator<?>> opsWork1 = findWorkOperators(optimizerCache, currentOp1);
for (Operator<?> op : opsWork1) {
if (op instanceof MapJoinOperator && !retainableOps.contains(op)) {
MapJoinOperator mop = (MapJoinOperator) op;
dataSize = StatsUtils.safeAdd(dataSize, mop.getConf().getInMemoryDataSize());
maxDataSize = mop.getConf().getMemoryMonitorInfo().getAdjustedNoConditionalTaskSize();
}
}
Set<Operator<?>> opsWork2 = findWorkOperators(optimizerCache, currentOp2);
for (Operator<?> op : opsWork2) {
if (op instanceof MapJoinOperator && !discardableOps.contains(op)) {
MapJoinOperator mop = (MapJoinOperator) op;
dataSize = StatsUtils.safeAdd(dataSize, mop.getConf().getInMemoryDataSize());
maxDataSize = mop.getConf().getMemoryMonitorInfo().getAdjustedNoConditionalTaskSize();
}
}
discardableInputOps.addAll(gatherDPPBranchOps(pctx, optimizerCache, discardableInputOps));
discardableInputOps.addAll(gatherDPPBranchOps(pctx, optimizerCache, discardableOps));
discardableInputOps.addAll(gatherDPPBranchOps(pctx, optimizerCache, retainableOps, discardableInputOps));
return new SharedResult(retainableOps, discardableOps, discardableInputOps, dataSize, maxDataSize);
}
use of org.apache.hadoop.hive.ql.exec.ReduceSinkOperator in project hive by apache.
the class SharedWorkOptimizer method areMergeable.
// FIXME: probably this should also be integrated with isSame() logics
private static boolean areMergeable(ParseContext pctx, SharedWorkOptimizerCache optimizerCache, TableScanOperator tsOp1, TableScanOperator tsOp2) throws SemanticException {
// First we check if the two table scan operators can actually be merged
// If schemas do not match, we currently do not merge
List<String> prevTsOpNeededColumns = tsOp1.getNeededColumns();
List<String> tsOpNeededColumns = tsOp2.getNeededColumns();
if (prevTsOpNeededColumns.size() != tsOpNeededColumns.size()) {
return false;
}
boolean notEqual = false;
for (int i = 0; i < prevTsOpNeededColumns.size(); i++) {
if (!prevTsOpNeededColumns.get(i).equals(tsOpNeededColumns.get(i))) {
notEqual = true;
break;
}
}
if (notEqual) {
return false;
}
// If row limit does not match, we currently do not merge
if (tsOp1.getConf().getRowLimit() != tsOp2.getConf().getRowLimit()) {
return false;
}
// If partitions do not match, we currently do not merge
PrunedPartitionList prevTsOpPPList = pctx.getPrunedPartitions(tsOp1);
PrunedPartitionList tsOpPPList = pctx.getPrunedPartitions(tsOp2);
if (!prevTsOpPPList.getPartitions().equals(tsOpPPList.getPartitions())) {
return false;
}
// If is a DPP, check if actually it refers to same target, column, etc.
// Further, the DPP value needs to be generated from same subtree
List<Operator<?>> dppsOp1 = new ArrayList<>(optimizerCache.tableScanToDPPSource.get(tsOp1));
List<Operator<?>> dppsOp2 = new ArrayList<>(optimizerCache.tableScanToDPPSource.get(tsOp2));
if (dppsOp1.isEmpty() && dppsOp2.isEmpty()) {
return true;
}
for (int i = 0; i < dppsOp1.size(); i++) {
Operator<?> op = dppsOp1.get(i);
if (op instanceof ReduceSinkOperator) {
Set<Operator<?>> ascendants = findAscendantWorkOperators(pctx, optimizerCache, op);
if (ascendants.contains(tsOp2)) {
// This should not happen, we cannot merge
return false;
}
}
}
for (int i = 0; i < dppsOp2.size(); i++) {
Operator<?> op = dppsOp2.get(i);
if (op instanceof ReduceSinkOperator) {
Set<Operator<?>> ascendants = findAscendantWorkOperators(pctx, optimizerCache, op);
if (ascendants.contains(tsOp1)) {
// This should not happen, we cannot merge
return false;
}
}
}
if (dppsOp1.size() != dppsOp2.size()) {
// Only first or second operator contains DPP pruning
return false;
}
// Check if DPP branches are equal
BitSet bs = new BitSet();
for (int i = 0; i < dppsOp1.size(); i++) {
Operator<?> dppOp1 = dppsOp1.get(i);
for (int j = 0; j < dppsOp2.size(); j++) {
if (!bs.get(j)) {
// If not visited yet
Operator<?> dppOp2 = dppsOp2.get(j);
if (compareAndGatherOps(pctx, dppOp1, dppOp2) != null) {
// The DPP operator/branch are equal
bs.set(j);
break;
}
}
}
if (bs.cardinality() < i + 1) {
return false;
}
}
return true;
}
Aggregations