use of org.apache.hadoop.hive.ql.exec.mr.MapRedTask in project hive by apache.
the class GenMRTableScan1 method process.
/**
* Table Sink encountered.
* @param nd
* the table sink operator encountered
* @param opProcCtx
* context
*/
@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx opProcCtx, Object... nodeOutputs) throws SemanticException {
TableScanOperator op = (TableScanOperator) nd;
GenMRProcContext ctx = (GenMRProcContext) opProcCtx;
ctx.reset();
ParseContext parseCtx = ctx.getParseCtx();
Table table = op.getConf().getTableMetadata();
Class<? extends InputFormat> inputFormat = table.getInputFormatClass();
Map<Operator<? extends OperatorDesc>, GenMapRedCtx> mapCurrCtx = ctx.getMapCurrCtx();
// create a dummy MapReduce task
MapredWork currWork = GenMapRedUtils.getMapRedWork(parseCtx);
MapRedTask currTask = (MapRedTask) TaskFactory.get(currWork);
ctx.setCurrTask(currTask);
ctx.setCurrTopOp(op);
for (String alias : parseCtx.getTopOps().keySet()) {
Operator<? extends OperatorDesc> currOp = parseCtx.getTopOps().get(alias);
if (currOp == op) {
String currAliasId = alias;
ctx.setCurrAliasId(currAliasId);
mapCurrCtx.put(op, new GenMapRedCtx(currTask, currAliasId));
if (parseCtx.getQueryProperties().isAnalyzeCommand()) {
boolean noScan = parseCtx.getQueryProperties().isNoScanAnalyzeCommand();
if (BasicStatsNoJobTask.canUseBasicStats(table, inputFormat)) {
// For ORC and Parquet, all the following statements are the same
// ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS
// ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS noscan;
// There will not be any MR or Tez job above this task
StatsWork statWork = new StatsWork(table, parseCtx.getConf());
statWork.setFooterScan();
// If partition is specified, get pruned partition list
Set<Partition> confirmedParts = GenMapRedUtils.getConfirmedPartitionsForScan(op);
if (confirmedParts.size() > 0) {
List<String> partCols = GenMapRedUtils.getPartitionColumns(op);
PrunedPartitionList partList = new PrunedPartitionList(table, confirmedParts, partCols, false);
statWork.addInputPartitions(partList.getPartitions());
}
Task<StatsWork> snjTask = TaskFactory.get(statWork);
ctx.setCurrTask(snjTask);
ctx.setCurrTopOp(null);
ctx.getRootTasks().clear();
ctx.getRootTasks().add(snjTask);
} else {
// ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS;
// The plan consists of a simple MapRedTask followed by a StatsTask.
// The MR task is just a simple TableScanOperator
BasicStatsWork statsWork = new BasicStatsWork(table.getTableSpec());
statsWork.setIsExplicitAnalyze(true);
statsWork.setNoScanAnalyzeCommand(noScan);
StatsWork columnStatsWork = new StatsWork(table, statsWork, parseCtx.getConf());
columnStatsWork.collectStatsFromAggregator(op.getConf());
columnStatsWork.setSourceTask(currTask);
Task<StatsWork> columnStatsTask = TaskFactory.get(columnStatsWork);
currTask.addDependentTask(columnStatsTask);
if (!ctx.getRootTasks().contains(currTask)) {
ctx.getRootTasks().add(currTask);
}
// The plan consists of a StatsTask only.
if (noScan) {
columnStatsTask.setParentTasks(null);
ctx.getRootTasks().remove(currTask);
ctx.getRootTasks().add(columnStatsTask);
}
currWork.getMapWork().setGatheringStats(true);
if (currWork.getReduceWork() != null) {
currWork.getReduceWork().setGatheringStats(true);
}
// NOTE: here we should use the new partition predicate pushdown API to get a list of
// pruned list,
// and pass it to setTaskPlan as the last parameter
Set<Partition> confirmedPartns = GenMapRedUtils.getConfirmedPartitionsForScan(op);
if (confirmedPartns.size() > 0) {
List<String> partCols = GenMapRedUtils.getPartitionColumns(op);
PrunedPartitionList partList = new PrunedPartitionList(table, confirmedPartns, partCols, false);
GenMapRedUtils.setTaskPlan(currAliasId, op, currTask, false, ctx, partList);
} else {
// non-partitioned table
GenMapRedUtils.setTaskPlan(currAliasId, op, currTask, false, ctx);
}
}
}
return true;
}
}
assert false;
return null;
}
Aggregations