use of org.apache.hadoop.hive.ql.plan.MapredWork in project hive by apache.
the class GenMapRedUtils method setUnionPlan.
private static void setUnionPlan(GenMRProcContext opProcCtx, boolean local, Task<? extends Serializable> currTask, GenMRUnionCtx uCtx, boolean mergeTask) throws SemanticException {
TableScanOperator currTopOp = opProcCtx.getCurrTopOp();
if (currTopOp != null) {
String currAliasId = opProcCtx.getCurrAliasId();
if (mergeTask || !opProcCtx.isSeenOp(currTask, currTopOp)) {
setTaskPlan(currAliasId, currTopOp, currTask, local, opProcCtx);
}
currTopOp = null;
opProcCtx.setCurrTopOp(currTopOp);
} else {
List<String> taskTmpDirLst = uCtx.getTaskTmpDir();
if ((taskTmpDirLst != null) && !(taskTmpDirLst.isEmpty())) {
List<TableDesc> tt_descLst = uCtx.getTTDesc();
assert !taskTmpDirLst.isEmpty() && !tt_descLst.isEmpty();
assert taskTmpDirLst.size() == tt_descLst.size();
int size = taskTmpDirLst.size();
assert local == false;
List<TableScanOperator> topOperators = uCtx.getListTopOperators();
MapredWork plan = (MapredWork) currTask.getWork();
for (int pos = 0; pos < size; pos++) {
String taskTmpDir = taskTmpDirLst.get(pos);
TableDesc tt_desc = tt_descLst.get(pos);
MapWork mWork = plan.getMapWork();
if (mWork.getPathToAliases().get(taskTmpDir) == null) {
taskTmpDir = taskTmpDir.intern();
Path taskTmpDirPath = StringInternUtils.internUriStringsInPath(new Path(taskTmpDir));
mWork.removePathToAlias(taskTmpDirPath);
mWork.addPathToAlias(taskTmpDirPath, taskTmpDir);
mWork.addPathToPartitionInfo(taskTmpDirPath, new PartitionDesc(tt_desc, null));
mWork.getAliasToWork().put(taskTmpDir, topOperators.get(pos));
}
}
}
}
}
use of org.apache.hadoop.hive.ql.plan.MapredWork in project hive by apache.
the class GenMRTableScan1 method process.
/**
* Table Sink encountered.
* @param nd
* the table sink operator encountered
* @param opProcCtx
* context
*/
@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx opProcCtx, Object... nodeOutputs) throws SemanticException {
TableScanOperator op = (TableScanOperator) nd;
GenMRProcContext ctx = (GenMRProcContext) opProcCtx;
ParseContext parseCtx = ctx.getParseCtx();
Class<? extends InputFormat> inputFormat = op.getConf().getTableMetadata().getInputFormatClass();
Map<Operator<? extends OperatorDesc>, GenMapRedCtx> mapCurrCtx = ctx.getMapCurrCtx();
// create a dummy MapReduce task
MapredWork currWork = GenMapRedUtils.getMapRedWork(parseCtx);
MapRedTask currTask = (MapRedTask) TaskFactory.get(currWork, parseCtx.getConf());
ctx.setCurrTask(currTask);
ctx.setCurrTopOp(op);
for (String alias : parseCtx.getTopOps().keySet()) {
Operator<? extends OperatorDesc> currOp = parseCtx.getTopOps().get(alias);
if (currOp == op) {
String currAliasId = alias;
ctx.setCurrAliasId(currAliasId);
mapCurrCtx.put(op, new GenMapRedCtx(currTask, currAliasId));
if (parseCtx.getQueryProperties().isAnalyzeCommand()) {
boolean partialScan = parseCtx.getQueryProperties().isPartialScanAnalyzeCommand();
boolean noScan = parseCtx.getQueryProperties().isNoScanAnalyzeCommand();
if (OrcInputFormat.class.isAssignableFrom(inputFormat) || MapredParquetInputFormat.class.isAssignableFrom(inputFormat)) {
// For ORC and Parquet, all the following statements are the same
// ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS
// ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS partialscan;
// ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS noscan;
// There will not be any MR or Tez job above this task
StatsNoJobWork snjWork = new StatsNoJobWork(op.getConf().getTableMetadata().getTableSpec());
snjWork.setStatsReliable(parseCtx.getConf().getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE));
// If partition is specified, get pruned partition list
Set<Partition> confirmedParts = GenMapRedUtils.getConfirmedPartitionsForScan(op);
if (confirmedParts.size() > 0) {
Table source = op.getConf().getTableMetadata();
List<String> partCols = GenMapRedUtils.getPartitionColumns(op);
PrunedPartitionList partList = new PrunedPartitionList(source, confirmedParts, partCols, false);
snjWork.setPrunedPartitionList(partList);
}
Task<StatsNoJobWork> snjTask = TaskFactory.get(snjWork, parseCtx.getConf());
ctx.setCurrTask(snjTask);
ctx.setCurrTopOp(null);
ctx.getRootTasks().clear();
ctx.getRootTasks().add(snjTask);
} else {
// ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS;
// The plan consists of a simple MapRedTask followed by a StatsTask.
// The MR task is just a simple TableScanOperator
StatsWork statsWork = new StatsWork(op.getConf().getTableMetadata().getTableSpec());
statsWork.setAggKey(op.getConf().getStatsAggPrefix());
statsWork.setStatsTmpDir(op.getConf().getTmpStatsDir());
statsWork.setSourceTask(currTask);
statsWork.setStatsReliable(parseCtx.getConf().getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE));
Task<StatsWork> statsTask = TaskFactory.get(statsWork, parseCtx.getConf());
currTask.addDependentTask(statsTask);
if (!ctx.getRootTasks().contains(currTask)) {
ctx.getRootTasks().add(currTask);
}
// The plan consists of a StatsTask only.
if (noScan) {
statsTask.setParentTasks(null);
statsWork.setNoScanAnalyzeCommand(true);
ctx.getRootTasks().remove(currTask);
ctx.getRootTasks().add(statsTask);
}
// ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS partialscan;
if (partialScan) {
handlePartialScanCommand(op, ctx, parseCtx, currTask, statsWork, statsTask);
}
currWork.getMapWork().setGatheringStats(true);
if (currWork.getReduceWork() != null) {
currWork.getReduceWork().setGatheringStats(true);
}
// NOTE: here we should use the new partition predicate pushdown API to get a list of
// pruned list,
// and pass it to setTaskPlan as the last parameter
Set<Partition> confirmedPartns = GenMapRedUtils.getConfirmedPartitionsForScan(op);
if (confirmedPartns.size() > 0) {
Table source = op.getConf().getTableMetadata();
List<String> partCols = GenMapRedUtils.getPartitionColumns(op);
PrunedPartitionList partList = new PrunedPartitionList(source, confirmedPartns, partCols, false);
GenMapRedUtils.setTaskPlan(currAliasId, op, currTask, false, ctx, partList);
} else {
// non-partitioned table
GenMapRedUtils.setTaskPlan(currAliasId, op, currTask, false, ctx);
}
}
}
return true;
}
}
assert false;
return null;
}
use of org.apache.hadoop.hive.ql.plan.MapredWork in project hive by apache.
the class TestSymlinkTextInputFormat method setUp.
@Override
protected void setUp() throws IOException {
conf = new Configuration();
job = new JobConf(conf);
TableDesc tblDesc = Utilities.defaultTd;
PartitionDesc partDesc = new PartitionDesc(tblDesc, null);
LinkedHashMap<Path, PartitionDesc> pt = new LinkedHashMap<>();
pt.put(new Path("/tmp/testfolder"), partDesc);
MapredWork mrwork = new MapredWork();
mrwork.getMapWork().setPathToPartitionInfo(pt);
Utilities.setMapRedWork(job, mrwork, new Path("/tmp/" + System.getProperty("user.name"), "hive"));
fileSystem = FileSystem.getLocal(conf);
testDir = new Path(System.getProperty("test.tmp.dir", System.getProperty("user.dir", new File(".").getAbsolutePath())) + "/TestSymlinkTextInputFormat");
reporter = Reporter.NULL;
fileSystem.delete(testDir, true);
dataDir1 = new Path(testDir, "datadir1");
dataDir2 = new Path(testDir, "datadir2");
symlinkDir = new Path(testDir, "symlinkdir");
}
use of org.apache.hadoop.hive.ql.plan.MapredWork in project hive by apache.
the class TestCombineHiveInputFormat method testAvoidSplitCombination.
public void testAvoidSplitCombination() throws Exception {
Configuration conf = new Configuration();
JobConf job = new JobConf(conf);
TableDesc tblDesc = Utilities.defaultTd;
tblDesc.setInputFileFormatClass(TestSkipCombineInputFormat.class);
PartitionDesc partDesc = new PartitionDesc(tblDesc, null);
LinkedHashMap<Path, PartitionDesc> pt = new LinkedHashMap<>();
pt.put(new Path("/tmp/testfolder1"), partDesc);
pt.put(new Path("/tmp/testfolder2"), partDesc);
MapredWork mrwork = new MapredWork();
mrwork.getMapWork().setPathToPartitionInfo(pt);
Path mapWorkPath = new Path("/tmp/" + System.getProperty("user.name"), "hive");
Utilities.setMapRedWork(conf, mrwork, mapWorkPath);
try {
Path[] paths = new Path[2];
paths[0] = new Path("/tmp/testfolder1");
paths[1] = new Path("/tmp/testfolder2");
CombineHiveInputFormat combineInputFormat = ReflectionUtils.newInstance(CombineHiveInputFormat.class, conf);
combineInputFormat.pathToPartitionInfo = Utilities.getMapWork(conf).getPathToPartitionInfo();
Set results = combineInputFormat.getNonCombinablePathIndices(job, paths, 2);
assertEquals("Should have both path indices in the results set", 2, results.size());
} finally {
// Cleanup the mapwork path
FileSystem.get(conf).delete(mapWorkPath, true);
}
}
use of org.apache.hadoop.hive.ql.plan.MapredWork in project hive by apache.
the class TestHiveBinarySearchRecordReader method init.
private void init() throws IOException {
conf = new JobConf();
resetIOContext();
rcfReader = mock(RCFileRecordReader.class);
when(rcfReader.next((LongWritable) anyObject(), (BytesRefArrayWritable) anyObject())).thenReturn(true);
// Since the start is 0, and the length is 100, the first call to sync should be with the value
// 50 so return that for getPos()
when(rcfReader.getPos()).thenReturn(50L);
conf.setBoolean("hive.input.format.sorted", true);
TableDesc tblDesc = Utilities.defaultTd;
PartitionDesc partDesc = new PartitionDesc(tblDesc, null);
LinkedHashMap<Path, PartitionDesc> pt = new LinkedHashMap<>();
pt.put(new Path("/tmp/testfolder"), partDesc);
MapredWork mrwork = new MapredWork();
mrwork.getMapWork().setPathToPartitionInfo(pt);
Utilities.setMapRedWork(conf, mrwork, new Path("/tmp/" + System.getProperty("user.name"), "hive"));
hiveSplit = new TestHiveInputSplit();
hbsReader = new TestHiveRecordReader(rcfReader, conf);
hbsReader.initIOContext(hiveSplit, conf, Class.class, rcfReader);
}
Aggregations