use of org.apache.hadoop.hive.ql.plan.MapredWork in project hive by apache.
the class GenMapRedUtils method getMapRedWork.
/**
* create a new plan and return.
*
* @return the new plan
*/
public static MapredWork getMapRedWork(ParseContext parseCtx) {
MapredWork work = getMapRedWorkFromConf(parseCtx.getConf());
work.getMapWork().setNameToSplitSample(parseCtx.getNameToSplitSample());
return work;
}
use of org.apache.hadoop.hive.ql.plan.MapredWork in project hive by apache.
the class GenMapRedUtils method createMRWorkForMergingFiles.
/**
* Create a MapredWork based on input path, the top operator and the input
* table descriptor.
*
* @param conf
* @param topOp
* the table scan operator that is the root of the MapReduce task.
* @param fsDesc
* the file sink descriptor that serves as the input to this merge task.
* @param parentMR
* the parent MapReduce work
* @param parentFS
* the last FileSinkOperator in the parent MapReduce work
* @return the MapredWork
*/
private static MapWork createMRWorkForMergingFiles(HiveConf conf, TableScanOperator topOp, FileSinkDesc fsDesc) {
ArrayList<String> aliases = new ArrayList<String>();
Path inputDir = StringInternUtils.internUriStringsInPath(fsDesc.getMergeInputDirName());
String inputDirStr = inputDir.toString().intern();
TableDesc tblDesc = fsDesc.getTableInfo();
// dummy alias: just use the input path
aliases.add(inputDirStr);
// constructing the default MapredWork
MapredWork cMrPlan = GenMapRedUtils.getMapRedWorkFromConf(conf);
MapWork cplan = cMrPlan.getMapWork();
cplan.addPathToAlias(inputDir, aliases);
cplan.addPathToPartitionInfo(inputDir, new PartitionDesc(tblDesc, null));
cplan.getAliasToWork().put(inputDirStr, topOp);
cplan.setMapperCannotSpanPartns(true);
return cplan;
}
use of org.apache.hadoop.hive.ql.plan.MapredWork in project hive by apache.
the class GenMapRedUtils method setUnionPlan.
private static void setUnionPlan(GenMRProcContext opProcCtx, boolean local, Task<? extends Serializable> currTask, GenMRUnionCtx uCtx, boolean mergeTask) throws SemanticException {
TableScanOperator currTopOp = opProcCtx.getCurrTopOp();
if (currTopOp != null) {
String currAliasId = opProcCtx.getCurrAliasId();
if (mergeTask || !opProcCtx.isSeenOp(currTask, currTopOp)) {
setTaskPlan(currAliasId, currTopOp, currTask, local, opProcCtx);
}
currTopOp = null;
opProcCtx.setCurrTopOp(currTopOp);
} else {
List<String> taskTmpDirLst = uCtx.getTaskTmpDir();
if ((taskTmpDirLst != null) && !(taskTmpDirLst.isEmpty())) {
List<TableDesc> tt_descLst = uCtx.getTTDesc();
assert !taskTmpDirLst.isEmpty() && !tt_descLst.isEmpty();
assert taskTmpDirLst.size() == tt_descLst.size();
int size = taskTmpDirLst.size();
assert local == false;
List<TableScanOperator> topOperators = uCtx.getListTopOperators();
MapredWork plan = (MapredWork) currTask.getWork();
for (int pos = 0; pos < size; pos++) {
String taskTmpDir = taskTmpDirLst.get(pos);
Path taskTmpDirPath = new Path(taskTmpDir);
MapWork mWork = plan.getMapWork();
if (!mWork.getPathToAliases().containsKey(taskTmpDirPath)) {
taskTmpDir = taskTmpDir.intern();
StringInternUtils.internUriStringsInPath(taskTmpDirPath);
TableDesc tt_desc = tt_descLst.get(pos);
mWork.addPathToAlias(taskTmpDirPath, taskTmpDir);
mWork.addPathToPartitionInfo(taskTmpDirPath, new PartitionDesc(tt_desc, null));
mWork.getAliasToWork().put(taskTmpDir, topOperators.get(pos));
}
}
}
}
}
use of org.apache.hadoop.hive.ql.plan.MapredWork in project hive by apache.
the class GenMapRedUtils method initUnionPlan.
/**
* Initialize the current union plan.
*
* @param op
* the reduce sink operator encountered
* @param opProcCtx
* processing context
*/
public static void initUnionPlan(ReduceSinkOperator op, UnionOperator currUnionOp, GenMRProcContext opProcCtx, Task<? extends Serializable> unionTask) throws SemanticException {
Operator<? extends OperatorDesc> reducer = op.getChildOperators().get(0);
MapredWork plan = (MapredWork) unionTask.getWork();
HashMap<Operator<? extends OperatorDesc>, Task<? extends Serializable>> opTaskMap = opProcCtx.getOpTaskMap();
opTaskMap.put(reducer, unionTask);
plan.setReduceWork(new ReduceWork());
plan.getReduceWork().setReducer(reducer);
plan.getReduceWork().setReducer(reducer);
ReduceSinkDesc desc = op.getConf();
plan.getReduceWork().setNumReduceTasks(desc.getNumReducers());
if (needsTagging(plan.getReduceWork())) {
plan.getReduceWork().setNeedsTagging(true);
}
initUnionPlan(opProcCtx, currUnionOp, unionTask, false);
}
use of org.apache.hadoop.hive.ql.plan.MapredWork in project hive by apache.
the class ExecDriver method main.
@SuppressWarnings("unchecked")
public static void main(String[] args) throws IOException, HiveException {
String planFileName = null;
String jobConfFileName = null;
boolean noLog = false;
String files = null;
String libjars = null;
boolean localtask = false;
try {
for (int i = 0; i < args.length; i++) {
if (args[i].equals("-plan")) {
planFileName = args[++i];
} else if (args[i].equals("-jobconffile")) {
jobConfFileName = args[++i];
} else if (args[i].equals("-nolog")) {
noLog = true;
} else if (args[i].equals("-files")) {
files = args[++i];
} else if (args[i].equals("-libjars")) {
libjars = args[++i];
} else if (args[i].equals("-localtask")) {
localtask = true;
}
}
} catch (IndexOutOfBoundsException e) {
System.err.println("Missing argument to option");
printUsage();
}
JobConf conf;
if (localtask) {
conf = new JobConf(MapredLocalTask.class);
} else {
conf = new JobConf(ExecDriver.class);
}
if (jobConfFileName != null) {
conf.addResource(new Path(jobConfFileName));
}
// Initialize the resources from command line
if (files != null) {
conf.set("tmpfiles", files);
}
if (libjars != null) {
conf.set("tmpjars", libjars);
}
if (UserGroupInformation.isSecurityEnabled()) {
String hadoopAuthToken = System.getenv(UserGroupInformation.HADOOP_TOKEN_FILE_LOCATION);
if (hadoopAuthToken != null) {
conf.set("mapreduce.job.credentials.binary", hadoopAuthToken);
}
}
boolean isSilent = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVESESSIONSILENT);
String queryId = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYID, "").trim();
if (queryId.isEmpty()) {
queryId = "unknown-" + System.currentTimeMillis();
HiveConf.setVar(conf, HiveConf.ConfVars.HIVEQUERYID, queryId);
}
System.setProperty(HiveConf.ConfVars.HIVEQUERYID.toString(), queryId);
LogUtils.registerLoggingContext(conf);
if (noLog) {
// If started from main(), and noLog is on, we should not output
// any logs. To turn the log on, please set -Dtest.silent=false
org.apache.logging.log4j.Logger logger = org.apache.logging.log4j.LogManager.getRootLogger();
NullAppender appender = NullAppender.createNullAppender();
appender.addToLogger(logger.getName(), Level.ERROR);
appender.start();
} else {
setupChildLog4j(conf);
}
Logger LOG = LoggerFactory.getLogger(ExecDriver.class.getName());
LogHelper console = new LogHelper(LOG, isSilent);
if (planFileName == null) {
console.printError("Must specify Plan File Name");
printUsage();
}
// that it's easy to find reason for local mode execution failures
for (Appender appender : ((org.apache.logging.log4j.core.Logger) LogManager.getRootLogger()).getAppenders().values()) {
if (appender instanceof FileAppender) {
console.printInfo("Execution log at: " + ((FileAppender) appender).getFileName());
} else if (appender instanceof RollingFileAppender) {
console.printInfo("Execution log at: " + ((RollingFileAppender) appender).getFileName());
}
}
// the plan file should always be in local directory
Path p = new Path(planFileName);
FileSystem fs = FileSystem.getLocal(conf);
InputStream pathData = fs.open(p);
// child process. so we add it here explicitly
try {
// see also - code in CliDriver.java
ClassLoader loader = conf.getClassLoader();
if (StringUtils.isNotBlank(libjars)) {
loader = Utilities.addToClassPath(loader, StringUtils.split(libjars, ","));
}
conf.setClassLoader(loader);
// Also set this to the Thread ContextClassLoader, so new threads will
// inherit
// this class loader, and propagate into newly created Configurations by
// those
// new threads.
Thread.currentThread().setContextClassLoader(loader);
} catch (Exception e) {
throw new HiveException(e.getMessage(), e);
}
int ret;
if (localtask) {
memoryMXBean = ManagementFactory.getMemoryMXBean();
MapredLocalWork plan = SerializationUtilities.deserializePlan(pathData, MapredLocalWork.class);
MapredLocalTask ed = new MapredLocalTask(plan, conf, isSilent);
ret = ed.executeInProcess(new DriverContext());
} else {
MapredWork plan = SerializationUtilities.deserializePlan(pathData, MapredWork.class);
ExecDriver ed = new ExecDriver(plan, conf, isSilent);
ret = ed.execute(new DriverContext());
}
if (ret != 0) {
System.exit(ret);
}
}
Aggregations