use of org.apache.hadoop.hive.ql.Context in project hive by apache.
the class MergeFileTask method execute.
/**
* start a new map-reduce job to do the merge, almost the same as ExecDriver.
*/
@Override
public int execute(DriverContext driverContext) {
Context ctx = driverContext.getCtx();
boolean ctxCreated = false;
RunningJob rj = null;
int returnVal = 0;
try {
if (ctx == null) {
ctx = new Context(job);
ctxCreated = true;
}
HiveFileFormatUtils.prepareJobOutput(job);
job.setInputFormat(work.getInputformatClass());
job.setOutputFormat(HiveOutputFormatImpl.class);
job.setMapperClass(MergeFileMapper.class);
job.setMapOutputKeyClass(NullWritable.class);
job.setMapOutputValueClass(NullWritable.class);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(NullWritable.class);
job.setNumReduceTasks(0);
// create the temp directories
Path outputPath = work.getOutputDir();
Path tempOutPath = Utilities.toTempPath(outputPath);
FileSystem fs = tempOutPath.getFileSystem(job);
if (!fs.exists(tempOutPath)) {
fs.mkdirs(tempOutPath);
}
ExecDriver.propagateSplitSettings(job, work);
// set job name
boolean noName = StringUtils.isEmpty(job.get(MRJobConfig.JOB_NAME));
String jobName = null;
if (noName && this.getQueryPlan() != null) {
int maxlen = conf.getIntVar(HiveConf.ConfVars.HIVEJOBNAMELENGTH);
jobName = Utilities.abbreviate(this.getQueryPlan().getQueryStr(), maxlen - 6);
}
if (noName) {
// This is for a special case to ensure unit tests pass
job.set(MRJobConfig.JOB_NAME, jobName != null ? jobName : "JOB" + Utilities.randGen.nextInt());
}
// add input path
addInputPaths(job, work);
// serialize work
Utilities.setMapWork(job, work, ctx.getMRTmpPath(), true);
// remove pwd from conf file so that job tracker doesn't show this logs
String pwd = HiveConf.getVar(job, HiveConf.ConfVars.METASTOREPWD);
if (pwd != null) {
HiveConf.setVar(job, HiveConf.ConfVars.METASTOREPWD, "HIVE");
}
// submit the job
JobClient jc = new JobClient(job);
String addedJars = Utilities.getResourceFiles(job, SessionState.ResourceType.JAR);
if (!addedJars.isEmpty()) {
job.set("tmpjars", addedJars);
}
// make this client wait if job trcker is not behaving well.
Throttle.checkJobTracker(job, LOG);
// Finally SUBMIT the JOB!
rj = jc.submitJob(job);
this.jobID = rj.getJobID();
returnVal = jobExecHelper.progress(rj, jc, ctx);
success = (returnVal == 0);
} catch (Exception e) {
setException(e);
String mesg = " with exception '" + Utilities.getNameMessage(e) + "'";
if (rj != null) {
mesg = "Ended Job = " + rj.getJobID() + mesg;
} else {
mesg = "Job Submission failed" + mesg;
}
// Has to use full name to make sure it does not conflict with
// org.apache.commons.lang.StringUtils
console.printError(mesg, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
success = false;
returnVal = 1;
} finally {
try {
if (ctxCreated) {
ctx.clear();
}
if (rj != null) {
if (returnVal != 0) {
rj.killJob();
}
}
// get the list of Dynamic partition paths
if (rj != null) {
if (work.getAliasToWork() != null) {
for (Operator<? extends OperatorDesc> op : work.getAliasToWork().values()) {
op.jobClose(job, success);
}
}
}
} catch (Exception e) {
// jobClose needs to execute successfully otherwise fail task
LOG.warn("Job close failed ", e);
if (success) {
setException(e);
success = false;
returnVal = 3;
String mesg = "Job Commit failed with exception '" + Utilities.getNameMessage(e) + "'";
console.printError(mesg, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
}
} finally {
HadoopJobExecHelper.runningJobs.remove(rj);
}
}
return returnVal;
}
use of org.apache.hadoop.hive.ql.Context in project hive by apache.
the class TezTask method execute.
@Override
public int execute(DriverContext driverContext) {
int rc = 1;
boolean cleanContext = false;
Context ctx = null;
Ref<TezSessionState> sessionRef = Ref.from(null);
try {
// Get or create Context object. If we create it we have to clean it later as well.
ctx = driverContext.getCtx();
if (ctx == null) {
ctx = new Context(conf);
cleanContext = true;
// some DDL task that directly executes a TezTask does not setup Context and hence TriggerContext.
// Setting queryId is messed up. Some DDL tasks have executionId instead of proper queryId.
String queryId = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYID);
WmContext wmContext = new WmContext(System.currentTimeMillis(), queryId);
ctx.setWmContext(wmContext);
}
// Need to remove this static hack. But this is the way currently to get a session.
SessionState ss = SessionState.get();
// Note: given that we return pool sessions to the pool in the finally block below, and that
// we need to set the global to null to do that, this "reuse" may be pointless.
TezSessionState session = sessionRef.value = ss.getTezSession();
if (session != null && !session.isOpen()) {
LOG.warn("The session: " + session + " has not been opened");
}
// We only need a username for UGI to use for groups; getGroups will fetch the groups
// based on Hadoop configuration, as documented at
// https://hadoop.apache.org/docs/r2.8.0/hadoop-project-dist/hadoop-common/GroupsMapping.html
String userName = ss.getUserName();
List<String> groups = null;
if (userName == null) {
userName = "anonymous";
} else {
groups = UserGroupInformation.createRemoteUser(ss.getUserName()).getGroups();
}
MappingInput mi = new MappingInput(userName, groups, ss.getHiveVariables().get("wmpool"), ss.getHiveVariables().get("wmapp"));
WmContext wmContext = ctx.getWmContext();
// jobConf will hold all the configuration for hadoop, tez, and hive
JobConf jobConf = utils.createConfiguration(conf);
// Get all user jars from work (e.g. input format stuff).
String[] allNonConfFiles = work.configureJobConfAndExtractJars(jobConf);
// DAG scratch dir. We get a session from the pool so it may be different from Tez one.
// TODO: we could perhaps reuse the same directory for HiveResources?
Path scratchDir = utils.createTezDir(ctx.getMRScratchDir(), conf);
CallerContext callerContext = CallerContext.create("HIVE", queryPlan.getQueryId(), "HIVE_QUERY_ID", queryPlan.getQueryStr());
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.TEZ_GET_SESSION);
session = sessionRef.value = WorkloadManagerFederation.getSession(sessionRef.value, conf, mi, getWork().getLlapMode(), wmContext);
perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.TEZ_GET_SESSION);
try {
ss.setTezSession(session);
LOG.info("Subscribed to counters: {} for queryId: {}", wmContext.getSubscribedCounters(), wmContext.getQueryId());
// Ensure the session is open and has the necessary local resources.
// This would refresh any conf resources and also local resources.
ensureSessionHasResources(session, allNonConfFiles);
// This is a combination of the jar stuff from conf, and not from conf.
List<LocalResource> allNonAppResources = session.getLocalizedResources();
logResources(allNonAppResources);
Map<String, LocalResource> allResources = DagUtils.createTezLrMap(session.getAppJarLr(), allNonAppResources);
// next we translate the TezWork to a Tez DAG
DAG dag = build(jobConf, work, scratchDir, ctx, allResources);
dag.setCallerContext(callerContext);
// Check isShutdown opportunistically; it's never unset.
if (this.isShutdown) {
throw new HiveException("Operation cancelled");
}
DAGClient dagClient = submit(jobConf, dag, sessionRef);
session = sessionRef.value;
boolean wasShutdown = false;
synchronized (dagClientLock) {
assert this.dagClient == null;
wasShutdown = this.isShutdown;
if (!wasShutdown) {
this.dagClient = dagClient;
}
}
if (wasShutdown) {
closeDagClientOnCancellation(dagClient);
throw new HiveException("Operation cancelled");
}
// finally monitor will print progress until the job is done
TezJobMonitor monitor = new TezJobMonitor(work.getAllWork(), dagClient, conf, dag, ctx);
rc = monitor.monitorExecution();
if (rc != 0) {
this.setException(new HiveException(monitor.getDiagnostics()));
}
// fetch the counters
try {
Set<StatusGetOpts> statusGetOpts = EnumSet.of(StatusGetOpts.GET_COUNTERS);
counters = dagClient.getDAGStatus(statusGetOpts).getDAGCounters();
} catch (Exception err) {
// Don't fail execution due to counters - just don't print summary info
LOG.warn("Failed to get counters. Ignoring, summary info will be incomplete. " + err, err);
counters = null;
}
} finally {
// Note: due to TEZ-3846, the session may actually be invalid in case of some errors.
// Currently, reopen on an attempted reuse will take care of that; we cannot tell
// if the session is usable until we try.
// We return this to the pool even if it's unusable; reopen is supposed to handle this.
wmContext = ctx.getWmContext();
try {
if (sessionRef.value != null) {
sessionRef.value.returnToSessionManager();
}
} catch (Exception e) {
LOG.error("Failed to return session: {} to pool", session, e);
throw e;
}
if (!conf.getVar(HiveConf.ConfVars.TEZ_SESSION_EVENTS_SUMMARY).equalsIgnoreCase("none") && wmContext != null) {
if (conf.getVar(HiveConf.ConfVars.TEZ_SESSION_EVENTS_SUMMARY).equalsIgnoreCase("json")) {
wmContext.printJson(console);
} else if (conf.getVar(HiveConf.ConfVars.TEZ_SESSION_EVENTS_SUMMARY).equalsIgnoreCase("text")) {
wmContext.print(console);
}
}
}
if (LOG.isInfoEnabled() && counters != null && (HiveConf.getBoolVar(conf, HiveConf.ConfVars.TEZ_EXEC_SUMMARY) || Utilities.isPerfOrAboveLogging(conf))) {
for (CounterGroup group : counters) {
LOG.info(group.getDisplayName() + ":");
for (TezCounter counter : group) {
LOG.info(" " + counter.getDisplayName() + ": " + counter.getValue());
}
}
}
} catch (Exception e) {
LOG.error("Failed to execute tez graph.", e);
// rc will be 1 at this point indicating failure.
} finally {
Utilities.clearWork(conf);
// Clear gWorkMap
for (BaseWork w : work.getAllWork()) {
JobConf workCfg = workToConf.get(w);
if (workCfg != null) {
Utilities.clearWorkMapForConf(workCfg);
}
}
if (cleanContext) {
try {
ctx.clear();
} catch (Exception e) {
/*best effort*/
LOG.warn("Failed to clean up after tez job", e);
}
}
// need to either move tmp files or remove them
DAGClient dagClient = null;
synchronized (dagClientLock) {
dagClient = this.dagClient;
this.dagClient = null;
}
// DagClient as such should have no bearing on jobClose.
if (dagClient != null) {
// rc will only be overwritten if close errors out
rc = close(work, rc, dagClient);
}
}
return rc;
}
use of org.apache.hadoop.hive.ql.Context in project hive by apache.
the class ExecDriver method execute.
/**
* Execute a query plan using Hadoop.
*/
@SuppressWarnings({ "deprecation", "unchecked" })
@Override
public int execute(DriverContext driverContext) {
IOPrepareCache ioPrepareCache = IOPrepareCache.get();
ioPrepareCache.clear();
boolean success = true;
Context ctx = driverContext.getCtx();
boolean ctxCreated = false;
Path emptyScratchDir;
JobClient jc = null;
if (driverContext.isShutdown()) {
LOG.warn("Task was cancelled");
return 5;
}
MapWork mWork = work.getMapWork();
ReduceWork rWork = work.getReduceWork();
try {
if (ctx == null) {
ctx = new Context(job);
ctxCreated = true;
}
emptyScratchDir = ctx.getMRTmpPath();
FileSystem fs = emptyScratchDir.getFileSystem(job);
fs.mkdirs(emptyScratchDir);
} catch (IOException e) {
e.printStackTrace();
console.printError("Error launching map-reduce job", "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
return 5;
}
HiveFileFormatUtils.prepareJobOutput(job);
// See the javadoc on HiveOutputFormatImpl and HadoopShims.prepareJobOutput()
job.setOutputFormat(HiveOutputFormatImpl.class);
job.setMapRunnerClass(ExecMapRunner.class);
job.setMapperClass(ExecMapper.class);
job.setMapOutputKeyClass(HiveKey.class);
job.setMapOutputValueClass(BytesWritable.class);
try {
String partitioner = HiveConf.getVar(job, ConfVars.HIVEPARTITIONER);
job.setPartitionerClass(JavaUtils.loadClass(partitioner));
} catch (ClassNotFoundException e) {
throw new RuntimeException(e.getMessage(), e);
}
propagateSplitSettings(job, mWork);
job.setNumReduceTasks(rWork != null ? rWork.getNumReduceTasks().intValue() : 0);
job.setReducerClass(ExecReducer.class);
// set input format information if necessary
setInputAttributes(job);
// Turn on speculative execution for reducers
boolean useSpeculativeExecReducers = HiveConf.getBoolVar(job, HiveConf.ConfVars.HIVESPECULATIVEEXECREDUCERS);
job.setBoolean(MRJobConfig.REDUCE_SPECULATIVE, useSpeculativeExecReducers);
String inpFormat = HiveConf.getVar(job, HiveConf.ConfVars.HIVEINPUTFORMAT);
if (mWork.isUseBucketizedHiveInputFormat()) {
inpFormat = BucketizedHiveInputFormat.class.getName();
}
LOG.info("Using " + inpFormat);
try {
job.setInputFormat(JavaUtils.loadClass(inpFormat));
} catch (ClassNotFoundException e) {
throw new RuntimeException(e.getMessage(), e);
}
// No-Op - we don't really write anything here ..
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
int returnVal = 0;
boolean noName = StringUtils.isEmpty(job.get(MRJobConfig.JOB_NAME));
if (noName) {
// This is for a special case to ensure unit tests pass
job.set(MRJobConfig.JOB_NAME, "JOB" + Utilities.randGen.nextInt());
}
try {
MapredLocalWork localwork = mWork.getMapRedLocalWork();
if (localwork != null && localwork.hasStagedAlias()) {
if (!ShimLoader.getHadoopShims().isLocalMode(job)) {
Path localPath = localwork.getTmpPath();
Path hdfsPath = mWork.getTmpHDFSPath();
FileSystem hdfs = hdfsPath.getFileSystem(job);
FileSystem localFS = localPath.getFileSystem(job);
FileStatus[] hashtableFiles = localFS.listStatus(localPath);
int fileNumber = hashtableFiles.length;
String[] fileNames = new String[fileNumber];
for (int i = 0; i < fileNumber; i++) {
fileNames[i] = hashtableFiles[i].getPath().getName();
}
// package and compress all the hashtable files to an archive file
String stageId = this.getId();
String archiveFileName = Utilities.generateTarFileName(stageId);
localwork.setStageID(stageId);
CompressionUtils.tar(localPath.toUri().getPath(), fileNames, archiveFileName);
Path archivePath = Utilities.generateTarPath(localPath, stageId);
LOG.info("Archive " + hashtableFiles.length + " hash table files to " + archivePath);
// upload archive file to hdfs
Path hdfsFilePath = Utilities.generateTarPath(hdfsPath, stageId);
short replication = (short) job.getInt("mapred.submit.replication", 10);
hdfs.copyFromLocalFile(archivePath, hdfsFilePath);
hdfs.setReplication(hdfsFilePath, replication);
LOG.info("Upload 1 archive file from" + archivePath + " to: " + hdfsFilePath);
// add the archive file to distributed cache
DistributedCache.createSymlink(job);
DistributedCache.addCacheArchive(hdfsFilePath.toUri(), job);
LOG.info("Add 1 archive file to distributed cache. Archive file: " + hdfsFilePath.toUri());
}
}
work.configureJobConf(job);
List<Path> inputPaths = Utilities.getInputPaths(job, mWork, emptyScratchDir, ctx, false);
Utilities.setInputPaths(job, inputPaths);
Utilities.setMapRedWork(job, work, ctx.getMRTmpPath());
if (mWork.getSamplingType() > 0 && rWork != null && job.getNumReduceTasks() > 1) {
try {
handleSampling(ctx, mWork, job);
job.setPartitionerClass(HiveTotalOrderPartitioner.class);
} catch (IllegalStateException e) {
console.printInfo("Not enough sampling data.. Rolling back to single reducer task");
rWork.setNumReduceTasks(1);
job.setNumReduceTasks(1);
} catch (Exception e) {
LOG.error("Sampling error", e);
console.printError(e.toString(), "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
rWork.setNumReduceTasks(1);
job.setNumReduceTasks(1);
}
}
jc = new JobClient(job);
// make this client wait if job tracker is not behaving well.
Throttle.checkJobTracker(job, LOG);
if (mWork.isGatheringStats() || (rWork != null && rWork.isGatheringStats())) {
// initialize stats publishing table
StatsPublisher statsPublisher;
StatsFactory factory = StatsFactory.newFactory(job);
if (factory != null) {
statsPublisher = factory.getStatsPublisher();
List<String> statsTmpDir = Utilities.getStatsTmpDirs(mWork, job);
if (rWork != null) {
statsTmpDir.addAll(Utilities.getStatsTmpDirs(rWork, job));
}
StatsCollectionContext sc = new StatsCollectionContext(job);
sc.setStatsTmpDirs(statsTmpDir);
if (!statsPublisher.init(sc)) {
// creating stats table if not exists
if (HiveConf.getBoolVar(job, HiveConf.ConfVars.HIVE_STATS_RELIABLE)) {
throw new HiveException(ErrorMsg.STATSPUBLISHER_INITIALIZATION_ERROR.getErrorCodedMsg());
}
}
}
}
Utilities.createTmpDirs(job, mWork);
Utilities.createTmpDirs(job, rWork);
SessionState ss = SessionState.get();
// TODO: why is there a TezSession in MR ExecDriver?
if (ss != null && HiveConf.getVar(job, ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) {
// TODO: this is the only place that uses keepTmpDir. Why?
TezSessionPoolManager.closeIfNotDefault(ss.getTezSession(), true);
}
HiveConfUtil.updateJobCredentialProviders(job);
// Finally SUBMIT the JOB!
if (driverContext.isShutdown()) {
LOG.warn("Task was cancelled");
return 5;
}
rj = jc.submitJob(job);
if (driverContext.isShutdown()) {
LOG.warn("Task was cancelled");
killJob();
return 5;
}
this.jobID = rj.getJobID();
updateStatusInQueryDisplay();
returnVal = jobExecHelper.progress(rj, jc, ctx);
success = (returnVal == 0);
} catch (Exception e) {
e.printStackTrace();
setException(e);
String mesg = " with exception '" + Utilities.getNameMessage(e) + "'";
if (rj != null) {
mesg = "Ended Job = " + rj.getJobID() + mesg;
} else {
mesg = "Job Submission failed" + mesg;
}
// Has to use full name to make sure it does not conflict with
// org.apache.commons.lang.StringUtils
console.printError(mesg, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
success = false;
returnVal = 1;
} finally {
Utilities.clearWork(job);
try {
if (ctxCreated) {
ctx.clear();
}
if (rj != null) {
if (returnVal != 0) {
killJob();
}
jobID = rj.getID().toString();
}
if (jc != null) {
jc.close();
}
} catch (Exception e) {
LOG.warn("Failed while cleaning up ", e);
} finally {
HadoopJobExecHelper.runningJobs.remove(rj);
}
}
// get the list of Dynamic partition paths
try {
if (rj != null) {
if (mWork.getAliasToWork() != null) {
for (Operator<? extends OperatorDesc> op : mWork.getAliasToWork().values()) {
op.jobClose(job, success);
}
}
if (rWork != null) {
rWork.getReducer().jobClose(job, success);
}
}
} catch (Exception e) {
// jobClose needs to execute successfully otherwise fail task
if (success) {
setException(e);
success = false;
returnVal = 3;
String mesg = "Job Commit failed with exception '" + Utilities.getNameMessage(e) + "'";
console.printError(mesg, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
}
}
return (returnVal);
}
use of org.apache.hadoop.hive.ql.Context in project hive by apache.
the class MapredLocalTask method executeInChildVM.
public int executeInChildVM(DriverContext driverContext) {
// execute in child jvm
try {
// generate the cmd line to run in the child jvm
Context ctx = driverContext.getCtx();
String hiveJar = conf.getJar();
String hadoopExec = conf.getVar(HiveConf.ConfVars.HADOOPBIN);
conf.setVar(ConfVars.HIVEADDEDJARS, Utilities.getResourceFiles(conf, SessionState.ResourceType.JAR));
// write out the plan to a local file
Path planPath = new Path(ctx.getLocalTmpPath(), "plan.xml");
MapredLocalWork plan = getWork();
LOG.info("Generating plan file " + planPath.toString());
OutputStream out = null;
try {
out = FileSystem.getLocal(conf).create(planPath);
SerializationUtilities.serializePlan(plan, out);
out.close();
out = null;
} finally {
IOUtils.closeQuietly(out);
}
String isSilent = "true".equalsIgnoreCase(System.getProperty("test.silent")) ? "-nolog" : "";
String libJars = ExecDriver.getResource(conf, ResourceType.JAR);
String libJarsOption = StringUtils.isEmpty(libJars) ? " " : " -libjars " + libJars + " ";
String jarCmd = hiveJar + " " + ExecDriver.class.getName() + libJarsOption;
String hiveConfArgs = ExecDriver.generateCmdLine(conf, ctx);
String cmdLine = hadoopExec + " jar " + jarCmd + " -localtask -plan " + planPath.toString() + " " + isSilent + " " + hiveConfArgs;
String workDir = (new File(".")).getCanonicalPath();
String files = Utilities.getResourceFiles(conf, SessionState.ResourceType.FILE);
if (!files.isEmpty()) {
cmdLine = cmdLine + " -files " + files;
workDir = ctx.getLocalTmpPath().toUri().getPath();
if (!(new File(workDir)).mkdir()) {
throw new IOException("Cannot create tmp working dir: " + workDir);
}
for (String f : StringUtils.split(files, ',')) {
Path p = new Path(f);
String target = p.toUri().getPath();
String link = workDir + Path.SEPARATOR + p.getName();
if (FileUtil.symLink(target, link) != 0) {
throw new IOException("Cannot link to added file: " + target + " from: " + link);
}
}
}
// Inherit Java system variables
String hadoopOpts;
StringBuilder sb = new StringBuilder();
Properties p = System.getProperties();
for (String element : HIVE_SYS_PROP) {
if (p.containsKey(element)) {
sb.append(" -D" + element + "=" + p.getProperty(element));
}
}
hadoopOpts = sb.toString();
// Inherit the environment variables
String[] env;
Map<String, String> variables = new HashMap<String, String>(System.getenv());
// The user can specify the hadoop memory
// if ("local".equals(conf.getVar(HiveConf.ConfVars.HADOOPJT))) {
// if we are running in local mode - then the amount of memory used
// by the child jvm can no longer default to the memory used by the
// parent jvm
// int hadoopMem = conf.getIntVar(HiveConf.ConfVars.HIVEHADOOPMAXMEM);
int hadoopMem = conf.getIntVar(HiveConf.ConfVars.HIVEHADOOPMAXMEM);
if (hadoopMem == 0) {
// remove env var that would default child jvm to use parent's memory
// as default. child jvm would use default memory for a hadoop client
variables.remove(HADOOP_MEM_KEY);
} else {
// user specified the memory for local mode hadoop run
console.printInfo(" set heap size\t" + hadoopMem + "MB");
variables.put(HADOOP_MEM_KEY, String.valueOf(hadoopMem));
}
// } else {
// nothing to do - we are not running in local mode - only submitting
// the job via a child process. in this case it's appropriate that the
// child jvm use the same memory as the parent jvm
// }
// Set HADOOP_USER_NAME env variable for child process, so that
// it also runs with hadoop permissions for the user the job is running as
// This will be used by hadoop only in unsecure(/non kerberos) mode
String endUserName = Utils.getUGI().getShortUserName();
LOG.debug("setting HADOOP_USER_NAME\t" + endUserName);
variables.put("HADOOP_USER_NAME", endUserName);
if (variables.containsKey(HADOOP_OPTS_KEY)) {
variables.put(HADOOP_OPTS_KEY, variables.get(HADOOP_OPTS_KEY) + hadoopOpts);
} else {
variables.put(HADOOP_OPTS_KEY, hadoopOpts);
}
// Hiveserver2 using "-hiveconf hive.hadoop.classpath=%HIVE_LIB%". This is to combine path(s).
if (HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_HADOOP_CLASSPATH) != null) {
if (variables.containsKey("HADOOP_CLASSPATH")) {
variables.put("HADOOP_CLASSPATH", variables.get("HADOOP_CLASSPATH") + ";" + HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_HADOOP_CLASSPATH));
} else {
variables.put("HADOOP_CLASSPATH", HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_HADOOP_CLASSPATH));
}
}
if (variables.containsKey(MapRedTask.HIVE_DEBUG_RECURSIVE)) {
MapRedTask.configureDebugVariablesForChildJVM(variables);
}
if (UserGroupInformation.isSecurityEnabled() && UserGroupInformation.isLoginKeytabBased()) {
// If kerberos security is enabled, and HS2 doAs is enabled,
// then additional params need to be set so that the command is run as
// intended user
secureDoAs = new SecureCmdDoAs(conf);
secureDoAs.addEnv(variables);
}
// have different settings from those of HiveServer2.
if (variables.containsKey(HIVE_LOCAL_TASK_CHILD_OPTS_KEY)) {
String childOpts = variables.get(HIVE_LOCAL_TASK_CHILD_OPTS_KEY);
if (childOpts == null) {
childOpts = "";
}
String clientOpts = variables.put(HADOOP_CLIENT_OPTS, childOpts);
String tmp = variables.get(HADOOP_OPTS_KEY);
if (tmp != null && !StringUtils.isBlank(clientOpts)) {
tmp = tmp.replace(clientOpts, childOpts);
variables.put(HADOOP_OPTS_KEY, tmp);
}
}
env = new String[variables.size()];
int pos = 0;
for (Map.Entry<String, String> entry : variables.entrySet()) {
String name = entry.getKey();
String value = entry.getValue();
env[pos++] = name + "=" + value;
LOG.debug("Setting env: " + name + "=" + LogUtils.maskIfPassword(name, value));
}
LOG.info("Executing: " + cmdLine);
// Run ExecDriver in another JVM
executor = Runtime.getRuntime().exec(cmdLine, env, new File(workDir));
final LogRedirector.LogSourceCallback callback = () -> {
return executor.isAlive();
};
LogRedirector.redirect(Thread.currentThread().getName() + "-LocalTask-" + getName() + "-stdout", new LogRedirector(executor.getInputStream(), LOG, callback));
LogRedirector.redirect(Thread.currentThread().getName() + "-LocalTask-" + getName() + "-stderr", new LogRedirector(executor.getErrorStream(), LOG, callback));
CachingPrintStream errPrintStream = new CachingPrintStream(System.err);
StreamPrinter outPrinter = new StreamPrinter(executor.getInputStream(), null, System.out);
StreamPrinter errPrinter = new StreamPrinter(executor.getErrorStream(), null, errPrintStream);
outPrinter.start();
errPrinter.start();
int exitVal = jobExecHelper.progressLocal(executor, getId());
// wait for stream threads to finish
outPrinter.join();
errPrinter.join();
if (exitVal != 0) {
LOG.error("Execution failed with exit status: " + exitVal);
if (SessionState.get() != null) {
SessionState.get().addLocalMapRedErrors(getId(), errPrintStream.getOutput());
}
} else {
LOG.info("Execution completed successfully");
}
return exitVal;
} catch (Exception e) {
LOG.error("Exception: ", e);
return (1);
} finally {
if (secureDoAs != null) {
secureDoAs.close();
}
}
}
use of org.apache.hadoop.hive.ql.Context in project hive by apache.
the class TestUtilities method testGetInputSummaryPoolAndFailure.
@Test
public void testGetInputSummaryPoolAndFailure() throws ExecutionException, InterruptedException, IOException {
ExecutorService pool = mock(ExecutorService.class);
when(pool.submit(any(Runnable.class))).thenReturn(mock(Future.class));
Set<Path> pathNeedProcess = new HashSet<>();
pathNeedProcess.add(new Path("dummy-path1"));
pathNeedProcess.add(new Path("dummy-path2"));
pathNeedProcess.add(new Path("dummy-path3"));
SessionState.start(new HiveConf());
JobConf jobConf = new JobConf();
Context context = new Context(jobConf);
Utilities.getInputSummaryWithPool(context, pathNeedProcess, mock(MapWork.class), new long[3], pool);
verify(pool, times(3)).submit(any(Runnable.class));
verify(pool).shutdown();
verify(pool).shutdownNow();
}
Aggregations