use of org.apache.hadoop.hive.ql.DriverContext in project hive by apache.
the class TezTask method execute.
@Override
public int execute(DriverContext driverContext) {
int rc = 1;
boolean cleanContext = false;
Context ctx = null;
TezSessionState session = null;
try {
// Get or create Context object. If we create it we have to clean it later as well.
ctx = driverContext.getCtx();
if (ctx == null) {
ctx = new Context(conf);
cleanContext = true;
}
// Need to remove this static hack. But this is the way currently to get a session.
SessionState ss = SessionState.get();
session = ss.getTezSession();
if (session != null && !session.isOpen()) {
LOG.warn("The session: " + session + " has not been opened");
}
session = TezSessionPoolManager.getInstance().getSession(session, conf, false, getWork().getLlapMode());
ss.setTezSession(session);
try {
// jobConf will hold all the configuration for hadoop, tez, and hive
JobConf jobConf = utils.createConfiguration(conf);
// Get all user jars from work (e.g. input format stuff).
String[] inputOutputJars = work.configureJobConfAndExtractJars(jobConf);
// we will localize all the files (jars, plans, hashtables) to the
// scratch dir. let's create this and tmp first.
Path scratchDir = ctx.getMRScratchDir();
// create the tez tmp dir
scratchDir = utils.createTezDir(scratchDir, conf);
Map<String, LocalResource> inputOutputLocalResources = getExtraLocalResources(jobConf, scratchDir, inputOutputJars);
// Ensure the session is open and has the necessary local resources
updateSession(session, jobConf, scratchDir, inputOutputJars, inputOutputLocalResources);
List<LocalResource> additionalLr = session.getLocalizedResources();
logResources(additionalLr);
// unless already installed on all the cluster nodes, we'll have to
// localize hive-exec.jar as well.
LocalResource appJarLr = session.getAppJarLr();
// next we translate the TezWork to a Tez DAG
DAG dag = build(jobConf, work, scratchDir, appJarLr, additionalLr, ctx);
CallerContext callerContext = CallerContext.create("HIVE", queryPlan.getQueryId(), "HIVE_QUERY_ID", queryPlan.getQueryStr());
dag.setCallerContext(callerContext);
// Add the extra resources to the dag
addExtraResourcesToDag(session, dag, inputOutputJars, inputOutputLocalResources);
// submit will send the job to the cluster and start executing
dagClient = submit(jobConf, dag, scratchDir, appJarLr, session, additionalLr, inputOutputJars, inputOutputLocalResources);
// finally monitor will print progress until the job is done
TezJobMonitor monitor = new TezJobMonitor(work.getWorkMap(), dagClient, conf, dag, ctx);
rc = monitor.monitorExecution();
if (rc != 0) {
this.setException(new HiveException(monitor.getDiagnostics()));
}
// fetch the counters
try {
Set<StatusGetOpts> statusGetOpts = EnumSet.of(StatusGetOpts.GET_COUNTERS);
counters = dagClient.getDAGStatus(statusGetOpts).getDAGCounters();
} catch (Exception err) {
// Don't fail execution due to counters - just don't print summary info
LOG.warn("Failed to get counters. Ignoring, summary info will be incomplete. " + err, err);
counters = null;
}
} finally {
// We return this to the pool even if it's unusable; reopen is supposed to handle this.
try {
TezSessionPoolManager.getInstance().returnSession(session, getWork().getLlapMode());
} catch (Exception e) {
LOG.error("Failed to return session: {} to pool", session, e);
throw e;
}
}
if (LOG.isInfoEnabled() && counters != null && (HiveConf.getBoolVar(conf, HiveConf.ConfVars.TEZ_EXEC_SUMMARY) || Utilities.isPerfOrAboveLogging(conf))) {
for (CounterGroup group : counters) {
LOG.info(group.getDisplayName() + ":");
for (TezCounter counter : group) {
LOG.info(" " + counter.getDisplayName() + ": " + counter.getValue());
}
}
}
} catch (Exception e) {
LOG.error("Failed to execute tez graph.", e);
// rc will be 1 at this point indicating failure.
} finally {
Utilities.clearWork(conf);
// Clear gWorkMap
for (BaseWork w : work.getAllWork()) {
JobConf workCfg = workToConf.get(w);
if (workCfg != null) {
Utilities.clearWorkMapForConf(workCfg);
}
}
if (cleanContext) {
try {
ctx.clear();
} catch (Exception e) {
/*best effort*/
LOG.warn("Failed to clean up after tez job", e);
}
}
// need to either move tmp files or remove them
if (dagClient != null) {
// rc will only be overwritten if close errors out
rc = close(work, rc);
}
}
return rc;
}
use of org.apache.hadoop.hive.ql.DriverContext in project hive by apache.
the class SparkProcessAnalyzeTable method handlePartialScanCommand.
/**
* handle partial scan command.
*
* It is composed of PartialScanTask followed by StatsTask.
*/
private void handlePartialScanCommand(TableScanOperator tableScan, ParseContext parseContext, StatsWork statsWork, GenSparkProcContext context, Task<StatsWork> statsTask) throws SemanticException {
String aggregationKey = tableScan.getConf().getStatsAggPrefix();
StringBuilder aggregationKeyBuffer = new StringBuilder(aggregationKey);
List<Path> inputPaths = GenMapRedUtils.getInputPathsForPartialScan(tableScan, aggregationKeyBuffer);
aggregationKey = aggregationKeyBuffer.toString();
// scan work
PartialScanWork scanWork = new PartialScanWork(inputPaths);
scanWork.setMapperCannotSpanPartns(true);
scanWork.setAggKey(aggregationKey);
scanWork.setStatsTmpDir(tableScan.getConf().getTmpStatsDir(), parseContext.getConf());
// stats work
statsWork.setPartialScanAnalyzeCommand(true);
// partial scan task
DriverContext driverCxt = new DriverContext();
@SuppressWarnings("unchecked") Task<PartialScanWork> partialScanTask = TaskFactory.get(scanWork, parseContext.getConf());
partialScanTask.initialize(parseContext.getQueryState(), null, driverCxt, tableScan.getCompilationOpContext());
partialScanTask.setWork(scanWork);
statsWork.setSourceTask(partialScanTask);
// task dependency
context.rootTasks.remove(context.currentTask);
context.rootTasks.add(partialScanTask);
partialScanTask.addDependentTask(statsTask);
}
use of org.apache.hadoop.hive.ql.DriverContext in project hive by apache.
the class PartialScanTask method execute.
@Override
public /**
* start a new map-reduce job to do partial scan to calculate Stats,
* almost the same as BlockMergeTask or ExecDriver.
*/
int execute(DriverContext driverContext) {
HiveConf.setVar(job, HiveConf.ConfVars.HIVEINPUTFORMAT, CombineHiveInputFormat.class.getName());
success = true;
HiveFileFormatUtils.prepareJobOutput(job);
job.setOutputFormat(HiveOutputFormatImpl.class);
job.setMapperClass(work.getMapperClass());
Context ctx = driverContext.getCtx();
boolean ctxCreated = false;
try {
if (ctx == null) {
ctx = new Context(job);
ctxCreated = true;
}
} catch (IOException e) {
e.printStackTrace();
console.printError("Error launching map-reduce job", "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
return 5;
}
job.setMapOutputKeyClass(NullWritable.class);
job.setMapOutputValueClass(NullWritable.class);
if (work.getNumMapTasks() != null) {
job.setNumMapTasks(work.getNumMapTasks());
}
// zero reducers
job.setNumReduceTasks(0);
if (work.getMinSplitSize() != null) {
HiveConf.setLongVar(job, HiveConf.ConfVars.MAPREDMINSPLITSIZE, work.getMinSplitSize().longValue());
}
if (work.getInputformat() != null) {
HiveConf.setVar(job, HiveConf.ConfVars.HIVEINPUTFORMAT, work.getInputformat());
}
String inpFormat = HiveConf.getVar(job, HiveConf.ConfVars.HIVEINPUTFORMAT);
LOG.info("Using " + inpFormat);
try {
job.setInputFormat(JavaUtils.loadClass(inpFormat));
} catch (ClassNotFoundException e) {
throw new RuntimeException(e.getMessage(), e);
}
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(NullWritable.class);
int returnVal = 0;
RunningJob rj = null;
boolean noName = StringUtils.isEmpty(job.get(MRJobConfig.JOB_NAME));
String jobName = null;
if (noName && this.getQueryPlan() != null) {
int maxlen = conf.getIntVar(HiveConf.ConfVars.HIVEJOBNAMELENGTH);
jobName = Utilities.abbreviate(this.getQueryPlan().getQueryStr(), maxlen - 6);
}
if (noName) {
// This is for a special case to ensure unit tests pass
job.set(MRJobConfig.JOB_NAME, jobName != null ? jobName : "JOB" + Utilities.randGen.nextInt());
}
// pass aggregation key to mapper
HiveConf.setVar(job, HiveConf.ConfVars.HIVE_STATS_KEY_PREFIX, work.getAggKey());
job.set(StatsSetupConst.STATS_TMP_LOC, work.getStatsTmpDir());
try {
addInputPaths(job, work);
MapredWork mrWork = new MapredWork();
mrWork.setMapWork(work);
Utilities.setMapRedWork(job, mrWork, ctx.getMRTmpPath());
// remove the pwd from conf file so that job tracker doesn't show this
// logs
String pwd = HiveConf.getVar(job, HiveConf.ConfVars.METASTOREPWD);
if (pwd != null) {
HiveConf.setVar(job, HiveConf.ConfVars.METASTOREPWD, "HIVE");
}
JobClient jc = new JobClient(job);
String addedJars = Utilities.getResourceFiles(job, SessionState.ResourceType.JAR);
if (!addedJars.isEmpty()) {
job.set("tmpjars", addedJars);
}
// make this client wait if job trcker is not behaving well.
Throttle.checkJobTracker(job, LOG);
if (work.isGatheringStats()) {
// initialize stats publishing table
StatsPublisher statsPublisher;
StatsFactory factory = StatsFactory.newFactory(job);
if (factory != null) {
statsPublisher = factory.getStatsPublisher();
StatsCollectionContext sc = new StatsCollectionContext(job);
sc.setStatsTmpDir(work.getStatsTmpDir());
if (!statsPublisher.init(sc)) {
// creating stats table if not exists
if (HiveConf.getBoolVar(job, HiveConf.ConfVars.HIVE_STATS_RELIABLE)) {
throw new HiveException(ErrorMsg.STATSPUBLISHER_INITIALIZATION_ERROR.getErrorCodedMsg());
}
}
}
}
// Finally SUBMIT the JOB!
rj = jc.submitJob(job);
this.jobID = rj.getJobID();
returnVal = jobExecHelper.progress(rj, jc, ctx);
success = (returnVal == 0);
} catch (Exception e) {
e.printStackTrace();
setException(e);
String mesg = " with exception '" + Utilities.getNameMessage(e) + "'";
if (rj != null) {
mesg = "Ended Job = " + rj.getJobID() + mesg;
} else {
mesg = "Job Submission failed" + mesg;
}
// Has to use full name to make sure it does not conflict with
// org.apache.commons.lang.StringUtils
console.printError(mesg, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
success = false;
returnVal = 1;
} finally {
try {
if (ctxCreated) {
ctx.clear();
}
if (rj != null) {
if (returnVal != 0) {
rj.killJob();
}
}
} catch (Exception e) {
LOG.warn("Failed in cleaning up ", e);
} finally {
HadoopJobExecHelper.runningJobs.remove(rj);
}
}
return (returnVal);
}
use of org.apache.hadoop.hive.ql.DriverContext in project hive by apache.
the class MapRedTask method execute.
@Override
public int execute(DriverContext driverContext) {
Context ctx = driverContext.getCtx();
boolean ctxCreated = false;
try {
if (ctx == null) {
ctx = new Context(conf);
ctxCreated = true;
}
// estimate number of reducers
setNumberOfReducers();
// auto-determine local mode if allowed
if (!ctx.isLocalOnlyExecutionMode() && conf.getBoolVar(HiveConf.ConfVars.LOCALMODEAUTO)) {
if (inputSummary == null) {
inputSummary = Utilities.getInputSummary(driverContext.getCtx(), work.getMapWork(), null);
}
// set the values of totalInputFileSize and totalInputNumFiles, estimating them
// if percentage block sampling is being used
double samplePercentage = Utilities.getHighestSamplePercentage(work.getMapWork());
totalInputFileSize = Utilities.getTotalInputFileSize(inputSummary, work.getMapWork(), samplePercentage);
totalInputNumFiles = Utilities.getTotalInputNumFiles(inputSummary, work.getMapWork(), samplePercentage);
// at this point the number of reducers is precisely defined in the plan
int numReducers = work.getReduceWork() == null ? 0 : work.getReduceWork().getNumReduceTasks();
if (LOG.isDebugEnabled()) {
LOG.debug("Task: " + getId() + ", Summary: " + totalInputFileSize + "," + totalInputNumFiles + "," + numReducers);
}
String reason = MapRedTask.isEligibleForLocalMode(conf, numReducers, totalInputFileSize, totalInputNumFiles);
if (reason == null) {
// clone configuration before modifying it on per-task basis
cloneConf();
ShimLoader.getHadoopShims().setJobLauncherRpcAddress(conf, "local");
console.printInfo("Selecting local mode for task: " + getId());
this.setLocalMode(true);
} else {
console.printInfo("Cannot run job locally: " + reason);
this.setLocalMode(false);
}
}
runningViaChild = conf.getBoolVar(HiveConf.ConfVars.SUBMITVIACHILD);
if (!runningViaChild) {
// in ExecDriver as well to have proper local properties.
if (this.isLocalMode()) {
// save the original job tracker
ctx.setOriginalTracker(ShimLoader.getHadoopShims().getJobLauncherRpcAddress(job));
// change it to local
ShimLoader.getHadoopShims().setJobLauncherRpcAddress(job, "local");
}
// we are not running this mapred task via child jvm
// so directly invoke ExecDriver
int ret = super.execute(driverContext);
// restore the previous properties for framework name, RM address etc.
if (this.isLocalMode()) {
// restore the local job tracker back to original
ctx.restoreOriginalTracker();
}
return ret;
}
// we need to edit the configuration to setup cmdline. clone it first
cloneConf();
// propagate input format if necessary
super.setInputAttributes(conf);
// enable assertion
String hadoopExec = conf.getVar(HiveConf.ConfVars.HADOOPBIN);
String hiveJar = conf.getJar();
String libJars = super.getResource(conf, ResourceType.JAR);
String libJarsOption = StringUtils.isEmpty(libJars) ? " " : " -libjars " + libJars + " ";
// Generate the hiveConfArgs after potentially adding the jars
String hiveConfArgs = generateCmdLine(conf, ctx);
// write out the plan to a local file
Path planPath = new Path(ctx.getLocalTmpPath(), "plan.xml");
MapredWork plan = getWork();
LOG.info("Generating plan file " + planPath.toString());
OutputStream out = null;
try {
out = FileSystem.getLocal(conf).create(planPath);
SerializationUtilities.serializePlan(plan, out);
out.close();
out = null;
} finally {
IOUtils.closeQuietly(out);
}
String isSilent = "true".equalsIgnoreCase(System.getProperty("test.silent")) ? "-nolog" : "";
String jarCmd = hiveJar + " " + ExecDriver.class.getName() + libJarsOption;
String cmdLine = hadoopExec + " jar " + jarCmd + " -plan " + planPath.toString() + " " + isSilent + " " + hiveConfArgs;
String workDir = (new File(".")).getCanonicalPath();
String files = super.getResource(conf, ResourceType.FILE);
if (!files.isEmpty()) {
cmdLine = cmdLine + " -files " + files;
workDir = ctx.getLocalTmpPath().toUri().getPath();
if (!(new File(workDir)).mkdir()) {
throw new IOException("Cannot create tmp working dir: " + workDir);
}
for (String f : StringUtils.split(files, ',')) {
Path p = new Path(f);
String target = p.toUri().getPath();
String link = workDir + Path.SEPARATOR + p.getName();
if (FileUtil.symLink(target, link) != 0) {
throw new IOException("Cannot link to added file: " + target + " from: " + link);
}
}
}
LOG.info("Executing: " + cmdLine);
// Inherit Java system variables
String hadoopOpts;
StringBuilder sb = new StringBuilder();
Properties p = System.getProperties();
for (String element : HIVE_SYS_PROP) {
if (p.containsKey(element)) {
sb.append(" -D" + element + "=" + p.getProperty(element));
}
}
hadoopOpts = sb.toString();
// Inherit the environment variables
String[] env;
Map<String, String> variables = new HashMap<String, String>(System.getenv());
if (ShimLoader.getHadoopShims().isLocalMode(conf)) {
// if we are running in local mode - then the amount of memory used
// by the child jvm can no longer default to the memory used by the
// parent jvm
int hadoopMem = conf.getIntVar(HiveConf.ConfVars.HIVEHADOOPMAXMEM);
if (hadoopMem == 0) {
// remove env var that would default child jvm to use parent's memory
// as default. child jvm would use default memory for a hadoop client
variables.remove(HADOOP_MEM_KEY);
} else {
// user specified the memory for local mode hadoop run
variables.put(HADOOP_MEM_KEY, String.valueOf(hadoopMem));
}
} else {
// nothing to do - we are not running in local mode - only submitting
// the job via a child process. in this case it's appropriate that the
// child jvm use the same memory as the parent jvm
}
if (variables.containsKey(HADOOP_OPTS_KEY)) {
variables.put(HADOOP_OPTS_KEY, variables.get(HADOOP_OPTS_KEY) + hadoopOpts);
} else {
variables.put(HADOOP_OPTS_KEY, hadoopOpts);
}
if (variables.containsKey(HIVE_DEBUG_RECURSIVE)) {
configureDebugVariablesForChildJVM(variables);
}
env = new String[variables.size()];
int pos = 0;
for (Map.Entry<String, String> entry : variables.entrySet()) {
String name = entry.getKey();
String value = entry.getValue();
env[pos++] = name + "=" + value;
}
// Run ExecDriver in another JVM
executor = Runtime.getRuntime().exec(cmdLine, env, new File(workDir));
CachingPrintStream errPrintStream = new CachingPrintStream(SessionState.getConsole().getChildErrStream());
StreamPrinter outPrinter = new StreamPrinter(executor.getInputStream(), null, SessionState.getConsole().getChildOutStream());
StreamPrinter errPrinter = new StreamPrinter(executor.getErrorStream(), null, errPrintStream);
outPrinter.start();
errPrinter.start();
int exitVal = jobExecHelper.progressLocal(executor, getId());
// wait for stream threads to finish
outPrinter.join();
errPrinter.join();
if (exitVal != 0) {
LOG.error("Execution failed with exit status: " + exitVal);
if (SessionState.get() != null) {
SessionState.get().addLocalMapRedErrors(getId(), errPrintStream.getOutput());
}
} else {
LOG.info("Execution completed successfully");
}
return exitVal;
} catch (Exception e) {
LOG.error("Got exception", e);
return (1);
} finally {
try {
// sure to clear it out
if (ctxCreated) {
ctx.clear();
}
} catch (Exception e) {
LOG.error("Exception: ", e);
}
}
}
use of org.apache.hadoop.hive.ql.DriverContext in project hive by apache.
the class ExecDriver method main.
@SuppressWarnings("unchecked")
public static void main(String[] args) throws IOException, HiveException {
String planFileName = null;
String jobConfFileName = null;
boolean noLog = false;
String files = null;
String libjars = null;
boolean localtask = false;
try {
for (int i = 0; i < args.length; i++) {
if (args[i].equals("-plan")) {
planFileName = args[++i];
} else if (args[i].equals("-jobconffile")) {
jobConfFileName = args[++i];
} else if (args[i].equals("-nolog")) {
noLog = true;
} else if (args[i].equals("-files")) {
files = args[++i];
} else if (args[i].equals("-libjars")) {
libjars = args[++i];
} else if (args[i].equals("-localtask")) {
localtask = true;
}
}
} catch (IndexOutOfBoundsException e) {
System.err.println("Missing argument to option");
printUsage();
}
JobConf conf;
if (localtask) {
conf = new JobConf(MapredLocalTask.class);
} else {
conf = new JobConf(ExecDriver.class);
}
if (jobConfFileName != null) {
conf.addResource(new Path(jobConfFileName));
}
// Initialize the resources from command line
if (files != null) {
conf.set("tmpfiles", files);
}
if (libjars != null) {
conf.set("tmpjars", libjars);
}
if (UserGroupInformation.isSecurityEnabled()) {
String hadoopAuthToken = System.getenv(UserGroupInformation.HADOOP_TOKEN_FILE_LOCATION);
if (hadoopAuthToken != null) {
conf.set("mapreduce.job.credentials.binary", hadoopAuthToken);
}
}
boolean isSilent = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVESESSIONSILENT);
String queryId = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYID, "").trim();
if (queryId.isEmpty()) {
queryId = "unknown-" + System.currentTimeMillis();
HiveConf.setVar(conf, HiveConf.ConfVars.HIVEQUERYID, queryId);
}
System.setProperty(HiveConf.ConfVars.HIVEQUERYID.toString(), queryId);
if (noLog) {
// If started from main(), and noLog is on, we should not output
// any logs. To turn the log on, please set -Dtest.silent=false
org.apache.logging.log4j.Logger logger = org.apache.logging.log4j.LogManager.getRootLogger();
NullAppender appender = NullAppender.createNullAppender();
appender.addToLogger(logger.getName(), Level.ERROR);
appender.start();
} else {
setupChildLog4j(conf);
}
Logger LOG = LoggerFactory.getLogger(ExecDriver.class.getName());
LogHelper console = new LogHelper(LOG, isSilent);
if (planFileName == null) {
console.printError("Must specify Plan File Name");
printUsage();
}
// that it's easy to find reason for local mode execution failures
for (Appender appender : ((org.apache.logging.log4j.core.Logger) LogManager.getRootLogger()).getAppenders().values()) {
if (appender instanceof FileAppender) {
console.printInfo("Execution log at: " + ((FileAppender) appender).getFileName());
} else if (appender instanceof RollingFileAppender) {
console.printInfo("Execution log at: " + ((RollingFileAppender) appender).getFileName());
}
}
// the plan file should always be in local directory
Path p = new Path(planFileName);
FileSystem fs = FileSystem.getLocal(conf);
InputStream pathData = fs.open(p);
// child process. so we add it here explicitly
try {
// see also - code in CliDriver.java
ClassLoader loader = conf.getClassLoader();
if (StringUtils.isNotBlank(libjars)) {
loader = Utilities.addToClassPath(loader, StringUtils.split(libjars, ","));
}
conf.setClassLoader(loader);
// Also set this to the Thread ContextClassLoader, so new threads will
// inherit
// this class loader, and propagate into newly created Configurations by
// those
// new threads.
Thread.currentThread().setContextClassLoader(loader);
} catch (Exception e) {
throw new HiveException(e.getMessage(), e);
}
int ret;
if (localtask) {
memoryMXBean = ManagementFactory.getMemoryMXBean();
MapredLocalWork plan = SerializationUtilities.deserializePlan(pathData, MapredLocalWork.class);
MapredLocalTask ed = new MapredLocalTask(plan, conf, isSilent);
ret = ed.executeInProcess(new DriverContext());
} else {
MapredWork plan = SerializationUtilities.deserializePlan(pathData, MapredWork.class);
ExecDriver ed = new ExecDriver(plan, conf, isSilent);
ret = ed.execute(new DriverContext());
}
if (ret != 0) {
System.exit(ret);
}
}
Aggregations