use of org.apache.hadoop.hive.ql.Context in project hive by apache.
the class MapRedTask method execute.
@Override
public int execute(DriverContext driverContext) {
Context ctx = driverContext.getCtx();
boolean ctxCreated = false;
try {
if (ctx == null) {
ctx = new Context(conf);
ctxCreated = true;
}
// estimate number of reducers
setNumberOfReducers();
// auto-determine local mode if allowed
if (!ctx.isLocalOnlyExecutionMode() && conf.getBoolVar(HiveConf.ConfVars.LOCALMODEAUTO)) {
if (inputSummary == null) {
inputSummary = Utilities.getInputSummary(driverContext.getCtx(), work.getMapWork(), null);
}
// set the values of totalInputFileSize and totalInputNumFiles, estimating them
// if percentage block sampling is being used
double samplePercentage = Utilities.getHighestSamplePercentage(work.getMapWork());
totalInputFileSize = Utilities.getTotalInputFileSize(inputSummary, work.getMapWork(), samplePercentage);
totalInputNumFiles = Utilities.getTotalInputNumFiles(inputSummary, work.getMapWork(), samplePercentage);
// at this point the number of reducers is precisely defined in the plan
int numReducers = work.getReduceWork() == null ? 0 : work.getReduceWork().getNumReduceTasks();
if (LOG.isDebugEnabled()) {
LOG.debug("Task: " + getId() + ", Summary: " + totalInputFileSize + "," + totalInputNumFiles + "," + numReducers);
}
String reason = MapRedTask.isEligibleForLocalMode(conf, numReducers, totalInputFileSize, totalInputNumFiles);
if (reason == null) {
// clone configuration before modifying it on per-task basis
cloneConf();
ShimLoader.getHadoopShims().setJobLauncherRpcAddress(conf, "local");
console.printInfo("Selecting local mode for task: " + getId());
this.setLocalMode(true);
} else {
console.printInfo("Cannot run job locally: " + reason);
this.setLocalMode(false);
}
}
runningViaChild = conf.getBoolVar(HiveConf.ConfVars.SUBMITVIACHILD);
if (!runningViaChild) {
// in ExecDriver as well to have proper local properties.
if (this.isLocalMode()) {
// save the original job tracker
ctx.setOriginalTracker(ShimLoader.getHadoopShims().getJobLauncherRpcAddress(job));
// change it to local
ShimLoader.getHadoopShims().setJobLauncherRpcAddress(job, "local");
}
// we are not running this mapred task via child jvm
// so directly invoke ExecDriver
int ret = super.execute(driverContext);
// restore the previous properties for framework name, RM address etc.
if (this.isLocalMode()) {
// restore the local job tracker back to original
ctx.restoreOriginalTracker();
}
return ret;
}
// we need to edit the configuration to setup cmdline. clone it first
cloneConf();
// propagate input format if necessary
super.setInputAttributes(conf);
// enable assertion
String hadoopExec = conf.getVar(HiveConf.ConfVars.HADOOPBIN);
String hiveJar = conf.getJar();
String libJars = super.getResource(conf, ResourceType.JAR);
String libJarsOption = StringUtils.isEmpty(libJars) ? " " : " -libjars " + libJars + " ";
// Generate the hiveConfArgs after potentially adding the jars
String hiveConfArgs = generateCmdLine(conf, ctx);
// write out the plan to a local file
Path planPath = new Path(ctx.getLocalTmpPath(), "plan.xml");
MapredWork plan = getWork();
LOG.info("Generating plan file " + planPath.toString());
OutputStream out = null;
try {
out = FileSystem.getLocal(conf).create(planPath);
SerializationUtilities.serializePlan(plan, out);
out.close();
out = null;
} finally {
IOUtils.closeQuietly(out);
}
String isSilent = "true".equalsIgnoreCase(System.getProperty("test.silent")) ? "-nolog" : "";
String jarCmd = hiveJar + " " + ExecDriver.class.getName() + libJarsOption;
String cmdLine = hadoopExec + " jar " + jarCmd + " -plan " + planPath.toString() + " " + isSilent + " " + hiveConfArgs;
String workDir = (new File(".")).getCanonicalPath();
String files = super.getResource(conf, ResourceType.FILE);
if (!files.isEmpty()) {
cmdLine = cmdLine + " -files " + files;
workDir = ctx.getLocalTmpPath().toUri().getPath();
if (!(new File(workDir)).mkdir()) {
throw new IOException("Cannot create tmp working dir: " + workDir);
}
for (String f : StringUtils.split(files, ',')) {
Path p = new Path(f);
String target = p.toUri().getPath();
String link = workDir + Path.SEPARATOR + p.getName();
if (FileUtil.symLink(target, link) != 0) {
throw new IOException("Cannot link to added file: " + target + " from: " + link);
}
}
}
LOG.info("Executing: " + cmdLine);
// Inherit Java system variables
String hadoopOpts;
StringBuilder sb = new StringBuilder();
Properties p = System.getProperties();
for (String element : HIVE_SYS_PROP) {
if (p.containsKey(element)) {
sb.append(" -D" + element + "=" + p.getProperty(element));
}
}
hadoopOpts = sb.toString();
// Inherit the environment variables
String[] env;
Map<String, String> variables = new HashMap<String, String>(System.getenv());
if (ShimLoader.getHadoopShims().isLocalMode(conf)) {
// if we are running in local mode - then the amount of memory used
// by the child jvm can no longer default to the memory used by the
// parent jvm
int hadoopMem = conf.getIntVar(HiveConf.ConfVars.HIVEHADOOPMAXMEM);
if (hadoopMem == 0) {
// remove env var that would default child jvm to use parent's memory
// as default. child jvm would use default memory for a hadoop client
variables.remove(HADOOP_MEM_KEY);
} else {
// user specified the memory for local mode hadoop run
variables.put(HADOOP_MEM_KEY, String.valueOf(hadoopMem));
}
} else {
// nothing to do - we are not running in local mode - only submitting
// the job via a child process. in this case it's appropriate that the
// child jvm use the same memory as the parent jvm
}
if (variables.containsKey(HADOOP_OPTS_KEY)) {
variables.put(HADOOP_OPTS_KEY, variables.get(HADOOP_OPTS_KEY) + hadoopOpts);
} else {
variables.put(HADOOP_OPTS_KEY, hadoopOpts);
}
if (variables.containsKey(HIVE_DEBUG_RECURSIVE)) {
configureDebugVariablesForChildJVM(variables);
}
env = new String[variables.size()];
int pos = 0;
for (Map.Entry<String, String> entry : variables.entrySet()) {
String name = entry.getKey();
String value = entry.getValue();
env[pos++] = name + "=" + value;
}
// Run ExecDriver in another JVM
executor = Runtime.getRuntime().exec(cmdLine, env, new File(workDir));
CachingPrintStream errPrintStream = new CachingPrintStream(SessionState.getConsole().getChildErrStream());
StreamPrinter outPrinter = new StreamPrinter(executor.getInputStream(), null, SessionState.getConsole().getChildOutStream());
StreamPrinter errPrinter = new StreamPrinter(executor.getErrorStream(), null, errPrintStream);
outPrinter.start();
errPrinter.start();
int exitVal = jobExecHelper.progressLocal(executor, getId());
// wait for stream threads to finish
outPrinter.join();
errPrinter.join();
if (exitVal != 0) {
LOG.error("Execution failed with exit status: " + exitVal);
if (SessionState.get() != null) {
SessionState.get().addLocalMapRedErrors(getId(), errPrintStream.getOutput());
}
} else {
LOG.info("Execution completed successfully");
}
return exitVal;
} catch (Exception e) {
LOG.error("Got exception", e);
return (1);
} finally {
try {
// sure to clear it out
if (ctxCreated) {
ctx.clear();
}
} catch (Exception e) {
LOG.error("Exception: ", e);
}
}
}
use of org.apache.hadoop.hive.ql.Context in project hive by apache.
the class HiveMaterializedViewsRegistry method parseQuery.
private static RelNode parseQuery(String viewQuery) {
try {
final ASTNode node = ParseUtils.parse(viewQuery);
final QueryState qs = new QueryState(SessionState.get().getConf());
CalcitePlanner analyzer = new CalcitePlanner(qs);
analyzer.initCtx(new Context(SessionState.get().getConf()));
analyzer.init(false);
return analyzer.genLogicalPlan(node);
} catch (Exception e) {
// We could not parse the view
return null;
}
}
use of org.apache.hadoop.hive.ql.Context in project hive by apache.
the class TestContext method setUp.
@Before
public void setUp() throws IOException {
/* Only called to create session directories used by the Context class */
SessionState.start(conf);
SessionState.detachSession();
context = new Context(conf);
}
use of org.apache.hadoop.hive.ql.Context in project hive by apache.
the class ColumnTruncateTask method execute.
@Override
public /**
* start a new map-reduce job to do the truncation, almost the same as ExecDriver.
*/
int execute(DriverContext driverContext) {
HiveConf.setVar(job, HiveConf.ConfVars.HIVEINPUTFORMAT, BucketizedHiveInputFormat.class.getName());
success = true;
HiveFileFormatUtils.prepareJobOutput(job);
job.setOutputFormat(HiveOutputFormatImpl.class);
job.setMapperClass(work.getMapperClass());
Context ctx = driverContext.getCtx();
boolean ctxCreated = false;
try {
if (ctx == null) {
ctx = new Context(job);
ctxCreated = true;
}
} catch (IOException e) {
e.printStackTrace();
console.printError("Error launching map-reduce job", "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
return 5;
}
job.setMapOutputKeyClass(NullWritable.class);
job.setMapOutputValueClass(NullWritable.class);
if (work.getNumMapTasks() != null) {
job.setNumMapTasks(work.getNumMapTasks());
}
// zero reducers
job.setNumReduceTasks(0);
if (work.getMinSplitSize() != null) {
HiveConf.setLongVar(job, HiveConf.ConfVars.MAPREDMINSPLITSIZE, work.getMinSplitSize().longValue());
}
if (work.getInputformat() != null) {
HiveConf.setVar(job, HiveConf.ConfVars.HIVEINPUTFORMAT, work.getInputformat());
}
String inpFormat = HiveConf.getVar(job, HiveConf.ConfVars.HIVEINPUTFORMAT);
LOG.info("Using " + inpFormat);
try {
job.setInputFormat(JavaUtils.loadClass(inpFormat));
} catch (ClassNotFoundException e) {
throw new RuntimeException(e.getMessage(), e);
}
Path outputPath = this.work.getOutputDir();
Path tempOutPath = Utilities.toTempPath(outputPath);
try {
FileSystem fs = tempOutPath.getFileSystem(job);
if (!fs.exists(tempOutPath)) {
fs.mkdirs(tempOutPath);
}
} catch (IOException e) {
console.printError("Can't make path " + outputPath + " : " + e.getMessage());
return 6;
}
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(NullWritable.class);
int returnVal = 0;
RunningJob rj = null;
boolean noName = StringUtils.isEmpty(job.get(MRJobConfig.JOB_NAME));
String jobName = null;
if (noName && this.getQueryPlan() != null) {
int maxlen = conf.getIntVar(HiveConf.ConfVars.HIVEJOBNAMELENGTH);
jobName = Utilities.abbreviate(this.getQueryPlan().getQueryStr(), maxlen - 6);
}
if (noName) {
// This is for a special case to ensure unit tests pass
job.set(MRJobConfig.JOB_NAME, jobName != null ? jobName : "JOB" + Utilities.randGen.nextInt());
}
try {
addInputPaths(job, work);
MapredWork mrWork = new MapredWork();
mrWork.setMapWork(work);
Utilities.setMapRedWork(job, mrWork, ctx.getMRTmpPath());
// remove the pwd from conf file so that job tracker doesn't show this
// logs
String pwd = HiveConf.getVar(job, HiveConf.ConfVars.METASTOREPWD);
if (pwd != null) {
HiveConf.setVar(job, HiveConf.ConfVars.METASTOREPWD, "HIVE");
}
JobClient jc = new JobClient(job);
String addedJars = Utilities.getResourceFiles(job, SessionState.ResourceType.JAR);
if (!addedJars.isEmpty()) {
job.set("tmpjars", addedJars);
}
// make this client wait if job trcker is not behaving well.
Throttle.checkJobTracker(job, LOG);
// Finally SUBMIT the JOB!
rj = jc.submitJob(job);
this.jobID = rj.getJobID();
returnVal = jobExecHelper.progress(rj, jc, ctx);
success = (returnVal == 0);
} catch (Exception e) {
e.printStackTrace();
setException(e);
String mesg = " with exception '" + Utilities.getNameMessage(e) + "'";
if (rj != null) {
mesg = "Ended Job = " + rj.getJobID() + mesg;
} else {
mesg = "Job Submission failed" + mesg;
}
// Has to use full name to make sure it does not conflict with
// org.apache.commons.lang.StringUtils
console.printError(mesg, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
success = false;
returnVal = 1;
} finally {
try {
if (ctxCreated) {
ctx.clear();
}
if (rj != null) {
if (returnVal != 0) {
rj.killJob();
}
}
ColumnTruncateMapper.jobClose(outputPath, success, job, console, work.getDynPartCtx(), null);
} catch (Exception e) {
LOG.warn("Failed while cleaning up ", e);
} finally {
HadoopJobExecHelper.runningJobs.remove(rj);
}
}
return (returnVal);
}
use of org.apache.hadoop.hive.ql.Context in project hive by apache.
the class GenMRUnion1 method processSubQueryUnionCreateIntermediate.
/**
* Process the union when the parent is a map-reduce job. Create a temporary
* output, and let the union task read from the temporary output.
*
* The files created for all the inputs are in the union context and later
* used to initialize the union plan
*
* @param parent
* @param child
* @param uTask
* @param ctx
* @param uCtxTask
*/
private void processSubQueryUnionCreateIntermediate(Operator<? extends OperatorDesc> parent, Operator<? extends OperatorDesc> child, Task<? extends Serializable> uTask, GenMRProcContext ctx, GenMRUnionCtx uCtxTask) {
ParseContext parseCtx = ctx.getParseCtx();
TableDesc tt_desc = PlanUtils.getIntermediateFileTableDesc(PlanUtils.getFieldSchemasFromRowSchema(parent.getSchema(), "temporarycol"));
// generate the temporary file
Context baseCtx = parseCtx.getContext();
Path taskTmpDir = baseCtx.getMRTmpPath();
// Create the temporary file, its corresponding FileSinkOperaotr, and
// its corresponding TableScanOperator.
TableScanOperator tableScanOp = GenMapRedUtils.createTemporaryFile(parent, child, taskTmpDir, tt_desc, parseCtx);
// Add the path to alias mapping
uCtxTask.addTaskTmpDir(taskTmpDir.toUri().toString());
uCtxTask.addTTDesc(tt_desc);
uCtxTask.addListTopOperators(tableScanOp);
// The union task is empty. The files created for all the inputs are
// assembled in the union context and later used to initialize the union
// plan
Task<? extends Serializable> currTask = ctx.getCurrTask();
currTask.addDependentTask(uTask);
if (ctx.getRootTasks().contains(uTask)) {
ctx.getRootTasks().remove(uTask);
if (!ctx.getRootTasks().contains(currTask) && shouldBeRootTask(currTask)) {
ctx.getRootTasks().add(currTask);
}
}
}
Aggregations