use of org.apache.hadoop.mapred.JobClient in project hive by apache.
the class ColumnTruncateTask method execute.
@Override
public /**
* start a new map-reduce job to do the truncation, almost the same as ExecDriver.
*/
int execute(DriverContext driverContext) {
HiveConf.setVar(job, HiveConf.ConfVars.HIVEINPUTFORMAT, BucketizedHiveInputFormat.class.getName());
success = true;
HiveFileFormatUtils.prepareJobOutput(job);
job.setOutputFormat(HiveOutputFormatImpl.class);
job.setMapperClass(work.getMapperClass());
Context ctx = driverContext.getCtx();
boolean ctxCreated = false;
try {
if (ctx == null) {
ctx = new Context(job);
ctxCreated = true;
}
} catch (IOException e) {
e.printStackTrace();
console.printError("Error launching map-reduce job", "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
return 5;
}
job.setMapOutputKeyClass(NullWritable.class);
job.setMapOutputValueClass(NullWritable.class);
if (work.getNumMapTasks() != null) {
job.setNumMapTasks(work.getNumMapTasks());
}
// zero reducers
job.setNumReduceTasks(0);
if (work.getMinSplitSize() != null) {
HiveConf.setLongVar(job, HiveConf.ConfVars.MAPREDMINSPLITSIZE, work.getMinSplitSize().longValue());
}
if (work.getInputformat() != null) {
HiveConf.setVar(job, HiveConf.ConfVars.HIVEINPUTFORMAT, work.getInputformat());
}
String inpFormat = HiveConf.getVar(job, HiveConf.ConfVars.HIVEINPUTFORMAT);
LOG.info("Using " + inpFormat);
try {
job.setInputFormat(JavaUtils.loadClass(inpFormat));
} catch (ClassNotFoundException e) {
throw new RuntimeException(e.getMessage(), e);
}
Path outputPath = this.work.getOutputDir();
Path tempOutPath = Utilities.toTempPath(outputPath);
try {
FileSystem fs = tempOutPath.getFileSystem(job);
if (!fs.exists(tempOutPath)) {
fs.mkdirs(tempOutPath);
}
} catch (IOException e) {
console.printError("Can't make path " + outputPath + " : " + e.getMessage());
return 6;
}
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(NullWritable.class);
int returnVal = 0;
RunningJob rj = null;
boolean noName = StringUtils.isEmpty(job.get(MRJobConfig.JOB_NAME));
String jobName = null;
if (noName && this.getQueryPlan() != null) {
int maxlen = conf.getIntVar(HiveConf.ConfVars.HIVEJOBNAMELENGTH);
jobName = Utilities.abbreviate(this.getQueryPlan().getQueryStr(), maxlen - 6);
}
if (noName) {
// This is for a special case to ensure unit tests pass
job.set(MRJobConfig.JOB_NAME, jobName != null ? jobName : "JOB" + Utilities.randGen.nextInt());
}
try {
addInputPaths(job, work);
MapredWork mrWork = new MapredWork();
mrWork.setMapWork(work);
Utilities.setMapRedWork(job, mrWork, ctx.getMRTmpPath());
// remove the pwd from conf file so that job tracker doesn't show this
// logs
String pwd = HiveConf.getVar(job, HiveConf.ConfVars.METASTOREPWD);
if (pwd != null) {
HiveConf.setVar(job, HiveConf.ConfVars.METASTOREPWD, "HIVE");
}
JobClient jc = new JobClient(job);
String addedJars = Utilities.getResourceFiles(job, SessionState.ResourceType.JAR);
if (!addedJars.isEmpty()) {
job.set("tmpjars", addedJars);
}
// make this client wait if job trcker is not behaving well.
Throttle.checkJobTracker(job, LOG);
// Finally SUBMIT the JOB!
rj = jc.submitJob(job);
this.jobID = rj.getJobID();
returnVal = jobExecHelper.progress(rj, jc, ctx);
success = (returnVal == 0);
} catch (Exception e) {
e.printStackTrace();
setException(e);
String mesg = " with exception '" + Utilities.getNameMessage(e) + "'";
if (rj != null) {
mesg = "Ended Job = " + rj.getJobID() + mesg;
} else {
mesg = "Job Submission failed" + mesg;
}
// Has to use full name to make sure it does not conflict with
// org.apache.commons.lang.StringUtils
console.printError(mesg, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
success = false;
returnVal = 1;
} finally {
try {
if (ctxCreated) {
ctx.clear();
}
if (rj != null) {
if (returnVal != 0) {
rj.killJob();
}
}
ColumnTruncateMapper.jobClose(outputPath, success, job, console, work.getDynPartCtx(), null);
} catch (Exception e) {
LOG.warn("Failed while cleaning up ", e);
} finally {
HadoopJobExecHelper.runningJobs.remove(rj);
}
}
return (returnVal);
}
use of org.apache.hadoop.mapred.JobClient in project hive by apache.
the class CompactorMR method launchCompactionJob.
private void launchCompactionJob(JobConf job, Path baseDir, CompactionType compactionType, StringableList dirsToSearch, List<AcidUtils.ParsedDelta> parsedDeltas, int curDirNumber, int obsoleteDirNumber, HiveConf hiveConf, TxnStore txnHandler, long id) throws IOException {
job.setBoolean(IS_MAJOR, compactionType == CompactionType.MAJOR);
if (dirsToSearch == null) {
dirsToSearch = new StringableList();
}
StringableList deltaDirs = new StringableList();
long minTxn = Long.MAX_VALUE;
long maxTxn = Long.MIN_VALUE;
for (AcidUtils.ParsedDelta delta : parsedDeltas) {
LOG.debug("Adding delta " + delta.getPath() + " to directories to search");
dirsToSearch.add(delta.getPath());
deltaDirs.add(delta.getPath());
minTxn = Math.min(minTxn, delta.getMinTransaction());
maxTxn = Math.max(maxTxn, delta.getMaxTransaction());
}
if (baseDir != null)
job.set(BASE_DIR, baseDir.toString());
job.set(DELTA_DIRS, deltaDirs.toString());
job.set(DIRS_TO_SEARCH, dirsToSearch.toString());
job.setLong(MIN_TXN, minTxn);
job.setLong(MAX_TXN, maxTxn);
if (hiveConf.getBoolVar(HiveConf.ConfVars.HIVE_IN_TEST)) {
mrJob = job;
}
LOG.info("Submitting " + compactionType + " compaction job '" + job.getJobName() + "' to " + job.getQueueName() + " queue. " + "(current delta dirs count=" + curDirNumber + ", obsolete delta dirs count=" + obsoleteDirNumber + ". TxnIdRange[" + minTxn + "," + maxTxn + "]");
RunningJob rj = new JobClient(job).submitJob(job);
LOG.info("Submitted compaction job '" + job.getJobName() + "' with jobID=" + rj.getID() + " compaction ID=" + id);
txnHandler.setHadoopJobId(rj.getID().toString(), id);
rj.waitForCompletion();
}
use of org.apache.hadoop.mapred.JobClient in project hadoop-book by elephantscale.
the class RandomWriter method run.
/**
* This is the main routine for launching a distributed random write job. It
* runs 10 maps/node and each node writes 1 gig of data to a DFS file. The
* reduce doesn't do anything.
*
* @throws IOException
*/
public int run(String[] args) throws Exception {
if (args.length == 0) {
System.out.println("Usage: writer <out-dir>");
ToolRunner.printGenericCommandUsage(System.out);
return -1;
}
Path outDir = new Path(args[0]);
JobConf job = new JobConf(getConf());
job.setJarByClass(RandomWriter.class);
job.setJobName("random-writer");
FileOutputFormat.setOutputPath(job, outDir);
job.setOutputKeyClass(BytesWritable.class);
job.setOutputValueClass(BytesWritable.class);
job.setInputFormat(RandomInputFormat.class);
job.setMapperClass(Map.class);
job.setReducerClass(IdentityReducer.class);
job.setOutputFormat(SequenceFileOutputFormat.class);
JobClient client = new JobClient(job);
ClusterStatus cluster = client.getClusterStatus();
int numMapsPerHost = job.getInt("test.randomwriter.maps_per_host", 10);
long numBytesToWritePerMap = job.getLong("test.randomwrite.bytes_per_map", 1 * 1024 * 1024 * 1024);
if (numBytesToWritePerMap == 0) {
System.err.println("Cannot have test.randomwrite.bytes_per_map set to 0");
return -2;
}
long totalBytesToWrite = job.getLong("test.randomwrite.total_bytes", numMapsPerHost * numBytesToWritePerMap * cluster.getTaskTrackers());
int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap);
if (numMaps == 0 && totalBytesToWrite > 0) {
numMaps = 1;
job.setLong("test.randomwrite.bytes_per_map", totalBytesToWrite);
}
job.setNumMapTasks(numMaps);
System.out.println("Running " + numMaps + " maps.");
// reducer NONE
job.setNumReduceTasks(0);
Date startTime = new Date();
System.out.println("Job started: " + startTime);
JobClient.runJob(job);
Date endTime = new Date();
System.out.println("Job ended: " + endTime);
System.out.println("The job took " + (endTime.getTime() - startTime.getTime()) / 1000 + " seconds.");
return 0;
}
use of org.apache.hadoop.mapred.JobClient in project hadoop-book by elephantscale.
the class InvertedIndex method main.
public static void main(String[] args) {
JobClient client = new JobClient();
JobConf conf = new JobConf(InvertedIndex.class);
conf.setJobName("InvertedIndex");
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(Text.class);
FileInputFormat.addInputPath(conf, new Path("input"));
FileOutputFormat.setOutputPath(conf, new Path("output"));
conf.setMapperClass(InvertedIndexMapper.class);
conf.setReducerClass(InvertedIndexReducer.class);
client.setConf(conf);
try {
JobClient.runJob(conf);
} catch (Exception e) {
e.printStackTrace(System.out);
}
}
use of org.apache.hadoop.mapred.JobClient in project hadoop by apache.
the class DummySocketFactory method testSocketFactory.
/**
* Check that we can reach a NameNode or Resource Manager using a specific
* socket factory
*/
@Test
public void testSocketFactory() throws IOException {
// Create a standard mini-cluster
Configuration sconf = new Configuration();
MiniDFSCluster cluster = new MiniDFSCluster.Builder(sconf).numDataNodes(1).build();
final int nameNodePort = cluster.getNameNodePort();
// Get a reference to its DFS directly
FileSystem fs = cluster.getFileSystem();
Assert.assertTrue(fs instanceof DistributedFileSystem);
DistributedFileSystem directDfs = (DistributedFileSystem) fs;
Configuration cconf = getCustomSocketConfigs(nameNodePort);
fs = FileSystem.get(cconf);
Assert.assertTrue(fs instanceof DistributedFileSystem);
DistributedFileSystem dfs = (DistributedFileSystem) fs;
JobClient client = null;
MiniMRYarnCluster miniMRYarnCluster = null;
try {
// This will test RPC to the NameNode only.
// could we test Client-DataNode connections?
Path filePath = new Path("/dir");
Assert.assertFalse(directDfs.exists(filePath));
Assert.assertFalse(dfs.exists(filePath));
directDfs.mkdirs(filePath);
Assert.assertTrue(directDfs.exists(filePath));
Assert.assertTrue(dfs.exists(filePath));
// This will test RPC to a Resource Manager
fs = FileSystem.get(sconf);
JobConf jobConf = new JobConf();
FileSystem.setDefaultUri(jobConf, fs.getUri().toString());
miniMRYarnCluster = initAndStartMiniMRYarnCluster(jobConf);
JobConf jconf = new JobConf(miniMRYarnCluster.getConfig());
jconf.set("hadoop.rpc.socket.factory.class.default", "org.apache.hadoop.ipc.DummySocketFactory");
jconf.set(MRConfig.FRAMEWORK_NAME, MRConfig.YARN_FRAMEWORK_NAME);
String rmAddress = jconf.get(YarnConfiguration.RM_ADDRESS);
String[] split = rmAddress.split(":");
jconf.set(YarnConfiguration.RM_ADDRESS, split[0] + ':' + (Integer.parseInt(split[1]) + 10));
client = new JobClient(jconf);
JobStatus[] jobs = client.jobsToComplete();
Assert.assertTrue(jobs.length == 0);
} finally {
closeClient(client);
closeDfs(dfs);
closeDfs(directDfs);
stopMiniMRYarnCluster(miniMRYarnCluster);
shutdownDFSCluster(cluster);
}
}
Aggregations