use of org.apache.hadoop.mapred.JobClient in project hadoop by apache.
the class DataJoinJob method runJob.
/**
* Submit/run a map/reduce job.
*
* @param job
* @return true for success
* @throws IOException
*/
public static boolean runJob(JobConf job) throws IOException {
JobClient jc = new JobClient(job);
boolean sucess = true;
RunningJob running = null;
try {
running = jc.submitJob(job);
JobID jobId = running.getID();
System.out.println("Job " + jobId + " is submitted");
while (!running.isComplete()) {
System.out.println("Job " + jobId + " is still running.");
try {
Thread.sleep(60000);
} catch (InterruptedException e) {
}
running = jc.getJob(jobId);
}
sucess = running.isSuccessful();
} finally {
if (!sucess && (running != null)) {
running.killJob();
}
jc.close();
}
return sucess;
}
use of org.apache.hadoop.mapred.JobClient in project hadoop by apache.
the class DistCh method setup.
private boolean setup(List<FileOperation> ops, Path log) throws IOException {
final String randomId = getRandomId();
JobClient jClient = new JobClient(jobconf);
Path stagingArea;
try {
stagingArea = JobSubmissionFiles.getStagingDir(jClient.getClusterHandle(), jobconf);
} catch (InterruptedException ie) {
throw new IOException(ie);
}
Path jobdir = new Path(stagingArea + NAME + "_" + randomId);
FsPermission mapredSysPerms = new FsPermission(JobSubmissionFiles.JOB_DIR_PERMISSION);
FileSystem.mkdirs(jClient.getFs(), jobdir, mapredSysPerms);
LOG.info(JOB_DIR_LABEL + "=" + jobdir);
if (log == null) {
log = new Path(jobdir, "_logs");
}
FileOutputFormat.setOutputPath(jobconf, log);
LOG.info("log=" + log);
//create operation list
FileSystem fs = jobdir.getFileSystem(jobconf);
Path opList = new Path(jobdir, "_" + OP_LIST_LABEL);
jobconf.set(OP_LIST_LABEL, opList.toString());
int opCount = 0, synCount = 0;
try (SequenceFile.Writer opWriter = SequenceFile.createWriter(fs, jobconf, opList, Text.class, FileOperation.class, SequenceFile.CompressionType.NONE)) {
for (FileOperation op : ops) {
FileStatus srcstat = fs.getFileStatus(op.src);
if (srcstat.isDirectory() && op.isDifferent(srcstat)) {
++opCount;
opWriter.append(new Text(op.src.toString()), op);
}
Stack<Path> pathstack = new Stack<Path>();
for (pathstack.push(op.src); !pathstack.empty(); ) {
for (FileStatus stat : fs.listStatus(pathstack.pop())) {
if (stat.isDirectory()) {
pathstack.push(stat.getPath());
}
if (op.isDifferent(stat)) {
++opCount;
if (++synCount > SYNC_FILE_MAX) {
opWriter.sync();
synCount = 0;
}
Path f = stat.getPath();
opWriter.append(new Text(f.toString()), new FileOperation(f, op));
}
}
}
}
}
checkDuplication(fs, opList, new Path(jobdir, "_sorted"), jobconf);
jobconf.setInt(OP_COUNT_LABEL, opCount);
LOG.info(OP_COUNT_LABEL + "=" + opCount);
jobconf.setNumMapTasks(getMapCount(opCount, new JobClient(jobconf).getClusterStatus().getTaskTrackers()));
return opCount != 0;
}
use of org.apache.hadoop.mapred.JobClient in project hadoop by apache.
the class TestGridmixSummary method testClusterSummarizer.
/**
* Test {@link ClusterSummarizer}.
*/
@Test
public void testClusterSummarizer() throws IOException {
ClusterSummarizer cs = new ClusterSummarizer();
Configuration conf = new Configuration();
String jt = "test-jt:1234";
String nn = "test-nn:5678";
conf.set(JTConfig.JT_IPC_ADDRESS, jt);
conf.set(CommonConfigurationKeys.FS_DEFAULT_NAME_KEY, nn);
cs.start(conf);
assertEquals("JT name mismatch", jt, cs.getJobTrackerInfo());
assertEquals("NN name mismatch", nn, cs.getNamenodeInfo());
ClusterStats cStats = ClusterStats.getClusterStats();
conf.set(JTConfig.JT_IPC_ADDRESS, "local");
conf.set(CommonConfigurationKeys.FS_DEFAULT_NAME_KEY, "local");
JobClient jc = new JobClient(conf);
cStats.setClusterMetric(jc.getClusterStatus());
cs.update(cStats);
// test
assertEquals("Cluster summary test failed!", 1, cs.getMaxMapTasks());
assertEquals("Cluster summary test failed!", 1, cs.getMaxReduceTasks());
assertEquals("Cluster summary test failed!", 1, cs.getNumActiveTrackers());
assertEquals("Cluster summary test failed!", 0, cs.getNumBlacklistedTrackers());
}
use of org.apache.hadoop.mapred.JobClient in project hadoop by apache.
the class StreamJob method submitAndMonitorJob.
// Based on JobClient
public int submitAndMonitorJob() throws IOException {
if (jar_ != null && isLocalHadoop()) {
// getAbs became required when shell and subvm have different working dirs...
File wd = new File(".").getAbsoluteFile();
RunJar.unJar(new File(jar_), wd);
}
// if jobConf_ changes must recreate a JobClient
jc_ = new JobClient(jobConf_);
running_ = null;
try {
running_ = jc_.submitJob(jobConf_);
jobId_ = running_.getID();
if (background_) {
LOG.info("Job is running in background.");
} else if (!jc_.monitorAndPrintJob(jobConf_, running_)) {
LOG.error("Job not successful!");
return 1;
}
LOG.info("Output directory: " + output_);
} catch (FileNotFoundException fe) {
LOG.error("Error launching job , bad input path : " + fe.getMessage());
return 2;
} catch (InvalidJobConfException je) {
LOG.error("Error launching job , Invalid job conf : " + je.getMessage());
return 3;
} catch (FileAlreadyExistsException fae) {
LOG.error("Error launching job , Output path already exists : " + fae.getMessage());
return 4;
} catch (IOException ioe) {
LOG.error("Error Launching job : " + ioe.getMessage());
return 5;
} catch (InterruptedException ie) {
LOG.error("Error monitoring job : " + ie.getMessage());
return 6;
} finally {
jc_.close();
}
return 0;
}
use of org.apache.hadoop.mapred.JobClient in project Cloud9 by lintool.
the class ClueWebAnchorTextForwardIndexHttpServer method main.
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if (otherArgs.length != 3) {
System.out.println("usage: [index-file] [docno-mapping-data-files] [clue-forward-index-root]");
//[clue-forward-index-root: /shared/ClueWeb09/collection.compressed.block/
System.exit(-1);
}
String indexFile = otherArgs[0];
String mappingFile = otherArgs[1];
String clueIndexRoot = otherArgs[2].endsWith("/") ? otherArgs[2] : otherArgs[2] + "/";
String cluewebForwardIndex = "";
for (int i = 1; i < 10; i++) cluewebForwardIndex += clueIndexRoot + "findex.en.0" + i + ".dat" + SEPARATOR + " ";
cluewebForwardIndex += clueIndexRoot + "findex.en.10.dat";
LOG.info("Launching DocumentForwardIndexHttpServer");
LOG.info(" - index file: " + indexFile);
LOG.info(" - docno mapping data file: " + mappingFile);
LOG.info(" - ClueWeb09 index root:" + clueIndexRoot);
FileSystem fs = FileSystem.get(conf);
Random rand = new Random();
int r = rand.nextInt();
// this tmp file as a rendezvous point
Path tmpPath = new Path("/tmp/" + r);
if (fs.exists(tmpPath)) {
fs.delete(tmpPath, true);
}
JobConf job = new JobConf(conf, ClueWebAnchorTextForwardIndexHttpServer.class);
job.setJobName("ForwardIndexServer:" + indexFile);
job.set("mapred.child.java.opts", "-Xmx2048m");
job.setNumMapTasks(1);
job.setNumReduceTasks(0);
job.setInputFormat(NullInputFormat.class);
job.setOutputFormat(NullOutputFormat.class);
job.setMapperClass(ServerMapper.class);
job.set("IndexFile", indexFile);
job.set("DocnoMappingDataFile", mappingFile);
job.set("TmpPath", tmpPath.toString());
job.set("ClueWebIndexFiles", cluewebForwardIndex);
JobClient client = new JobClient(job);
client.submitJob(job);
LOG.info("Waiting for server to start up...");
while (!fs.exists(tmpPath)) {
Thread.sleep(50000);
LOG.info("...");
}
FSDataInputStream in = fs.open(tmpPath);
String host = in.readUTF();
in.close();
LOG.info("host: " + host);
LOG.info("port: 8888");
}
Aggregations