Example 41 with JobClient

use of org.apache.hadoop.mapred.JobClient in project hadoop by apache.

the class DataJoinJob method runJob.

   * Submit/run a map/reduce job.
   * @param job
   * @return true for success
   * @throws IOException
public static boolean runJob(JobConf job) throws IOException {
    JobClient jc = new JobClient(job);
    boolean sucess = true;
    RunningJob running = null;
    try {
        running = jc.submitJob(job);
        JobID jobId = running.getID();
        System.out.println("Job " + jobId + " is submitted");
        while (!running.isComplete()) {
            System.out.println("Job " + jobId + " is still running.");
            try {
            } catch (InterruptedException e) {
            running = jc.getJob(jobId);
        sucess = running.isSuccessful();
    } finally {
        if (!sucess && (running != null)) {
    return sucess;
Also used : RunningJob(org.apache.hadoop.mapred.RunningJob) JobClient(org.apache.hadoop.mapred.JobClient) JobID(org.apache.hadoop.mapred.JobID)

Example 42 with JobClient

use of org.apache.hadoop.mapred.JobClient in project hadoop by apache.

the class DistCh method setup.

private boolean setup(List<FileOperation> ops, Path log) throws IOException {
    final String randomId = getRandomId();
    JobClient jClient = new JobClient(jobconf);
    Path stagingArea;
    try {
        stagingArea = JobSubmissionFiles.getStagingDir(jClient.getClusterHandle(), jobconf);
    } catch (InterruptedException ie) {
        throw new IOException(ie);
    Path jobdir = new Path(stagingArea + NAME + "_" + randomId);
    FsPermission mapredSysPerms = new FsPermission(JobSubmissionFiles.JOB_DIR_PERMISSION);
    FileSystem.mkdirs(jClient.getFs(), jobdir, mapredSysPerms); + "=" + jobdir);
    if (log == null) {
        log = new Path(jobdir, "_logs");
    FileOutputFormat.setOutputPath(jobconf, log);"log=" + log);
    //create operation list
    FileSystem fs = jobdir.getFileSystem(jobconf);
    Path opList = new Path(jobdir, "_" + OP_LIST_LABEL);
    jobconf.set(OP_LIST_LABEL, opList.toString());
    int opCount = 0, synCount = 0;
    try (SequenceFile.Writer opWriter = SequenceFile.createWriter(fs, jobconf, opList, Text.class, FileOperation.class, SequenceFile.CompressionType.NONE)) {
        for (FileOperation op : ops) {
            FileStatus srcstat = fs.getFileStatus(op.src);
            if (srcstat.isDirectory() && op.isDifferent(srcstat)) {
                opWriter.append(new Text(op.src.toString()), op);
            Stack<Path> pathstack = new Stack<Path>();
            for (pathstack.push(op.src); !pathstack.empty(); ) {
                for (FileStatus stat : fs.listStatus(pathstack.pop())) {
                    if (stat.isDirectory()) {
                    if (op.isDifferent(stat)) {
                        if (++synCount > SYNC_FILE_MAX) {
                            synCount = 0;
                        Path f = stat.getPath();
                        opWriter.append(new Text(f.toString()), new FileOperation(f, op));
    checkDuplication(fs, opList, new Path(jobdir, "_sorted"), jobconf);
    jobconf.setInt(OP_COUNT_LABEL, opCount); + "=" + opCount);
    jobconf.setNumMapTasks(getMapCount(opCount, new JobClient(jobconf).getClusterStatus().getTaskTrackers()));
    return opCount != 0;
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) Text( IOException( JobClient(org.apache.hadoop.mapred.JobClient) Stack(java.util.Stack) SequenceFile( FileSystem(org.apache.hadoop.fs.FileSystem) FsPermission(org.apache.hadoop.fs.permission.FsPermission)

Example 43 with JobClient

use of org.apache.hadoop.mapred.JobClient in project hadoop by apache.

the class TestGridmixSummary method testClusterSummarizer.

   * Test {@link ClusterSummarizer}.
public void testClusterSummarizer() throws IOException {
    ClusterSummarizer cs = new ClusterSummarizer();
    Configuration conf = new Configuration();
    String jt = "test-jt:1234";
    String nn = "test-nn:5678";
    conf.set(JTConfig.JT_IPC_ADDRESS, jt);
    conf.set(CommonConfigurationKeys.FS_DEFAULT_NAME_KEY, nn);
    assertEquals("JT name mismatch", jt, cs.getJobTrackerInfo());
    assertEquals("NN name mismatch", nn, cs.getNamenodeInfo());
    ClusterStats cStats = ClusterStats.getClusterStats();
    conf.set(JTConfig.JT_IPC_ADDRESS, "local");
    conf.set(CommonConfigurationKeys.FS_DEFAULT_NAME_KEY, "local");
    JobClient jc = new JobClient(conf);
    // test
    assertEquals("Cluster summary test failed!", 1, cs.getMaxMapTasks());
    assertEquals("Cluster summary test failed!", 1, cs.getMaxReduceTasks());
    assertEquals("Cluster summary test failed!", 1, cs.getNumActiveTrackers());
    assertEquals("Cluster summary test failed!", 0, cs.getNumBlacklistedTrackers());
Also used : ClusterStats(org.apache.hadoop.mapred.gridmix.Statistics.ClusterStats) Configuration(org.apache.hadoop.conf.Configuration) JobClient(org.apache.hadoop.mapred.JobClient) Test(org.junit.Test)

Example 44 with JobClient

use of org.apache.hadoop.mapred.JobClient in project hadoop by apache.

the class StreamJob method submitAndMonitorJob.

// Based on JobClient
public int submitAndMonitorJob() throws IOException {
    if (jar_ != null && isLocalHadoop()) {
        // getAbs became required when shell and subvm have different working dirs...
        File wd = new File(".").getAbsoluteFile();
        RunJar.unJar(new File(jar_), wd);
    // if jobConf_ changes must recreate a JobClient
    jc_ = new JobClient(jobConf_);
    running_ = null;
    try {
        running_ = jc_.submitJob(jobConf_);
        jobId_ = running_.getID();
        if (background_) {
  "Job is running in background.");
        } else if (!jc_.monitorAndPrintJob(jobConf_, running_)) {
            LOG.error("Job not successful!");
            return 1;
        }"Output directory: " + output_);
    } catch (FileNotFoundException fe) {
        LOG.error("Error launching job , bad input path : " + fe.getMessage());
        return 2;
    } catch (InvalidJobConfException je) {
        LOG.error("Error launching job , Invalid job conf : " + je.getMessage());
        return 3;
    } catch (FileAlreadyExistsException fae) {
        LOG.error("Error launching job , Output path already exists : " + fae.getMessage());
        return 4;
    } catch (IOException ioe) {
        LOG.error("Error Launching job : " + ioe.getMessage());
        return 5;
    } catch (InterruptedException ie) {
        LOG.error("Error monitoring job : " + ie.getMessage());
        return 6;
    } finally {
    return 0;
Also used : FileAlreadyExistsException(org.apache.hadoop.fs.FileAlreadyExistsException) FileNotFoundException( InvalidJobConfException(org.apache.hadoop.mapred.InvalidJobConfException) IOException( File( JobClient(org.apache.hadoop.mapred.JobClient)

Example 45 with JobClient

use of org.apache.hadoop.mapred.JobClient in project Cloud9 by lintool.

the class ClueWebAnchorTextForwardIndexHttpServer method main.

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 3) {
        System.out.println("usage: [index-file] [docno-mapping-data-files] [clue-forward-index-root]");
        //[clue-forward-index-root: /shared/ClueWeb09/collection.compressed.block/
    String indexFile = otherArgs[0];
    String mappingFile = otherArgs[1];
    String clueIndexRoot = otherArgs[2].endsWith("/") ? otherArgs[2] : otherArgs[2] + "/";
    String cluewebForwardIndex = "";
    for (int i = 1; i < 10; i++) cluewebForwardIndex += clueIndexRoot + "findex.en.0" + i + ".dat" + SEPARATOR + " ";
    cluewebForwardIndex += clueIndexRoot + "findex.en.10.dat";"Launching DocumentForwardIndexHttpServer");" - index file: " + indexFile);" - docno mapping data file: " + mappingFile);" - ClueWeb09 index root:" + clueIndexRoot);
    FileSystem fs = FileSystem.get(conf);
    Random rand = new Random();
    int r = rand.nextInt();
    // this tmp file as a rendezvous point
    Path tmpPath = new Path("/tmp/" + r);
    if (fs.exists(tmpPath)) {
        fs.delete(tmpPath, true);
    JobConf job = new JobConf(conf, ClueWebAnchorTextForwardIndexHttpServer.class);
    job.setJobName("ForwardIndexServer:" + indexFile);
    job.set("", "-Xmx2048m");
    job.set("IndexFile", indexFile);
    job.set("DocnoMappingDataFile", mappingFile);
    job.set("TmpPath", tmpPath.toString());
    job.set("ClueWebIndexFiles", cluewebForwardIndex);
    JobClient client = new JobClient(job);
    client.submitJob(job);"Waiting for server to start up...");
    while (!fs.exists(tmpPath)) {
    FSDataInputStream in =;
    String host = in.readUTF();
    in.close();"host: " + host);"port: 8888");
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) Random(java.util.Random) FileSystem(org.apache.hadoop.fs.FileSystem) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) JobConf(org.apache.hadoop.mapred.JobConf) JobClient(org.apache.hadoop.mapred.JobClient) GenericOptionsParser(org.apache.hadoop.util.GenericOptionsParser)


