Search in sources :

Example 41 with JobClient

use of org.apache.hadoop.mapred.JobClient in project hadoop by apache.

the class DataJoinJob method runJob.

/**
   * Submit/run a map/reduce job.
   * 
   * @param job
   * @return true for success
   * @throws IOException
   */
public static boolean runJob(JobConf job) throws IOException {
    JobClient jc = new JobClient(job);
    boolean sucess = true;
    RunningJob running = null;
    try {
        running = jc.submitJob(job);
        JobID jobId = running.getID();
        System.out.println("Job " + jobId + " is submitted");
        while (!running.isComplete()) {
            System.out.println("Job " + jobId + " is still running.");
            try {
                Thread.sleep(60000);
            } catch (InterruptedException e) {
            }
            running = jc.getJob(jobId);
        }
        sucess = running.isSuccessful();
    } finally {
        if (!sucess && (running != null)) {
            running.killJob();
        }
        jc.close();
    }
    return sucess;
}
Also used : RunningJob(org.apache.hadoop.mapred.RunningJob) JobClient(org.apache.hadoop.mapred.JobClient) JobID(org.apache.hadoop.mapred.JobID)

Example 42 with JobClient

use of org.apache.hadoop.mapred.JobClient in project hadoop by apache.

the class DistCh method setup.

private boolean setup(List<FileOperation> ops, Path log) throws IOException {
    final String randomId = getRandomId();
    JobClient jClient = new JobClient(jobconf);
    Path stagingArea;
    try {
        stagingArea = JobSubmissionFiles.getStagingDir(jClient.getClusterHandle(), jobconf);
    } catch (InterruptedException ie) {
        throw new IOException(ie);
    }
    Path jobdir = new Path(stagingArea + NAME + "_" + randomId);
    FsPermission mapredSysPerms = new FsPermission(JobSubmissionFiles.JOB_DIR_PERMISSION);
    FileSystem.mkdirs(jClient.getFs(), jobdir, mapredSysPerms);
    LOG.info(JOB_DIR_LABEL + "=" + jobdir);
    if (log == null) {
        log = new Path(jobdir, "_logs");
    }
    FileOutputFormat.setOutputPath(jobconf, log);
    LOG.info("log=" + log);
    //create operation list
    FileSystem fs = jobdir.getFileSystem(jobconf);
    Path opList = new Path(jobdir, "_" + OP_LIST_LABEL);
    jobconf.set(OP_LIST_LABEL, opList.toString());
    int opCount = 0, synCount = 0;
    try (SequenceFile.Writer opWriter = SequenceFile.createWriter(fs, jobconf, opList, Text.class, FileOperation.class, SequenceFile.CompressionType.NONE)) {
        for (FileOperation op : ops) {
            FileStatus srcstat = fs.getFileStatus(op.src);
            if (srcstat.isDirectory() && op.isDifferent(srcstat)) {
                ++opCount;
                opWriter.append(new Text(op.src.toString()), op);
            }
            Stack<Path> pathstack = new Stack<Path>();
            for (pathstack.push(op.src); !pathstack.empty(); ) {
                for (FileStatus stat : fs.listStatus(pathstack.pop())) {
                    if (stat.isDirectory()) {
                        pathstack.push(stat.getPath());
                    }
                    if (op.isDifferent(stat)) {
                        ++opCount;
                        if (++synCount > SYNC_FILE_MAX) {
                            opWriter.sync();
                            synCount = 0;
                        }
                        Path f = stat.getPath();
                        opWriter.append(new Text(f.toString()), new FileOperation(f, op));
                    }
                }
            }
        }
    }
    checkDuplication(fs, opList, new Path(jobdir, "_sorted"), jobconf);
    jobconf.setInt(OP_COUNT_LABEL, opCount);
    LOG.info(OP_COUNT_LABEL + "=" + opCount);
    jobconf.setNumMapTasks(getMapCount(opCount, new JobClient(jobconf).getClusterStatus().getTaskTrackers()));
    return opCount != 0;
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) Text(org.apache.hadoop.io.Text) IOException(java.io.IOException) JobClient(org.apache.hadoop.mapred.JobClient) Stack(java.util.Stack) SequenceFile(org.apache.hadoop.io.SequenceFile) FileSystem(org.apache.hadoop.fs.FileSystem) FsPermission(org.apache.hadoop.fs.permission.FsPermission)

Example 43 with JobClient

use of org.apache.hadoop.mapred.JobClient in project hadoop by apache.

the class TestGridmixSummary method testClusterSummarizer.

/**
   * Test {@link ClusterSummarizer}.
   */
@Test
public void testClusterSummarizer() throws IOException {
    ClusterSummarizer cs = new ClusterSummarizer();
    Configuration conf = new Configuration();
    String jt = "test-jt:1234";
    String nn = "test-nn:5678";
    conf.set(JTConfig.JT_IPC_ADDRESS, jt);
    conf.set(CommonConfigurationKeys.FS_DEFAULT_NAME_KEY, nn);
    cs.start(conf);
    assertEquals("JT name mismatch", jt, cs.getJobTrackerInfo());
    assertEquals("NN name mismatch", nn, cs.getNamenodeInfo());
    ClusterStats cStats = ClusterStats.getClusterStats();
    conf.set(JTConfig.JT_IPC_ADDRESS, "local");
    conf.set(CommonConfigurationKeys.FS_DEFAULT_NAME_KEY, "local");
    JobClient jc = new JobClient(conf);
    cStats.setClusterMetric(jc.getClusterStatus());
    cs.update(cStats);
    // test
    assertEquals("Cluster summary test failed!", 1, cs.getMaxMapTasks());
    assertEquals("Cluster summary test failed!", 1, cs.getMaxReduceTasks());
    assertEquals("Cluster summary test failed!", 1, cs.getNumActiveTrackers());
    assertEquals("Cluster summary test failed!", 0, cs.getNumBlacklistedTrackers());
}
Also used : ClusterStats(org.apache.hadoop.mapred.gridmix.Statistics.ClusterStats) Configuration(org.apache.hadoop.conf.Configuration) JobClient(org.apache.hadoop.mapred.JobClient) Test(org.junit.Test)

Example 44 with JobClient

use of org.apache.hadoop.mapred.JobClient in project hadoop by apache.

the class StreamJob method submitAndMonitorJob.

// Based on JobClient
public int submitAndMonitorJob() throws IOException {
    if (jar_ != null && isLocalHadoop()) {
        // getAbs became required when shell and subvm have different working dirs...
        File wd = new File(".").getAbsoluteFile();
        RunJar.unJar(new File(jar_), wd);
    }
    // if jobConf_ changes must recreate a JobClient
    jc_ = new JobClient(jobConf_);
    running_ = null;
    try {
        running_ = jc_.submitJob(jobConf_);
        jobId_ = running_.getID();
        if (background_) {
            LOG.info("Job is running in background.");
        } else if (!jc_.monitorAndPrintJob(jobConf_, running_)) {
            LOG.error("Job not successful!");
            return 1;
        }
        LOG.info("Output directory: " + output_);
    } catch (FileNotFoundException fe) {
        LOG.error("Error launching job , bad input path : " + fe.getMessage());
        return 2;
    } catch (InvalidJobConfException je) {
        LOG.error("Error launching job , Invalid job conf : " + je.getMessage());
        return 3;
    } catch (FileAlreadyExistsException fae) {
        LOG.error("Error launching job , Output path already exists : " + fae.getMessage());
        return 4;
    } catch (IOException ioe) {
        LOG.error("Error Launching job : " + ioe.getMessage());
        return 5;
    } catch (InterruptedException ie) {
        LOG.error("Error monitoring job : " + ie.getMessage());
        return 6;
    } finally {
        jc_.close();
    }
    return 0;
}
Also used : FileAlreadyExistsException(org.apache.hadoop.fs.FileAlreadyExistsException) FileNotFoundException(java.io.FileNotFoundException) InvalidJobConfException(org.apache.hadoop.mapred.InvalidJobConfException) IOException(java.io.IOException) File(java.io.File) JobClient(org.apache.hadoop.mapred.JobClient)

Example 45 with JobClient

use of org.apache.hadoop.mapred.JobClient in project Cloud9 by lintool.

the class ClueWebAnchorTextForwardIndexHttpServer method main.

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 3) {
        System.out.println("usage: [index-file] [docno-mapping-data-files] [clue-forward-index-root]");
        //[clue-forward-index-root: /shared/ClueWeb09/collection.compressed.block/
        System.exit(-1);
    }
    String indexFile = otherArgs[0];
    String mappingFile = otherArgs[1];
    String clueIndexRoot = otherArgs[2].endsWith("/") ? otherArgs[2] : otherArgs[2] + "/";
    String cluewebForwardIndex = "";
    for (int i = 1; i < 10; i++) cluewebForwardIndex += clueIndexRoot + "findex.en.0" + i + ".dat" + SEPARATOR + " ";
    cluewebForwardIndex += clueIndexRoot + "findex.en.10.dat";
    LOG.info("Launching DocumentForwardIndexHttpServer");
    LOG.info(" - index file: " + indexFile);
    LOG.info(" - docno mapping data file: " + mappingFile);
    LOG.info(" - ClueWeb09 index root:" + clueIndexRoot);
    FileSystem fs = FileSystem.get(conf);
    Random rand = new Random();
    int r = rand.nextInt();
    // this tmp file as a rendezvous point
    Path tmpPath = new Path("/tmp/" + r);
    if (fs.exists(tmpPath)) {
        fs.delete(tmpPath, true);
    }
    JobConf job = new JobConf(conf, ClueWebAnchorTextForwardIndexHttpServer.class);
    job.setJobName("ForwardIndexServer:" + indexFile);
    job.set("mapred.child.java.opts", "-Xmx2048m");
    job.setNumMapTasks(1);
    job.setNumReduceTasks(0);
    job.setInputFormat(NullInputFormat.class);
    job.setOutputFormat(NullOutputFormat.class);
    job.setMapperClass(ServerMapper.class);
    job.set("IndexFile", indexFile);
    job.set("DocnoMappingDataFile", mappingFile);
    job.set("TmpPath", tmpPath.toString());
    job.set("ClueWebIndexFiles", cluewebForwardIndex);
    JobClient client = new JobClient(job);
    client.submitJob(job);
    LOG.info("Waiting for server to start up...");
    while (!fs.exists(tmpPath)) {
        Thread.sleep(50000);
        LOG.info("...");
    }
    FSDataInputStream in = fs.open(tmpPath);
    String host = in.readUTF();
    in.close();
    LOG.info("host: " + host);
    LOG.info("port: 8888");
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) Random(java.util.Random) FileSystem(org.apache.hadoop.fs.FileSystem) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) JobConf(org.apache.hadoop.mapred.JobConf) JobClient(org.apache.hadoop.mapred.JobClient) GenericOptionsParser(org.apache.hadoop.util.GenericOptionsParser)

Aggregations

JobClient (org.apache.hadoop.mapred.JobClient)47 Path (org.apache.hadoop.fs.Path)25 RunningJob (org.apache.hadoop.mapred.RunningJob)20 FileSystem (org.apache.hadoop.fs.FileSystem)18 JobConf (org.apache.hadoop.mapred.JobConf)18 IOException (java.io.IOException)16 Configuration (org.apache.hadoop.conf.Configuration)16 ClusterStatus (org.apache.hadoop.mapred.ClusterStatus)11 Date (java.util.Date)7 Text (org.apache.hadoop.io.Text)6 Counters (org.apache.hadoop.mapred.Counters)6 Test (org.junit.Test)6 DataOutputStream (java.io.DataOutputStream)5 FileStatus (org.apache.hadoop.fs.FileStatus)5 BufferedReader (java.io.BufferedReader)4 InputStreamReader (java.io.InputStreamReader)4 CompilationOpContext (org.apache.hadoop.hive.ql.CompilationOpContext)4 Context (org.apache.hadoop.hive.ql.Context)4 DriverContext (org.apache.hadoop.hive.ql.DriverContext)4 FileOutputFormat (org.apache.hadoop.mapreduce.lib.output.FileOutputFormat)4