Examples with RandomTextWriterJob - org.apache.hadoop.RandomTextWriterJob

Example 1 with RandomTextWriterJob

use of org.apache.hadoop.RandomTextWriterJob in project hadoop by apache.

the class TestMRJobs method testRandomWriter.

@Test(timeout = 60000)
public void testRandomWriter() throws IOException, InterruptedException, ClassNotFoundException {
    LOG.info("\n\n\nStarting testRandomWriter().");
    if (!(new File(MiniMRYarnCluster.APPJAR)).exists()) {
        LOG.info("MRAppJar " + MiniMRYarnCluster.APPJAR + " not found. Not running test.");
        return;
    }
    RandomTextWriterJob randomWriterJob = new RandomTextWriterJob();
    mrCluster.getConfig().set(RandomTextWriterJob.TOTAL_BYTES, "3072");
    mrCluster.getConfig().set(RandomTextWriterJob.BYTES_PER_MAP, "1024");
    Job job = randomWriterJob.createJob(mrCluster.getConfig());
    Path outputDir = new Path(OUTPUT_ROOT_DIR, "random-output");
    FileOutputFormat.setOutputPath(job, outputDir);
    job.setSpeculativeExecution(false);
    // The AppMaster jar itself.
    job.addFileToClassPath(APP_JAR);
    job.setJarByClass(RandomTextWriterJob.class);
    // speed up failures
    job.setMaxMapAttempts(1);
    job.submit();
    String trackingUrl = job.getTrackingURL();
    String jobId = job.getJobID().toString();
    boolean succeeded = job.waitForCompletion(true);
    Assert.assertTrue(succeeded);
    Assert.assertEquals(JobStatus.State.SUCCEEDED, job.getJobState());
    Assert.assertTrue("Tracking URL was " + trackingUrl + " but didn't Match Job ID " + jobId, trackingUrl.endsWith(jobId.substring(jobId.lastIndexOf("_")) + "/"));
    // Make sure there are three files in the output-dir
    RemoteIterator<FileStatus> iterator = FileContext.getFileContext(mrCluster.getConfig()).listStatus(outputDir);
    int count = 0;
    while (iterator.hasNext()) {
        FileStatus file = iterator.next();
        if (!file.getPath().getName().equals(FileOutputCommitter.SUCCEEDED_FILE_NAME)) {
            count++;
        }
    }
    Assert.assertEquals("Number of part files is wrong!", 3, count);
    verifyRandomWriterCounters(job);
// TODO later:  add explicit "isUber()" checks of some sort
}

Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) RandomTextWriterJob(org.apache.hadoop.RandomTextWriterJob) RunningJob(org.apache.hadoop.mapred.RunningJob) Job(org.apache.hadoop.mapreduce.Job) RandomTextWriterJob(org.apache.hadoop.RandomTextWriterJob) SleepJob(org.apache.hadoop.mapreduce.SleepJob) File(java.io.File) Test(org.junit.Test)

Example 2 with RandomTextWriterJob

use of org.apache.hadoop.RandomTextWriterJob in project tez by apache.

the class TestMRRJobs method testRandomWriter.

@Test(timeout = 60000)
public void testRandomWriter() throws IOException, InterruptedException, ClassNotFoundException {
    LOG.info("\n\n\nStarting testRandomWriter().");
    if (!(new File(MiniTezCluster.APPJAR)).exists()) {
        LOG.info("MRAppJar " + MiniTezCluster.APPJAR + " not found. Not running test.");
        return;
    }
    RandomTextWriterJob randomWriterJob = new RandomTextWriterJob();
    mrrTezCluster.getConfig().set(RandomTextWriterJob.TOTAL_BYTES, "3072");
    mrrTezCluster.getConfig().set(RandomTextWriterJob.BYTES_PER_MAP, "1024");
    Job job = randomWriterJob.createJob(mrrTezCluster.getConfig());
    Path outputDir = new Path(OUTPUT_ROOT_DIR, "random-output");
    FileOutputFormat.setOutputPath(job, outputDir);
    job.setSpeculativeExecution(false);
    job.setJarByClass(RandomTextWriterJob.class);
    // speed up failures
    job.setMaxMapAttempts(1);
    job.submit();
    String trackingUrl = job.getTrackingURL();
    String jobId = job.getJobID().toString();
    boolean succeeded = job.waitForCompletion(true);
    Assert.assertTrue(succeeded);
    Assert.assertEquals(JobStatus.State.SUCCEEDED, job.getJobState());
    Assert.assertTrue("Tracking URL was " + trackingUrl + " but didn't Match Job ID " + jobId, trackingUrl.contains(jobId.substring(jobId.indexOf("_"))));
    // Make sure there are three files in the output-dir
    RemoteIterator<FileStatus> iterator = FileContext.getFileContext(mrrTezCluster.getConfig()).listStatus(outputDir);
    int count = 0;
    while (iterator.hasNext()) {
        FileStatus file = iterator.next();
        if (!file.getPath().getName().equals(FileOutputCommitter.SUCCEEDED_FILE_NAME)) {
            count++;
        }
    }
    Assert.assertEquals("Number of part files is wrong!", 3, count);
}

Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) RandomTextWriterJob(org.apache.hadoop.RandomTextWriterJob) MRRSleepJob(org.apache.tez.mapreduce.examples.MRRSleepJob) Job(org.apache.hadoop.mapreduce.Job) RandomTextWriterJob(org.apache.hadoop.RandomTextWriterJob) File(java.io.File) Test(org.junit.Test)

Aggregations

File (java.io.File)2 RandomTextWriterJob (org.apache.hadoop.RandomTextWriterJob)2 FileStatus (org.apache.hadoop.fs.FileStatus)2 Path (org.apache.hadoop.fs.Path)2 Job (org.apache.hadoop.mapreduce.Job)2 Test (org.junit.Test)2 RunningJob (org.apache.hadoop.mapred.RunningJob)1 SleepJob (org.apache.hadoop.mapreduce.SleepJob)1 MRRSleepJob (org.apache.tez.mapreduce.examples.MRRSleepJob)1