Search in sources :

Example 86 with TaskAttemptContext

use of org.apache.hadoop.mapreduce.TaskAttemptContext in project hadoop by apache.

the class TestCombineFileInputFormat method testReinit.

@Test
public void testReinit() throws Exception {
    // Test that a split containing multiple files works correctly,
    // with the child RecordReader getting its initialize() method
    // called a second time.
    TaskAttemptID taskId = new TaskAttemptID("jt", 0, TaskType.MAP, 0, 0);
    Configuration conf = new Configuration();
    TaskAttemptContext context = new TaskAttemptContextImpl(conf, taskId);
    // This will create a CombineFileRecordReader that itself contains a
    // DummyRecordReader.
    InputFormat inputFormat = new ChildRRInputFormat();
    Path[] files = { new Path("file1"), new Path("file2") };
    long[] lengths = { 1, 1 };
    CombineFileSplit split = new CombineFileSplit(files, lengths);
    RecordReader rr = inputFormat.createRecordReader(split, context);
    assertTrue("Unexpected RR type!", rr instanceof CombineFileRecordReader);
    // first initialize() call comes from MapTask. We'll do it here.
    rr.initialize(split, context);
    // First value is first filename.
    assertTrue(rr.nextKeyValue());
    assertEquals("file1", rr.getCurrentValue().toString());
    // The inner RR will return false, because it only emits one (k, v) pair.
    // But there's another sub-split to process. This returns true to us.
    assertTrue(rr.nextKeyValue());
    // And the 2nd rr will have its initialize method called correctly.
    assertEquals("file2", rr.getCurrentValue().toString());
    // But after both child RR's have returned their singleton (k, v), this
    // should also return false.
    assertFalse(rr.nextKeyValue());
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) InputFormat(org.apache.hadoop.mapreduce.InputFormat) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) RecordReader(org.apache.hadoop.mapreduce.RecordReader) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) Test(org.junit.Test)

Example 87 with TaskAttemptContext

use of org.apache.hadoop.mapreduce.TaskAttemptContext in project hadoop by apache.

the class TestCombineFileInputFormat method testRecordReaderInit.

@Test
public void testRecordReaderInit() throws InterruptedException, IOException {
    // Test that we properly initialize the child recordreader when
    // CombineFileInputFormat and CombineFileRecordReader are used.
    TaskAttemptID taskId = new TaskAttemptID("jt", 0, TaskType.MAP, 0, 0);
    Configuration conf1 = new Configuration();
    conf1.set(DUMMY_KEY, "STATE1");
    TaskAttemptContext context1 = new TaskAttemptContextImpl(conf1, taskId);
    // This will create a CombineFileRecordReader that itself contains a
    // DummyRecordReader.
    InputFormat inputFormat = new ChildRRInputFormat();
    Path[] files = { new Path("file1") };
    long[] lengths = { 1 };
    CombineFileSplit split = new CombineFileSplit(files, lengths);
    RecordReader rr = inputFormat.createRecordReader(split, context1);
    assertTrue("Unexpected RR type!", rr instanceof CombineFileRecordReader);
    // Verify that the initial configuration is the one being used.
    // Right after construction the dummy key should have value "STATE1"
    assertEquals("Invalid initial dummy key value", "STATE1", rr.getCurrentKey().toString());
    // Switch the active context for the RecordReader...
    Configuration conf2 = new Configuration();
    conf2.set(DUMMY_KEY, "STATE2");
    TaskAttemptContext context2 = new TaskAttemptContextImpl(conf2, taskId);
    rr.initialize(split, context2);
    // And verify that the new context is updated into the child record reader.
    assertEquals("Invalid secondary dummy key value", "STATE2", rr.getCurrentKey().toString());
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) InputFormat(org.apache.hadoop.mapreduce.InputFormat) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) RecordReader(org.apache.hadoop.mapreduce.RecordReader) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) Test(org.junit.Test)

Example 88 with TaskAttemptContext

use of org.apache.hadoop.mapreduce.TaskAttemptContext in project hadoop by apache.

the class TestMRCJCFileOutputCommitter method testEmptyOutput.

public void testEmptyOutput() throws Exception {
    Job job = Job.getInstance();
    FileOutputFormat.setOutputPath(job, outDir);
    Configuration conf = job.getConfiguration();
    conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
    JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
    TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
    FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);
    // setup
    committer.setupJob(jContext);
    committer.setupTask(tContext);
    // Do not write any output
    // do commit
    committer.commitTask(tContext);
    committer.commitJob(jContext);
    FileUtil.fullyDelete(new File(outDir.toString()));
}
Also used : JobContextImpl(org.apache.hadoop.mapreduce.task.JobContextImpl) Configuration(org.apache.hadoop.conf.Configuration) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) JobContext(org.apache.hadoop.mapreduce.JobContext) Job(org.apache.hadoop.mapreduce.Job)

Example 89 with TaskAttemptContext

use of org.apache.hadoop.mapreduce.TaskAttemptContext in project hadoop by apache.

the class TestMRCJCFileOutputCommitter method testFailAbort.

@SuppressWarnings("unchecked")
public void testFailAbort() throws IOException, InterruptedException {
    Job job = Job.getInstance();
    Configuration conf = job.getConfiguration();
    conf.set(FileSystem.FS_DEFAULT_NAME_KEY, "faildel:///");
    conf.setClass("fs.faildel.impl", FakeFileSystem.class, FileSystem.class);
    conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
    FileOutputFormat.setOutputPath(job, outDir);
    JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
    TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
    FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);
    // do setup
    committer.setupJob(jContext);
    committer.setupTask(tContext);
    // write output
    TextOutputFormat<?, ?> theOutputFormat = new TextOutputFormat();
    RecordWriter<?, ?> theRecordWriter = theOutputFormat.getRecordWriter(tContext);
    writeOutput(theRecordWriter, tContext);
    // do abort
    Throwable th = null;
    try {
        committer.abortTask(tContext);
    } catch (IOException ie) {
        th = ie;
    }
    assertNotNull(th);
    assertTrue(th instanceof IOException);
    assertTrue(th.getMessage().contains("fake delete failed"));
    //Path taskBaseDirName = committer.getTaskAttemptBaseDirName(tContext);
    File jobTmpDir = new File(committer.getJobAttemptPath(jContext).toUri().getPath());
    File taskTmpDir = new File(committer.getTaskAttemptPath(tContext).toUri().getPath());
    File expectedFile = new File(taskTmpDir, partFile);
    assertTrue(expectedFile + " does not exists", expectedFile.exists());
    th = null;
    try {
        committer.abortJob(jContext, JobStatus.State.FAILED);
    } catch (IOException ie) {
        th = ie;
    }
    assertNotNull(th);
    assertTrue(th instanceof IOException);
    assertTrue(th.getMessage().contains("fake delete failed"));
    assertTrue("job temp dir does not exists", jobTmpDir.exists());
    FileUtil.fullyDelete(new File(outDir.toString()));
}
Also used : JobContextImpl(org.apache.hadoop.mapreduce.task.JobContextImpl) Configuration(org.apache.hadoop.conf.Configuration) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) JobContext(org.apache.hadoop.mapreduce.JobContext) Job(org.apache.hadoop.mapreduce.Job)

Example 90 with TaskAttemptContext

use of org.apache.hadoop.mapreduce.TaskAttemptContext in project hadoop by apache.

the class TestFixedLengthInputFormat method runRandomTests.

private void runRandomTests(CompressionCodec codec) throws Exception {
    StringBuilder fileName = new StringBuilder("testFormat.txt");
    if (codec != null) {
        fileName.append(".gz");
    }
    localFs.delete(workDir, true);
    Path file = new Path(workDir, fileName.toString());
    int seed = new Random().nextInt();
    LOG.info("Seed = " + seed);
    Random random = new Random(seed);
    int MAX_TESTS = 20;
    LongWritable key;
    BytesWritable value;
    for (int i = 0; i < MAX_TESTS; i++) {
        LOG.info("----------------------------------------------------------");
        // Maximum total records of 999
        int totalRecords = random.nextInt(999) + 1;
        // Test an empty file
        if (i == 8) {
            totalRecords = 0;
        }
        // Maximum bytes in a record of 100K
        int recordLength = random.nextInt(1024 * 100) + 1;
        // For the 11th test, force a record length of 1
        if (i == 10) {
            recordLength = 1;
        }
        // The total bytes in the test file
        int fileSize = (totalRecords * recordLength);
        LOG.info("totalRecords=" + totalRecords + " recordLength=" + recordLength);
        // Create the job 
        Job job = Job.getInstance(defaultConf);
        if (codec != null) {
            ReflectionUtils.setConf(codec, job.getConfiguration());
        }
        // Create the test file
        ArrayList<String> recordList = createFile(file, codec, recordLength, totalRecords);
        assertTrue(localFs.exists(file));
        //set the fixed length record length config property for the job
        FixedLengthInputFormat.setRecordLength(job.getConfiguration(), recordLength);
        int numSplits = 1;
        // Arbitrarily set number of splits.
        if (i > 0) {
            if (i == (MAX_TESTS - 1)) {
                // Test a split size that is less than record len
                numSplits = (int) (fileSize / Math.floor(recordLength / 2));
            } else {
                if (MAX_TESTS % i == 0) {
                    // Let us create a split size that is forced to be 
                    // smaller than the end file itself, (ensures 1+ splits)
                    numSplits = fileSize / (fileSize - random.nextInt(fileSize));
                } else {
                    // Just pick a random split size with no upper bound 
                    numSplits = Math.max(1, fileSize / random.nextInt(Integer.MAX_VALUE));
                }
            }
            LOG.info("Number of splits set to: " + numSplits);
        }
        job.getConfiguration().setLong("mapreduce.input.fileinputformat.split.maxsize", (long) (fileSize / numSplits));
        // setup the input path
        FileInputFormat.setInputPaths(job, workDir);
        // Try splitting the file in a variety of sizes
        FixedLengthInputFormat format = new FixedLengthInputFormat();
        List<InputSplit> splits = format.getSplits(job);
        LOG.info("Actual number of splits = " + splits.size());
        // Test combined split lengths = total file size
        long recordOffset = 0;
        int recordNumber = 0;
        for (InputSplit split : splits) {
            TaskAttemptContext context = MapReduceTestUtil.createDummyMapTaskAttemptContext(job.getConfiguration());
            RecordReader<LongWritable, BytesWritable> reader = format.createRecordReader(split, context);
            MapContext<LongWritable, BytesWritable, LongWritable, BytesWritable> mcontext = new MapContextImpl<LongWritable, BytesWritable, LongWritable, BytesWritable>(job.getConfiguration(), context.getTaskAttemptID(), reader, null, null, MapReduceTestUtil.createDummyReporter(), split);
            reader.initialize(split, mcontext);
            Class<?> clazz = reader.getClass();
            assertEquals("RecordReader class should be FixedLengthRecordReader:", FixedLengthRecordReader.class, clazz);
            // Plow through the records in this split
            while (reader.nextKeyValue()) {
                key = reader.getCurrentKey();
                value = reader.getCurrentValue();
                assertEquals("Checking key", (long) (recordNumber * recordLength), key.get());
                String valueString = new String(value.getBytes(), 0, value.getLength());
                assertEquals("Checking record length:", recordLength, value.getLength());
                assertTrue("Checking for more records than expected:", recordNumber < totalRecords);
                String origRecord = recordList.get(recordNumber);
                assertEquals("Checking record content:", origRecord, valueString);
                recordNumber++;
            }
            reader.close();
        }
        assertEquals("Total original records should be total read records:", recordList.size(), recordNumber);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) MapContextImpl(org.apache.hadoop.mapreduce.task.MapContextImpl) BytesWritable(org.apache.hadoop.io.BytesWritable) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) Random(java.util.Random) LongWritable(org.apache.hadoop.io.LongWritable) Job(org.apache.hadoop.mapreduce.Job) InputSplit(org.apache.hadoop.mapreduce.InputSplit)

Aggregations

TaskAttemptContext (org.apache.hadoop.mapreduce.TaskAttemptContext)110 Configuration (org.apache.hadoop.conf.Configuration)58 Job (org.apache.hadoop.mapreduce.Job)44 Path (org.apache.hadoop.fs.Path)39 TaskAttemptContextImpl (org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl)38 InputSplit (org.apache.hadoop.mapreduce.InputSplit)36 Test (org.junit.Test)35 TaskAttemptID (org.apache.hadoop.mapreduce.TaskAttemptID)33 JobContext (org.apache.hadoop.mapreduce.JobContext)28 IOException (java.io.IOException)27 File (java.io.File)22 LongWritable (org.apache.hadoop.io.LongWritable)22 JobContextImpl (org.apache.hadoop.mapreduce.task.JobContextImpl)21 RecordWriter (org.apache.hadoop.mapreduce.RecordWriter)19 MapContextImpl (org.apache.hadoop.mapreduce.task.MapContextImpl)17 FileSystem (org.apache.hadoop.fs.FileSystem)16 OutputCommitter (org.apache.hadoop.mapreduce.OutputCommitter)12 ArrayList (java.util.ArrayList)11 BytesWritable (org.apache.hadoop.io.BytesWritable)10 MapFile (org.apache.hadoop.io.MapFile)10