Examples with MapOutputFile - org.apache.hadoop.mapred.MapOutputFile

Example 1 with MapOutputFile

use of org.apache.hadoop.mapred.MapOutputFile in project hadoop by apache.

the class TestShuffleScheduler method TestSucceedAndFailedCopyMap.

@SuppressWarnings("rawtypes")
@Test
public <K, V> void TestSucceedAndFailedCopyMap() throws Exception {
    JobConf job = new JobConf();
    job.setNumMapTasks(2);
    //mock creation
    TaskUmbilicalProtocol mockUmbilical = mock(TaskUmbilicalProtocol.class);
    Reporter mockReporter = mock(Reporter.class);
    FileSystem mockFileSystem = mock(FileSystem.class);
    Class<? extends org.apache.hadoop.mapred.Reducer> combinerClass = job.getCombinerClass();
    // needed for mock with generic
    @SuppressWarnings("unchecked") CombineOutputCollector<K, V> mockCombineOutputCollector = (CombineOutputCollector<K, V>) mock(CombineOutputCollector.class);
    org.apache.hadoop.mapreduce.TaskAttemptID mockTaskAttemptID = mock(org.apache.hadoop.mapreduce.TaskAttemptID.class);
    LocalDirAllocator mockLocalDirAllocator = mock(LocalDirAllocator.class);
    CompressionCodec mockCompressionCodec = mock(CompressionCodec.class);
    Counter mockCounter = mock(Counter.class);
    TaskStatus mockTaskStatus = mock(TaskStatus.class);
    Progress mockProgress = mock(Progress.class);
    MapOutputFile mockMapOutputFile = mock(MapOutputFile.class);
    Task mockTask = mock(Task.class);
    @SuppressWarnings("unchecked") MapOutput<K, V> output = mock(MapOutput.class);
    ShuffleConsumerPlugin.Context<K, V> context = new ShuffleConsumerPlugin.Context<K, V>(mockTaskAttemptID, job, mockFileSystem, mockUmbilical, mockLocalDirAllocator, mockReporter, mockCompressionCodec, combinerClass, mockCombineOutputCollector, mockCounter, mockCounter, mockCounter, mockCounter, mockCounter, mockCounter, mockTaskStatus, mockProgress, mockProgress, mockTask, mockMapOutputFile, null);
    TaskStatus status = new TaskStatus() {

        @Override
        public boolean getIsMap() {
            return false;
        }

        @Override
        public void addFetchFailedMap(TaskAttemptID mapTaskId) {
        }
    };
    Progress progress = new Progress();
    ShuffleSchedulerImpl<K, V> scheduler = new ShuffleSchedulerImpl<K, V>(job, status, null, null, progress, context.getShuffledMapsCounter(), context.getReduceShuffleBytes(), context.getFailedShuffleCounter());
    MapHost host1 = new MapHost("host1", null);
    TaskAttemptID failedAttemptID = new TaskAttemptID(new org.apache.hadoop.mapred.TaskID(new JobID("test", 0), TaskType.MAP, 0), 0);
    TaskAttemptID succeedAttemptID = new TaskAttemptID(new org.apache.hadoop.mapred.TaskID(new JobID("test", 0), TaskType.MAP, 1), 1);
    // handle output fetch failure for failedAttemptID, part I
    scheduler.hostFailed(host1.getHostName());
    // handle output fetch succeed for succeedAttemptID
    long bytes = (long) 500 * 1024 * 1024;
    scheduler.copySucceeded(succeedAttemptID, host1, bytes, 0, 500000, output);
    // handle output fetch failure for failedAttemptID, part II
    // for MAPREDUCE-6361: verify no NPE exception get thrown out
    scheduler.copyFailed(failedAttemptID, host1, true, false);
}

Also used : Task(org.apache.hadoop.mapred.Task) TaskAttemptID(org.apache.hadoop.mapred.TaskAttemptID) ShuffleConsumerPlugin(org.apache.hadoop.mapred.ShuffleConsumerPlugin) Counter(org.apache.hadoop.mapred.Counters.Counter) FileSystem(org.apache.hadoop.fs.FileSystem) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) JobConf(org.apache.hadoop.mapred.JobConf) MapOutputFile(org.apache.hadoop.mapred.MapOutputFile) Progress(org.apache.hadoop.util.Progress) Reporter(org.apache.hadoop.mapred.Reporter) TaskStatus(org.apache.hadoop.mapred.TaskStatus) CombineOutputCollector(org.apache.hadoop.mapred.Task.CombineOutputCollector) TaskUmbilicalProtocol(org.apache.hadoop.mapred.TaskUmbilicalProtocol) LocalDirAllocator(org.apache.hadoop.fs.LocalDirAllocator) JobID(org.apache.hadoop.mapreduce.JobID) Test(org.junit.Test)

Example 2 with MapOutputFile

use of org.apache.hadoop.mapred.MapOutputFile in project hadoop by apache.

the class TestShufflePlugin method testConsumerApi.

@Test
public /**
   * A testing method verifying availability and accessibility of API that is needed
   * for sub-classes of ShuffleConsumerPlugin
   */
void testConsumerApi() {
    JobConf jobConf = new JobConf();
    ShuffleConsumerPlugin<K, V> shuffleConsumerPlugin = new TestShuffleConsumerPlugin<K, V>();
    //mock creation
    ReduceTask mockReduceTask = mock(ReduceTask.class);
    TaskUmbilicalProtocol mockUmbilical = mock(TaskUmbilicalProtocol.class);
    Reporter mockReporter = mock(Reporter.class);
    FileSystem mockFileSystem = mock(FileSystem.class);
    Class<? extends org.apache.hadoop.mapred.Reducer> combinerClass = jobConf.getCombinerClass();
    // needed for mock with generic
    @SuppressWarnings("unchecked") CombineOutputCollector<K, V> mockCombineOutputCollector = (CombineOutputCollector<K, V>) mock(CombineOutputCollector.class);
    org.apache.hadoop.mapreduce.TaskAttemptID mockTaskAttemptID = mock(org.apache.hadoop.mapreduce.TaskAttemptID.class);
    LocalDirAllocator mockLocalDirAllocator = mock(LocalDirAllocator.class);
    CompressionCodec mockCompressionCodec = mock(CompressionCodec.class);
    Counter mockCounter = mock(Counter.class);
    TaskStatus mockTaskStatus = mock(TaskStatus.class);
    Progress mockProgress = mock(Progress.class);
    MapOutputFile mockMapOutputFile = mock(MapOutputFile.class);
    Task mockTask = mock(Task.class);
    try {
        String[] dirs = jobConf.getLocalDirs();
        // verify that these APIs are available through super class handler
        ShuffleConsumerPlugin.Context<K, V> context = new ShuffleConsumerPlugin.Context<K, V>(mockTaskAttemptID, jobConf, mockFileSystem, mockUmbilical, mockLocalDirAllocator, mockReporter, mockCompressionCodec, combinerClass, mockCombineOutputCollector, mockCounter, mockCounter, mockCounter, mockCounter, mockCounter, mockCounter, mockTaskStatus, mockProgress, mockProgress, mockTask, mockMapOutputFile, null);
        shuffleConsumerPlugin.init(context);
        shuffleConsumerPlugin.run();
        shuffleConsumerPlugin.close();
    } catch (Exception e) {
        assertTrue("Threw exception:" + e, false);
    }
    // verify that these APIs are available for 3rd party plugins
    mockReduceTask.getTaskID();
    mockReduceTask.getJobID();
    mockReduceTask.getNumMaps();
    mockReduceTask.getPartition();
    mockReporter.progress();
}

Also used : ReduceTask(org.apache.hadoop.mapred.ReduceTask) Task(org.apache.hadoop.mapred.Task) ShuffleConsumerPlugin(org.apache.hadoop.mapred.ShuffleConsumerPlugin) Counter(org.apache.hadoop.mapred.Counters.Counter) FileSystem(org.apache.hadoop.fs.FileSystem) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) JobConf(org.apache.hadoop.mapred.JobConf) MapOutputFile(org.apache.hadoop.mapred.MapOutputFile) Progress(org.apache.hadoop.util.Progress) Reporter(org.apache.hadoop.mapred.Reporter) TaskStatus(org.apache.hadoop.mapred.TaskStatus) CombineOutputCollector(org.apache.hadoop.mapred.Task.CombineOutputCollector) TaskUmbilicalProtocol(org.apache.hadoop.mapred.TaskUmbilicalProtocol) ReduceTask(org.apache.hadoop.mapred.ReduceTask) LocalDirAllocator(org.apache.hadoop.fs.LocalDirAllocator) Test(org.junit.Test)

Example 3 with MapOutputFile

use of org.apache.hadoop.mapred.MapOutputFile in project hadoop by apache.

the class TestMergeManager method testOnDiskMerger.

@SuppressWarnings({ "unchecked", "deprecation" })
@Test(timeout = 10000)
public void testOnDiskMerger() throws IOException, URISyntaxException, InterruptedException {
    JobConf jobConf = new JobConf();
    final int SORT_FACTOR = 5;
    jobConf.setInt(MRJobConfig.IO_SORT_FACTOR, SORT_FACTOR);
    MapOutputFile mapOutputFile = new MROutputFiles();
    FileSystem fs = FileSystem.getLocal(jobConf);
    MergeManagerImpl<IntWritable, IntWritable> manager = new MergeManagerImpl<IntWritable, IntWritable>(null, jobConf, fs, null, null, null, null, null, null, null, null, null, null, mapOutputFile);
    MergeThread<MapOutput<IntWritable, IntWritable>, IntWritable, IntWritable> onDiskMerger = (MergeThread<MapOutput<IntWritable, IntWritable>, IntWritable, IntWritable>) Whitebox.getInternalState(manager, "onDiskMerger");
    int mergeFactor = (Integer) Whitebox.getInternalState(onDiskMerger, "mergeFactor");
    // make sure the io.sort.factor is set properly
    assertEquals(mergeFactor, SORT_FACTOR);
    // Stop the onDiskMerger thread so that we can intercept the list of files
    // waiting to be merged.
    onDiskMerger.suspend();
    //Send the list of fake files waiting to be merged
    Random rand = new Random();
    for (int i = 0; i < 2 * SORT_FACTOR; ++i) {
        Path path = new Path("somePath");
        CompressAwarePath cap = new CompressAwarePath(path, 1l, rand.nextInt());
        manager.closeOnDiskFile(cap);
    }
    //Check that the files pending to be merged are in sorted order.
    LinkedList<List<CompressAwarePath>> pendingToBeMerged = (LinkedList<List<CompressAwarePath>>) Whitebox.getInternalState(onDiskMerger, "pendingToBeMerged");
    assertTrue("No inputs were added to list pending to merge", pendingToBeMerged.size() > 0);
    for (int i = 0; i < pendingToBeMerged.size(); ++i) {
        List<CompressAwarePath> inputs = pendingToBeMerged.get(i);
        for (int j = 1; j < inputs.size(); ++j) {
            assertTrue("Not enough / too many inputs were going to be merged", inputs.size() > 0 && inputs.size() <= SORT_FACTOR);
            assertTrue("Inputs to be merged were not sorted according to size: ", inputs.get(j).getCompressedSize() >= inputs.get(j - 1).getCompressedSize());
        }
    }
}

Also used : CompressAwarePath(org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl.CompressAwarePath) Path(org.apache.hadoop.fs.Path) MROutputFiles(org.apache.hadoop.mapred.MROutputFiles) LinkedList(java.util.LinkedList) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Random(java.util.Random) FileSystem(org.apache.hadoop.fs.FileSystem) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) CompressAwarePath(org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl.CompressAwarePath) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) List(java.util.List) JobConf(org.apache.hadoop.mapred.JobConf) MapOutputFile(org.apache.hadoop.mapred.MapOutputFile) IntWritable(org.apache.hadoop.io.IntWritable) Test(org.junit.Test)

Example 4 with MapOutputFile

use of org.apache.hadoop.mapred.MapOutputFile in project hadoop by apache.

the class TestShuffleScheduler method TestAggregatedTransferRate.

@SuppressWarnings("rawtypes")
@Test
public <K, V> void TestAggregatedTransferRate() throws Exception {
    JobConf job = new JobConf();
    job.setNumMapTasks(10);
    //mock creation
    TaskUmbilicalProtocol mockUmbilical = mock(TaskUmbilicalProtocol.class);
    Reporter mockReporter = mock(Reporter.class);
    FileSystem mockFileSystem = mock(FileSystem.class);
    Class<? extends org.apache.hadoop.mapred.Reducer> combinerClass = job.getCombinerClass();
    // needed for mock with generic
    @SuppressWarnings("unchecked") CombineOutputCollector<K, V> mockCombineOutputCollector = (CombineOutputCollector<K, V>) mock(CombineOutputCollector.class);
    org.apache.hadoop.mapreduce.TaskAttemptID mockTaskAttemptID = mock(org.apache.hadoop.mapreduce.TaskAttemptID.class);
    LocalDirAllocator mockLocalDirAllocator = mock(LocalDirAllocator.class);
    CompressionCodec mockCompressionCodec = mock(CompressionCodec.class);
    Counter mockCounter = mock(Counter.class);
    TaskStatus mockTaskStatus = mock(TaskStatus.class);
    Progress mockProgress = mock(Progress.class);
    MapOutputFile mockMapOutputFile = mock(MapOutputFile.class);
    Task mockTask = mock(Task.class);
    @SuppressWarnings("unchecked") MapOutput<K, V> output = mock(MapOutput.class);
    ShuffleConsumerPlugin.Context<K, V> context = new ShuffleConsumerPlugin.Context<K, V>(mockTaskAttemptID, job, mockFileSystem, mockUmbilical, mockLocalDirAllocator, mockReporter, mockCompressionCodec, combinerClass, mockCombineOutputCollector, mockCounter, mockCounter, mockCounter, mockCounter, mockCounter, mockCounter, mockTaskStatus, mockProgress, mockProgress, mockTask, mockMapOutputFile, null);
    TaskStatus status = new TaskStatus() {

        @Override
        public boolean getIsMap() {
            return false;
        }

        @Override
        public void addFetchFailedMap(TaskAttemptID mapTaskId) {
        }
    };
    Progress progress = new Progress();
    ShuffleSchedulerImpl<K, V> scheduler = new ShuffleSchedulerImpl<K, V>(job, status, null, null, progress, context.getShuffledMapsCounter(), context.getReduceShuffleBytes(), context.getFailedShuffleCounter());
    TaskAttemptID attemptID0 = new TaskAttemptID(new org.apache.hadoop.mapred.TaskID(new JobID("test", 0), TaskType.MAP, 0), 0);
    //adding the 1st interval, 40MB from 60s to 100s
    long bytes = (long) 40 * 1024 * 1024;
    scheduler.copySucceeded(attemptID0, new MapHost(null, null), bytes, 60000, 100000, output);
    Assert.assertEquals(copyMessage(1, 1, 1), progress.toString());
    TaskAttemptID attemptID1 = new TaskAttemptID(new org.apache.hadoop.mapred.TaskID(new JobID("test", 0), TaskType.MAP, 1), 1);
    //adding the 2nd interval before the 1st interval, 50MB from 0s to 50s
    bytes = (long) 50 * 1024 * 1024;
    scheduler.copySucceeded(attemptID1, new MapHost(null, null), bytes, 0, 50000, output);
    Assert.assertEquals(copyMessage(2, 1, 1), progress.toString());
    TaskAttemptID attemptID2 = new TaskAttemptID(new org.apache.hadoop.mapred.TaskID(new JobID("test", 0), TaskType.MAP, 2), 2);
    //adding the 3rd interval overlapping with the 1st and the 2nd interval
    //110MB from 25s to 80s
    bytes = (long) 110 * 1024 * 1024;
    scheduler.copySucceeded(attemptID2, new MapHost(null, null), bytes, 25000, 80000, output);
    Assert.assertEquals(copyMessage(3, 2, 2), progress.toString());
    TaskAttemptID attemptID3 = new TaskAttemptID(new org.apache.hadoop.mapred.TaskID(new JobID("test", 0), TaskType.MAP, 3), 3);
    //adding the 4th interval just after the 2nd interval, 100MB from 100s to 300s
    bytes = (long) 100 * 1024 * 1024;
    scheduler.copySucceeded(attemptID3, new MapHost(null, null), bytes, 100000, 300000, output);
    Assert.assertEquals(copyMessage(4, 0.5, 1), progress.toString());
    TaskAttemptID attemptID4 = new TaskAttemptID(new org.apache.hadoop.mapred.TaskID(new JobID("test", 0), TaskType.MAP, 4), 4);
    //adding the 5th interval between after 4th, 50MB from 350s to 400s
    bytes = (long) 50 * 1024 * 1024;
    scheduler.copySucceeded(attemptID4, new MapHost(null, null), bytes, 350000, 400000, output);
    Assert.assertEquals(copyMessage(5, 1, 1), progress.toString());
    TaskAttemptID attemptID5 = new TaskAttemptID(new org.apache.hadoop.mapred.TaskID(new JobID("test", 0), TaskType.MAP, 5), 5);
    //adding the 6th interval between after 5th, 50MB from 450s to 500s
    bytes = (long) 50 * 1024 * 1024;
    scheduler.copySucceeded(attemptID5, new MapHost(null, null), bytes, 450000, 500000, output);
    Assert.assertEquals(copyMessage(6, 1, 1), progress.toString());
    TaskAttemptID attemptID6 = new TaskAttemptID(new org.apache.hadoop.mapred.TaskID(new JobID("test", 0), TaskType.MAP, 6), 6);
    //adding the 7th interval between after 5th and 6th interval, 20MB from 320s to 340s
    bytes = (long) 20 * 1024 * 1024;
    scheduler.copySucceeded(attemptID6, new MapHost(null, null), bytes, 320000, 340000, output);
    Assert.assertEquals(copyMessage(7, 1, 1), progress.toString());
    TaskAttemptID attemptID7 = new TaskAttemptID(new org.apache.hadoop.mapred.TaskID(new JobID("test", 0), TaskType.MAP, 7), 7);
    //adding the 8th interval overlapping with 4th, 5th, and 7th 30MB from 290s to 350s
    bytes = (long) 30 * 1024 * 1024;
    scheduler.copySucceeded(attemptID7, new MapHost(null, null), bytes, 290000, 350000, output);
    Assert.assertEquals(copyMessage(8, 0.5, 1), progress.toString());
    TaskAttemptID attemptID8 = new TaskAttemptID(new org.apache.hadoop.mapred.TaskID(new JobID("test", 0), TaskType.MAP, 8), 8);
    //adding the 9th interval overlapping with 5th and 6th, 50MB from 400s to 450s
    bytes = (long) 50 * 1024 * 1024;
    scheduler.copySucceeded(attemptID8, new MapHost(null, null), bytes, 400000, 450000, output);
    Assert.assertEquals(copyMessage(9, 1, 1), progress.toString());
    TaskAttemptID attemptID9 = new TaskAttemptID(new org.apache.hadoop.mapred.TaskID(new JobID("test", 0), TaskType.MAP, 9), 9);
    //adding the 10th interval overlapping with all intervals, 500MB from 0s to 500s
    bytes = (long) 500 * 1024 * 1024;
    scheduler.copySucceeded(attemptID9, new MapHost(null, null), bytes, 0, 500000, output);
    Assert.assertEquals(copyMessage(10, 1, 2), progress.toString());
}

Aggregations

FileSystem (org.apache.hadoop.fs.FileSystem)4 JobConf (org.apache.hadoop.mapred.JobConf)4 MapOutputFile (org.apache.hadoop.mapred.MapOutputFile)4 Test (org.junit.Test)4 LocalDirAllocator (org.apache.hadoop.fs.LocalDirAllocator)3 CompressionCodec (org.apache.hadoop.io.compress.CompressionCodec)3 Counter (org.apache.hadoop.mapred.Counters.Counter)3 Reporter (org.apache.hadoop.mapred.Reporter)3 ShuffleConsumerPlugin (org.apache.hadoop.mapred.ShuffleConsumerPlugin)3 Task (org.apache.hadoop.mapred.Task)3 CombineOutputCollector (org.apache.hadoop.mapred.Task.CombineOutputCollector)3 TaskStatus (org.apache.hadoop.mapred.TaskStatus)3 TaskUmbilicalProtocol (org.apache.hadoop.mapred.TaskUmbilicalProtocol)3 Progress (org.apache.hadoop.util.Progress)3 TaskAttemptID (org.apache.hadoop.mapred.TaskAttemptID)2 JobID (org.apache.hadoop.mapreduce.JobID)2 ArrayList (java.util.ArrayList)1 LinkedList (java.util.LinkedList)1 List (java.util.List)1 Random (java.util.Random)1