Search in sources :

Example 6 with Progress

use of org.apache.hadoop.util.Progress in project hadoop by apache.

the class TestMerger method testMergeShouldReturnProperProgress.

@SuppressWarnings({ "unchecked" })
public void testMergeShouldReturnProperProgress(List<Segment<Text, Text>> segments) throws IOException {
    Path tmpDir = new Path("localpath");
    Class<Text> keyClass = (Class<Text>) jobConf.getMapOutputKeyClass();
    Class<Text> valueClass = (Class<Text>) jobConf.getMapOutputValueClass();
    RawComparator<Text> comparator = jobConf.getOutputKeyComparator();
    Counter readsCounter = new Counter();
    Counter writesCounter = new Counter();
    Progress mergePhase = new Progress();
    RawKeyValueIterator mergeQueue = Merger.merge(conf, fs, keyClass, valueClass, segments, 2, tmpDir, comparator, getReporter(), readsCounter, writesCounter, mergePhase);
    final float epsilon = 0.00001f;
    // Reading 6 keys total, 3 each in 2 segments, so each key read moves the
    // progress forward 1/6th of the way. Initially the first keys from each
    // segment have been read as part of the merge setup, so progress = 2/6.
    Assert.assertEquals(2 / 6.0f, mergeQueue.getProgress().get(), epsilon);
    // The first next() returns one of the keys already read during merge setup
    Assert.assertTrue(mergeQueue.next());
    Assert.assertEquals(2 / 6.0f, mergeQueue.getProgress().get(), epsilon);
    // Subsequent next() calls should read one key and move progress
    Assert.assertTrue(mergeQueue.next());
    Assert.assertEquals(3 / 6.0f, mergeQueue.getProgress().get(), epsilon);
    Assert.assertTrue(mergeQueue.next());
    Assert.assertEquals(4 / 6.0f, mergeQueue.getProgress().get(), epsilon);
    // At this point we've exhausted all of the keys in one segment
    // so getting the next key will return the already cached key from the
    // other segment
    Assert.assertTrue(mergeQueue.next());
    Assert.assertEquals(4 / 6.0f, mergeQueue.getProgress().get(), epsilon);
    // Subsequent next() calls should read one key and move progress
    Assert.assertTrue(mergeQueue.next());
    Assert.assertEquals(5 / 6.0f, mergeQueue.getProgress().get(), epsilon);
    Assert.assertTrue(mergeQueue.next());
    Assert.assertEquals(1.0f, mergeQueue.getProgress().get(), epsilon);
    // Now there should be no more input
    Assert.assertFalse(mergeQueue.next());
    Assert.assertEquals(1.0f, mergeQueue.getProgress().get(), epsilon);
    Assert.assertTrue(mergeQueue.getKey() == null);
    Assert.assertEquals(0, mergeQueue.getValue().getData().length);
}
Also used : CompressAwarePath(org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl.CompressAwarePath) Path(org.apache.hadoop.fs.Path) Progress(org.apache.hadoop.util.Progress) Counter(org.apache.hadoop.mapred.Counters.Counter) Text(org.apache.hadoop.io.Text) RawKeyValueIterator(org.apache.hadoop.mapred.RawKeyValueIterator)

Example 7 with Progress

use of org.apache.hadoop.util.Progress in project hadoop by apache.

the class TestShuffleScheduler method TestAggregatedTransferRate.

@SuppressWarnings("rawtypes")
@Test
public <K, V> void TestAggregatedTransferRate() throws Exception {
    JobConf job = new JobConf();
    job.setNumMapTasks(10);
    //mock creation
    TaskUmbilicalProtocol mockUmbilical = mock(TaskUmbilicalProtocol.class);
    Reporter mockReporter = mock(Reporter.class);
    FileSystem mockFileSystem = mock(FileSystem.class);
    Class<? extends org.apache.hadoop.mapred.Reducer> combinerClass = job.getCombinerClass();
    // needed for mock with generic
    @SuppressWarnings("unchecked") CombineOutputCollector<K, V> mockCombineOutputCollector = (CombineOutputCollector<K, V>) mock(CombineOutputCollector.class);
    org.apache.hadoop.mapreduce.TaskAttemptID mockTaskAttemptID = mock(org.apache.hadoop.mapreduce.TaskAttemptID.class);
    LocalDirAllocator mockLocalDirAllocator = mock(LocalDirAllocator.class);
    CompressionCodec mockCompressionCodec = mock(CompressionCodec.class);
    Counter mockCounter = mock(Counter.class);
    TaskStatus mockTaskStatus = mock(TaskStatus.class);
    Progress mockProgress = mock(Progress.class);
    MapOutputFile mockMapOutputFile = mock(MapOutputFile.class);
    Task mockTask = mock(Task.class);
    @SuppressWarnings("unchecked") MapOutput<K, V> output = mock(MapOutput.class);
    ShuffleConsumerPlugin.Context<K, V> context = new ShuffleConsumerPlugin.Context<K, V>(mockTaskAttemptID, job, mockFileSystem, mockUmbilical, mockLocalDirAllocator, mockReporter, mockCompressionCodec, combinerClass, mockCombineOutputCollector, mockCounter, mockCounter, mockCounter, mockCounter, mockCounter, mockCounter, mockTaskStatus, mockProgress, mockProgress, mockTask, mockMapOutputFile, null);
    TaskStatus status = new TaskStatus() {

        @Override
        public boolean getIsMap() {
            return false;
        }

        @Override
        public void addFetchFailedMap(TaskAttemptID mapTaskId) {
        }
    };
    Progress progress = new Progress();
    ShuffleSchedulerImpl<K, V> scheduler = new ShuffleSchedulerImpl<K, V>(job, status, null, null, progress, context.getShuffledMapsCounter(), context.getReduceShuffleBytes(), context.getFailedShuffleCounter());
    TaskAttemptID attemptID0 = new TaskAttemptID(new org.apache.hadoop.mapred.TaskID(new JobID("test", 0), TaskType.MAP, 0), 0);
    //adding the 1st interval, 40MB from 60s to 100s
    long bytes = (long) 40 * 1024 * 1024;
    scheduler.copySucceeded(attemptID0, new MapHost(null, null), bytes, 60000, 100000, output);
    Assert.assertEquals(copyMessage(1, 1, 1), progress.toString());
    TaskAttemptID attemptID1 = new TaskAttemptID(new org.apache.hadoop.mapred.TaskID(new JobID("test", 0), TaskType.MAP, 1), 1);
    //adding the 2nd interval before the 1st interval, 50MB from 0s to 50s
    bytes = (long) 50 * 1024 * 1024;
    scheduler.copySucceeded(attemptID1, new MapHost(null, null), bytes, 0, 50000, output);
    Assert.assertEquals(copyMessage(2, 1, 1), progress.toString());
    TaskAttemptID attemptID2 = new TaskAttemptID(new org.apache.hadoop.mapred.TaskID(new JobID("test", 0), TaskType.MAP, 2), 2);
    //adding the 3rd interval overlapping with the 1st and the 2nd interval
    //110MB from 25s to 80s
    bytes = (long) 110 * 1024 * 1024;
    scheduler.copySucceeded(attemptID2, new MapHost(null, null), bytes, 25000, 80000, output);
    Assert.assertEquals(copyMessage(3, 2, 2), progress.toString());
    TaskAttemptID attemptID3 = new TaskAttemptID(new org.apache.hadoop.mapred.TaskID(new JobID("test", 0), TaskType.MAP, 3), 3);
    //adding the 4th interval just after the 2nd interval, 100MB from 100s to 300s
    bytes = (long) 100 * 1024 * 1024;
    scheduler.copySucceeded(attemptID3, new MapHost(null, null), bytes, 100000, 300000, output);
    Assert.assertEquals(copyMessage(4, 0.5, 1), progress.toString());
    TaskAttemptID attemptID4 = new TaskAttemptID(new org.apache.hadoop.mapred.TaskID(new JobID("test", 0), TaskType.MAP, 4), 4);
    //adding the 5th interval between after 4th, 50MB from 350s to 400s
    bytes = (long) 50 * 1024 * 1024;
    scheduler.copySucceeded(attemptID4, new MapHost(null, null), bytes, 350000, 400000, output);
    Assert.assertEquals(copyMessage(5, 1, 1), progress.toString());
    TaskAttemptID attemptID5 = new TaskAttemptID(new org.apache.hadoop.mapred.TaskID(new JobID("test", 0), TaskType.MAP, 5), 5);
    //adding the 6th interval between after 5th, 50MB from 450s to 500s
    bytes = (long) 50 * 1024 * 1024;
    scheduler.copySucceeded(attemptID5, new MapHost(null, null), bytes, 450000, 500000, output);
    Assert.assertEquals(copyMessage(6, 1, 1), progress.toString());
    TaskAttemptID attemptID6 = new TaskAttemptID(new org.apache.hadoop.mapred.TaskID(new JobID("test", 0), TaskType.MAP, 6), 6);
    //adding the 7th interval between after 5th and 6th interval, 20MB from 320s to 340s
    bytes = (long) 20 * 1024 * 1024;
    scheduler.copySucceeded(attemptID6, new MapHost(null, null), bytes, 320000, 340000, output);
    Assert.assertEquals(copyMessage(7, 1, 1), progress.toString());
    TaskAttemptID attemptID7 = new TaskAttemptID(new org.apache.hadoop.mapred.TaskID(new JobID("test", 0), TaskType.MAP, 7), 7);
    //adding the 8th interval overlapping with 4th, 5th, and 7th 30MB from 290s to 350s
    bytes = (long) 30 * 1024 * 1024;
    scheduler.copySucceeded(attemptID7, new MapHost(null, null), bytes, 290000, 350000, output);
    Assert.assertEquals(copyMessage(8, 0.5, 1), progress.toString());
    TaskAttemptID attemptID8 = new TaskAttemptID(new org.apache.hadoop.mapred.TaskID(new JobID("test", 0), TaskType.MAP, 8), 8);
    //adding the 9th interval overlapping with 5th and 6th, 50MB from 400s to 450s
    bytes = (long) 50 * 1024 * 1024;
    scheduler.copySucceeded(attemptID8, new MapHost(null, null), bytes, 400000, 450000, output);
    Assert.assertEquals(copyMessage(9, 1, 1), progress.toString());
    TaskAttemptID attemptID9 = new TaskAttemptID(new org.apache.hadoop.mapred.TaskID(new JobID("test", 0), TaskType.MAP, 9), 9);
    //adding the 10th interval overlapping with all intervals, 500MB from 0s to 500s
    bytes = (long) 500 * 1024 * 1024;
    scheduler.copySucceeded(attemptID9, new MapHost(null, null), bytes, 0, 500000, output);
    Assert.assertEquals(copyMessage(10, 1, 2), progress.toString());
}
Also used : Task(org.apache.hadoop.mapred.Task) TaskAttemptID(org.apache.hadoop.mapred.TaskAttemptID) ShuffleConsumerPlugin(org.apache.hadoop.mapred.ShuffleConsumerPlugin) Counter(org.apache.hadoop.mapred.Counters.Counter) FileSystem(org.apache.hadoop.fs.FileSystem) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) JobConf(org.apache.hadoop.mapred.JobConf) MapOutputFile(org.apache.hadoop.mapred.MapOutputFile) Progress(org.apache.hadoop.util.Progress) Reporter(org.apache.hadoop.mapred.Reporter) TaskStatus(org.apache.hadoop.mapred.TaskStatus) CombineOutputCollector(org.apache.hadoop.mapred.Task.CombineOutputCollector) TaskUmbilicalProtocol(org.apache.hadoop.mapred.TaskUmbilicalProtocol) LocalDirAllocator(org.apache.hadoop.fs.LocalDirAllocator) JobID(org.apache.hadoop.mapreduce.JobID) Test(org.junit.Test)

Example 8 with Progress

use of org.apache.hadoop.util.Progress in project hadoop by apache.

the class TestShuffleScheduler method testTipFailed.

@SuppressWarnings("rawtypes")
@Test
public void testTipFailed() throws Exception {
    JobConf job = new JobConf();
    job.setNumMapTasks(2);
    TaskStatus status = new TaskStatus() {

        @Override
        public boolean getIsMap() {
            return false;
        }

        @Override
        public void addFetchFailedMap(TaskAttemptID mapTaskId) {
        }
    };
    Progress progress = new Progress();
    TaskAttemptID reduceId = new TaskAttemptID("314159", 0, TaskType.REDUCE, 0, 0);
    ShuffleSchedulerImpl scheduler = new ShuffleSchedulerImpl(job, status, reduceId, null, progress, null, null, null);
    JobID jobId = new JobID();
    TaskID taskId1 = new TaskID(jobId, TaskType.REDUCE, 1);
    scheduler.tipFailed(taskId1);
    Assert.assertEquals("Progress should be 0.5", 0.5f, progress.getProgress(), 0.0f);
    Assert.assertFalse(scheduler.waitUntilDone(1));
    TaskID taskId0 = new TaskID(jobId, TaskType.REDUCE, 0);
    scheduler.tipFailed(taskId0);
    Assert.assertEquals("Progress should be 1.0", 1.0f, progress.getProgress(), 0.0f);
    Assert.assertTrue(scheduler.waitUntilDone(1));
}
Also used : Progress(org.apache.hadoop.util.Progress) TaskID(org.apache.hadoop.mapreduce.TaskID) TaskAttemptID(org.apache.hadoop.mapred.TaskAttemptID) TaskStatus(org.apache.hadoop.mapred.TaskStatus) JobConf(org.apache.hadoop.mapred.JobConf) JobID(org.apache.hadoop.mapreduce.JobID) Test(org.junit.Test)

Aggregations

Progress (org.apache.hadoop.util.Progress)8 Test (org.junit.Test)5 LocalDirAllocator (org.apache.hadoop.fs.LocalDirAllocator)4 Counter (org.apache.hadoop.mapred.Counters.Counter)4 JobConf (org.apache.hadoop.mapred.JobConf)4 TaskStatus (org.apache.hadoop.mapred.TaskStatus)4 JobID (org.apache.hadoop.mapreduce.JobID)4 FileSystem (org.apache.hadoop.fs.FileSystem)3 CompressionCodec (org.apache.hadoop.io.compress.CompressionCodec)3 MapOutputFile (org.apache.hadoop.mapred.MapOutputFile)3 Reporter (org.apache.hadoop.mapred.Reporter)3 ShuffleConsumerPlugin (org.apache.hadoop.mapred.ShuffleConsumerPlugin)3 Task (org.apache.hadoop.mapred.Task)3 CombineOutputCollector (org.apache.hadoop.mapred.Task.CombineOutputCollector)3 TaskAttemptID (org.apache.hadoop.mapred.TaskAttemptID)3 TaskUmbilicalProtocol (org.apache.hadoop.mapred.TaskUmbilicalProtocol)3 TaskID (org.apache.hadoop.mapreduce.TaskID)3 IOException (java.io.IOException)2 ArrayList (java.util.ArrayList)2 Path (org.apache.hadoop.fs.Path)2