Search in sources :

Example 11 with Counter

use of org.apache.hadoop.mapred.Counters.Counter in project hadoop by apache.

the class TestShufflePlugin method testConsumerApi.

@Test
public /**
   * A testing method verifying availability and accessibility of API that is needed
   * for sub-classes of ShuffleConsumerPlugin
   */
void testConsumerApi() {
    JobConf jobConf = new JobConf();
    ShuffleConsumerPlugin<K, V> shuffleConsumerPlugin = new TestShuffleConsumerPlugin<K, V>();
    //mock creation
    ReduceTask mockReduceTask = mock(ReduceTask.class);
    TaskUmbilicalProtocol mockUmbilical = mock(TaskUmbilicalProtocol.class);
    Reporter mockReporter = mock(Reporter.class);
    FileSystem mockFileSystem = mock(FileSystem.class);
    Class<? extends org.apache.hadoop.mapred.Reducer> combinerClass = jobConf.getCombinerClass();
    // needed for mock with generic
    @SuppressWarnings("unchecked") CombineOutputCollector<K, V> mockCombineOutputCollector = (CombineOutputCollector<K, V>) mock(CombineOutputCollector.class);
    org.apache.hadoop.mapreduce.TaskAttemptID mockTaskAttemptID = mock(org.apache.hadoop.mapreduce.TaskAttemptID.class);
    LocalDirAllocator mockLocalDirAllocator = mock(LocalDirAllocator.class);
    CompressionCodec mockCompressionCodec = mock(CompressionCodec.class);
    Counter mockCounter = mock(Counter.class);
    TaskStatus mockTaskStatus = mock(TaskStatus.class);
    Progress mockProgress = mock(Progress.class);
    MapOutputFile mockMapOutputFile = mock(MapOutputFile.class);
    Task mockTask = mock(Task.class);
    try {
        String[] dirs = jobConf.getLocalDirs();
        // verify that these APIs are available through super class handler
        ShuffleConsumerPlugin.Context<K, V> context = new ShuffleConsumerPlugin.Context<K, V>(mockTaskAttemptID, jobConf, mockFileSystem, mockUmbilical, mockLocalDirAllocator, mockReporter, mockCompressionCodec, combinerClass, mockCombineOutputCollector, mockCounter, mockCounter, mockCounter, mockCounter, mockCounter, mockCounter, mockTaskStatus, mockProgress, mockProgress, mockTask, mockMapOutputFile, null);
        shuffleConsumerPlugin.init(context);
        shuffleConsumerPlugin.run();
        shuffleConsumerPlugin.close();
    } catch (Exception e) {
        assertTrue("Threw exception:" + e, false);
    }
    // verify that these APIs are available for 3rd party plugins
    mockReduceTask.getTaskID();
    mockReduceTask.getJobID();
    mockReduceTask.getNumMaps();
    mockReduceTask.getPartition();
    mockReporter.progress();
}
Also used : ReduceTask(org.apache.hadoop.mapred.ReduceTask) Task(org.apache.hadoop.mapred.Task) ShuffleConsumerPlugin(org.apache.hadoop.mapred.ShuffleConsumerPlugin) Counter(org.apache.hadoop.mapred.Counters.Counter) FileSystem(org.apache.hadoop.fs.FileSystem) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) JobConf(org.apache.hadoop.mapred.JobConf) MapOutputFile(org.apache.hadoop.mapred.MapOutputFile) Progress(org.apache.hadoop.util.Progress) Reporter(org.apache.hadoop.mapred.Reporter) TaskStatus(org.apache.hadoop.mapred.TaskStatus) CombineOutputCollector(org.apache.hadoop.mapred.Task.CombineOutputCollector) TaskUmbilicalProtocol(org.apache.hadoop.mapred.TaskUmbilicalProtocol) ReduceTask(org.apache.hadoop.mapred.ReduceTask) LocalDirAllocator(org.apache.hadoop.fs.LocalDirAllocator) Test(org.junit.Test)

Example 12 with Counter

use of org.apache.hadoop.mapred.Counters.Counter in project hadoop by apache.

the class TestMerger method testMergeShouldReturnProperProgress.

@SuppressWarnings({ "unchecked" })
public void testMergeShouldReturnProperProgress(List<Segment<Text, Text>> segments) throws IOException {
    Path tmpDir = new Path("localpath");
    Class<Text> keyClass = (Class<Text>) jobConf.getMapOutputKeyClass();
    Class<Text> valueClass = (Class<Text>) jobConf.getMapOutputValueClass();
    RawComparator<Text> comparator = jobConf.getOutputKeyComparator();
    Counter readsCounter = new Counter();
    Counter writesCounter = new Counter();
    Progress mergePhase = new Progress();
    RawKeyValueIterator mergeQueue = Merger.merge(conf, fs, keyClass, valueClass, segments, 2, tmpDir, comparator, getReporter(), readsCounter, writesCounter, mergePhase);
    final float epsilon = 0.00001f;
    // Reading 6 keys total, 3 each in 2 segments, so each key read moves the
    // progress forward 1/6th of the way. Initially the first keys from each
    // segment have been read as part of the merge setup, so progress = 2/6.
    Assert.assertEquals(2 / 6.0f, mergeQueue.getProgress().get(), epsilon);
    // The first next() returns one of the keys already read during merge setup
    Assert.assertTrue(mergeQueue.next());
    Assert.assertEquals(2 / 6.0f, mergeQueue.getProgress().get(), epsilon);
    // Subsequent next() calls should read one key and move progress
    Assert.assertTrue(mergeQueue.next());
    Assert.assertEquals(3 / 6.0f, mergeQueue.getProgress().get(), epsilon);
    Assert.assertTrue(mergeQueue.next());
    Assert.assertEquals(4 / 6.0f, mergeQueue.getProgress().get(), epsilon);
    // At this point we've exhausted all of the keys in one segment
    // so getting the next key will return the already cached key from the
    // other segment
    Assert.assertTrue(mergeQueue.next());
    Assert.assertEquals(4 / 6.0f, mergeQueue.getProgress().get(), epsilon);
    // Subsequent next() calls should read one key and move progress
    Assert.assertTrue(mergeQueue.next());
    Assert.assertEquals(5 / 6.0f, mergeQueue.getProgress().get(), epsilon);
    Assert.assertTrue(mergeQueue.next());
    Assert.assertEquals(1.0f, mergeQueue.getProgress().get(), epsilon);
    // Now there should be no more input
    Assert.assertFalse(mergeQueue.next());
    Assert.assertEquals(1.0f, mergeQueue.getProgress().get(), epsilon);
    Assert.assertTrue(mergeQueue.getKey() == null);
    Assert.assertEquals(0, mergeQueue.getValue().getData().length);
}
Also used : CompressAwarePath(org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl.CompressAwarePath) Path(org.apache.hadoop.fs.Path) Progress(org.apache.hadoop.util.Progress) Counter(org.apache.hadoop.mapred.Counters.Counter) Text(org.apache.hadoop.io.Text) RawKeyValueIterator(org.apache.hadoop.mapred.RawKeyValueIterator)

Example 13 with Counter

use of org.apache.hadoop.mapred.Counters.Counter in project hadoop by apache.

the class TestShuffleScheduler method TestAggregatedTransferRate.

@SuppressWarnings("rawtypes")
@Test
public <K, V> void TestAggregatedTransferRate() throws Exception {
    JobConf job = new JobConf();
    job.setNumMapTasks(10);
    //mock creation
    TaskUmbilicalProtocol mockUmbilical = mock(TaskUmbilicalProtocol.class);
    Reporter mockReporter = mock(Reporter.class);
    FileSystem mockFileSystem = mock(FileSystem.class);
    Class<? extends org.apache.hadoop.mapred.Reducer> combinerClass = job.getCombinerClass();
    // needed for mock with generic
    @SuppressWarnings("unchecked") CombineOutputCollector<K, V> mockCombineOutputCollector = (CombineOutputCollector<K, V>) mock(CombineOutputCollector.class);
    org.apache.hadoop.mapreduce.TaskAttemptID mockTaskAttemptID = mock(org.apache.hadoop.mapreduce.TaskAttemptID.class);
    LocalDirAllocator mockLocalDirAllocator = mock(LocalDirAllocator.class);
    CompressionCodec mockCompressionCodec = mock(CompressionCodec.class);
    Counter mockCounter = mock(Counter.class);
    TaskStatus mockTaskStatus = mock(TaskStatus.class);
    Progress mockProgress = mock(Progress.class);
    MapOutputFile mockMapOutputFile = mock(MapOutputFile.class);
    Task mockTask = mock(Task.class);
    @SuppressWarnings("unchecked") MapOutput<K, V> output = mock(MapOutput.class);
    ShuffleConsumerPlugin.Context<K, V> context = new ShuffleConsumerPlugin.Context<K, V>(mockTaskAttemptID, job, mockFileSystem, mockUmbilical, mockLocalDirAllocator, mockReporter, mockCompressionCodec, combinerClass, mockCombineOutputCollector, mockCounter, mockCounter, mockCounter, mockCounter, mockCounter, mockCounter, mockTaskStatus, mockProgress, mockProgress, mockTask, mockMapOutputFile, null);
    TaskStatus status = new TaskStatus() {

        @Override
        public boolean getIsMap() {
            return false;
        }

        @Override
        public void addFetchFailedMap(TaskAttemptID mapTaskId) {
        }
    };
    Progress progress = new Progress();
    ShuffleSchedulerImpl<K, V> scheduler = new ShuffleSchedulerImpl<K, V>(job, status, null, null, progress, context.getShuffledMapsCounter(), context.getReduceShuffleBytes(), context.getFailedShuffleCounter());
    TaskAttemptID attemptID0 = new TaskAttemptID(new org.apache.hadoop.mapred.TaskID(new JobID("test", 0), TaskType.MAP, 0), 0);
    //adding the 1st interval, 40MB from 60s to 100s
    long bytes = (long) 40 * 1024 * 1024;
    scheduler.copySucceeded(attemptID0, new MapHost(null, null), bytes, 60000, 100000, output);
    Assert.assertEquals(copyMessage(1, 1, 1), progress.toString());
    TaskAttemptID attemptID1 = new TaskAttemptID(new org.apache.hadoop.mapred.TaskID(new JobID("test", 0), TaskType.MAP, 1), 1);
    //adding the 2nd interval before the 1st interval, 50MB from 0s to 50s
    bytes = (long) 50 * 1024 * 1024;
    scheduler.copySucceeded(attemptID1, new MapHost(null, null), bytes, 0, 50000, output);
    Assert.assertEquals(copyMessage(2, 1, 1), progress.toString());
    TaskAttemptID attemptID2 = new TaskAttemptID(new org.apache.hadoop.mapred.TaskID(new JobID("test", 0), TaskType.MAP, 2), 2);
    //adding the 3rd interval overlapping with the 1st and the 2nd interval
    //110MB from 25s to 80s
    bytes = (long) 110 * 1024 * 1024;
    scheduler.copySucceeded(attemptID2, new MapHost(null, null), bytes, 25000, 80000, output);
    Assert.assertEquals(copyMessage(3, 2, 2), progress.toString());
    TaskAttemptID attemptID3 = new TaskAttemptID(new org.apache.hadoop.mapred.TaskID(new JobID("test", 0), TaskType.MAP, 3), 3);
    //adding the 4th interval just after the 2nd interval, 100MB from 100s to 300s
    bytes = (long) 100 * 1024 * 1024;
    scheduler.copySucceeded(attemptID3, new MapHost(null, null), bytes, 100000, 300000, output);
    Assert.assertEquals(copyMessage(4, 0.5, 1), progress.toString());
    TaskAttemptID attemptID4 = new TaskAttemptID(new org.apache.hadoop.mapred.TaskID(new JobID("test", 0), TaskType.MAP, 4), 4);
    //adding the 5th interval between after 4th, 50MB from 350s to 400s
    bytes = (long) 50 * 1024 * 1024;
    scheduler.copySucceeded(attemptID4, new MapHost(null, null), bytes, 350000, 400000, output);
    Assert.assertEquals(copyMessage(5, 1, 1), progress.toString());
    TaskAttemptID attemptID5 = new TaskAttemptID(new org.apache.hadoop.mapred.TaskID(new JobID("test", 0), TaskType.MAP, 5), 5);
    //adding the 6th interval between after 5th, 50MB from 450s to 500s
    bytes = (long) 50 * 1024 * 1024;
    scheduler.copySucceeded(attemptID5, new MapHost(null, null), bytes, 450000, 500000, output);
    Assert.assertEquals(copyMessage(6, 1, 1), progress.toString());
    TaskAttemptID attemptID6 = new TaskAttemptID(new org.apache.hadoop.mapred.TaskID(new JobID("test", 0), TaskType.MAP, 6), 6);
    //adding the 7th interval between after 5th and 6th interval, 20MB from 320s to 340s
    bytes = (long) 20 * 1024 * 1024;
    scheduler.copySucceeded(attemptID6, new MapHost(null, null), bytes, 320000, 340000, output);
    Assert.assertEquals(copyMessage(7, 1, 1), progress.toString());
    TaskAttemptID attemptID7 = new TaskAttemptID(new org.apache.hadoop.mapred.TaskID(new JobID("test", 0), TaskType.MAP, 7), 7);
    //adding the 8th interval overlapping with 4th, 5th, and 7th 30MB from 290s to 350s
    bytes = (long) 30 * 1024 * 1024;
    scheduler.copySucceeded(attemptID7, new MapHost(null, null), bytes, 290000, 350000, output);
    Assert.assertEquals(copyMessage(8, 0.5, 1), progress.toString());
    TaskAttemptID attemptID8 = new TaskAttemptID(new org.apache.hadoop.mapred.TaskID(new JobID("test", 0), TaskType.MAP, 8), 8);
    //adding the 9th interval overlapping with 5th and 6th, 50MB from 400s to 450s
    bytes = (long) 50 * 1024 * 1024;
    scheduler.copySucceeded(attemptID8, new MapHost(null, null), bytes, 400000, 450000, output);
    Assert.assertEquals(copyMessage(9, 1, 1), progress.toString());
    TaskAttemptID attemptID9 = new TaskAttemptID(new org.apache.hadoop.mapred.TaskID(new JobID("test", 0), TaskType.MAP, 9), 9);
    //adding the 10th interval overlapping with all intervals, 500MB from 0s to 500s
    bytes = (long) 500 * 1024 * 1024;
    scheduler.copySucceeded(attemptID9, new MapHost(null, null), bytes, 0, 500000, output);
    Assert.assertEquals(copyMessage(10, 1, 2), progress.toString());
}
Also used : Task(org.apache.hadoop.mapred.Task) TaskAttemptID(org.apache.hadoop.mapred.TaskAttemptID) ShuffleConsumerPlugin(org.apache.hadoop.mapred.ShuffleConsumerPlugin) Counter(org.apache.hadoop.mapred.Counters.Counter) FileSystem(org.apache.hadoop.fs.FileSystem) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) JobConf(org.apache.hadoop.mapred.JobConf) MapOutputFile(org.apache.hadoop.mapred.MapOutputFile) Progress(org.apache.hadoop.util.Progress) Reporter(org.apache.hadoop.mapred.Reporter) TaskStatus(org.apache.hadoop.mapred.TaskStatus) CombineOutputCollector(org.apache.hadoop.mapred.Task.CombineOutputCollector) TaskUmbilicalProtocol(org.apache.hadoop.mapred.TaskUmbilicalProtocol) LocalDirAllocator(org.apache.hadoop.fs.LocalDirAllocator) JobID(org.apache.hadoop.mapreduce.JobID) Test(org.junit.Test)

Example 14 with Counter

use of org.apache.hadoop.mapred.Counters.Counter in project hadoop by apache.

the class TestPipeApplication method testApplication.

/**
   * test org.apache.hadoop.mapred.pipes.Application
   * test a internal functions: MessageType.REGISTER_COUNTER,  INCREMENT_COUNTER, STATUS, PROGRESS...
   *
   * @throws Throwable
   */
@Test
public void testApplication() throws Throwable {
    JobConf conf = new JobConf();
    RecordReader<FloatWritable, NullWritable> rReader = new Reader();
    // client for test
    File fCommand = getFileCommand("org.apache.hadoop.mapred.pipes.PipeApplicationStub");
    TestTaskReporter reporter = new TestTaskReporter();
    File[] psw = cleanTokenPasswordFile();
    try {
        conf.set(MRJobConfig.TASK_ATTEMPT_ID, taskName);
        conf.set(MRJobConfig.CACHE_LOCALFILES, fCommand.getAbsolutePath());
        // token for authorization
        Token<AMRMTokenIdentifier> token = new Token<AMRMTokenIdentifier>("user".getBytes(), "password".getBytes(), new Text("kind"), new Text("service"));
        TokenCache.setJobToken(token, conf.getCredentials());
        FakeCollector output = new FakeCollector(new Counters.Counter(), new Progress());
        FileSystem fs = new RawLocalFileSystem();
        fs.initialize(FsConstants.LOCAL_FS_URI, conf);
        Writer<IntWritable, Text> wr = new Writer<IntWritable, Text>(conf, fs.create(new Path(workSpace.getAbsolutePath() + File.separator + "outfile")), IntWritable.class, Text.class, null, null, true);
        output.setWriter(wr);
        conf.set(Submitter.PRESERVE_COMMANDFILE, "true");
        initStdOut(conf);
        Application<WritableComparable<IntWritable>, Writable, IntWritable, Text> application = new Application<WritableComparable<IntWritable>, Writable, IntWritable, Text>(conf, rReader, output, reporter, IntWritable.class, Text.class);
        application.getDownlink().flush();
        application.getDownlink().mapItem(new IntWritable(3), new Text("txt"));
        application.getDownlink().flush();
        application.waitForFinish();
        wr.close();
        // test getDownlink().mapItem();
        String stdOut = readStdOut(conf);
        assertTrue(stdOut.contains("key:3"));
        assertTrue(stdOut.contains("value:txt"));
        // reporter test counter, and status should be sended
        // test MessageType.REGISTER_COUNTER and INCREMENT_COUNTER
        assertEquals(1.0, reporter.getProgress(), 0.01);
        assertNotNull(reporter.getCounter("group", "name"));
        // test status MessageType.STATUS
        assertEquals(reporter.getStatus(), "PROGRESS");
        stdOut = readFile(new File(workSpace.getAbsolutePath() + File.separator + "outfile"));
        // check MessageType.PROGRESS
        assertEquals(0.55f, rReader.getProgress(), 0.001);
        application.getDownlink().close();
        // test MessageType.OUTPUT
        Entry<IntWritable, Text> entry = output.getCollect().entrySet().iterator().next();
        assertEquals(123, entry.getKey().get());
        assertEquals("value", entry.getValue().toString());
        try {
            // try to abort
            application.abort(new Throwable());
            fail();
        } catch (IOException e) {
            // abort works ?
            assertEquals("pipe child exception", e.getMessage());
        }
    } finally {
        if (psw != null) {
            // remove password files
            for (File file : psw) {
                file.deleteOnExit();
            }
        }
    }
}
Also used : RawLocalFileSystem(org.apache.hadoop.fs.RawLocalFileSystem) RecordReader(org.apache.hadoop.mapred.RecordReader) NullWritable(org.apache.hadoop.io.NullWritable) Writable(org.apache.hadoop.io.Writable) IntWritable(org.apache.hadoop.io.IntWritable) BooleanWritable(org.apache.hadoop.io.BooleanWritable) FloatWritable(org.apache.hadoop.io.FloatWritable) Token(org.apache.hadoop.security.token.Token) AMRMTokenIdentifier(org.apache.hadoop.yarn.security.AMRMTokenIdentifier) FileSystem(org.apache.hadoop.fs.FileSystem) RawLocalFileSystem(org.apache.hadoop.fs.RawLocalFileSystem) JobConf(org.apache.hadoop.mapred.JobConf) IntWritable(org.apache.hadoop.io.IntWritable) Path(org.apache.hadoop.fs.Path) Text(org.apache.hadoop.io.Text) IOException(java.io.IOException) NullWritable(org.apache.hadoop.io.NullWritable) Counter(org.apache.hadoop.mapred.Counters.Counter) FloatWritable(org.apache.hadoop.io.FloatWritable) WritableComparable(org.apache.hadoop.io.WritableComparable) Counters(org.apache.hadoop.mapred.Counters) File(java.io.File) Writer(org.apache.hadoop.mapred.IFile.Writer) Test(org.junit.Test)

Example 15 with Counter

use of org.apache.hadoop.mapred.Counters.Counter in project hadoop by apache.

the class CombinerHandler method create.

public static <K, V> ICombineHandler create(TaskContext context) throws IOException, ClassNotFoundException {
    final JobConf conf = new JobConf(context.getConf());
    conf.set(Constants.SERIALIZATION_FRAMEWORK, String.valueOf(SerializationFramework.WRITABLE_SERIALIZATION.getType()));
    String combinerClazz = conf.get(Constants.MAPRED_COMBINER_CLASS);
    if (null == combinerClazz) {
        combinerClazz = conf.get(MRJobConfig.COMBINE_CLASS_ATTR);
    }
    if (null == combinerClazz) {
        return null;
    } else {
        LOG.info("NativeTask Combiner is enabled, class = " + combinerClazz);
    }
    final Counter combineInputCounter = context.getTaskReporter().getCounter(TaskCounter.COMBINE_INPUT_RECORDS);
    final CombinerRunner<K, V> combinerRunner = CombinerRunner.create(conf, context.getTaskAttemptId(), combineInputCounter, context.getTaskReporter(), null);
    final INativeHandler nativeHandler = NativeBatchProcessor.create(NAME, conf, DataChannel.INOUT);
    @SuppressWarnings("unchecked") final BufferPusher<K, V> pusher = new BufferPusher<K, V>((Class<K>) context.getInputKeyClass(), (Class<V>) context.getInputValueClass(), nativeHandler);
    final BufferPuller puller = new BufferPuller(nativeHandler);
    return new CombinerHandler<K, V>(nativeHandler, combinerRunner, puller, pusher);
}
Also used : TaskCounter(org.apache.hadoop.mapreduce.TaskCounter) Counter(org.apache.hadoop.mapred.Counters.Counter) INativeHandler(org.apache.hadoop.mapred.nativetask.INativeHandler) JobConf(org.apache.hadoop.mapred.JobConf)

Aggregations

Counter (org.apache.hadoop.mapred.Counters.Counter)17 Test (org.junit.Test)8 Counters (org.apache.hadoop.mapred.Counters)6 JobConf (org.apache.hadoop.mapred.JobConf)6 FileSystem (org.apache.hadoop.fs.FileSystem)5 IOException (java.io.IOException)4 Path (org.apache.hadoop.fs.Path)4 Group (org.apache.hadoop.mapred.Counters.Group)4 TaskCounter (org.apache.hadoop.mapreduce.TaskCounter)4 LocalDirAllocator (org.apache.hadoop.fs.LocalDirAllocator)3 CompressionCodec (org.apache.hadoop.io.compress.CompressionCodec)3 MapOutputFile (org.apache.hadoop.mapred.MapOutputFile)3 Reporter (org.apache.hadoop.mapred.Reporter)3 ShuffleConsumerPlugin (org.apache.hadoop.mapred.ShuffleConsumerPlugin)3 Task (org.apache.hadoop.mapred.Task)3 CombineOutputCollector (org.apache.hadoop.mapred.Task.CombineOutputCollector)3 TaskStatus (org.apache.hadoop.mapred.TaskStatus)3 TaskUmbilicalProtocol (org.apache.hadoop.mapred.TaskUmbilicalProtocol)3 Progress (org.apache.hadoop.util.Progress)3 ArrayList (java.util.ArrayList)2