Search in sources :

Example 21 with Reporter

use of org.apache.hadoop.mapred.Reporter in project hadoop by apache.

the class TestShuffleScheduler method TestAggregatedTransferRate.

@SuppressWarnings("rawtypes")
@Test
public <K, V> void TestAggregatedTransferRate() throws Exception {
    JobConf job = new JobConf();
    job.setNumMapTasks(10);
    //mock creation
    TaskUmbilicalProtocol mockUmbilical = mock(TaskUmbilicalProtocol.class);
    Reporter mockReporter = mock(Reporter.class);
    FileSystem mockFileSystem = mock(FileSystem.class);
    Class<? extends org.apache.hadoop.mapred.Reducer> combinerClass = job.getCombinerClass();
    // needed for mock with generic
    @SuppressWarnings("unchecked") CombineOutputCollector<K, V> mockCombineOutputCollector = (CombineOutputCollector<K, V>) mock(CombineOutputCollector.class);
    org.apache.hadoop.mapreduce.TaskAttemptID mockTaskAttemptID = mock(org.apache.hadoop.mapreduce.TaskAttemptID.class);
    LocalDirAllocator mockLocalDirAllocator = mock(LocalDirAllocator.class);
    CompressionCodec mockCompressionCodec = mock(CompressionCodec.class);
    Counter mockCounter = mock(Counter.class);
    TaskStatus mockTaskStatus = mock(TaskStatus.class);
    Progress mockProgress = mock(Progress.class);
    MapOutputFile mockMapOutputFile = mock(MapOutputFile.class);
    Task mockTask = mock(Task.class);
    @SuppressWarnings("unchecked") MapOutput<K, V> output = mock(MapOutput.class);
    ShuffleConsumerPlugin.Context<K, V> context = new ShuffleConsumerPlugin.Context<K, V>(mockTaskAttemptID, job, mockFileSystem, mockUmbilical, mockLocalDirAllocator, mockReporter, mockCompressionCodec, combinerClass, mockCombineOutputCollector, mockCounter, mockCounter, mockCounter, mockCounter, mockCounter, mockCounter, mockTaskStatus, mockProgress, mockProgress, mockTask, mockMapOutputFile, null);
    TaskStatus status = new TaskStatus() {

        @Override
        public boolean getIsMap() {
            return false;
        }

        @Override
        public void addFetchFailedMap(TaskAttemptID mapTaskId) {
        }
    };
    Progress progress = new Progress();
    ShuffleSchedulerImpl<K, V> scheduler = new ShuffleSchedulerImpl<K, V>(job, status, null, null, progress, context.getShuffledMapsCounter(), context.getReduceShuffleBytes(), context.getFailedShuffleCounter());
    TaskAttemptID attemptID0 = new TaskAttemptID(new org.apache.hadoop.mapred.TaskID(new JobID("test", 0), TaskType.MAP, 0), 0);
    //adding the 1st interval, 40MB from 60s to 100s
    long bytes = (long) 40 * 1024 * 1024;
    scheduler.copySucceeded(attemptID0, new MapHost(null, null), bytes, 60000, 100000, output);
    Assert.assertEquals(copyMessage(1, 1, 1), progress.toString());
    TaskAttemptID attemptID1 = new TaskAttemptID(new org.apache.hadoop.mapred.TaskID(new JobID("test", 0), TaskType.MAP, 1), 1);
    //adding the 2nd interval before the 1st interval, 50MB from 0s to 50s
    bytes = (long) 50 * 1024 * 1024;
    scheduler.copySucceeded(attemptID1, new MapHost(null, null), bytes, 0, 50000, output);
    Assert.assertEquals(copyMessage(2, 1, 1), progress.toString());
    TaskAttemptID attemptID2 = new TaskAttemptID(new org.apache.hadoop.mapred.TaskID(new JobID("test", 0), TaskType.MAP, 2), 2);
    //adding the 3rd interval overlapping with the 1st and the 2nd interval
    //110MB from 25s to 80s
    bytes = (long) 110 * 1024 * 1024;
    scheduler.copySucceeded(attemptID2, new MapHost(null, null), bytes, 25000, 80000, output);
    Assert.assertEquals(copyMessage(3, 2, 2), progress.toString());
    TaskAttemptID attemptID3 = new TaskAttemptID(new org.apache.hadoop.mapred.TaskID(new JobID("test", 0), TaskType.MAP, 3), 3);
    //adding the 4th interval just after the 2nd interval, 100MB from 100s to 300s
    bytes = (long) 100 * 1024 * 1024;
    scheduler.copySucceeded(attemptID3, new MapHost(null, null), bytes, 100000, 300000, output);
    Assert.assertEquals(copyMessage(4, 0.5, 1), progress.toString());
    TaskAttemptID attemptID4 = new TaskAttemptID(new org.apache.hadoop.mapred.TaskID(new JobID("test", 0), TaskType.MAP, 4), 4);
    //adding the 5th interval between after 4th, 50MB from 350s to 400s
    bytes = (long) 50 * 1024 * 1024;
    scheduler.copySucceeded(attemptID4, new MapHost(null, null), bytes, 350000, 400000, output);
    Assert.assertEquals(copyMessage(5, 1, 1), progress.toString());
    TaskAttemptID attemptID5 = new TaskAttemptID(new org.apache.hadoop.mapred.TaskID(new JobID("test", 0), TaskType.MAP, 5), 5);
    //adding the 6th interval between after 5th, 50MB from 450s to 500s
    bytes = (long) 50 * 1024 * 1024;
    scheduler.copySucceeded(attemptID5, new MapHost(null, null), bytes, 450000, 500000, output);
    Assert.assertEquals(copyMessage(6, 1, 1), progress.toString());
    TaskAttemptID attemptID6 = new TaskAttemptID(new org.apache.hadoop.mapred.TaskID(new JobID("test", 0), TaskType.MAP, 6), 6);
    //adding the 7th interval between after 5th and 6th interval, 20MB from 320s to 340s
    bytes = (long) 20 * 1024 * 1024;
    scheduler.copySucceeded(attemptID6, new MapHost(null, null), bytes, 320000, 340000, output);
    Assert.assertEquals(copyMessage(7, 1, 1), progress.toString());
    TaskAttemptID attemptID7 = new TaskAttemptID(new org.apache.hadoop.mapred.TaskID(new JobID("test", 0), TaskType.MAP, 7), 7);
    //adding the 8th interval overlapping with 4th, 5th, and 7th 30MB from 290s to 350s
    bytes = (long) 30 * 1024 * 1024;
    scheduler.copySucceeded(attemptID7, new MapHost(null, null), bytes, 290000, 350000, output);
    Assert.assertEquals(copyMessage(8, 0.5, 1), progress.toString());
    TaskAttemptID attemptID8 = new TaskAttemptID(new org.apache.hadoop.mapred.TaskID(new JobID("test", 0), TaskType.MAP, 8), 8);
    //adding the 9th interval overlapping with 5th and 6th, 50MB from 400s to 450s
    bytes = (long) 50 * 1024 * 1024;
    scheduler.copySucceeded(attemptID8, new MapHost(null, null), bytes, 400000, 450000, output);
    Assert.assertEquals(copyMessage(9, 1, 1), progress.toString());
    TaskAttemptID attemptID9 = new TaskAttemptID(new org.apache.hadoop.mapred.TaskID(new JobID("test", 0), TaskType.MAP, 9), 9);
    //adding the 10th interval overlapping with all intervals, 500MB from 0s to 500s
    bytes = (long) 500 * 1024 * 1024;
    scheduler.copySucceeded(attemptID9, new MapHost(null, null), bytes, 0, 500000, output);
    Assert.assertEquals(copyMessage(10, 1, 2), progress.toString());
}
Also used : Task(org.apache.hadoop.mapred.Task) TaskAttemptID(org.apache.hadoop.mapred.TaskAttemptID) ShuffleConsumerPlugin(org.apache.hadoop.mapred.ShuffleConsumerPlugin) Counter(org.apache.hadoop.mapred.Counters.Counter) FileSystem(org.apache.hadoop.fs.FileSystem) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) JobConf(org.apache.hadoop.mapred.JobConf) MapOutputFile(org.apache.hadoop.mapred.MapOutputFile) Progress(org.apache.hadoop.util.Progress) Reporter(org.apache.hadoop.mapred.Reporter) TaskStatus(org.apache.hadoop.mapred.TaskStatus) CombineOutputCollector(org.apache.hadoop.mapred.Task.CombineOutputCollector) TaskUmbilicalProtocol(org.apache.hadoop.mapred.TaskUmbilicalProtocol) LocalDirAllocator(org.apache.hadoop.fs.LocalDirAllocator) JobID(org.apache.hadoop.mapreduce.JobID) Test(org.junit.Test)

Example 22 with Reporter

use of org.apache.hadoop.mapred.Reporter in project hadoop by apache.

the class TestPipeApplication method testPipesReduser.

/**
   * test org.apache.hadoop.mapred.pipes.PipesReducer
   * test the transfer of data: key and value
   *
   * @throws Exception
   */
@Test
public void testPipesReduser() throws Exception {
    File[] psw = cleanTokenPasswordFile();
    JobConf conf = new JobConf();
    try {
        Token<AMRMTokenIdentifier> token = new Token<AMRMTokenIdentifier>("user".getBytes(), "password".getBytes(), new Text("kind"), new Text("service"));
        TokenCache.setJobToken(token, conf.getCredentials());
        File fCommand = getFileCommand("org.apache.hadoop.mapred.pipes.PipeReducerStub");
        conf.set(MRJobConfig.CACHE_LOCALFILES, fCommand.getAbsolutePath());
        PipesReducer<BooleanWritable, Text, IntWritable, Text> reducer = new PipesReducer<BooleanWritable, Text, IntWritable, Text>();
        reducer.configure(conf);
        BooleanWritable bw = new BooleanWritable(true);
        conf.set(MRJobConfig.TASK_ATTEMPT_ID, taskName);
        initStdOut(conf);
        conf.setBoolean(MRJobConfig.SKIP_RECORDS, true);
        CombineOutputCollector<IntWritable, Text> output = new CombineOutputCollector<IntWritable, Text>(new Counters.Counter(), new Progress());
        Reporter reporter = new TestTaskReporter();
        List<Text> texts = new ArrayList<Text>();
        texts.add(new Text("first"));
        texts.add(new Text("second"));
        texts.add(new Text("third"));
        reducer.reduce(bw, texts.iterator(), output, reporter);
        reducer.close();
        String stdOut = readStdOut(conf);
        // test data: key
        assertTrue(stdOut.contains("reducer key :true"));
        // and values
        assertTrue(stdOut.contains("reduce value  :first"));
        assertTrue(stdOut.contains("reduce value  :second"));
        assertTrue(stdOut.contains("reduce value  :third"));
    } finally {
        if (psw != null) {
            // remove password files
            for (File file : psw) {
                file.deleteOnExit();
            }
        }
    }
}
Also used : Reporter(org.apache.hadoop.mapred.Reporter) ArrayList(java.util.ArrayList) Token(org.apache.hadoop.security.token.Token) Text(org.apache.hadoop.io.Text) Counter(org.apache.hadoop.mapred.Counters.Counter) AMRMTokenIdentifier(org.apache.hadoop.yarn.security.AMRMTokenIdentifier) BooleanWritable(org.apache.hadoop.io.BooleanWritable) Counters(org.apache.hadoop.mapred.Counters) File(java.io.File) JobConf(org.apache.hadoop.mapred.JobConf) IntWritable(org.apache.hadoop.io.IntWritable) Test(org.junit.Test)

Example 23 with Reporter

use of org.apache.hadoop.mapred.Reporter in project ignite by apache.

the class HadoopV1MapTask method run.

/**
 * {@inheritDoc}
 */
@SuppressWarnings("unchecked")
@Override
public void run(HadoopTaskContext taskCtx) throws IgniteCheckedException {
    HadoopJobEx job = taskCtx.job();
    HadoopV2TaskContext taskCtx0 = (HadoopV2TaskContext) taskCtx;
    if (taskCtx.taskInfo().hasMapperIndex())
        HadoopMapperUtils.mapperIndex(taskCtx.taskInfo().mapperIndex());
    else
        HadoopMapperUtils.clearMapperIndex();
    try {
        JobConf jobConf = taskCtx0.jobConf();
        InputFormat inFormat = jobConf.getInputFormat();
        HadoopInputSplit split = info().inputSplit();
        InputSplit nativeSplit;
        if (split instanceof HadoopFileBlock) {
            HadoopFileBlock block = (HadoopFileBlock) split;
            nativeSplit = new FileSplit(new Path(block.file().toString()), block.start(), block.length(), EMPTY_HOSTS);
        } else
            nativeSplit = (InputSplit) taskCtx0.getNativeSplit(split);
        assert nativeSplit != null;
        Reporter reporter = new HadoopV1Reporter(taskCtx, nativeSplit);
        HadoopV1OutputCollector collector = null;
        try {
            collector = collector(jobConf, taskCtx0, !job.info().hasCombiner() && !job.info().hasReducer(), fileName(), taskCtx0.attemptId());
            RecordReader reader = inFormat.getRecordReader(nativeSplit, jobConf, reporter);
            Mapper mapper = ReflectionUtils.newInstance(jobConf.getMapperClass(), jobConf);
            Object key = reader.createKey();
            Object val = reader.createValue();
            assert mapper != null;
            try {
                try {
                    while (reader.next(key, val)) {
                        if (isCancelled())
                            throw new HadoopTaskCancelledException("Map task cancelled.");
                        mapper.map(key, val, collector, reporter);
                    }
                    taskCtx.onMapperFinished();
                } finally {
                    mapper.close();
                }
            } finally {
                collector.closeWriter();
            }
            collector.commit();
        } catch (Exception e) {
            if (collector != null)
                collector.abort();
            throw new IgniteCheckedException(e);
        }
    } finally {
        HadoopMapperUtils.clearMapperIndex();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Reporter(org.apache.hadoop.mapred.Reporter) RecordReader(org.apache.hadoop.mapred.RecordReader) HadoopInputSplit(org.apache.ignite.hadoop.HadoopInputSplit) HadoopFileBlock(org.apache.ignite.internal.processors.hadoop.HadoopFileBlock) FileSplit(org.apache.hadoop.mapred.FileSplit) IgniteCheckedException(org.apache.ignite.IgniteCheckedException) HadoopTaskCancelledException(org.apache.ignite.internal.processors.hadoop.HadoopTaskCancelledException) Mapper(org.apache.hadoop.mapred.Mapper) IgniteCheckedException(org.apache.ignite.IgniteCheckedException) HadoopJobEx(org.apache.ignite.internal.processors.hadoop.HadoopJobEx) InputFormat(org.apache.hadoop.mapred.InputFormat) HadoopTaskCancelledException(org.apache.ignite.internal.processors.hadoop.HadoopTaskCancelledException) JobConf(org.apache.hadoop.mapred.JobConf) HadoopInputSplit(org.apache.ignite.hadoop.HadoopInputSplit) InputSplit(org.apache.hadoop.mapred.InputSplit) HadoopV2TaskContext(org.apache.ignite.internal.processors.hadoop.impl.v2.HadoopV2TaskContext)

Aggregations

Reporter (org.apache.hadoop.mapred.Reporter)23 JobConf (org.apache.hadoop.mapred.JobConf)13 Test (org.junit.Test)12 FileSystem (org.apache.hadoop.fs.FileSystem)6 Result (org.apache.hadoop.hbase.client.Result)5 ImmutableBytesWritable (org.apache.hadoop.hbase.io.ImmutableBytesWritable)5 InputSplit (org.apache.hadoop.mapred.InputSplit)5 Path (org.apache.hadoop.fs.Path)4 CompressionCodec (org.apache.hadoop.io.compress.CompressionCodec)4 Counter (org.apache.hadoop.mapred.Counters.Counter)4 File (java.io.File)3 IOException (java.io.IOException)3 Configuration (org.apache.hadoop.conf.Configuration)3 LocalDirAllocator (org.apache.hadoop.fs.LocalDirAllocator)3 Cell (org.apache.hadoop.hbase.Cell)3 KeyValue (org.apache.hadoop.hbase.KeyValue)3 LongWritable (org.apache.hadoop.io.LongWritable)3 Text (org.apache.hadoop.io.Text)3 MapOutputFile (org.apache.hadoop.mapred.MapOutputFile)3 ShuffleConsumerPlugin (org.apache.hadoop.mapred.ShuffleConsumerPlugin)3