use of org.apache.hadoop.mapred.Counters.Counter in project hadoop by apache.
the class TestShufflePlugin method testConsumerApi.
@Test
public /**
* A testing method verifying availability and accessibility of API that is needed
* for sub-classes of ShuffleConsumerPlugin
*/
void testConsumerApi() {
JobConf jobConf = new JobConf();
ShuffleConsumerPlugin<K, V> shuffleConsumerPlugin = new TestShuffleConsumerPlugin<K, V>();
//mock creation
ReduceTask mockReduceTask = mock(ReduceTask.class);
TaskUmbilicalProtocol mockUmbilical = mock(TaskUmbilicalProtocol.class);
Reporter mockReporter = mock(Reporter.class);
FileSystem mockFileSystem = mock(FileSystem.class);
Class<? extends org.apache.hadoop.mapred.Reducer> combinerClass = jobConf.getCombinerClass();
// needed for mock with generic
@SuppressWarnings("unchecked") CombineOutputCollector<K, V> mockCombineOutputCollector = (CombineOutputCollector<K, V>) mock(CombineOutputCollector.class);
org.apache.hadoop.mapreduce.TaskAttemptID mockTaskAttemptID = mock(org.apache.hadoop.mapreduce.TaskAttemptID.class);
LocalDirAllocator mockLocalDirAllocator = mock(LocalDirAllocator.class);
CompressionCodec mockCompressionCodec = mock(CompressionCodec.class);
Counter mockCounter = mock(Counter.class);
TaskStatus mockTaskStatus = mock(TaskStatus.class);
Progress mockProgress = mock(Progress.class);
MapOutputFile mockMapOutputFile = mock(MapOutputFile.class);
Task mockTask = mock(Task.class);
try {
String[] dirs = jobConf.getLocalDirs();
// verify that these APIs are available through super class handler
ShuffleConsumerPlugin.Context<K, V> context = new ShuffleConsumerPlugin.Context<K, V>(mockTaskAttemptID, jobConf, mockFileSystem, mockUmbilical, mockLocalDirAllocator, mockReporter, mockCompressionCodec, combinerClass, mockCombineOutputCollector, mockCounter, mockCounter, mockCounter, mockCounter, mockCounter, mockCounter, mockTaskStatus, mockProgress, mockProgress, mockTask, mockMapOutputFile, null);
shuffleConsumerPlugin.init(context);
shuffleConsumerPlugin.run();
shuffleConsumerPlugin.close();
} catch (Exception e) {
assertTrue("Threw exception:" + e, false);
}
// verify that these APIs are available for 3rd party plugins
mockReduceTask.getTaskID();
mockReduceTask.getJobID();
mockReduceTask.getNumMaps();
mockReduceTask.getPartition();
mockReporter.progress();
}
use of org.apache.hadoop.mapred.Counters.Counter in project hadoop by apache.
the class TestMerger method testMergeShouldReturnProperProgress.
@SuppressWarnings({ "unchecked" })
public void testMergeShouldReturnProperProgress(List<Segment<Text, Text>> segments) throws IOException {
Path tmpDir = new Path("localpath");
Class<Text> keyClass = (Class<Text>) jobConf.getMapOutputKeyClass();
Class<Text> valueClass = (Class<Text>) jobConf.getMapOutputValueClass();
RawComparator<Text> comparator = jobConf.getOutputKeyComparator();
Counter readsCounter = new Counter();
Counter writesCounter = new Counter();
Progress mergePhase = new Progress();
RawKeyValueIterator mergeQueue = Merger.merge(conf, fs, keyClass, valueClass, segments, 2, tmpDir, comparator, getReporter(), readsCounter, writesCounter, mergePhase);
final float epsilon = 0.00001f;
// Reading 6 keys total, 3 each in 2 segments, so each key read moves the
// progress forward 1/6th of the way. Initially the first keys from each
// segment have been read as part of the merge setup, so progress = 2/6.
Assert.assertEquals(2 / 6.0f, mergeQueue.getProgress().get(), epsilon);
// The first next() returns one of the keys already read during merge setup
Assert.assertTrue(mergeQueue.next());
Assert.assertEquals(2 / 6.0f, mergeQueue.getProgress().get(), epsilon);
// Subsequent next() calls should read one key and move progress
Assert.assertTrue(mergeQueue.next());
Assert.assertEquals(3 / 6.0f, mergeQueue.getProgress().get(), epsilon);
Assert.assertTrue(mergeQueue.next());
Assert.assertEquals(4 / 6.0f, mergeQueue.getProgress().get(), epsilon);
// At this point we've exhausted all of the keys in one segment
// so getting the next key will return the already cached key from the
// other segment
Assert.assertTrue(mergeQueue.next());
Assert.assertEquals(4 / 6.0f, mergeQueue.getProgress().get(), epsilon);
// Subsequent next() calls should read one key and move progress
Assert.assertTrue(mergeQueue.next());
Assert.assertEquals(5 / 6.0f, mergeQueue.getProgress().get(), epsilon);
Assert.assertTrue(mergeQueue.next());
Assert.assertEquals(1.0f, mergeQueue.getProgress().get(), epsilon);
// Now there should be no more input
Assert.assertFalse(mergeQueue.next());
Assert.assertEquals(1.0f, mergeQueue.getProgress().get(), epsilon);
Assert.assertTrue(mergeQueue.getKey() == null);
Assert.assertEquals(0, mergeQueue.getValue().getData().length);
}
use of org.apache.hadoop.mapred.Counters.Counter in project hadoop by apache.
the class TestShuffleScheduler method TestAggregatedTransferRate.
@SuppressWarnings("rawtypes")
@Test
public <K, V> void TestAggregatedTransferRate() throws Exception {
JobConf job = new JobConf();
job.setNumMapTasks(10);
//mock creation
TaskUmbilicalProtocol mockUmbilical = mock(TaskUmbilicalProtocol.class);
Reporter mockReporter = mock(Reporter.class);
FileSystem mockFileSystem = mock(FileSystem.class);
Class<? extends org.apache.hadoop.mapred.Reducer> combinerClass = job.getCombinerClass();
// needed for mock with generic
@SuppressWarnings("unchecked") CombineOutputCollector<K, V> mockCombineOutputCollector = (CombineOutputCollector<K, V>) mock(CombineOutputCollector.class);
org.apache.hadoop.mapreduce.TaskAttemptID mockTaskAttemptID = mock(org.apache.hadoop.mapreduce.TaskAttemptID.class);
LocalDirAllocator mockLocalDirAllocator = mock(LocalDirAllocator.class);
CompressionCodec mockCompressionCodec = mock(CompressionCodec.class);
Counter mockCounter = mock(Counter.class);
TaskStatus mockTaskStatus = mock(TaskStatus.class);
Progress mockProgress = mock(Progress.class);
MapOutputFile mockMapOutputFile = mock(MapOutputFile.class);
Task mockTask = mock(Task.class);
@SuppressWarnings("unchecked") MapOutput<K, V> output = mock(MapOutput.class);
ShuffleConsumerPlugin.Context<K, V> context = new ShuffleConsumerPlugin.Context<K, V>(mockTaskAttemptID, job, mockFileSystem, mockUmbilical, mockLocalDirAllocator, mockReporter, mockCompressionCodec, combinerClass, mockCombineOutputCollector, mockCounter, mockCounter, mockCounter, mockCounter, mockCounter, mockCounter, mockTaskStatus, mockProgress, mockProgress, mockTask, mockMapOutputFile, null);
TaskStatus status = new TaskStatus() {
@Override
public boolean getIsMap() {
return false;
}
@Override
public void addFetchFailedMap(TaskAttemptID mapTaskId) {
}
};
Progress progress = new Progress();
ShuffleSchedulerImpl<K, V> scheduler = new ShuffleSchedulerImpl<K, V>(job, status, null, null, progress, context.getShuffledMapsCounter(), context.getReduceShuffleBytes(), context.getFailedShuffleCounter());
TaskAttemptID attemptID0 = new TaskAttemptID(new org.apache.hadoop.mapred.TaskID(new JobID("test", 0), TaskType.MAP, 0), 0);
//adding the 1st interval, 40MB from 60s to 100s
long bytes = (long) 40 * 1024 * 1024;
scheduler.copySucceeded(attemptID0, new MapHost(null, null), bytes, 60000, 100000, output);
Assert.assertEquals(copyMessage(1, 1, 1), progress.toString());
TaskAttemptID attemptID1 = new TaskAttemptID(new org.apache.hadoop.mapred.TaskID(new JobID("test", 0), TaskType.MAP, 1), 1);
//adding the 2nd interval before the 1st interval, 50MB from 0s to 50s
bytes = (long) 50 * 1024 * 1024;
scheduler.copySucceeded(attemptID1, new MapHost(null, null), bytes, 0, 50000, output);
Assert.assertEquals(copyMessage(2, 1, 1), progress.toString());
TaskAttemptID attemptID2 = new TaskAttemptID(new org.apache.hadoop.mapred.TaskID(new JobID("test", 0), TaskType.MAP, 2), 2);
//adding the 3rd interval overlapping with the 1st and the 2nd interval
//110MB from 25s to 80s
bytes = (long) 110 * 1024 * 1024;
scheduler.copySucceeded(attemptID2, new MapHost(null, null), bytes, 25000, 80000, output);
Assert.assertEquals(copyMessage(3, 2, 2), progress.toString());
TaskAttemptID attemptID3 = new TaskAttemptID(new org.apache.hadoop.mapred.TaskID(new JobID("test", 0), TaskType.MAP, 3), 3);
//adding the 4th interval just after the 2nd interval, 100MB from 100s to 300s
bytes = (long) 100 * 1024 * 1024;
scheduler.copySucceeded(attemptID3, new MapHost(null, null), bytes, 100000, 300000, output);
Assert.assertEquals(copyMessage(4, 0.5, 1), progress.toString());
TaskAttemptID attemptID4 = new TaskAttemptID(new org.apache.hadoop.mapred.TaskID(new JobID("test", 0), TaskType.MAP, 4), 4);
//adding the 5th interval between after 4th, 50MB from 350s to 400s
bytes = (long) 50 * 1024 * 1024;
scheduler.copySucceeded(attemptID4, new MapHost(null, null), bytes, 350000, 400000, output);
Assert.assertEquals(copyMessage(5, 1, 1), progress.toString());
TaskAttemptID attemptID5 = new TaskAttemptID(new org.apache.hadoop.mapred.TaskID(new JobID("test", 0), TaskType.MAP, 5), 5);
//adding the 6th interval between after 5th, 50MB from 450s to 500s
bytes = (long) 50 * 1024 * 1024;
scheduler.copySucceeded(attemptID5, new MapHost(null, null), bytes, 450000, 500000, output);
Assert.assertEquals(copyMessage(6, 1, 1), progress.toString());
TaskAttemptID attemptID6 = new TaskAttemptID(new org.apache.hadoop.mapred.TaskID(new JobID("test", 0), TaskType.MAP, 6), 6);
//adding the 7th interval between after 5th and 6th interval, 20MB from 320s to 340s
bytes = (long) 20 * 1024 * 1024;
scheduler.copySucceeded(attemptID6, new MapHost(null, null), bytes, 320000, 340000, output);
Assert.assertEquals(copyMessage(7, 1, 1), progress.toString());
TaskAttemptID attemptID7 = new TaskAttemptID(new org.apache.hadoop.mapred.TaskID(new JobID("test", 0), TaskType.MAP, 7), 7);
//adding the 8th interval overlapping with 4th, 5th, and 7th 30MB from 290s to 350s
bytes = (long) 30 * 1024 * 1024;
scheduler.copySucceeded(attemptID7, new MapHost(null, null), bytes, 290000, 350000, output);
Assert.assertEquals(copyMessage(8, 0.5, 1), progress.toString());
TaskAttemptID attemptID8 = new TaskAttemptID(new org.apache.hadoop.mapred.TaskID(new JobID("test", 0), TaskType.MAP, 8), 8);
//adding the 9th interval overlapping with 5th and 6th, 50MB from 400s to 450s
bytes = (long) 50 * 1024 * 1024;
scheduler.copySucceeded(attemptID8, new MapHost(null, null), bytes, 400000, 450000, output);
Assert.assertEquals(copyMessage(9, 1, 1), progress.toString());
TaskAttemptID attemptID9 = new TaskAttemptID(new org.apache.hadoop.mapred.TaskID(new JobID("test", 0), TaskType.MAP, 9), 9);
//adding the 10th interval overlapping with all intervals, 500MB from 0s to 500s
bytes = (long) 500 * 1024 * 1024;
scheduler.copySucceeded(attemptID9, new MapHost(null, null), bytes, 0, 500000, output);
Assert.assertEquals(copyMessage(10, 1, 2), progress.toString());
}
use of org.apache.hadoop.mapred.Counters.Counter in project hadoop by apache.
the class TestPipeApplication method testApplication.
/**
* test org.apache.hadoop.mapred.pipes.Application
* test a internal functions: MessageType.REGISTER_COUNTER, INCREMENT_COUNTER, STATUS, PROGRESS...
*
* @throws Throwable
*/
@Test
public void testApplication() throws Throwable {
JobConf conf = new JobConf();
RecordReader<FloatWritable, NullWritable> rReader = new Reader();
// client for test
File fCommand = getFileCommand("org.apache.hadoop.mapred.pipes.PipeApplicationStub");
TestTaskReporter reporter = new TestTaskReporter();
File[] psw = cleanTokenPasswordFile();
try {
conf.set(MRJobConfig.TASK_ATTEMPT_ID, taskName);
conf.set(MRJobConfig.CACHE_LOCALFILES, fCommand.getAbsolutePath());
// token for authorization
Token<AMRMTokenIdentifier> token = new Token<AMRMTokenIdentifier>("user".getBytes(), "password".getBytes(), new Text("kind"), new Text("service"));
TokenCache.setJobToken(token, conf.getCredentials());
FakeCollector output = new FakeCollector(new Counters.Counter(), new Progress());
FileSystem fs = new RawLocalFileSystem();
fs.initialize(FsConstants.LOCAL_FS_URI, conf);
Writer<IntWritable, Text> wr = new Writer<IntWritable, Text>(conf, fs.create(new Path(workSpace.getAbsolutePath() + File.separator + "outfile")), IntWritable.class, Text.class, null, null, true);
output.setWriter(wr);
conf.set(Submitter.PRESERVE_COMMANDFILE, "true");
initStdOut(conf);
Application<WritableComparable<IntWritable>, Writable, IntWritable, Text> application = new Application<WritableComparable<IntWritable>, Writable, IntWritable, Text>(conf, rReader, output, reporter, IntWritable.class, Text.class);
application.getDownlink().flush();
application.getDownlink().mapItem(new IntWritable(3), new Text("txt"));
application.getDownlink().flush();
application.waitForFinish();
wr.close();
// test getDownlink().mapItem();
String stdOut = readStdOut(conf);
assertTrue(stdOut.contains("key:3"));
assertTrue(stdOut.contains("value:txt"));
// reporter test counter, and status should be sended
// test MessageType.REGISTER_COUNTER and INCREMENT_COUNTER
assertEquals(1.0, reporter.getProgress(), 0.01);
assertNotNull(reporter.getCounter("group", "name"));
// test status MessageType.STATUS
assertEquals(reporter.getStatus(), "PROGRESS");
stdOut = readFile(new File(workSpace.getAbsolutePath() + File.separator + "outfile"));
// check MessageType.PROGRESS
assertEquals(0.55f, rReader.getProgress(), 0.001);
application.getDownlink().close();
// test MessageType.OUTPUT
Entry<IntWritable, Text> entry = output.getCollect().entrySet().iterator().next();
assertEquals(123, entry.getKey().get());
assertEquals("value", entry.getValue().toString());
try {
// try to abort
application.abort(new Throwable());
fail();
} catch (IOException e) {
// abort works ?
assertEquals("pipe child exception", e.getMessage());
}
} finally {
if (psw != null) {
// remove password files
for (File file : psw) {
file.deleteOnExit();
}
}
}
}
use of org.apache.hadoop.mapred.Counters.Counter in project hadoop by apache.
the class CombinerHandler method create.
public static <K, V> ICombineHandler create(TaskContext context) throws IOException, ClassNotFoundException {
final JobConf conf = new JobConf(context.getConf());
conf.set(Constants.SERIALIZATION_FRAMEWORK, String.valueOf(SerializationFramework.WRITABLE_SERIALIZATION.getType()));
String combinerClazz = conf.get(Constants.MAPRED_COMBINER_CLASS);
if (null == combinerClazz) {
combinerClazz = conf.get(MRJobConfig.COMBINE_CLASS_ATTR);
}
if (null == combinerClazz) {
return null;
} else {
LOG.info("NativeTask Combiner is enabled, class = " + combinerClazz);
}
final Counter combineInputCounter = context.getTaskReporter().getCounter(TaskCounter.COMBINE_INPUT_RECORDS);
final CombinerRunner<K, V> combinerRunner = CombinerRunner.create(conf, context.getTaskAttemptId(), combineInputCounter, context.getTaskReporter(), null);
final INativeHandler nativeHandler = NativeBatchProcessor.create(NAME, conf, DataChannel.INOUT);
@SuppressWarnings("unchecked") final BufferPusher<K, V> pusher = new BufferPusher<K, V>((Class<K>) context.getInputKeyClass(), (Class<V>) context.getInputValueClass(), nativeHandler);
final BufferPuller puller = new BufferPuller(nativeHandler);
return new CombinerHandler<K, V>(nativeHandler, combinerRunner, puller, pusher);
}
Aggregations