use of org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl.CompressAwarePath in project hadoop by apache.
the class TestMerger method testInMemoryAndOnDiskMerger.
@Test
public void testInMemoryAndOnDiskMerger() throws Throwable {
JobID jobId = new JobID("a", 0);
TaskAttemptID reduceId1 = new TaskAttemptID(new TaskID(jobId, TaskType.REDUCE, 0), 0);
TaskAttemptID mapId1 = new TaskAttemptID(new TaskID(jobId, TaskType.MAP, 1), 0);
TaskAttemptID mapId2 = new TaskAttemptID(new TaskID(jobId, TaskType.MAP, 2), 0);
LocalDirAllocator lda = new LocalDirAllocator(MRConfig.LOCAL_DIR);
MergeManagerImpl<Text, Text> mergeManager = new MergeManagerImpl<Text, Text>(reduceId1, jobConf, fs, lda, Reporter.NULL, null, null, null, null, null, null, null, new Progress(), new MROutputFiles());
// write map outputs
Map<String, String> map1 = new TreeMap<String, String>();
map1.put("apple", "disgusting");
map1.put("carrot", "delicious");
Map<String, String> map2 = new TreeMap<String, String>();
map1.put("banana", "pretty good");
byte[] mapOutputBytes1 = writeMapOutput(conf, map1);
byte[] mapOutputBytes2 = writeMapOutput(conf, map2);
InMemoryMapOutput<Text, Text> mapOutput1 = new InMemoryMapOutput<Text, Text>(conf, mapId1, mergeManager, mapOutputBytes1.length, null, true);
InMemoryMapOutput<Text, Text> mapOutput2 = new InMemoryMapOutput<Text, Text>(conf, mapId2, mergeManager, mapOutputBytes2.length, null, true);
System.arraycopy(mapOutputBytes1, 0, mapOutput1.getMemory(), 0, mapOutputBytes1.length);
System.arraycopy(mapOutputBytes2, 0, mapOutput2.getMemory(), 0, mapOutputBytes2.length);
// create merger and run merge
MergeThread<InMemoryMapOutput<Text, Text>, Text, Text> inMemoryMerger = mergeManager.createInMemoryMerger();
List<InMemoryMapOutput<Text, Text>> mapOutputs1 = new ArrayList<InMemoryMapOutput<Text, Text>>();
mapOutputs1.add(mapOutput1);
mapOutputs1.add(mapOutput2);
inMemoryMerger.merge(mapOutputs1);
Assert.assertEquals(1, mergeManager.onDiskMapOutputs.size());
TaskAttemptID reduceId2 = new TaskAttemptID(new TaskID(jobId, TaskType.REDUCE, 3), 0);
TaskAttemptID mapId3 = new TaskAttemptID(new TaskID(jobId, TaskType.MAP, 4), 0);
TaskAttemptID mapId4 = new TaskAttemptID(new TaskID(jobId, TaskType.MAP, 5), 0);
// write map outputs
Map<String, String> map3 = new TreeMap<String, String>();
map3.put("apple", "awesome");
map3.put("carrot", "amazing");
Map<String, String> map4 = new TreeMap<String, String>();
map4.put("banana", "bla");
byte[] mapOutputBytes3 = writeMapOutput(conf, map3);
byte[] mapOutputBytes4 = writeMapOutput(conf, map4);
InMemoryMapOutput<Text, Text> mapOutput3 = new InMemoryMapOutput<Text, Text>(conf, mapId3, mergeManager, mapOutputBytes3.length, null, true);
InMemoryMapOutput<Text, Text> mapOutput4 = new InMemoryMapOutput<Text, Text>(conf, mapId4, mergeManager, mapOutputBytes4.length, null, true);
System.arraycopy(mapOutputBytes3, 0, mapOutput3.getMemory(), 0, mapOutputBytes3.length);
System.arraycopy(mapOutputBytes4, 0, mapOutput4.getMemory(), 0, mapOutputBytes4.length);
// // create merger and run merge
MergeThread<InMemoryMapOutput<Text, Text>, Text, Text> inMemoryMerger2 = mergeManager.createInMemoryMerger();
List<InMemoryMapOutput<Text, Text>> mapOutputs2 = new ArrayList<InMemoryMapOutput<Text, Text>>();
mapOutputs2.add(mapOutput3);
mapOutputs2.add(mapOutput4);
inMemoryMerger2.merge(mapOutputs2);
Assert.assertEquals(2, mergeManager.onDiskMapOutputs.size());
List<CompressAwarePath> paths = new ArrayList<CompressAwarePath>();
Iterator<CompressAwarePath> iterator = mergeManager.onDiskMapOutputs.iterator();
List<String> keys = new ArrayList<String>();
List<String> values = new ArrayList<String>();
while (iterator.hasNext()) {
CompressAwarePath next = iterator.next();
readOnDiskMapOutput(conf, fs, next, keys, values);
paths.add(next);
}
Assert.assertEquals(keys, Arrays.asList("apple", "banana", "carrot", "apple", "banana", "carrot"));
Assert.assertEquals(values, Arrays.asList("awesome", "bla", "amazing", "disgusting", "pretty good", "delicious"));
mergeManager.close();
mergeManager = new MergeManagerImpl<Text, Text>(reduceId2, jobConf, fs, lda, Reporter.NULL, null, null, null, null, null, null, null, new Progress(), new MROutputFiles());
MergeThread<CompressAwarePath, Text, Text> onDiskMerger = mergeManager.createOnDiskMerger();
onDiskMerger.merge(paths);
Assert.assertEquals(1, mergeManager.onDiskMapOutputs.size());
keys = new ArrayList<String>();
values = new ArrayList<String>();
readOnDiskMapOutput(conf, fs, mergeManager.onDiskMapOutputs.iterator().next(), keys, values);
Assert.assertEquals(keys, Arrays.asList("apple", "apple", "banana", "banana", "carrot", "carrot"));
Assert.assertEquals(values, Arrays.asList("awesome", "disgusting", "pretty good", "bla", "amazing", "delicious"));
mergeManager.close();
Assert.assertEquals(0, mergeManager.inMemoryMapOutputs.size());
Assert.assertEquals(0, mergeManager.inMemoryMergedMapOutputs.size());
Assert.assertEquals(0, mergeManager.onDiskMapOutputs.size());
}
use of org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl.CompressAwarePath in project hadoop by apache.
the class OnDiskMapOutput method commit.
@Override
public void commit() throws IOException {
fs.rename(tmpOutputPath, outputPath);
CompressAwarePath compressAwarePath = new CompressAwarePath(outputPath, getSize(), this.compressedSize);
getMerger().closeOnDiskFile(compressAwarePath);
}
use of org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl.CompressAwarePath in project hadoop by apache.
the class TestMergeManager method testOnDiskMerger.
@SuppressWarnings({ "unchecked", "deprecation" })
@Test(timeout = 10000)
public void testOnDiskMerger() throws IOException, URISyntaxException, InterruptedException {
JobConf jobConf = new JobConf();
final int SORT_FACTOR = 5;
jobConf.setInt(MRJobConfig.IO_SORT_FACTOR, SORT_FACTOR);
MapOutputFile mapOutputFile = new MROutputFiles();
FileSystem fs = FileSystem.getLocal(jobConf);
MergeManagerImpl<IntWritable, IntWritable> manager = new MergeManagerImpl<IntWritable, IntWritable>(null, jobConf, fs, null, null, null, null, null, null, null, null, null, null, mapOutputFile);
MergeThread<MapOutput<IntWritable, IntWritable>, IntWritable, IntWritable> onDiskMerger = (MergeThread<MapOutput<IntWritable, IntWritable>, IntWritable, IntWritable>) Whitebox.getInternalState(manager, "onDiskMerger");
int mergeFactor = (Integer) Whitebox.getInternalState(onDiskMerger, "mergeFactor");
// make sure the io.sort.factor is set properly
assertEquals(mergeFactor, SORT_FACTOR);
// Stop the onDiskMerger thread so that we can intercept the list of files
// waiting to be merged.
onDiskMerger.suspend();
//Send the list of fake files waiting to be merged
Random rand = new Random();
for (int i = 0; i < 2 * SORT_FACTOR; ++i) {
Path path = new Path("somePath");
CompressAwarePath cap = new CompressAwarePath(path, 1l, rand.nextInt());
manager.closeOnDiskFile(cap);
}
//Check that the files pending to be merged are in sorted order.
LinkedList<List<CompressAwarePath>> pendingToBeMerged = (LinkedList<List<CompressAwarePath>>) Whitebox.getInternalState(onDiskMerger, "pendingToBeMerged");
assertTrue("No inputs were added to list pending to merge", pendingToBeMerged.size() > 0);
for (int i = 0; i < pendingToBeMerged.size(); ++i) {
List<CompressAwarePath> inputs = pendingToBeMerged.get(i);
for (int j = 1; j < inputs.size(); ++j) {
assertTrue("Not enough / too many inputs were going to be merged", inputs.size() > 0 && inputs.size() <= SORT_FACTOR);
assertTrue("Inputs to be merged were not sorted according to size: ", inputs.get(j).getCompressedSize() >= inputs.get(j - 1).getCompressedSize());
}
}
}
Aggregations