Search in sources :

Example 1 with CompressAwarePath

use of org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl.CompressAwarePath in project hadoop by apache.

the class TestMerger method testInMemoryAndOnDiskMerger.

@Test
public void testInMemoryAndOnDiskMerger() throws Throwable {
    JobID jobId = new JobID("a", 0);
    TaskAttemptID reduceId1 = new TaskAttemptID(new TaskID(jobId, TaskType.REDUCE, 0), 0);
    TaskAttemptID mapId1 = new TaskAttemptID(new TaskID(jobId, TaskType.MAP, 1), 0);
    TaskAttemptID mapId2 = new TaskAttemptID(new TaskID(jobId, TaskType.MAP, 2), 0);
    LocalDirAllocator lda = new LocalDirAllocator(MRConfig.LOCAL_DIR);
    MergeManagerImpl<Text, Text> mergeManager = new MergeManagerImpl<Text, Text>(reduceId1, jobConf, fs, lda, Reporter.NULL, null, null, null, null, null, null, null, new Progress(), new MROutputFiles());
    // write map outputs
    Map<String, String> map1 = new TreeMap<String, String>();
    map1.put("apple", "disgusting");
    map1.put("carrot", "delicious");
    Map<String, String> map2 = new TreeMap<String, String>();
    map1.put("banana", "pretty good");
    byte[] mapOutputBytes1 = writeMapOutput(conf, map1);
    byte[] mapOutputBytes2 = writeMapOutput(conf, map2);
    InMemoryMapOutput<Text, Text> mapOutput1 = new InMemoryMapOutput<Text, Text>(conf, mapId1, mergeManager, mapOutputBytes1.length, null, true);
    InMemoryMapOutput<Text, Text> mapOutput2 = new InMemoryMapOutput<Text, Text>(conf, mapId2, mergeManager, mapOutputBytes2.length, null, true);
    System.arraycopy(mapOutputBytes1, 0, mapOutput1.getMemory(), 0, mapOutputBytes1.length);
    System.arraycopy(mapOutputBytes2, 0, mapOutput2.getMemory(), 0, mapOutputBytes2.length);
    // create merger and run merge
    MergeThread<InMemoryMapOutput<Text, Text>, Text, Text> inMemoryMerger = mergeManager.createInMemoryMerger();
    List<InMemoryMapOutput<Text, Text>> mapOutputs1 = new ArrayList<InMemoryMapOutput<Text, Text>>();
    mapOutputs1.add(mapOutput1);
    mapOutputs1.add(mapOutput2);
    inMemoryMerger.merge(mapOutputs1);
    Assert.assertEquals(1, mergeManager.onDiskMapOutputs.size());
    TaskAttemptID reduceId2 = new TaskAttemptID(new TaskID(jobId, TaskType.REDUCE, 3), 0);
    TaskAttemptID mapId3 = new TaskAttemptID(new TaskID(jobId, TaskType.MAP, 4), 0);
    TaskAttemptID mapId4 = new TaskAttemptID(new TaskID(jobId, TaskType.MAP, 5), 0);
    // write map outputs
    Map<String, String> map3 = new TreeMap<String, String>();
    map3.put("apple", "awesome");
    map3.put("carrot", "amazing");
    Map<String, String> map4 = new TreeMap<String, String>();
    map4.put("banana", "bla");
    byte[] mapOutputBytes3 = writeMapOutput(conf, map3);
    byte[] mapOutputBytes4 = writeMapOutput(conf, map4);
    InMemoryMapOutput<Text, Text> mapOutput3 = new InMemoryMapOutput<Text, Text>(conf, mapId3, mergeManager, mapOutputBytes3.length, null, true);
    InMemoryMapOutput<Text, Text> mapOutput4 = new InMemoryMapOutput<Text, Text>(conf, mapId4, mergeManager, mapOutputBytes4.length, null, true);
    System.arraycopy(mapOutputBytes3, 0, mapOutput3.getMemory(), 0, mapOutputBytes3.length);
    System.arraycopy(mapOutputBytes4, 0, mapOutput4.getMemory(), 0, mapOutputBytes4.length);
    //    // create merger and run merge
    MergeThread<InMemoryMapOutput<Text, Text>, Text, Text> inMemoryMerger2 = mergeManager.createInMemoryMerger();
    List<InMemoryMapOutput<Text, Text>> mapOutputs2 = new ArrayList<InMemoryMapOutput<Text, Text>>();
    mapOutputs2.add(mapOutput3);
    mapOutputs2.add(mapOutput4);
    inMemoryMerger2.merge(mapOutputs2);
    Assert.assertEquals(2, mergeManager.onDiskMapOutputs.size());
    List<CompressAwarePath> paths = new ArrayList<CompressAwarePath>();
    Iterator<CompressAwarePath> iterator = mergeManager.onDiskMapOutputs.iterator();
    List<String> keys = new ArrayList<String>();
    List<String> values = new ArrayList<String>();
    while (iterator.hasNext()) {
        CompressAwarePath next = iterator.next();
        readOnDiskMapOutput(conf, fs, next, keys, values);
        paths.add(next);
    }
    Assert.assertEquals(keys, Arrays.asList("apple", "banana", "carrot", "apple", "banana", "carrot"));
    Assert.assertEquals(values, Arrays.asList("awesome", "bla", "amazing", "disgusting", "pretty good", "delicious"));
    mergeManager.close();
    mergeManager = new MergeManagerImpl<Text, Text>(reduceId2, jobConf, fs, lda, Reporter.NULL, null, null, null, null, null, null, null, new Progress(), new MROutputFiles());
    MergeThread<CompressAwarePath, Text, Text> onDiskMerger = mergeManager.createOnDiskMerger();
    onDiskMerger.merge(paths);
    Assert.assertEquals(1, mergeManager.onDiskMapOutputs.size());
    keys = new ArrayList<String>();
    values = new ArrayList<String>();
    readOnDiskMapOutput(conf, fs, mergeManager.onDiskMapOutputs.iterator().next(), keys, values);
    Assert.assertEquals(keys, Arrays.asList("apple", "apple", "banana", "banana", "carrot", "carrot"));
    Assert.assertEquals(values, Arrays.asList("awesome", "disgusting", "pretty good", "bla", "amazing", "delicious"));
    mergeManager.close();
    Assert.assertEquals(0, mergeManager.inMemoryMapOutputs.size());
    Assert.assertEquals(0, mergeManager.inMemoryMergedMapOutputs.size());
    Assert.assertEquals(0, mergeManager.onDiskMapOutputs.size());
}
Also used : MROutputFiles(org.apache.hadoop.mapred.MROutputFiles) Progress(org.apache.hadoop.util.Progress) TaskID(org.apache.hadoop.mapreduce.TaskID) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) ArrayList(java.util.ArrayList) Text(org.apache.hadoop.io.Text) TreeMap(java.util.TreeMap) CompressAwarePath(org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl.CompressAwarePath) LocalDirAllocator(org.apache.hadoop.fs.LocalDirAllocator) JobID(org.apache.hadoop.mapreduce.JobID) Test(org.junit.Test)

Example 2 with CompressAwarePath

use of org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl.CompressAwarePath in project hadoop by apache.

the class OnDiskMapOutput method commit.

@Override
public void commit() throws IOException {
    fs.rename(tmpOutputPath, outputPath);
    CompressAwarePath compressAwarePath = new CompressAwarePath(outputPath, getSize(), this.compressedSize);
    getMerger().closeOnDiskFile(compressAwarePath);
}
Also used : CompressAwarePath(org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl.CompressAwarePath)

Example 3 with CompressAwarePath

use of org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl.CompressAwarePath in project hadoop by apache.

the class TestMergeManager method testOnDiskMerger.

@SuppressWarnings({ "unchecked", "deprecation" })
@Test(timeout = 10000)
public void testOnDiskMerger() throws IOException, URISyntaxException, InterruptedException {
    JobConf jobConf = new JobConf();
    final int SORT_FACTOR = 5;
    jobConf.setInt(MRJobConfig.IO_SORT_FACTOR, SORT_FACTOR);
    MapOutputFile mapOutputFile = new MROutputFiles();
    FileSystem fs = FileSystem.getLocal(jobConf);
    MergeManagerImpl<IntWritable, IntWritable> manager = new MergeManagerImpl<IntWritable, IntWritable>(null, jobConf, fs, null, null, null, null, null, null, null, null, null, null, mapOutputFile);
    MergeThread<MapOutput<IntWritable, IntWritable>, IntWritable, IntWritable> onDiskMerger = (MergeThread<MapOutput<IntWritable, IntWritable>, IntWritable, IntWritable>) Whitebox.getInternalState(manager, "onDiskMerger");
    int mergeFactor = (Integer) Whitebox.getInternalState(onDiskMerger, "mergeFactor");
    // make sure the io.sort.factor is set properly
    assertEquals(mergeFactor, SORT_FACTOR);
    // Stop the onDiskMerger thread so that we can intercept the list of files
    // waiting to be merged.
    onDiskMerger.suspend();
    //Send the list of fake files waiting to be merged
    Random rand = new Random();
    for (int i = 0; i < 2 * SORT_FACTOR; ++i) {
        Path path = new Path("somePath");
        CompressAwarePath cap = new CompressAwarePath(path, 1l, rand.nextInt());
        manager.closeOnDiskFile(cap);
    }
    //Check that the files pending to be merged are in sorted order.
    LinkedList<List<CompressAwarePath>> pendingToBeMerged = (LinkedList<List<CompressAwarePath>>) Whitebox.getInternalState(onDiskMerger, "pendingToBeMerged");
    assertTrue("No inputs were added to list pending to merge", pendingToBeMerged.size() > 0);
    for (int i = 0; i < pendingToBeMerged.size(); ++i) {
        List<CompressAwarePath> inputs = pendingToBeMerged.get(i);
        for (int j = 1; j < inputs.size(); ++j) {
            assertTrue("Not enough / too many inputs were going to be merged", inputs.size() > 0 && inputs.size() <= SORT_FACTOR);
            assertTrue("Inputs to be merged were not sorted according to size: ", inputs.get(j).getCompressedSize() >= inputs.get(j - 1).getCompressedSize());
        }
    }
}
Also used : CompressAwarePath(org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl.CompressAwarePath) Path(org.apache.hadoop.fs.Path) MROutputFiles(org.apache.hadoop.mapred.MROutputFiles) LinkedList(java.util.LinkedList) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Random(java.util.Random) FileSystem(org.apache.hadoop.fs.FileSystem) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) CompressAwarePath(org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl.CompressAwarePath) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) List(java.util.List) JobConf(org.apache.hadoop.mapred.JobConf) MapOutputFile(org.apache.hadoop.mapred.MapOutputFile) IntWritable(org.apache.hadoop.io.IntWritable) Test(org.junit.Test)

Aggregations

CompressAwarePath (org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl.CompressAwarePath)3 ArrayList (java.util.ArrayList)2 MROutputFiles (org.apache.hadoop.mapred.MROutputFiles)2 Test (org.junit.Test)2 LinkedList (java.util.LinkedList)1 List (java.util.List)1 Random (java.util.Random)1 TreeMap (java.util.TreeMap)1 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)1 FileSystem (org.apache.hadoop.fs.FileSystem)1 LocalDirAllocator (org.apache.hadoop.fs.LocalDirAllocator)1 LocalFileSystem (org.apache.hadoop.fs.LocalFileSystem)1 Path (org.apache.hadoop.fs.Path)1 IntWritable (org.apache.hadoop.io.IntWritable)1 Text (org.apache.hadoop.io.Text)1 JobConf (org.apache.hadoop.mapred.JobConf)1 MapOutputFile (org.apache.hadoop.mapred.MapOutputFile)1 JobID (org.apache.hadoop.mapreduce.JobID)1 TaskAttemptID (org.apache.hadoop.mapreduce.TaskAttemptID)1 TaskID (org.apache.hadoop.mapreduce.TaskID)1