Search in sources :

Example 21 with InputContext

use of org.apache.tez.runtime.api.InputContext in project tez by apache.

the class TestMergeManager method testIntermediateMemoryMerge.

@Test(timeout = 60000l)
public void testIntermediateMemoryMerge() throws Throwable {
    Configuration conf = new TezConfiguration(defaultConf);
    conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_COMPRESS, false);
    conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS, IntWritable.class.getName());
    conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS, IntWritable.class.getName());
    conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_ENABLE_MEMTOMEM, true);
    conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMTOMEM_SEGMENTS, 3);
    Path localDir = new Path(workDir, "local");
    Path srcDir = new Path(workDir, "srcData");
    localFs.mkdirs(localDir);
    localFs.mkdirs(srcDir);
    conf.setStrings(TezRuntimeFrameworkConfigs.LOCAL_DIRS, localDir.toString());
    FileSystem localFs = FileSystem.getLocal(conf);
    LocalDirAllocator localDirAllocator = new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS);
    InputContext inputContext = createMockInputContext(UUID.randomUUID().toString());
    ExceptionReporter exceptionReporter = mock(ExceptionReporter.class);
    MergeManager mergeManager = new MergeManager(conf, localFs, localDirAllocator, inputContext, null, null, null, null, exceptionReporter, 2000000, null, false, -1);
    mergeManager.configureAndStart();
    assertEquals(0, mergeManager.getUsedMemory());
    assertEquals(0, mergeManager.getCommitMemory());
    /**
     * Test #1
     * - Have 4 segments where all of them can fit into memory.
     * - After 3 segment commits, it would trigger mem-to-mem merge.
     * - All of them can be merged in memory.
     */
    InputAttemptIdentifier inputAttemptIdentifier1 = new InputAttemptIdentifier(0, 0);
    InputAttemptIdentifier inputAttemptIdentifier2 = new InputAttemptIdentifier(1, 0);
    InputAttemptIdentifier inputAttemptIdentifier3 = new InputAttemptIdentifier(2, 0);
    InputAttemptIdentifier inputAttemptIdentifier4 = new InputAttemptIdentifier(3, 0);
    byte[] data1 = generateDataBySize(conf, 10, inputAttemptIdentifier1);
    byte[] data2 = generateDataBySize(conf, 20, inputAttemptIdentifier2);
    byte[] data3 = generateDataBySize(conf, 200, inputAttemptIdentifier3);
    byte[] data4 = generateDataBySize(conf, 20000, inputAttemptIdentifier4);
    MapOutput mo1 = mergeManager.reserve(inputAttemptIdentifier1, data1.length, data1.length, 0);
    MapOutput mo2 = mergeManager.reserve(inputAttemptIdentifier1, data2.length, data2.length, 0);
    MapOutput mo3 = mergeManager.reserve(inputAttemptIdentifier1, data3.length, data3.length, 0);
    MapOutput mo4 = mergeManager.reserve(inputAttemptIdentifier1, data4.length, data4.length, 0);
    assertEquals(MapOutput.Type.MEMORY, mo1.getType());
    assertEquals(MapOutput.Type.MEMORY, mo2.getType());
    assertEquals(MapOutput.Type.MEMORY, mo3.getType());
    assertEquals(MapOutput.Type.MEMORY, mo4.getType());
    assertEquals(0, mergeManager.getCommitMemory());
    // size should be ~20230.
    assertEquals(data1.length + data2.length + data3.length + data4.length, mergeManager.getUsedMemory());
    System.arraycopy(data1, 0, mo1.getMemory(), 0, data1.length);
    System.arraycopy(data2, 0, mo2.getMemory(), 0, data2.length);
    System.arraycopy(data3, 0, mo3.getMemory(), 0, data3.length);
    System.arraycopy(data4, 0, mo4.getMemory(), 0, data4.length);
    // Committing 3 segments should trigger mem-to-mem merge
    mo1.commit();
    mo2.commit();
    mo3.commit();
    mo4.commit();
    // Wait for mem-to-mem to complete
    mergeManager.waitForMemToMemMerge();
    assertEquals(1, mergeManager.inMemoryMergedMapOutputs.size());
    assertEquals(1, mergeManager.inMemoryMapOutputs.size());
    mergeManager.close(true);
    /**
     * Test #2
     * - Have 4 segments where all of them can fit into memory, but one of
     * them would be big enough that it can not be fit in memory during
     * mem-to-mem merging.
     *
     * - After 3 segment commits, it would trigger mem-to-mem merge.
     * - Smaller segments which can be fit in additional memory allocated gets
     * merged.
     */
    mergeManager = new MergeManager(conf, localFs, localDirAllocator, inputContext, null, null, null, null, exceptionReporter, 2000000, null, false, -1);
    mergeManager.configureAndStart();
    // Single shuffle limit is 25% of 2000000
    data1 = generateDataBySize(conf, 10, inputAttemptIdentifier1);
    data2 = generateDataBySize(conf, 400000, inputAttemptIdentifier2);
    data3 = generateDataBySize(conf, 400000, inputAttemptIdentifier3);
    data4 = generateDataBySize(conf, 400000, inputAttemptIdentifier4);
    mo1 = mergeManager.reserve(inputAttemptIdentifier1, data1.length, data1.length, 0);
    mo2 = mergeManager.reserve(inputAttemptIdentifier2, data2.length, data2.length, 0);
    mo3 = mergeManager.reserve(inputAttemptIdentifier3, data3.length, data3.length, 0);
    mo4 = mergeManager.reserve(inputAttemptIdentifier4, data4.length, data4.length, 0);
    assertEquals(MapOutput.Type.MEMORY, mo1.getType());
    assertEquals(MapOutput.Type.MEMORY, mo2.getType());
    assertEquals(MapOutput.Type.MEMORY, mo3.getType());
    assertEquals(MapOutput.Type.MEMORY, mo4.getType());
    assertEquals(0, mergeManager.getCommitMemory());
    assertEquals(data1.length + data2.length + data3.length + data4.length, mergeManager.getUsedMemory());
    System.arraycopy(data1, 0, mo1.getMemory(), 0, data1.length);
    System.arraycopy(data2, 0, mo2.getMemory(), 0, data2.length);
    System.arraycopy(data3, 0, mo3.getMemory(), 0, data3.length);
    System.arraycopy(data4, 0, mo4.getMemory(), 0, data4.length);
    // Committing 3 segments should trigger mem-to-mem merge
    mo1.commit();
    mo2.commit();
    mo3.commit();
    mo4.commit();
    // Wait for mem-to-mem to complete
    mergeManager.waitForMemToMemMerge();
    /**
     * Already all segments are in memory which is around 120000. It
     * would not be able to allocate more than 800000 for mem-to-mem. So it
     * would pick up only 2 small segments which can be accomodated within
     * 800000.
     */
    assertEquals(1, mergeManager.inMemoryMergedMapOutputs.size());
    assertEquals(2, mergeManager.inMemoryMapOutputs.size());
    mergeManager.close(true);
    /**
     * Test #3
     * - Set number of segments for merging to 4.
     * - Have 4 in-memory segments of size 400000 each
     * - Committing 4 segments would trigger mem-to-mem
     * - But none of them can be merged as there is no enough head room for
     * merging in memory.
     */
    mergeManager = new MergeManager(conf, localFs, localDirAllocator, inputContext, null, null, null, null, exceptionReporter, 2000000, null, false, -1);
    mergeManager.configureAndStart();
    // Single shuffle limit is 25% of 2000000
    data1 = generateDataBySize(conf, 400000, inputAttemptIdentifier1);
    data2 = generateDataBySize(conf, 400000, inputAttemptIdentifier2);
    data3 = generateDataBySize(conf, 400000, inputAttemptIdentifier3);
    data4 = generateDataBySize(conf, 400000, inputAttemptIdentifier4);
    mo1 = mergeManager.reserve(inputAttemptIdentifier1, data1.length, data1.length, 0);
    mo2 = mergeManager.reserve(inputAttemptIdentifier2, data2.length, data2.length, 0);
    mo3 = mergeManager.reserve(inputAttemptIdentifier3, data3.length, data3.length, 0);
    mo4 = mergeManager.reserve(inputAttemptIdentifier4, data4.length, data4.length, 0);
    assertEquals(MapOutput.Type.MEMORY, mo1.getType());
    assertEquals(MapOutput.Type.MEMORY, mo2.getType());
    assertEquals(MapOutput.Type.MEMORY, mo3.getType());
    assertEquals(MapOutput.Type.MEMORY, mo4.getType());
    assertEquals(0, mergeManager.getCommitMemory());
    assertEquals(data1.length + data2.length + data3.length + data4.length, mergeManager.getUsedMemory());
    System.arraycopy(data1, 0, mo1.getMemory(), 0, data1.length);
    System.arraycopy(data2, 0, mo2.getMemory(), 0, data2.length);
    System.arraycopy(data3, 0, mo3.getMemory(), 0, data3.length);
    System.arraycopy(data4, 0, mo4.getMemory(), 0, data4.length);
    // Committing 3 segments should trigger mem-to-mem merge
    mo1.commit();
    mo2.commit();
    mo3.commit();
    mo4.commit();
    // Wait for mem-to-mem to complete
    mergeManager.waitForMemToMemMerge();
    // None of them can be merged as new mem needed for mem-to-mem can't
    // accomodate any segements
    assertEquals(0, mergeManager.inMemoryMergedMapOutputs.size());
    assertEquals(4, mergeManager.inMemoryMapOutputs.size());
    mergeManager.close(true);
    /**
     * Test #4
     * - Set number of segments for merging to 4.
     * - Have 4 in-memory segments of size {490000,490000,490000,230000}
     * - Committing 4 segments would trigger mem-to-mem
     * - But only 300000 can fit into memory. This should not be
     * merged as there is no point in merging single segment. It should be
     * added back to the inMemorySegments
     */
    conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMTOMEM_SEGMENTS, 4);
    mergeManager = new MergeManager(conf, localFs, localDirAllocator, inputContext, null, null, null, null, exceptionReporter, 2000000, null, false, -1);
    mergeManager.configureAndStart();
    // Single shuffle limit is 25% of 2000000
    data1 = generateDataBySize(conf, 490000, inputAttemptIdentifier1);
    data2 = generateDataBySize(conf, 490000, inputAttemptIdentifier2);
    data3 = generateDataBySize(conf, 490000, inputAttemptIdentifier3);
    data4 = generateDataBySize(conf, 230000, inputAttemptIdentifier4);
    mo1 = mergeManager.reserve(inputAttemptIdentifier1, data1.length, data1.length, 0);
    mo2 = mergeManager.reserve(inputAttemptIdentifier2, data2.length, data2.length, 0);
    mo3 = mergeManager.reserve(inputAttemptIdentifier3, data3.length, data3.length, 0);
    mo4 = mergeManager.reserve(inputAttemptIdentifier4, data4.length, data4.length, 0);
    assertTrue(mergeManager.getUsedMemory() >= (490000 + 490000 + 490000 + 23000));
    assertEquals(MapOutput.Type.MEMORY, mo1.getType());
    assertEquals(MapOutput.Type.MEMORY, mo2.getType());
    assertEquals(MapOutput.Type.MEMORY, mo3.getType());
    assertEquals(MapOutput.Type.MEMORY, mo4.getType());
    assertEquals(0, mergeManager.getCommitMemory());
    assertEquals(data1.length + data2.length + data3.length + data4.length, mergeManager.getUsedMemory());
    System.arraycopy(data1, 0, mo1.getMemory(), 0, data1.length);
    System.arraycopy(data2, 0, mo2.getMemory(), 0, data2.length);
    System.arraycopy(data3, 0, mo3.getMemory(), 0, data3.length);
    System.arraycopy(data4, 0, mo4.getMemory(), 0, data4.length);
    // Committing 4 segments should trigger mem-to-mem merge
    mo1.commit();
    mo2.commit();
    mo3.commit();
    mo4.commit();
    // 4 segments were there originally in inMemoryMapOutput.
    int numberOfMapOutputs = 4;
    // Wait for mem-to-mem to complete. Since only 1 segment (230000) can fit
    // into memory, it should return early
    mergeManager.waitForMemToMemMerge();
    // Check if inMemorySegment has got the MapOutput back for merging later
    assertEquals(numberOfMapOutputs, mergeManager.inMemoryMapOutputs.size());
    mergeManager.close(true);
    /**
     * Test #5
     * - Same to #4, but calling mergeManager.close(false) and confirm that final merge doesn't occur.
     */
    conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMTOMEM_SEGMENTS, 4);
    mergeManager = new MergeManager(conf, localFs, localDirAllocator, inputContext, null, null, null, null, exceptionReporter, 2000000, null, false, -1);
    mergeManager.configureAndStart();
    // Single shuffle limit is 25% of 2000000
    data1 = generateDataBySize(conf, 490000, inputAttemptIdentifier1);
    data2 = generateDataBySize(conf, 490000, inputAttemptIdentifier2);
    data3 = generateDataBySize(conf, 490000, inputAttemptIdentifier3);
    data4 = generateDataBySize(conf, 230000, inputAttemptIdentifier4);
    mo1 = mergeManager.reserve(inputAttemptIdentifier1, data1.length, data1.length, 0);
    mo2 = mergeManager.reserve(inputAttemptIdentifier2, data2.length, data2.length, 0);
    mo3 = mergeManager.reserve(inputAttemptIdentifier3, data3.length, data3.length, 0);
    mo4 = mergeManager.reserve(inputAttemptIdentifier4, data4.length, data4.length, 0);
    assertTrue(mergeManager.getUsedMemory() >= (490000 + 490000 + 490000 + 23000));
    assertEquals(MapOutput.Type.MEMORY, mo1.getType());
    assertEquals(MapOutput.Type.MEMORY, mo2.getType());
    assertEquals(MapOutput.Type.MEMORY, mo3.getType());
    assertEquals(MapOutput.Type.MEMORY, mo4.getType());
    assertEquals(0, mergeManager.getCommitMemory());
    assertEquals(data1.length + data2.length + data3.length + data4.length, mergeManager.getUsedMemory());
    System.arraycopy(data1, 0, mo1.getMemory(), 0, data1.length);
    System.arraycopy(data2, 0, mo2.getMemory(), 0, data2.length);
    System.arraycopy(data3, 0, mo3.getMemory(), 0, data3.length);
    System.arraycopy(data4, 0, mo4.getMemory(), 0, data4.length);
    // Committing 4 segments should trigger mem-to-mem merge
    mo1.commit();
    mo2.commit();
    mo3.commit();
    mo4.commit();
    // 4 segments were there originally in inMemoryMapOutput.
    numberOfMapOutputs = 4;
    // Wait for mem-to-mem to complete. Since only 1 segment (230000) can fit
    // into memory, it should return early
    mergeManager.waitForMemToMemMerge();
    // Check if inMemorySegment has got the MapOutput back for merging later
    assertEquals(numberOfMapOutputs, mergeManager.inMemoryMapOutputs.size());
    Assert.assertNull(mergeManager.close(false));
    Assert.assertFalse(mergeManager.isMergeComplete());
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) TezRuntimeConfiguration(org.apache.tez.runtime.library.api.TezRuntimeConfiguration) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) InputContext(org.apache.tez.runtime.api.InputContext) InputAttemptIdentifier(org.apache.tez.runtime.library.common.InputAttemptIdentifier) FileSystem(org.apache.hadoop.fs.FileSystem) LocalDirAllocator(org.apache.hadoop.fs.LocalDirAllocator) IntWritable(org.apache.hadoop.io.IntWritable) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) Test(org.junit.Test)

Example 22 with InputContext

use of org.apache.tez.runtime.api.InputContext in project tez by apache.

the class TestMergeManager method testIntermediateMemoryMergeAccounting.

@Test(timeout = 20000)
public void testIntermediateMemoryMergeAccounting() throws Exception {
    Configuration conf = new TezConfiguration(defaultConf);
    conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_COMPRESS, false);
    conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS, IntWritable.class.getName());
    conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS, IntWritable.class.getName());
    conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_ENABLE_MEMTOMEM, true);
    conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMTOMEM_SEGMENTS, 2);
    Path localDir = new Path(workDir, "local");
    Path srcDir = new Path(workDir, "srcData");
    localFs.mkdirs(localDir);
    localFs.mkdirs(srcDir);
    conf.setStrings(TezRuntimeFrameworkConfigs.LOCAL_DIRS, localDir.toString());
    FileSystem localFs = FileSystem.getLocal(conf);
    LocalDirAllocator localDirAllocator = new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS);
    InputContext inputContext = createMockInputContext(UUID.randomUUID().toString());
    ExceptionReporter exceptionReporter = mock(ExceptionReporter.class);
    MergeManager mergeManager = new MergeManager(conf, localFs, localDirAllocator, inputContext, null, null, null, null, exceptionReporter, 2000000, null, false, -1);
    mergeManager.configureAndStart();
    assertEquals(0, mergeManager.getUsedMemory());
    assertEquals(0, mergeManager.getCommitMemory());
    byte[] data1 = generateData(conf, 10, null);
    byte[] data2 = generateData(conf, 20, null);
    MapOutput firstMapOutput = mergeManager.reserve(null, data1.length, data1.length, 0);
    MapOutput secondMapOutput = mergeManager.reserve(null, data2.length, data2.length, 0);
    assertEquals(MapOutput.Type.MEMORY, firstMapOutput.getType());
    assertEquals(MapOutput.Type.MEMORY, secondMapOutput.getType());
    assertEquals(0, mergeManager.getCommitMemory());
    assertEquals(data1.length + data2.length, mergeManager.getUsedMemory());
    System.arraycopy(data1, 0, firstMapOutput.getMemory(), 0, data1.length);
    System.arraycopy(data2, 0, secondMapOutput.getMemory(), 0, data2.length);
    secondMapOutput.commit();
    assertEquals(data2.length, mergeManager.getCommitMemory());
    assertEquals(data1.length + data2.length, mergeManager.getUsedMemory());
    firstMapOutput.commit();
    mergeManager.waitForMemToMemMerge();
    assertEquals(data1.length + data2.length, mergeManager.getCommitMemory());
    assertEquals(data1.length + data2.length, mergeManager.getUsedMemory());
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) TezRuntimeConfiguration(org.apache.tez.runtime.library.api.TezRuntimeConfiguration) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) FileSystem(org.apache.hadoop.fs.FileSystem) InputContext(org.apache.tez.runtime.api.InputContext) LocalDirAllocator(org.apache.hadoop.fs.LocalDirAllocator) IntWritable(org.apache.hadoop.io.IntWritable) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) Test(org.junit.Test)

Example 23 with InputContext

use of org.apache.tez.runtime.api.InputContext in project tez by apache.

the class TestMergeManager method testReservationAccounting.

@Test(timeout = 10000)
public void testReservationAccounting() throws IOException {
    Configuration conf = new TezConfiguration(defaultConf);
    FileSystem localFs = FileSystem.getLocal(conf);
    InputContext inputContext = createMockInputContext(UUID.randomUUID().toString());
    MergeManager mergeManager = new MergeManager(conf, localFs, null, inputContext, null, null, null, null, mock(ExceptionReporter.class), 2000000, null, false, -1);
    mergeManager.configureAndStart();
    assertEquals(0, mergeManager.getUsedMemory());
    assertEquals(0, mergeManager.getCommitMemory());
    MapOutput mapOutput = mergeManager.reserve(null, 1, 1, 0);
    assertEquals(1, mergeManager.getUsedMemory());
    assertEquals(0, mergeManager.getCommitMemory());
    mapOutput.abort();
    assertEquals(0, mergeManager.getUsedMemory());
    assertEquals(0, mergeManager.getCommitMemory());
    mapOutput = mergeManager.reserve(null, 2, 2, 0);
    mergeManager.closeInMemoryFile(mapOutput);
    assertEquals(2, mergeManager.getUsedMemory());
    assertEquals(2, mergeManager.getCommitMemory());
    mergeManager.releaseCommittedMemory(2);
    assertEquals(0, mergeManager.getUsedMemory());
    assertEquals(0, mergeManager.getCommitMemory());
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) TezRuntimeConfiguration(org.apache.tez.runtime.library.api.TezRuntimeConfiguration) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) FileSystem(org.apache.hadoop.fs.FileSystem) InputContext(org.apache.tez.runtime.api.InputContext) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) Test(org.junit.Test)

Example 24 with InputContext

use of org.apache.tez.runtime.api.InputContext in project tez by apache.

the class TestMergeManager method testLocalDiskMergeMultipleTasks.

void testLocalDiskMergeMultipleTasks(final boolean interruptInMiddle) throws IOException, InterruptedException {
    Configuration conf = new TezConfiguration(defaultConf);
    conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_COMPRESS, false);
    conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS, IntWritable.class.getName());
    conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS, IntWritable.class.getName());
    Path localDir = new Path(workDir, "local");
    Path srcDir = new Path(workDir, "srcData");
    localFs.mkdirs(localDir);
    localFs.mkdirs(srcDir);
    conf.setStrings(TezRuntimeFrameworkConfigs.LOCAL_DIRS, localDir.toString());
    FileSystem localFs = FileSystem.getLocal(conf);
    LocalDirAllocator localDirAllocator = new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS);
    InputContext t0inputContext = createMockInputContext(UUID.randomUUID().toString());
    InputContext t1inputContext = createMockInputContext(UUID.randomUUID().toString());
    ExceptionReporter t0exceptionReporter = mock(ExceptionReporter.class);
    ExceptionReporter t1exceptionReporter = mock(ExceptionReporter.class);
    MergeManager t0mergeManagerReal = new MergeManager(conf, localFs, localDirAllocator, t0inputContext, null, null, null, null, t0exceptionReporter, 2000000, null, false, -1) {

        // override for interruptInMiddle testing
        @Override
        public synchronized void closeOnDiskFile(FileChunk file) {
            if (interruptInMiddle) {
                try {
                    Thread.sleep(2000);
                } catch (InterruptedException e) {
                    Thread.currentThread().interrupt();
                    return;
                }
            }
            super.closeOnDiskFile(file);
        }
    };
    MergeManager t0mergeManager = spy(t0mergeManagerReal);
    t0mergeManager.configureAndStart();
    MergeManager t1mergeManagerReal = new MergeManager(conf, localFs, localDirAllocator, t1inputContext, null, null, null, null, t1exceptionReporter, 2000000, null, false, -1);
    MergeManager t1mergeManager = spy(t1mergeManagerReal);
    // Partition 0 Keys 0-2, Partition 1 Keys 3-5
    SrcFileInfo src1Info = createFile(conf, localFs, new Path(srcDir, InputAttemptIdentifier.PATH_PREFIX + "src1.out"), 2, 3, 0);
    // Partition 0 Keys 6-8, Partition 1 Keys 9-11
    SrcFileInfo src2Info = createFile(conf, localFs, new Path(srcDir, InputAttemptIdentifier.PATH_PREFIX + "src2.out"), 2, 3, 6);
    // Simulating Task 0 fetches partition 0. (targetIndex = 0,1)
    // Simulating Task 1 fetches partition 1. (targetIndex = 0,1)
    InputAttemptIdentifier t0Identifier0 = new InputAttemptIdentifier(0, 0, src1Info.path.getName());
    InputAttemptIdentifier t0Identifier1 = new InputAttemptIdentifier(1, 0, src2Info.path.getName());
    InputAttemptIdentifier t1Identifier0 = new InputAttemptIdentifier(0, 0, src1Info.path.getName());
    InputAttemptIdentifier t1Identifier1 = new InputAttemptIdentifier(1, 0, src2Info.path.getName());
    MapOutput t0MapOutput0 = getMapOutputForDirectDiskFetch(t0Identifier0, src1Info.path, src1Info.indexedRecords[0], t0mergeManager);
    MapOutput t0MapOutput1 = getMapOutputForDirectDiskFetch(t0Identifier1, src2Info.path, src2Info.indexedRecords[0], t0mergeManager);
    MapOutput t1MapOutput0 = getMapOutputForDirectDiskFetch(t1Identifier0, src1Info.path, src1Info.indexedRecords[1], t1mergeManager);
    MapOutput t1MapOutput1 = getMapOutputForDirectDiskFetch(t1Identifier1, src2Info.path, src2Info.indexedRecords[1], t1mergeManager);
    t0MapOutput0.commit();
    t0MapOutput1.commit();
    verify(t0mergeManager).closeOnDiskFile(t0MapOutput0.getOutputPath());
    verify(t0mergeManager).closeOnDiskFile(t0MapOutput1.getOutputPath());
    // Run the OnDiskMerge via MergeManager
    // Simulate the thread invocation - remove files, and invoke merge
    List<FileChunk> t0MergeFiles = new LinkedList<FileChunk>();
    t0MergeFiles.addAll(t0mergeManager.onDiskMapOutputs);
    t0mergeManager.onDiskMapOutputs.clear();
    if (!interruptInMiddle) {
        t0mergeManager.onDiskMerger.merge(t0MergeFiles);
        Assert.assertEquals(1, t0mergeManager.onDiskMapOutputs.size());
    } else {
        // Start Interrupting thread
        Thread interruptingThread = new Thread(new InterruptingThread(t0mergeManager.onDiskMerger));
        interruptingThread.start();
        try {
            Thread.sleep(1000);
        } catch (InterruptedException e) {
            e.printStackTrace();
        }
        // Will be interrupted in the middle by interruptingThread.
        t0mergeManager.onDiskMerger.startMerge(Sets.newHashSet(t0MergeFiles));
        t0mergeManager.onDiskMerger.waitForMerge();
        Assert.assertNotEquals(1, t0mergeManager.onDiskMapOutputs.size());
    }
    if (!interruptInMiddle) {
        t1MapOutput0.commit();
        t1MapOutput1.commit();
        verify(t1mergeManager).closeOnDiskFile(t1MapOutput0.getOutputPath());
        verify(t1mergeManager).closeOnDiskFile(t1MapOutput1.getOutputPath());
        // Run the OnDiskMerge via MergeManager
        // Simulate the thread invocation - remove files, and invoke merge
        List<FileChunk> t1MergeFiles = new LinkedList<FileChunk>();
        t1MergeFiles.addAll(t1mergeManager.onDiskMapOutputs);
        t1mergeManager.onDiskMapOutputs.clear();
        t1mergeManager.onDiskMerger.merge(t1MergeFiles);
        Assert.assertEquals(1, t1mergeManager.onDiskMapOutputs.size());
        Assert.assertNotEquals(t0mergeManager.onDiskMapOutputs.iterator().next().getPath(), t1mergeManager.onDiskMapOutputs.iterator().next().getPath());
        Assert.assertTrue(t0mergeManager.onDiskMapOutputs.iterator().next().getPath().toString().contains(t0inputContext.getUniqueIdentifier()));
        Assert.assertTrue(t1mergeManager.onDiskMapOutputs.iterator().next().getPath().toString().contains(t1inputContext.getUniqueIdentifier()));
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) TezRuntimeConfiguration(org.apache.tez.runtime.library.api.TezRuntimeConfiguration) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) InputContext(org.apache.tez.runtime.api.InputContext) InputAttemptIdentifier(org.apache.tez.runtime.library.common.InputAttemptIdentifier) LinkedList(java.util.LinkedList) FileSystem(org.apache.hadoop.fs.FileSystem) LocalDirAllocator(org.apache.hadoop.fs.LocalDirAllocator) FileChunk(org.apache.hadoop.io.FileChunk) IntWritable(org.apache.hadoop.io.IntWritable) TezConfiguration(org.apache.tez.dag.api.TezConfiguration)

Example 25 with InputContext

use of org.apache.tez.runtime.api.InputContext in project tez by apache.

the class TestOrderedGroupedKVInput method testInterruptWhileAwaitingInput.

@Test(timeout = 5000)
public void testInterruptWhileAwaitingInput() throws IOException, TezException {
    InputContext inputContext = createMockInputContext();
    OrderedGroupedKVInput kvInput = new OrderedGroupedKVInputForTest(inputContext, 10);
    kvInput.initialize();
    kvInput.start();
    try {
        kvInput.getReader();
        Assert.fail("getReader should not return since underlying inputs are not ready");
    } catch (IOException e) {
        Assert.assertTrue(e instanceof IOInterruptedException);
    }
}
Also used : IOInterruptedException(org.apache.tez.runtime.library.api.IOInterruptedException) InputContext(org.apache.tez.runtime.api.InputContext) IOException(java.io.IOException) Test(org.junit.Test)

Aggregations

InputContext (org.apache.tez.runtime.api.InputContext)65 Test (org.junit.Test)47 Configuration (org.apache.hadoop.conf.Configuration)30 TezConfiguration (org.apache.tez.dag.api.TezConfiguration)28 TezCounters (org.apache.tez.common.counters.TezCounters)19 TezRuntimeConfiguration (org.apache.tez.runtime.library.api.TezRuntimeConfiguration)18 CompositeInputAttemptIdentifier (org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier)17 IOException (java.io.IOException)16 InputAttemptIdentifier (org.apache.tez.runtime.library.common.InputAttemptIdentifier)16 Event (org.apache.tez.runtime.api.Event)14 LinkedList (java.util.LinkedList)12 Path (org.apache.hadoop.fs.Path)12 InputDescriptor (org.apache.tez.dag.api.InputDescriptor)10 InvocationOnMock (org.mockito.invocation.InvocationOnMock)10 ExecutorService (java.util.concurrent.ExecutorService)9 OutputContext (org.apache.tez.runtime.api.OutputContext)9 OutputDescriptor (org.apache.tez.dag.api.OutputDescriptor)8 DataMovementEvent (org.apache.tez.runtime.api.events.DataMovementEvent)8 FetchedInputAllocator (org.apache.tez.runtime.library.common.shuffle.FetchedInputAllocator)8 Text (org.apache.hadoop.io.Text)7