Search in sources :

Example 51 with InputAttemptIdentifier

use of org.apache.tez.runtime.library.common.InputAttemptIdentifier in project tez by apache.

the class TestFetcher method testSetupLocalDiskFetch.

@Test(timeout = 3000)
public void testSetupLocalDiskFetch() throws Exception {
    CompositeInputAttemptIdentifier[] srcAttempts = { new CompositeInputAttemptIdentifier(0, 1, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_0", 1), new CompositeInputAttemptIdentifier(1, 2, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_1", 1), new CompositeInputAttemptIdentifier(2, 3, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_2", 1), new CompositeInputAttemptIdentifier(3, 4, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_3", 1), new CompositeInputAttemptIdentifier(4, 5, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_4", 1) };
    final int FIRST_FAILED_ATTEMPT_IDX = 2;
    final int SECOND_FAILED_ATTEMPT_IDX = 4;
    final int[] sucessfulAttempts = { 0, 1, 3 };
    TezConfiguration conf = new TezConfiguration();
    conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_OPTIMIZE_LOCAL_FETCH, "true");
    int partition = 42;
    FetcherCallback callback = mock(FetcherCallback.class);
    Fetcher.FetcherBuilder builder = new Fetcher.FetcherBuilder(callback, null, null, ApplicationId.newInstance(0, 1), 1, null, "fetcherTest", conf, true, HOST, PORT, false, true, true);
    ArrayList<InputAttemptIdentifier> inputAttemptIdentifiers = new ArrayList<>();
    for (CompositeInputAttemptIdentifier compositeInputAttemptIdentifier : srcAttempts) {
        for (int i = 0; i < compositeInputAttemptIdentifier.getInputIdentifierCount(); i++) {
            inputAttemptIdentifiers.add(compositeInputAttemptIdentifier.expand(i));
        }
    }
    ArrayList<InputAttemptIdentifier> list = new ArrayList<InputAttemptIdentifier>();
    list.addAll(Arrays.asList(srcAttempts));
    builder.assignWork(HOST, PORT, partition, 1, list);
    Fetcher fetcher = spy(builder.build());
    for (CompositeInputAttemptIdentifier compositeInputAttemptIdentifier : srcAttempts) {
        for (int i = 0; i < compositeInputAttemptIdentifier.getInputIdentifierCount(); i++) {
            inputAttemptIdentifiers.add(compositeInputAttemptIdentifier.expand(i));
            Fetcher.PathPartition pathPartition = new Fetcher.PathPartition(compositeInputAttemptIdentifier.getPathComponent(), partition + i);
            fetcher.getPathToAttemptMap().put(pathPartition, compositeInputAttemptIdentifier.expand(i));
        }
    }
    doAnswer(new Answer<Path>() {

        @Override
        public Path answer(InvocationOnMock invocation) throws Throwable {
            Object[] args = invocation.getArguments();
            return new Path(SHUFFLE_INPUT_FILE_PREFIX + args[0]);
        }
    }).when(fetcher).getShuffleInputFileName(anyString(), anyString());
    doAnswer(new Answer<TezIndexRecord>() {

        @Override
        public TezIndexRecord answer(InvocationOnMock invocation) throws Throwable {
            Object[] args = invocation.getArguments();
            InputAttemptIdentifier srcAttemptId = (InputAttemptIdentifier) args[0];
            String pathComponent = srcAttemptId.getPathComponent();
            int len = pathComponent.length();
            long p = Long.valueOf(pathComponent.substring(len - 1, len));
            // Fail the 3rd one and 5th one.
            if (p == FIRST_FAILED_ATTEMPT_IDX || p == SECOND_FAILED_ATTEMPT_IDX) {
                throw new IOException("failing on 3/5th input to simulate failure case");
            }
            // match with params for copySucceeded below.
            return new TezIndexRecord(p * 10, p * 1000, p * 100);
        }
    }).when(fetcher).getTezIndexRecord(any(InputAttemptIdentifier.class), anyInt());
    doNothing().when(fetcher).shutdown();
    doNothing().when(callback).fetchSucceeded(anyString(), any(InputAttemptIdentifier.class), any(FetchedInput.class), anyLong(), anyLong(), anyLong());
    doNothing().when(callback).fetchFailed(anyString(), any(InputAttemptIdentifier.class), eq(false));
    FetchResult fetchResult = fetcher.call();
    verify(fetcher).setupLocalDiskFetch();
    // expect 3 sucesses and 2 failures
    for (int i : sucessfulAttempts) {
        verifyFetchSucceeded(callback, srcAttempts[i], conf);
    }
    verify(callback).fetchFailed(eq(HOST), eq(srcAttempts[FIRST_FAILED_ATTEMPT_IDX]), eq(false));
    verify(callback).fetchFailed(eq(HOST), eq(srcAttempts[SECOND_FAILED_ATTEMPT_IDX]), eq(false));
    Assert.assertEquals("fetchResult host", fetchResult.getHost(), HOST);
    Assert.assertEquals("fetchResult partition", fetchResult.getPartition(), partition);
    Assert.assertEquals("fetchResult port", fetchResult.getPort(), PORT);
    // 3nd and 5th attempt failed
    List<InputAttemptIdentifier> pendingInputs = Lists.newArrayList(fetchResult.getPendingInputs());
    Assert.assertEquals("fetchResult pendingInput size", pendingInputs.size(), 2);
    Assert.assertEquals("fetchResult failed attempt", pendingInputs.get(0), srcAttempts[FIRST_FAILED_ATTEMPT_IDX]);
    Assert.assertEquals("fetchResult failed attempt", pendingInputs.get(1), srcAttempts[SECOND_FAILED_ATTEMPT_IDX]);
}
Also used : ArrayList(java.util.ArrayList) CompositeInputAttemptIdentifier(org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier) InputAttemptIdentifier(org.apache.tez.runtime.library.common.InputAttemptIdentifier) Matchers.anyString(org.mockito.Matchers.anyString) CompositeInputAttemptIdentifier(org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) Path(org.apache.hadoop.fs.Path) IOException(java.io.IOException) TezIndexRecord(org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord) InvocationOnMock(org.mockito.invocation.InvocationOnMock) Test(org.junit.Test)

Example 52 with InputAttemptIdentifier

use of org.apache.tez.runtime.library.common.InputAttemptIdentifier in project tez by apache.

the class TestShuffleInputEventHandlerImpl method testCurrentPartitionEmpty.

@Test(timeout = 5000)
public void testCurrentPartitionEmpty() throws IOException {
    InputContext inputContext = mock(InputContext.class);
    ShuffleManager shuffleManager = mock(ShuffleManager.class);
    FetchedInputAllocator inputAllocator = mock(FetchedInputAllocator.class);
    ShuffleInputEventHandlerImpl handler = new ShuffleInputEventHandlerImpl(inputContext, shuffleManager, inputAllocator, null, false, 0, false);
    int taskIndex = 1;
    Event dme = createDataMovementEvent(0, taskIndex, createEmptyPartitionByteString(0));
    List<Event> eventList = new LinkedList<Event>();
    eventList.add(dme);
    handler.handleEvents(eventList);
    InputAttemptIdentifier expectedIdentifier = new InputAttemptIdentifier(taskIndex, 0);
    verify(shuffleManager).addCompletedInputWithNoData(eq(expectedIdentifier));
}
Also used : FetchedInputAllocator(org.apache.tez.runtime.library.common.shuffle.FetchedInputAllocator) InputContext(org.apache.tez.runtime.api.InputContext) Event(org.apache.tez.runtime.api.Event) DataMovementEvent(org.apache.tez.runtime.api.events.DataMovementEvent) CompositeInputAttemptIdentifier(org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier) InputAttemptIdentifier(org.apache.tez.runtime.library.common.InputAttemptIdentifier) LinkedList(java.util.LinkedList) Test(org.junit.Test)

Example 53 with InputAttemptIdentifier

use of org.apache.tez.runtime.library.common.InputAttemptIdentifier in project tez by apache.

the class TestMergeManager method testOnDiskMergerFilenames.

@Test(timeout = 10000)
public void testOnDiskMergerFilenames() throws IOException, InterruptedException {
    Configuration conf = new TezConfiguration(defaultConf);
    conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_COMPRESS, false);
    conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS, IntWritable.class.getName());
    conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS, IntWritable.class.getName());
    Path localDir = new Path(workDir, "local");
    Path srcDir = new Path(workDir, "srcData");
    localFs.mkdirs(localDir);
    localFs.mkdirs(srcDir);
    conf.setStrings(TezRuntimeFrameworkConfigs.LOCAL_DIRS, localDir.toString());
    FileSystem localFs = FileSystem.getLocal(conf);
    LocalDirAllocator localDirAllocator = new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS);
    InputContext inputContext = createMockInputContext(UUID.randomUUID().toString());
    ExceptionReporter exceptionReporter = mock(ExceptionReporter.class);
    MergeManager mergeManagerReal = new MergeManager(conf, localFs, localDirAllocator, inputContext, null, null, null, null, exceptionReporter, 1 * 1024l * 1024l, null, false, -1);
    MergeManager mergeManager = spy(mergeManagerReal);
    // Partition 0 Keys 0-2, Partition 1 Keys 3-5
    SrcFileInfo file1Info = createFile(conf, localFs, new Path(srcDir, InputAttemptIdentifier.PATH_PREFIX + "src1.out"), 2, 3, 6);
    SrcFileInfo file2Info = createFile(conf, localFs, new Path(srcDir, InputAttemptIdentifier.PATH_PREFIX + "src2.out"), 2, 3, 0);
    InputAttemptIdentifier iIdentifier1 = new InputAttemptIdentifier(0, 0, file1Info.path.getName());
    InputAttemptIdentifier iIdentifier2 = new InputAttemptIdentifier(1, 0, file2Info.path.getName());
    MapOutput mapOutput1 = getMapOutputForDirectDiskFetch(iIdentifier1, file1Info.path, file1Info.indexedRecords[0], mergeManager);
    MapOutput mapOutput2 = getMapOutputForDirectDiskFetch(iIdentifier2, file2Info.path, file2Info.indexedRecords[0], mergeManager);
    mapOutput1.commit();
    mapOutput2.commit();
    verify(mergeManager).closeOnDiskFile(mapOutput1.getOutputPath());
    verify(mergeManager).closeOnDiskFile(mapOutput2.getOutputPath());
    List<FileChunk> mergeFiles = new LinkedList<FileChunk>();
    mergeFiles.addAll(mergeManager.onDiskMapOutputs);
    mergeManager.onDiskMapOutputs.clear();
    mergeManager.onDiskMerger.merge(mergeFiles);
    Assert.assertEquals(1, mergeManager.onDiskMapOutputs.size());
    FileChunk fcMerged1 = mergeManager.onDiskMapOutputs.iterator().next();
    Path m1Path = fcMerged1.getPath();
    assertTrue(m1Path.toString().endsWith("merged0"));
    // Add another file. Make sure the filename is different, and does not get clobbered.
    SrcFileInfo file3Info = createFile(conf, localFs, new Path(srcDir, InputAttemptIdentifier.PATH_PREFIX + "src3.out"), 2, 22, 5);
    InputAttemptIdentifier iIdentifier3 = new InputAttemptIdentifier(2, 0, file1Info.path.getName());
    MapOutput mapOutput3 = getMapOutputForDirectDiskFetch(iIdentifier3, file3Info.path, file3Info.indexedRecords[0], mergeManager);
    mapOutput3.commit();
    verify(mergeManager).closeOnDiskFile(mapOutput3.getOutputPath());
    mergeFiles = new LinkedList<FileChunk>();
    mergeFiles.addAll(mergeManager.onDiskMapOutputs);
    mergeManager.onDiskMapOutputs.clear();
    mergeManager.onDiskMerger.merge(mergeFiles);
    Assert.assertEquals(1, mergeManager.onDiskMapOutputs.size());
    FileChunk fcMerged2 = mergeManager.onDiskMapOutputs.iterator().next();
    Path m2Path = fcMerged2.getPath();
    assertTrue(m2Path.toString().endsWith("merged1"));
    assertNotEquals(m1Path, m2Path);
    // Add another file. This time add it to the head of the list.
    SrcFileInfo file4Info = createFile(conf, localFs, new Path(srcDir, InputAttemptIdentifier.PATH_PREFIX + "src4.out"), 2, 45, 35);
    InputAttemptIdentifier iIdentifier4 = new InputAttemptIdentifier(3, 0, file4Info.path.getName());
    MapOutput mapOutput4 = getMapOutputForDirectDiskFetch(iIdentifier4, file4Info.path, file4Info.indexedRecords[0], mergeManager);
    mapOutput4.commit();
    verify(mergeManager).closeOnDiskFile(mapOutput4.getOutputPath());
    // Add in reverse order this time.
    List<FileChunk> tmpList = new LinkedList<>();
    mergeFiles = new LinkedList<>();
    assertEquals(2, mergeManager.onDiskMapOutputs.size());
    tmpList.addAll(mergeManager.onDiskMapOutputs);
    mergeFiles.add(tmpList.get(1));
    mergeFiles.add(tmpList.get(0));
    mergeManager.onDiskMapOutputs.clear();
    mergeManager.onDiskMerger.merge(mergeFiles);
    Assert.assertEquals(1, mergeManager.onDiskMapOutputs.size());
    FileChunk fcMerged3 = mergeManager.onDiskMapOutputs.iterator().next();
    Path m3Path = fcMerged3.getPath();
    assertTrue(m3Path.toString().endsWith("merged2"));
    assertNotEquals(m2Path, m3Path);
    // Ensure the lengths are the same - since the source file names are the same. No append happening.
    assertEquals(m1Path.toString().length(), m2Path.toString().length());
    assertEquals(m2Path.toString().length(), m3Path.toString().length());
    // Ensure the filenames are used correctly - based on the first file given to the merger.
    String m1Prefix = m1Path.toString().substring(0, m1Path.toString().indexOf("."));
    String m2Prefix = m2Path.toString().substring(0, m2Path.toString().indexOf("."));
    String m3Prefix = m3Path.toString().substring(0, m3Path.toString().indexOf("."));
    assertEquals(m1Prefix, m2Prefix);
    assertNotEquals(m1Prefix, m3Prefix);
    assertNotEquals(m2Prefix, m3Prefix);
    verify(inputContext, atLeastOnce()).notifyProgress();
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) TezRuntimeConfiguration(org.apache.tez.runtime.library.api.TezRuntimeConfiguration) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) InputContext(org.apache.tez.runtime.api.InputContext) InputAttemptIdentifier(org.apache.tez.runtime.library.common.InputAttemptIdentifier) LinkedList(java.util.LinkedList) FileSystem(org.apache.hadoop.fs.FileSystem) LocalDirAllocator(org.apache.hadoop.fs.LocalDirAllocator) FileChunk(org.apache.hadoop.io.FileChunk) IntWritable(org.apache.hadoop.io.IntWritable) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) Test(org.junit.Test)

Example 54 with InputAttemptIdentifier

use of org.apache.tez.runtime.library.common.InputAttemptIdentifier in project tez by apache.

the class TestShuffleInputEventHandlerOrderedGrouped method testCurrentPartitionEmpty.

@Test(timeout = 5000)
public void testCurrentPartitionEmpty() throws IOException {
    List<Event> events = new LinkedList<Event>();
    int srcIdx = 0;
    int targetIdx = 1;
    Event dme = createDataMovementEvent(srcIdx, targetIdx, createEmptyPartitionByteString(srcIdx), false);
    events.add(dme);
    handler.handleEvents(events);
    InputAttemptIdentifier expectedIdentifier = new InputAttemptIdentifier(targetIdx, 0);
    verify(scheduler).copySucceeded(eq(expectedIdentifier), any(MapHost.class), eq(0l), eq(0l), eq(0l), any(MapOutput.class), eq(true));
}
Also used : InputFailedEvent(org.apache.tez.runtime.api.events.InputFailedEvent) Event(org.apache.tez.runtime.api.Event) DataMovementEvent(org.apache.tez.runtime.api.events.DataMovementEvent) CompositeInputAttemptIdentifier(org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier) InputAttemptIdentifier(org.apache.tez.runtime.library.common.InputAttemptIdentifier) LinkedList(java.util.LinkedList) Test(org.junit.Test)

Example 55 with InputAttemptIdentifier

use of org.apache.tez.runtime.library.common.InputAttemptIdentifier in project tez by apache.

the class TestIFile method readUsingInMemoryReader.

private void readUsingInMemoryReader(byte[] bytes, List<KVPair> originalData) throws IOException {
    InMemoryReader inMemReader = new InMemoryReader(null, new InputAttemptIdentifier(0, 0), bytes, 0, bytes.length);
    verifyData(inMemReader, originalData);
}
Also used : InMemoryReader(org.apache.tez.runtime.library.common.shuffle.orderedgrouped.InMemoryReader) InputAttemptIdentifier(org.apache.tez.runtime.library.common.InputAttemptIdentifier)

Aggregations

InputAttemptIdentifier (org.apache.tez.runtime.library.common.InputAttemptIdentifier)55 CompositeInputAttemptIdentifier (org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier)41 Test (org.junit.Test)31 TezConfiguration (org.apache.tez.dag.api.TezConfiguration)17 Configuration (org.apache.hadoop.conf.Configuration)16 InputContext (org.apache.tez.runtime.api.InputContext)16 IOException (java.io.IOException)15 TezRuntimeConfiguration (org.apache.tez.runtime.library.api.TezRuntimeConfiguration)15 Path (org.apache.hadoop.fs.Path)10 LinkedList (java.util.LinkedList)8 Matchers.anyString (org.mockito.Matchers.anyString)8 VisibleForTesting (com.google.common.annotations.VisibleForTesting)7 TezCounters (org.apache.tez.common.counters.TezCounters)7 Event (org.apache.tez.runtime.api.Event)7 DataMovementEvent (org.apache.tez.runtime.api.events.DataMovementEvent)7 TezIndexRecord (org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord)6 InvocationOnMock (org.mockito.invocation.InvocationOnMock)6 FetcherReadTimeoutException (org.apache.tez.runtime.library.exceptions.FetcherReadTimeoutException)5 URL (java.net.URL)4 ArrayList (java.util.ArrayList)4