Search in sources :

Example 56 with InputContext

use of org.apache.tez.runtime.api.InputContext in project tez by apache.

the class TestMergeManager method testConfigs.

@Test(timeout = 10000)
public void testConfigs() throws IOException {
    long maxTaskMem = 8192 * 1024 * 1024l;
    // Test Shuffle fetch buffer and post merge buffer percentage
    Configuration conf = new TezConfiguration(defaultConf);
    conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, 0.8f);
    conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_POST_MERGE_BUFFER_PERCENT, 0.5f);
    Assert.assertTrue(MergeManager.getInitialMemoryRequirement(conf, maxTaskMem) == 6871947776l);
    conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, 0.5f);
    conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_POST_MERGE_BUFFER_PERCENT, 0.5f);
    Assert.assertTrue(MergeManager.getInitialMemoryRequirement(conf, maxTaskMem) > Integer.MAX_VALUE);
    conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, 0.4f);
    conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_POST_MERGE_BUFFER_PERCENT, 0.9f);
    Assert.assertTrue(MergeManager.getInitialMemoryRequirement(conf, maxTaskMem) > Integer.MAX_VALUE);
    conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, 0.1f);
    conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_POST_MERGE_BUFFER_PERCENT, 0.1f);
    Assert.assertTrue(MergeManager.getInitialMemoryRequirement(conf, maxTaskMem) < Integer.MAX_VALUE);
    try {
        conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, 2.4f);
        MergeManager.getInitialMemoryRequirement(conf, maxTaskMem);
        Assert.fail("Should have thrown wrong buffer percent configuration exception");
    } catch (IllegalArgumentException ie) {
    }
    try {
        conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, -2.4f);
        MergeManager.getInitialMemoryRequirement(conf, maxTaskMem);
        Assert.fail("Should have thrown wrong buffer percent configuration exception");
    } catch (IllegalArgumentException ie) {
    }
    try {
        conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_POST_MERGE_BUFFER_PERCENT, 1.4f);
        MergeManager.getInitialMemoryRequirement(conf, maxTaskMem);
        Assert.fail("Should have thrown wrong post merge buffer percent configuration exception");
    } catch (IllegalArgumentException ie) {
    }
    try {
        conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_POST_MERGE_BUFFER_PERCENT, -1.4f);
        MergeManager.getInitialMemoryRequirement(conf, maxTaskMem);
        Assert.fail("Should have thrown wrong post merge buffer percent configuration exception");
    } catch (IllegalArgumentException ie) {
    }
    try {
        conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, 1.4f);
        MergeManager.getInitialMemoryRequirement(conf, maxTaskMem);
        Assert.fail("Should have thrown wrong shuffle fetch buffer percent configuration exception");
    } catch (IllegalArgumentException ie) {
    }
    try {
        conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, -1.4f);
        MergeManager.getInitialMemoryRequirement(conf, maxTaskMem);
        Assert.fail("Should have thrown wrong shuffle fetch buffer percent configuration exception");
    } catch (IllegalArgumentException ie) {
    }
    // test post merge mem limit
    conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, 0.4f);
    conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_POST_MERGE_BUFFER_PERCENT, 0.8f);
    FileSystem localFs = FileSystem.getLocal(conf);
    LocalDirAllocator localDirAllocator = new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS);
    InputContext t0inputContext = createMockInputContext(UUID.randomUUID().toString(), maxTaskMem);
    ExceptionReporter t0exceptionReporter = mock(ExceptionReporter.class);
    long initialMemoryAvailable = (long) (maxTaskMem * 0.8);
    MergeManager mergeManager = new MergeManager(conf, localFs, localDirAllocator, t0inputContext, null, null, null, null, t0exceptionReporter, initialMemoryAvailable, null, false, -1);
    Assert.assertTrue(mergeManager.postMergeMemLimit > Integer.MAX_VALUE);
    // initial mem < memlimit
    initialMemoryAvailable = 200 * 1024 * 1024l;
    mergeManager = new MergeManager(conf, localFs, localDirAllocator, t0inputContext, null, null, null, null, t0exceptionReporter, initialMemoryAvailable, null, false, -1);
    Assert.assertTrue(mergeManager.postMergeMemLimit == initialMemoryAvailable);
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) TezRuntimeConfiguration(org.apache.tez.runtime.library.api.TezRuntimeConfiguration) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) FileSystem(org.apache.hadoop.fs.FileSystem) InputContext(org.apache.tez.runtime.api.InputContext) LocalDirAllocator(org.apache.hadoop.fs.LocalDirAllocator) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) Test(org.junit.Test)

Example 57 with InputContext

use of org.apache.tez.runtime.api.InputContext in project tez by apache.

the class TestMergeManager method testOnDiskMergerFilenames.

@Test(timeout = 10000)
public void testOnDiskMergerFilenames() throws IOException, InterruptedException {
    Configuration conf = new TezConfiguration(defaultConf);
    conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_COMPRESS, false);
    conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS, IntWritable.class.getName());
    conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS, IntWritable.class.getName());
    Path localDir = new Path(workDir, "local");
    Path srcDir = new Path(workDir, "srcData");
    localFs.mkdirs(localDir);
    localFs.mkdirs(srcDir);
    conf.setStrings(TezRuntimeFrameworkConfigs.LOCAL_DIRS, localDir.toString());
    FileSystem localFs = FileSystem.getLocal(conf);
    LocalDirAllocator localDirAllocator = new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS);
    InputContext inputContext = createMockInputContext(UUID.randomUUID().toString());
    ExceptionReporter exceptionReporter = mock(ExceptionReporter.class);
    MergeManager mergeManagerReal = new MergeManager(conf, localFs, localDirAllocator, inputContext, null, null, null, null, exceptionReporter, 1 * 1024l * 1024l, null, false, -1);
    MergeManager mergeManager = spy(mergeManagerReal);
    // Partition 0 Keys 0-2, Partition 1 Keys 3-5
    SrcFileInfo file1Info = createFile(conf, localFs, new Path(srcDir, InputAttemptIdentifier.PATH_PREFIX + "src1.out"), 2, 3, 6);
    SrcFileInfo file2Info = createFile(conf, localFs, new Path(srcDir, InputAttemptIdentifier.PATH_PREFIX + "src2.out"), 2, 3, 0);
    InputAttemptIdentifier iIdentifier1 = new InputAttemptIdentifier(0, 0, file1Info.path.getName());
    InputAttemptIdentifier iIdentifier2 = new InputAttemptIdentifier(1, 0, file2Info.path.getName());
    MapOutput mapOutput1 = getMapOutputForDirectDiskFetch(iIdentifier1, file1Info.path, file1Info.indexedRecords[0], mergeManager);
    MapOutput mapOutput2 = getMapOutputForDirectDiskFetch(iIdentifier2, file2Info.path, file2Info.indexedRecords[0], mergeManager);
    mapOutput1.commit();
    mapOutput2.commit();
    verify(mergeManager).closeOnDiskFile(mapOutput1.getOutputPath());
    verify(mergeManager).closeOnDiskFile(mapOutput2.getOutputPath());
    List<FileChunk> mergeFiles = new LinkedList<FileChunk>();
    mergeFiles.addAll(mergeManager.onDiskMapOutputs);
    mergeManager.onDiskMapOutputs.clear();
    mergeManager.onDiskMerger.merge(mergeFiles);
    Assert.assertEquals(1, mergeManager.onDiskMapOutputs.size());
    FileChunk fcMerged1 = mergeManager.onDiskMapOutputs.iterator().next();
    Path m1Path = fcMerged1.getPath();
    assertTrue(m1Path.toString().endsWith("merged0"));
    // Add another file. Make sure the filename is different, and does not get clobbered.
    SrcFileInfo file3Info = createFile(conf, localFs, new Path(srcDir, InputAttemptIdentifier.PATH_PREFIX + "src3.out"), 2, 22, 5);
    InputAttemptIdentifier iIdentifier3 = new InputAttemptIdentifier(2, 0, file1Info.path.getName());
    MapOutput mapOutput3 = getMapOutputForDirectDiskFetch(iIdentifier3, file3Info.path, file3Info.indexedRecords[0], mergeManager);
    mapOutput3.commit();
    verify(mergeManager).closeOnDiskFile(mapOutput3.getOutputPath());
    mergeFiles = new LinkedList<FileChunk>();
    mergeFiles.addAll(mergeManager.onDiskMapOutputs);
    mergeManager.onDiskMapOutputs.clear();
    mergeManager.onDiskMerger.merge(mergeFiles);
    Assert.assertEquals(1, mergeManager.onDiskMapOutputs.size());
    FileChunk fcMerged2 = mergeManager.onDiskMapOutputs.iterator().next();
    Path m2Path = fcMerged2.getPath();
    assertTrue(m2Path.toString().endsWith("merged1"));
    assertNotEquals(m1Path, m2Path);
    // Add another file. This time add it to the head of the list.
    SrcFileInfo file4Info = createFile(conf, localFs, new Path(srcDir, InputAttemptIdentifier.PATH_PREFIX + "src4.out"), 2, 45, 35);
    InputAttemptIdentifier iIdentifier4 = new InputAttemptIdentifier(3, 0, file4Info.path.getName());
    MapOutput mapOutput4 = getMapOutputForDirectDiskFetch(iIdentifier4, file4Info.path, file4Info.indexedRecords[0], mergeManager);
    mapOutput4.commit();
    verify(mergeManager).closeOnDiskFile(mapOutput4.getOutputPath());
    // Add in reverse order this time.
    List<FileChunk> tmpList = new LinkedList<>();
    mergeFiles = new LinkedList<>();
    assertEquals(2, mergeManager.onDiskMapOutputs.size());
    tmpList.addAll(mergeManager.onDiskMapOutputs);
    mergeFiles.add(tmpList.get(1));
    mergeFiles.add(tmpList.get(0));
    mergeManager.onDiskMapOutputs.clear();
    mergeManager.onDiskMerger.merge(mergeFiles);
    Assert.assertEquals(1, mergeManager.onDiskMapOutputs.size());
    FileChunk fcMerged3 = mergeManager.onDiskMapOutputs.iterator().next();
    Path m3Path = fcMerged3.getPath();
    assertTrue(m3Path.toString().endsWith("merged2"));
    assertNotEquals(m2Path, m3Path);
    // Ensure the lengths are the same - since the source file names are the same. No append happening.
    assertEquals(m1Path.toString().length(), m2Path.toString().length());
    assertEquals(m2Path.toString().length(), m3Path.toString().length());
    // Ensure the filenames are used correctly - based on the first file given to the merger.
    String m1Prefix = m1Path.toString().substring(0, m1Path.toString().indexOf("."));
    String m2Prefix = m2Path.toString().substring(0, m2Path.toString().indexOf("."));
    String m3Prefix = m3Path.toString().substring(0, m3Path.toString().indexOf("."));
    assertEquals(m1Prefix, m2Prefix);
    assertNotEquals(m1Prefix, m3Prefix);
    assertNotEquals(m2Prefix, m3Prefix);
    verify(inputContext, atLeastOnce()).notifyProgress();
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) TezRuntimeConfiguration(org.apache.tez.runtime.library.api.TezRuntimeConfiguration) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) InputContext(org.apache.tez.runtime.api.InputContext) InputAttemptIdentifier(org.apache.tez.runtime.library.common.InputAttemptIdentifier) LinkedList(java.util.LinkedList) FileSystem(org.apache.hadoop.fs.FileSystem) LocalDirAllocator(org.apache.hadoop.fs.LocalDirAllocator) FileChunk(org.apache.hadoop.io.FileChunk) IntWritable(org.apache.hadoop.io.IntWritable) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) Test(org.junit.Test)

Example 58 with InputContext

use of org.apache.tez.runtime.api.InputContext in project tez by apache.

the class TestShuffleInputEventHandlerOrderedGrouped method createTezInputContext.

private InputContext createTezInputContext() throws IOException {
    ApplicationId applicationId = ApplicationId.newInstance(1, 1);
    InputContext inputContext = mock(InputContext.class);
    doReturn(applicationId).when(inputContext).getApplicationId();
    doReturn("sourceVertex").when(inputContext).getSourceVertexName();
    when(inputContext.getCounters()).thenReturn(new TezCounters());
    ExecutionContext executionContext = new ExecutionContextImpl("localhost");
    doReturn(executionContext).when(inputContext).getExecutionContext();
    ByteBuffer shuffleBuffer = ByteBuffer.allocate(4).putInt(0, 4);
    doReturn(shuffleBuffer).when(inputContext).getServiceProviderMetaData(anyString());
    Token<JobTokenIdentifier> sessionToken = new Token<JobTokenIdentifier>(new JobTokenIdentifier(new Text("text")), new JobTokenSecretManager());
    ByteBuffer tokenBuffer = TezCommonUtils.serializeServiceData(sessionToken);
    doReturn(tokenBuffer).when(inputContext).getServiceConsumerMetaData(anyString());
    when(inputContext.createTezFrameworkExecutorService(anyInt(), anyString())).thenAnswer(new Answer<ExecutorService>() {

        @Override
        public ExecutorService answer(InvocationOnMock invocation) throws Throwable {
            return sharedExecutor.createExecutorService(invocation.getArgumentAt(0, Integer.class), invocation.getArgumentAt(1, String.class));
        }
    });
    return inputContext;
}
Also used : ExecutionContextImpl(org.apache.tez.runtime.api.impl.ExecutionContextImpl) InputContext(org.apache.tez.runtime.api.InputContext) JobTokenIdentifier(org.apache.tez.common.security.JobTokenIdentifier) Token(org.apache.hadoop.security.token.Token) Text(org.apache.hadoop.io.Text) ByteBuffer(java.nio.ByteBuffer) TezCounters(org.apache.tez.common.counters.TezCounters) ExecutionContext(org.apache.tez.runtime.api.ExecutionContext) JobTokenSecretManager(org.apache.tez.common.security.JobTokenSecretManager) InvocationOnMock(org.mockito.invocation.InvocationOnMock) ExecutorService(java.util.concurrent.ExecutorService) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId)

Example 59 with InputContext

use of org.apache.tez.runtime.api.InputContext in project tez by apache.

the class TestShuffleInputEventHandlerOrderedGrouped method setupScheduler.

private void setupScheduler(int numInputs) throws Exception {
    InputContext inputContext = createTezInputContext();
    Configuration config = new Configuration();
    realScheduler = new ShuffleScheduler(inputContext, config, numInputs, mock(Shuffle.class), mock(MergeManager.class), mock(MergeManager.class), System.currentTimeMillis(), null, false, 0, "src vertex");
    scheduler = spy(realScheduler);
    handler = new ShuffleInputEventHandlerOrderedGrouped(inputContext, scheduler, ShuffleUtils.isTezShuffleHandler(config));
    mergeManager = mock(MergeManager.class);
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) InputContext(org.apache.tez.runtime.api.InputContext)

Example 60 with InputContext

use of org.apache.tez.runtime.api.InputContext in project tez by apache.

the class TestWeightedScalingMemoryDistributor method testWeightedScalingNonConcurrent.

@Test(timeout = 5000)
public void testWeightedScalingNonConcurrent() throws TezException {
    Configuration conf = new Configuration(this.conf);
    conf.setBoolean(TezConfiguration.TEZ_TASK_SCALE_MEMORY_INPUT_OUTPUT_CONCURRENT, false);
    conf.setBoolean(TezConfiguration.TEZ_TASK_SCALE_MEMORY_NON_CONCURRENT_INPUTS_ENABLED, true);
    conf.setDouble(TezConfiguration.TEZ_TASK_SCALE_MEMORY_RESERVE_FRACTION, 0.2);
    conf.setStrings(TezConfiguration.TEZ_TASK_SCALE_MEMORY_WEIGHTED_RATIOS, WeightedScalingMemoryDistributor.generateWeightStrings(0, 0, 1, 2, 3, 1, 1));
    System.err.println(Joiner.on(",").join(conf.getStringCollection(TezConfiguration.TEZ_TASK_SCALE_MEMORY_WEIGHTED_RATIOS)));
    MemoryDistributor dist = new MemoryDistributor(2, 2, conf);
    dist.setJvmMemory(10000l);
    // First request - ScatterGatherShuffleInput
    MemoryUpdateCallbackForTest e1Callback = new MemoryUpdateCallbackForTest();
    InputContext e1InputContext1 = createTestInputContext();
    InputDescriptor e1InDesc1 = createTestInputDescriptor(OrderedGroupedKVInput.class);
    dist.requestMemory(10000, e1Callback, e1InputContext1, e1InDesc1);
    // Second request - BroadcastInput
    MemoryUpdateCallbackForTest e2Callback = new MemoryUpdateCallbackForTest();
    InputContext e2InputContext2 = createTestInputContext();
    InputDescriptor e2InDesc2 = createTestInputDescriptor(UnorderedKVInput.class);
    dist.requestMemory(10000, e2Callback, e2InputContext2, e2InDesc2);
    // Third request - randomOutput (simulates MROutput)
    MemoryUpdateCallbackForTest e3Callback = new MemoryUpdateCallbackForTest();
    OutputContext e3OutputContext1 = createTestOutputContext();
    OutputDescriptor e3OutDesc1 = createTestOutputDescriptor();
    dist.requestMemory(10000, e3Callback, e3OutputContext1, e3OutDesc1);
    // Fourth request - OnFileSortedOutput
    MemoryUpdateCallbackForTest e4Callback = new MemoryUpdateCallbackForTest();
    OutputContext e4OutputContext2 = createTestOutputContext();
    OutputDescriptor e4OutDesc2 = createTestOutputDescriptor(OrderedPartitionedKVOutput.class);
    dist.requestMemory(10000, e4Callback, e4OutputContext2, e4OutDesc2);
    // Fifth request - Processor
    MemoryUpdateCallbackForTest e5Callback = new MemoryUpdateCallbackForTest();
    ProcessorContext e5ProcContext = createTestProcessortContext();
    ProcessorDescriptor e5ProcDesc = createTestProcessorDescriptor();
    dist.requestMemory(10000, e5Callback, e5ProcContext, e5ProcDesc);
    dist.makeInitialAllocations();
    // Total available: 80% of 10K = 8000
    // 5 requests (weight) - 10K (3), 10K(1), 10K(1), 10K(2), 10K(1)
    // Overlap input and output memory
    assertEquals(5250, e1Callback.assigned);
    assertEquals(1750, e2Callback.assigned);
    assertEquals(2333, e3Callback.assigned);
    assertEquals(4666, e4Callback.assigned);
    assertEquals(1000, e5Callback.assigned);
}
Also used : InputDescriptor(org.apache.tez.dag.api.InputDescriptor) Configuration(org.apache.hadoop.conf.Configuration) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) OutputDescriptor(org.apache.tez.dag.api.OutputDescriptor) InputContext(org.apache.tez.runtime.api.InputContext) WeightedScalingMemoryDistributor(org.apache.tez.runtime.library.resources.WeightedScalingMemoryDistributor) ProcessorDescriptor(org.apache.tez.dag.api.ProcessorDescriptor) OutputContext(org.apache.tez.runtime.api.OutputContext) ProcessorContext(org.apache.tez.runtime.api.ProcessorContext) Test(org.junit.Test)

Aggregations

InputContext (org.apache.tez.runtime.api.InputContext)65 Test (org.junit.Test)47 Configuration (org.apache.hadoop.conf.Configuration)30 TezConfiguration (org.apache.tez.dag.api.TezConfiguration)28 TezCounters (org.apache.tez.common.counters.TezCounters)19 TezRuntimeConfiguration (org.apache.tez.runtime.library.api.TezRuntimeConfiguration)18 CompositeInputAttemptIdentifier (org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier)17 IOException (java.io.IOException)16 InputAttemptIdentifier (org.apache.tez.runtime.library.common.InputAttemptIdentifier)16 Event (org.apache.tez.runtime.api.Event)14 LinkedList (java.util.LinkedList)12 Path (org.apache.hadoop.fs.Path)12 InputDescriptor (org.apache.tez.dag.api.InputDescriptor)10 InvocationOnMock (org.mockito.invocation.InvocationOnMock)10 ExecutorService (java.util.concurrent.ExecutorService)9 OutputContext (org.apache.tez.runtime.api.OutputContext)9 OutputDescriptor (org.apache.tez.dag.api.OutputDescriptor)8 DataMovementEvent (org.apache.tez.runtime.api.events.DataMovementEvent)8 FetchedInputAllocator (org.apache.tez.runtime.library.common.shuffle.FetchedInputAllocator)8 Text (org.apache.hadoop.io.Text)7