Search in sources :

Example 26 with InputContext

use of org.apache.tez.runtime.api.InputContext in project tez by apache.

the class TestOrderedGroupedKVInput method createMockInputContext.

private InputContext createMockInputContext() throws IOException {
    InputContext inputContext = mock(InputContext.class);
    Configuration conf = new TezConfiguration();
    UserPayload payLoad = TezUtils.createUserPayloadFromConf(conf);
    String[] workingDirs = new String[] { "workDir1" };
    TezCounters counters = new TezCounters();
    doReturn(payLoad).when(inputContext).getUserPayload();
    doReturn(workingDirs).when(inputContext).getWorkDirs();
    doReturn(200 * 1024 * 1024l).when(inputContext).getTotalMemoryAvailableToTask();
    doReturn(counters).when(inputContext).getCounters();
    doAnswer(new Answer() {

        @Override
        public Object answer(InvocationOnMock invocation) throws Throwable {
            Object[] args = invocation.getArguments();
            if (args[1] instanceof MemoryUpdateCallbackHandler) {
                MemoryUpdateCallbackHandler memUpdateCallbackHandler = (MemoryUpdateCallbackHandler) args[1];
                memUpdateCallbackHandler.memoryAssigned(200 * 1024 * 1024);
            } else {
                Assert.fail();
            }
            return null;
        }
    }).when(inputContext).requestInitialMemory(any(long.class), any(MemoryUpdateCallbackHandler.class));
    return inputContext;
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) UserPayload(org.apache.tez.dag.api.UserPayload) InputContext(org.apache.tez.runtime.api.InputContext) TezCounters(org.apache.tez.common.counters.TezCounters) Answer(org.mockito.stubbing.Answer) Mockito.doAnswer(org.mockito.Mockito.doAnswer) InvocationOnMock(org.mockito.invocation.InvocationOnMock) MemoryUpdateCallbackHandler(org.apache.tez.runtime.library.common.MemoryUpdateCallbackHandler) TezConfiguration(org.apache.tez.dag.api.TezConfiguration)

Example 27 with InputContext

use of org.apache.tez.runtime.api.InputContext in project tez by apache.

the class TestWeightedScalingMemoryDistributor method testAdditionalReserveFractionWeightedScaling.

@Test(timeout = 5000)
public void testAdditionalReserveFractionWeightedScaling() throws TezException {
    Configuration conf = new Configuration(this.conf);
    conf.setStrings(TezConfiguration.TEZ_TASK_SCALE_MEMORY_WEIGHTED_RATIOS, WeightedScalingMemoryDistributor.generateWeightStrings(0, 0, 2, 3, 6, 1, 1));
    conf.setDouble(TezConfiguration.TEZ_TASK_SCALE_MEMORY_ADDITIONAL_RESERVATION_FRACTION_PER_IO, 0.025d);
    conf.setDouble(TezConfiguration.TEZ_TASK_SCALE_MEMORY_ADDITIONAL_RESERVATION_FRACTION_MAX, 0.2d);
    MemoryDistributor dist = new MemoryDistributor(2, 2, conf);
    dist.setJvmMemory(10000l);
    // First request - ScatterGatherShuffleInput [weight 6]
    MemoryUpdateCallbackForTest e1Callback = new MemoryUpdateCallbackForTest();
    InputContext e1InputContext1 = createTestInputContext();
    InputDescriptor e1InDesc1 = createTestInputDescriptor(OrderedGroupedKVInput.class);
    dist.requestMemory(10000, e1Callback, e1InputContext1, e1InDesc1);
    // Second request - BroadcastInput [weight 2]
    MemoryUpdateCallbackForTest e2Callback = new MemoryUpdateCallbackForTest();
    InputContext e2InputContext2 = createTestInputContext();
    InputDescriptor e2InDesc2 = createTestInputDescriptor(UnorderedKVInput.class);
    dist.requestMemory(10000, e2Callback, e2InputContext2, e2InDesc2);
    // Third request - randomOutput (simulates MROutput) [weight 1]
    MemoryUpdateCallbackForTest e3Callback = new MemoryUpdateCallbackForTest();
    OutputContext e3OutputContext1 = createTestOutputContext();
    OutputDescriptor e3OutDesc1 = createTestOutputDescriptor();
    dist.requestMemory(10000, e3Callback, e3OutputContext1, e3OutDesc1);
    // Fourth request - OnFileSortedOutput [weight 3]
    MemoryUpdateCallbackForTest e4Callback = new MemoryUpdateCallbackForTest();
    OutputContext e4OutputContext2 = createTestOutputContext();
    OutputDescriptor e4OutDesc2 = createTestOutputDescriptor(OrderedPartitionedKVOutput.class);
    dist.requestMemory(10000, e4Callback, e4OutputContext2, e4OutDesc2);
    dist.makeInitialAllocations();
    // Total available: 60% of 10K = 7000
    // 4 requests (weight) - 10K (6), 10K(2), 10K(1), 10K(3)
    // Scale down to - 3000, 1000, 500, 1500
    assertEquals(3000, e1Callback.assigned);
    assertEquals(1000, e2Callback.assigned);
    assertEquals(500, e3Callback.assigned);
    assertEquals(1500, e4Callback.assigned);
}
Also used : InputDescriptor(org.apache.tez.dag.api.InputDescriptor) Configuration(org.apache.hadoop.conf.Configuration) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) OutputDescriptor(org.apache.tez.dag.api.OutputDescriptor) InputContext(org.apache.tez.runtime.api.InputContext) WeightedScalingMemoryDistributor(org.apache.tez.runtime.library.resources.WeightedScalingMemoryDistributor) OutputContext(org.apache.tez.runtime.api.OutputContext) Test(org.junit.Test)

Example 28 with InputContext

use of org.apache.tez.runtime.api.InputContext in project tez by apache.

the class TestWeightedScalingMemoryDistributor method testSimpleWeightedScaling.

@Test(timeout = 5000)
public void testSimpleWeightedScaling() throws TezException {
    Configuration conf = new Configuration(this.conf);
    conf.setStrings(TezConfiguration.TEZ_TASK_SCALE_MEMORY_WEIGHTED_RATIOS, WeightedScalingMemoryDistributor.generateWeightStrings(0, 0, 1, 2, 3, 1, 1));
    System.err.println(Joiner.on(",").join(conf.getStringCollection(TezConfiguration.TEZ_TASK_SCALE_MEMORY_WEIGHTED_RATIOS)));
    MemoryDistributor dist = new MemoryDistributor(2, 2, conf);
    dist.setJvmMemory(10000l);
    // First request - ScatterGatherShuffleInput
    MemoryUpdateCallbackForTest e1Callback = new MemoryUpdateCallbackForTest();
    InputContext e1InputContext1 = createTestInputContext();
    InputDescriptor e1InDesc1 = createTestInputDescriptor(OrderedGroupedKVInput.class);
    dist.requestMemory(10000, e1Callback, e1InputContext1, e1InDesc1);
    // Second request - BroadcastInput
    MemoryUpdateCallbackForTest e2Callback = new MemoryUpdateCallbackForTest();
    InputContext e2InputContext2 = createTestInputContext();
    InputDescriptor e2InDesc2 = createTestInputDescriptor(UnorderedKVInput.class);
    dist.requestMemory(10000, e2Callback, e2InputContext2, e2InDesc2);
    // Third request - randomOutput (simulates MROutput)
    MemoryUpdateCallbackForTest e3Callback = new MemoryUpdateCallbackForTest();
    OutputContext e3OutputContext1 = createTestOutputContext();
    OutputDescriptor e3OutDesc1 = createTestOutputDescriptor();
    dist.requestMemory(10000, e3Callback, e3OutputContext1, e3OutDesc1);
    // Fourth request - OnFileSortedOutput
    MemoryUpdateCallbackForTest e4Callback = new MemoryUpdateCallbackForTest();
    OutputContext e4OutputContext2 = createTestOutputContext();
    OutputDescriptor e4OutDesc2 = createTestOutputDescriptor(OrderedPartitionedKVOutput.class);
    dist.requestMemory(10000, e4Callback, e4OutputContext2, e4OutDesc2);
    dist.makeInitialAllocations();
    // Total available: 70% of 10K = 7000
    // 4 requests (weight) - 10K (3), 10K(1), 10K(1), 10K(2)
    // Scale down to - 3000, 1000, 1000, 2000
    assertEquals(3000, e1Callback.assigned);
    assertEquals(1000, e2Callback.assigned);
    assertEquals(1000, e3Callback.assigned);
    assertEquals(2000, e4Callback.assigned);
}
Also used : InputDescriptor(org.apache.tez.dag.api.InputDescriptor) Configuration(org.apache.hadoop.conf.Configuration) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) OutputDescriptor(org.apache.tez.dag.api.OutputDescriptor) InputContext(org.apache.tez.runtime.api.InputContext) WeightedScalingMemoryDistributor(org.apache.tez.runtime.library.resources.WeightedScalingMemoryDistributor) OutputContext(org.apache.tez.runtime.api.OutputContext) Test(org.junit.Test)

Example 29 with InputContext

use of org.apache.tez.runtime.api.InputContext in project tez by apache.

the class TestMRInput method test0PhysicalInputs.

@Test(timeout = 5000)
public void test0PhysicalInputs() throws IOException {
    InputContext inputContext = mock(InputContext.class);
    DataSourceDescriptor dsd = MRInput.createConfigBuilder(new Configuration(false), FileInputFormat.class, "testPath").build();
    ApplicationId applicationId = ApplicationId.newInstance(1000, 1);
    doReturn(dsd.getInputDescriptor().getUserPayload()).when(inputContext).getUserPayload();
    doReturn(applicationId).when(inputContext).getApplicationId();
    doReturn("dagName").when(inputContext).getDAGName();
    doReturn("vertexName").when(inputContext).getTaskVertexName();
    doReturn("inputName").when(inputContext).getSourceVertexName();
    doReturn("uniqueIdentifier").when(inputContext).getUniqueIdentifier();
    doReturn(1).when(inputContext).getTaskIndex();
    doReturn(1).when(inputContext).getTaskAttemptNumber();
    doReturn(new TezCounters()).when(inputContext).getCounters();
    MRInput mrInput = new MRInput(inputContext, 0);
    mrInput.initialize();
    mrInput.start();
    assertFalse(mrInput.getReader().next());
    verify(inputContext, times(1)).notifyProgress();
    List<Event> events = new LinkedList<>();
    try {
        mrInput.handleEvents(events);
        fail("HandleEvents should cause an input with 0 physical inputs to fail");
    } catch (Exception e) {
        assertTrue(e instanceof IllegalStateException);
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) InputContext(org.apache.tez.runtime.api.InputContext) Event(org.apache.tez.runtime.api.Event) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) FileInputFormat(org.apache.hadoop.mapreduce.lib.input.FileInputFormat) TezCounters(org.apache.tez.common.counters.TezCounters) LinkedList(java.util.LinkedList) IOException(java.io.IOException) DataSourceDescriptor(org.apache.tez.dag.api.DataSourceDescriptor) Test(org.junit.Test)

Example 30 with InputContext

use of org.apache.tez.runtime.api.InputContext in project tez by apache.

the class TestMultiMRInput method testSingleSplit.

@Test(timeout = 5000)
public void testSingleSplit() throws Exception {
    Path workDir = new Path(TEST_ROOT_DIR, "testSingleSplit");
    JobConf jobConf = new JobConf(defaultConf);
    jobConf.setInputFormat(org.apache.hadoop.mapred.SequenceFileInputFormat.class);
    FileInputFormat.setInputPaths(jobConf, workDir);
    InputContext inputContext = createTezInputContext(jobConf);
    MultiMRInput input = new MultiMRInput(inputContext, 1);
    input.initialize();
    AtomicLong inputLength = new AtomicLong();
    LinkedHashMap<LongWritable, Text> data = createSplits(1, workDir, jobConf, inputLength);
    SequenceFileInputFormat<LongWritable, Text> format = new SequenceFileInputFormat<LongWritable, Text>();
    InputSplit[] splits = format.getSplits(jobConf, 1);
    assertEquals(1, splits.length);
    MRSplitProto splitProto = MRInputHelpers.createSplitProto(splits[0]);
    InputDataInformationEvent event = InputDataInformationEvent.createWithSerializedPayload(0, splitProto.toByteString().asReadOnlyByteBuffer());
    List<Event> eventList = new ArrayList<Event>();
    eventList.add(event);
    input.handleEvents(eventList);
    assertReaders(input, data, 1, inputLength.get());
}
Also used : Path(org.apache.hadoop.fs.Path) SequenceFileInputFormat(org.apache.hadoop.mapred.SequenceFileInputFormat) InputContext(org.apache.tez.runtime.api.InputContext) ArrayList(java.util.ArrayList) Text(org.apache.hadoop.io.Text) AtomicLong(java.util.concurrent.atomic.AtomicLong) Event(org.apache.tez.runtime.api.Event) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent) LongWritable(org.apache.hadoop.io.LongWritable) JobConf(org.apache.hadoop.mapred.JobConf) InputSplit(org.apache.hadoop.mapred.InputSplit) MRSplitProto(org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRSplitProto) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent) Test(org.junit.Test)

Aggregations

InputContext (org.apache.tez.runtime.api.InputContext)65 Test (org.junit.Test)47 Configuration (org.apache.hadoop.conf.Configuration)30 TezConfiguration (org.apache.tez.dag.api.TezConfiguration)28 TezCounters (org.apache.tez.common.counters.TezCounters)19 TezRuntimeConfiguration (org.apache.tez.runtime.library.api.TezRuntimeConfiguration)18 CompositeInputAttemptIdentifier (org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier)17 IOException (java.io.IOException)16 InputAttemptIdentifier (org.apache.tez.runtime.library.common.InputAttemptIdentifier)16 Event (org.apache.tez.runtime.api.Event)14 LinkedList (java.util.LinkedList)12 Path (org.apache.hadoop.fs.Path)12 InputDescriptor (org.apache.tez.dag.api.InputDescriptor)10 InvocationOnMock (org.mockito.invocation.InvocationOnMock)10 ExecutorService (java.util.concurrent.ExecutorService)9 OutputContext (org.apache.tez.runtime.api.OutputContext)9 OutputDescriptor (org.apache.tez.dag.api.OutputDescriptor)8 DataMovementEvent (org.apache.tez.runtime.api.events.DataMovementEvent)8 FetchedInputAllocator (org.apache.tez.runtime.library.common.shuffle.FetchedInputAllocator)8 Text (org.apache.hadoop.io.Text)7