Search in sources :

Example 61 with InputContext

use of org.apache.tez.runtime.api.InputContext in project tez by apache.

the class TestWeightedScalingMemoryDistributor method testAdditionalReserveFractionWeightedScalingNonConcurrent.

@Test(timeout = 5000)
public void testAdditionalReserveFractionWeightedScalingNonConcurrent() throws TezException {
    Configuration conf = new Configuration(this.conf);
    conf.setBoolean(TezConfiguration.TEZ_TASK_SCALE_MEMORY_INPUT_OUTPUT_CONCURRENT, false);
    conf.setBoolean(TezConfiguration.TEZ_TASK_SCALE_MEMORY_NON_CONCURRENT_INPUTS_ENABLED, true);
    conf.setStrings(TezConfiguration.TEZ_TASK_SCALE_MEMORY_WEIGHTED_RATIOS, WeightedScalingMemoryDistributor.generateWeightStrings(0, 0, 2, 3, 6, 1, 1));
    conf.setDouble(TezConfiguration.TEZ_TASK_SCALE_MEMORY_ADDITIONAL_RESERVATION_FRACTION_PER_IO, 0.025d);
    conf.setDouble(TezConfiguration.TEZ_TASK_SCALE_MEMORY_ADDITIONAL_RESERVATION_FRACTION_MAX, 0.2d);
    MemoryDistributor dist = new MemoryDistributor(2, 2, conf);
    dist.setJvmMemory(10000l);
    // First request - ScatterGatherShuffleInput [weight 6]
    MemoryUpdateCallbackForTest e1Callback = new MemoryUpdateCallbackForTest();
    InputContext e1InputContext1 = createTestInputContext();
    InputDescriptor e1InDesc1 = createTestInputDescriptor(OrderedGroupedKVInput.class);
    dist.requestMemory(10000, e1Callback, e1InputContext1, e1InDesc1);
    // Second request - BroadcastInput [weight 2]
    MemoryUpdateCallbackForTest e2Callback = new MemoryUpdateCallbackForTest();
    InputContext e2InputContext2 = createTestInputContext();
    InputDescriptor e2InDesc2 = createTestInputDescriptor(UnorderedKVInput.class);
    dist.requestMemory(10000, e2Callback, e2InputContext2, e2InDesc2);
    // Third request - randomOutput (simulates MROutput) [weight 1]
    MemoryUpdateCallbackForTest e3Callback = new MemoryUpdateCallbackForTest();
    OutputContext e3OutputContext1 = createTestOutputContext();
    OutputDescriptor e3OutDesc1 = createTestOutputDescriptor();
    dist.requestMemory(10000, e3Callback, e3OutputContext1, e3OutDesc1);
    // Fourth request - OnFileSortedOutput [weight 3]
    MemoryUpdateCallbackForTest e4Callback = new MemoryUpdateCallbackForTest();
    OutputContext e4OutputContext2 = createTestOutputContext();
    OutputDescriptor e4OutDesc2 = createTestOutputDescriptor(OrderedPartitionedKVOutput.class);
    dist.requestMemory(10000, e4Callback, e4OutputContext2, e4OutDesc2);
    dist.makeInitialAllocations();
    // Total available: 60% of 10K = 6000
    // 4 requests (weight) - 10K (6), 10K(2), 10K(1), 10K(3)
    // Overlap input and output memory
    assertEquals(4500, e1Callback.assigned);
    assertEquals(1500, e2Callback.assigned);
    assertEquals(1500, e3Callback.assigned);
    assertEquals(4500, e4Callback.assigned);
}
Also used : InputDescriptor(org.apache.tez.dag.api.InputDescriptor) Configuration(org.apache.hadoop.conf.Configuration) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) OutputDescriptor(org.apache.tez.dag.api.OutputDescriptor) InputContext(org.apache.tez.runtime.api.InputContext) WeightedScalingMemoryDistributor(org.apache.tez.runtime.library.resources.WeightedScalingMemoryDistributor) OutputContext(org.apache.tez.runtime.api.OutputContext) Test(org.junit.Test)

Example 62 with InputContext

use of org.apache.tez.runtime.api.InputContext in project tez by apache.

the class TestWeightedScalingMemoryDistributor method testWeightedScalingNonConcurrentInputsDisabled.

@Test(timeout = 5000)
public void testWeightedScalingNonConcurrentInputsDisabled() throws TezException {
    Configuration conf = new Configuration(this.conf);
    conf.setBoolean(TezConfiguration.TEZ_TASK_SCALE_MEMORY_INPUT_OUTPUT_CONCURRENT, false);
    conf.setBoolean(TezConfiguration.TEZ_TASK_SCALE_MEMORY_NON_CONCURRENT_INPUTS_ENABLED, false);
    conf.setDouble(TezConfiguration.TEZ_TASK_SCALE_MEMORY_RESERVE_FRACTION, 0.2);
    conf.setStrings(TezConfiguration.TEZ_TASK_SCALE_MEMORY_WEIGHTED_RATIOS, WeightedScalingMemoryDistributor.generateWeightStrings(0, 0, 1, 2, 3, 1, 1));
    System.err.println(Joiner.on(",").join(conf.getStringCollection(TezConfiguration.TEZ_TASK_SCALE_MEMORY_WEIGHTED_RATIOS)));
    MemoryDistributor dist = new MemoryDistributor(2, 2, conf);
    dist.setJvmMemory(10000l);
    // First request - ScatterGatherShuffleInput
    MemoryUpdateCallbackForTest e1Callback = new MemoryUpdateCallbackForTest();
    InputContext e1InputContext1 = createTestInputContext();
    InputDescriptor e1InDesc1 = createTestInputDescriptor(OrderedGroupedKVInput.class);
    dist.requestMemory(10000, e1Callback, e1InputContext1, e1InDesc1);
    // Second request - BroadcastInput
    MemoryUpdateCallbackForTest e2Callback = new MemoryUpdateCallbackForTest();
    InputContext e2InputContext2 = createTestInputContext();
    InputDescriptor e2InDesc2 = createTestInputDescriptor(UnorderedKVInput.class);
    dist.requestMemory(10000, e2Callback, e2InputContext2, e2InDesc2);
    // Third request - randomOutput (simulates MROutput)
    MemoryUpdateCallbackForTest e3Callback = new MemoryUpdateCallbackForTest();
    OutputContext e3OutputContext1 = createTestOutputContext();
    OutputDescriptor e3OutDesc1 = createTestOutputDescriptor();
    dist.requestMemory(10000, e3Callback, e3OutputContext1, e3OutDesc1);
    // Fourth request - OnFileSortedOutput
    MemoryUpdateCallbackForTest e4Callback = new MemoryUpdateCallbackForTest();
    OutputContext e4OutputContext2 = createTestOutputContext();
    OutputDescriptor e4OutDesc2 = createTestOutputDescriptor(OrderedPartitionedKVOutput.class);
    dist.requestMemory(10000, e4Callback, e4OutputContext2, e4OutDesc2);
    // Fifth request - Processor
    MemoryUpdateCallbackForTest e5Callback = new MemoryUpdateCallbackForTest();
    ProcessorContext e5ProcContext = createTestProcessortContext();
    ProcessorDescriptor e5ProcDesc = createTestProcessorDescriptor();
    dist.requestMemory(10000, e5Callback, e5ProcContext, e5ProcDesc);
    dist.makeInitialAllocations();
    // Total available: 80% of 10K = 8000
    // 5 requests (weight) - 10K (3), 10K(1), 10K(1), 10K(2), 10K(1)
    // Overlap input and output memory
    assertEquals(3000, e1Callback.assigned);
    assertEquals(1000, e2Callback.assigned);
    assertEquals(2333, e3Callback.assigned);
    assertEquals(4666, e4Callback.assigned);
    assertEquals(1000, e5Callback.assigned);
}
Also used : InputDescriptor(org.apache.tez.dag.api.InputDescriptor) Configuration(org.apache.hadoop.conf.Configuration) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) OutputDescriptor(org.apache.tez.dag.api.OutputDescriptor) InputContext(org.apache.tez.runtime.api.InputContext) WeightedScalingMemoryDistributor(org.apache.tez.runtime.library.resources.WeightedScalingMemoryDistributor) ProcessorDescriptor(org.apache.tez.dag.api.ProcessorDescriptor) OutputContext(org.apache.tez.runtime.api.OutputContext) ProcessorContext(org.apache.tez.runtime.api.ProcessorContext) Test(org.junit.Test)

Example 63 with InputContext

use of org.apache.tez.runtime.api.InputContext in project tez by apache.

the class TestMRInput method testAttributesInJobConf.

@Test(timeout = 5000)
public void testAttributesInJobConf() throws Exception {
    InputContext inputContext = mock(InputContext.class);
    doReturn(TEST_ATTRIBUTES_DAG_INDEX).when(inputContext).getDagIdentifier();
    doReturn(TEST_ATTRIBUTES_VERTEX_INDEX).when(inputContext).getTaskVertexIndex();
    doReturn(TEST_ATTRIBUTES_TASK_INDEX).when(inputContext).getTaskIndex();
    doReturn(TEST_ATTRIBUTES_TASK_ATTEMPT_INDEX).when(inputContext).getTaskAttemptNumber();
    doReturn(TEST_ATTRIBUTES_INPUT_INDEX).when(inputContext).getInputIndex();
    doReturn(TEST_ATTRIBUTES_DAG_ATTEMPT_NUMBER).when(inputContext).getDAGAttemptNumber();
    doReturn(TEST_ATTRIBUTES_DAG_NAME).when(inputContext).getDAGName();
    doReturn(TEST_ATTRIBUTES_VERTEX_NAME).when(inputContext).getTaskVertexName();
    doReturn(TEST_ATTRIBUTES_INPUT_NAME).when(inputContext).getSourceVertexName();
    doReturn(TEST_ATTRIBUTES_APPLICATION_ID).when(inputContext).getApplicationId();
    doReturn(TEST_ATTRIBUTES_UNIQUE_IDENTIFIER).when(inputContext).getUniqueIdentifier();
    DataSourceDescriptor dsd = MRInput.createConfigBuilder(new Configuration(false), TestInputFormat.class).groupSplits(false).build();
    doReturn(dsd.getInputDescriptor().getUserPayload()).when(inputContext).getUserPayload();
    doReturn(new TezCounters()).when(inputContext).getCounters();
    MRInput mrInput = new MRInput(inputContext, 1);
    mrInput.initialize();
    MRRuntimeProtos.MRSplitProto splitProto = MRRuntimeProtos.MRSplitProto.newBuilder().setSplitClassName(TestInputSplit.class.getName()).build();
    InputDataInformationEvent diEvent = InputDataInformationEvent.createWithSerializedPayload(0, splitProto.toByteString().asReadOnlyByteBuffer());
    List<Event> events = new LinkedList<>();
    events.add(diEvent);
    mrInput.handleEvents(events);
    TezCounter counter = mrInput.getContext().getCounters().findCounter(TaskCounter.INPUT_SPLIT_LENGTH_BYTES);
    assertEquals(counter.getValue(), TestInputSplit.length);
    assertTrue(TestInputFormat.invoked.get());
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) InputContext(org.apache.tez.runtime.api.InputContext) Event(org.apache.tez.runtime.api.Event) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent) TezCounter(org.apache.tez.common.counters.TezCounter) MRRuntimeProtos(org.apache.tez.mapreduce.protos.MRRuntimeProtos) TezCounters(org.apache.tez.common.counters.TezCounters) LinkedList(java.util.LinkedList) DataSourceDescriptor(org.apache.tez.dag.api.DataSourceDescriptor) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent) Test(org.junit.Test)

Example 64 with InputContext

use of org.apache.tez.runtime.api.InputContext in project tez by apache.

the class TestMultiMRInput method testExtraEvents.

@Test(timeout = 5000)
public void testExtraEvents() throws Exception {
    Path workDir = new Path(TEST_ROOT_DIR, "testExtraEvents");
    JobConf jobConf = new JobConf(defaultConf);
    jobConf.setInputFormat(org.apache.hadoop.mapred.SequenceFileInputFormat.class);
    FileInputFormat.setInputPaths(jobConf, workDir);
    InputContext inputContext = createTezInputContext(jobConf);
    MultiMRInput input = new MultiMRInput(inputContext, 1);
    input.initialize();
    createSplits(1, workDir, jobConf, new AtomicLong());
    SequenceFileInputFormat<LongWritable, Text> format = new SequenceFileInputFormat<LongWritable, Text>();
    InputSplit[] splits = format.getSplits(jobConf, 1);
    assertEquals(1, splits.length);
    MRSplitProto splitProto = MRInputHelpers.createSplitProto(splits[0]);
    InputDataInformationEvent event1 = InputDataInformationEvent.createWithSerializedPayload(0, splitProto.toByteString().asReadOnlyByteBuffer());
    InputDataInformationEvent event2 = InputDataInformationEvent.createWithSerializedPayload(1, splitProto.toByteString().asReadOnlyByteBuffer());
    List<Event> eventList = new ArrayList<Event>();
    eventList.add(event1);
    eventList.add(event2);
    try {
        input.handleEvents(eventList);
        fail("Expecting Exception due to too many events");
    } catch (Exception e) {
        assertTrue(e.getMessage().contains("Unexpected event. All physical sources already initialized"));
    }
}
Also used : Path(org.apache.hadoop.fs.Path) SequenceFileInputFormat(org.apache.hadoop.mapred.SequenceFileInputFormat) InputContext(org.apache.tez.runtime.api.InputContext) ArrayList(java.util.ArrayList) Text(org.apache.hadoop.io.Text) IOException(java.io.IOException) AtomicLong(java.util.concurrent.atomic.AtomicLong) Event(org.apache.tez.runtime.api.Event) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent) LongWritable(org.apache.hadoop.io.LongWritable) JobConf(org.apache.hadoop.mapred.JobConf) InputSplit(org.apache.hadoop.mapred.InputSplit) MRSplitProto(org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRSplitProto) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent) Test(org.junit.Test)

Example 65 with InputContext

use of org.apache.tez.runtime.api.InputContext in project tez by apache.

the class TestMultiMRInput method test0PhysicalInputs.

@Test(timeout = 5000)
public void test0PhysicalInputs() throws Exception {
    Path workDir = new Path(TEST_ROOT_DIR, "testSingleSplit");
    JobConf jobConf = new JobConf(defaultConf);
    jobConf.setInputFormat(org.apache.hadoop.mapred.SequenceFileInputFormat.class);
    FileInputFormat.setInputPaths(jobConf, workDir);
    InputContext inputContext = createTezInputContext(jobConf);
    MultiMRInput mMrInput = new MultiMRInput(inputContext, 0);
    mMrInput.initialize();
    mMrInput.start();
    assertEquals(0, mMrInput.getKeyValueReaders().size());
    List<Event> events = new LinkedList<>();
    try {
        mMrInput.handleEvents(events);
        fail("HandleEvents should cause an input with 0 physical inputs to fail");
    } catch (Exception e) {
        assertTrue(e instanceof IllegalStateException);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) InputContext(org.apache.tez.runtime.api.InputContext) Event(org.apache.tez.runtime.api.Event) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent) JobConf(org.apache.hadoop.mapred.JobConf) LinkedList(java.util.LinkedList) IOException(java.io.IOException) Test(org.junit.Test)

Aggregations

InputContext (org.apache.tez.runtime.api.InputContext)65 Test (org.junit.Test)47 Configuration (org.apache.hadoop.conf.Configuration)30 TezConfiguration (org.apache.tez.dag.api.TezConfiguration)28 TezCounters (org.apache.tez.common.counters.TezCounters)19 TezRuntimeConfiguration (org.apache.tez.runtime.library.api.TezRuntimeConfiguration)18 CompositeInputAttemptIdentifier (org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier)17 IOException (java.io.IOException)16 InputAttemptIdentifier (org.apache.tez.runtime.library.common.InputAttemptIdentifier)16 Event (org.apache.tez.runtime.api.Event)14 LinkedList (java.util.LinkedList)12 Path (org.apache.hadoop.fs.Path)12 InputDescriptor (org.apache.tez.dag.api.InputDescriptor)10 InvocationOnMock (org.mockito.invocation.InvocationOnMock)10 ExecutorService (java.util.concurrent.ExecutorService)9 OutputContext (org.apache.tez.runtime.api.OutputContext)9 OutputDescriptor (org.apache.tez.dag.api.OutputDescriptor)8 DataMovementEvent (org.apache.tez.runtime.api.events.DataMovementEvent)8 FetchedInputAllocator (org.apache.tez.runtime.library.common.shuffle.FetchedInputAllocator)8 Text (org.apache.hadoop.io.Text)7