Search in sources :

Example 6 with InputDescriptor

use of org.apache.tez.dag.api.InputDescriptor in project tez by apache.

the class TestWeightedScalingMemoryDistributor method testAdditionalReserveFractionWeightedScaling.

@Test(timeout = 5000)
public void testAdditionalReserveFractionWeightedScaling() throws TezException {
    Configuration conf = new Configuration(this.conf);
    conf.setStrings(TezConfiguration.TEZ_TASK_SCALE_MEMORY_WEIGHTED_RATIOS, WeightedScalingMemoryDistributor.generateWeightStrings(0, 0, 2, 3, 6, 1, 1));
    conf.setDouble(TezConfiguration.TEZ_TASK_SCALE_MEMORY_ADDITIONAL_RESERVATION_FRACTION_PER_IO, 0.025d);
    conf.setDouble(TezConfiguration.TEZ_TASK_SCALE_MEMORY_ADDITIONAL_RESERVATION_FRACTION_MAX, 0.2d);
    MemoryDistributor dist = new MemoryDistributor(2, 2, conf);
    dist.setJvmMemory(10000l);
    // First request - ScatterGatherShuffleInput [weight 6]
    MemoryUpdateCallbackForTest e1Callback = new MemoryUpdateCallbackForTest();
    InputContext e1InputContext1 = createTestInputContext();
    InputDescriptor e1InDesc1 = createTestInputDescriptor(OrderedGroupedKVInput.class);
    dist.requestMemory(10000, e1Callback, e1InputContext1, e1InDesc1);
    // Second request - BroadcastInput [weight 2]
    MemoryUpdateCallbackForTest e2Callback = new MemoryUpdateCallbackForTest();
    InputContext e2InputContext2 = createTestInputContext();
    InputDescriptor e2InDesc2 = createTestInputDescriptor(UnorderedKVInput.class);
    dist.requestMemory(10000, e2Callback, e2InputContext2, e2InDesc2);
    // Third request - randomOutput (simulates MROutput) [weight 1]
    MemoryUpdateCallbackForTest e3Callback = new MemoryUpdateCallbackForTest();
    OutputContext e3OutputContext1 = createTestOutputContext();
    OutputDescriptor e3OutDesc1 = createTestOutputDescriptor();
    dist.requestMemory(10000, e3Callback, e3OutputContext1, e3OutDesc1);
    // Fourth request - OnFileSortedOutput [weight 3]
    MemoryUpdateCallbackForTest e4Callback = new MemoryUpdateCallbackForTest();
    OutputContext e4OutputContext2 = createTestOutputContext();
    OutputDescriptor e4OutDesc2 = createTestOutputDescriptor(OrderedPartitionedKVOutput.class);
    dist.requestMemory(10000, e4Callback, e4OutputContext2, e4OutDesc2);
    dist.makeInitialAllocations();
    // Total available: 60% of 10K = 7000
    // 4 requests (weight) - 10K (6), 10K(2), 10K(1), 10K(3)
    // Scale down to - 3000, 1000, 500, 1500
    assertEquals(3000, e1Callback.assigned);
    assertEquals(1000, e2Callback.assigned);
    assertEquals(500, e3Callback.assigned);
    assertEquals(1500, e4Callback.assigned);
}
Also used : InputDescriptor(org.apache.tez.dag.api.InputDescriptor) Configuration(org.apache.hadoop.conf.Configuration) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) OutputDescriptor(org.apache.tez.dag.api.OutputDescriptor) InputContext(org.apache.tez.runtime.api.InputContext) WeightedScalingMemoryDistributor(org.apache.tez.runtime.library.resources.WeightedScalingMemoryDistributor) OutputContext(org.apache.tez.runtime.api.OutputContext) Test(org.junit.Test)

Example 7 with InputDescriptor

use of org.apache.tez.dag.api.InputDescriptor in project tez by apache.

the class TestWeightedScalingMemoryDistributor method testSimpleWeightedScaling.

@Test(timeout = 5000)
public void testSimpleWeightedScaling() throws TezException {
    Configuration conf = new Configuration(this.conf);
    conf.setStrings(TezConfiguration.TEZ_TASK_SCALE_MEMORY_WEIGHTED_RATIOS, WeightedScalingMemoryDistributor.generateWeightStrings(0, 0, 1, 2, 3, 1, 1));
    System.err.println(Joiner.on(",").join(conf.getStringCollection(TezConfiguration.TEZ_TASK_SCALE_MEMORY_WEIGHTED_RATIOS)));
    MemoryDistributor dist = new MemoryDistributor(2, 2, conf);
    dist.setJvmMemory(10000l);
    // First request - ScatterGatherShuffleInput
    MemoryUpdateCallbackForTest e1Callback = new MemoryUpdateCallbackForTest();
    InputContext e1InputContext1 = createTestInputContext();
    InputDescriptor e1InDesc1 = createTestInputDescriptor(OrderedGroupedKVInput.class);
    dist.requestMemory(10000, e1Callback, e1InputContext1, e1InDesc1);
    // Second request - BroadcastInput
    MemoryUpdateCallbackForTest e2Callback = new MemoryUpdateCallbackForTest();
    InputContext e2InputContext2 = createTestInputContext();
    InputDescriptor e2InDesc2 = createTestInputDescriptor(UnorderedKVInput.class);
    dist.requestMemory(10000, e2Callback, e2InputContext2, e2InDesc2);
    // Third request - randomOutput (simulates MROutput)
    MemoryUpdateCallbackForTest e3Callback = new MemoryUpdateCallbackForTest();
    OutputContext e3OutputContext1 = createTestOutputContext();
    OutputDescriptor e3OutDesc1 = createTestOutputDescriptor();
    dist.requestMemory(10000, e3Callback, e3OutputContext1, e3OutDesc1);
    // Fourth request - OnFileSortedOutput
    MemoryUpdateCallbackForTest e4Callback = new MemoryUpdateCallbackForTest();
    OutputContext e4OutputContext2 = createTestOutputContext();
    OutputDescriptor e4OutDesc2 = createTestOutputDescriptor(OrderedPartitionedKVOutput.class);
    dist.requestMemory(10000, e4Callback, e4OutputContext2, e4OutDesc2);
    dist.makeInitialAllocations();
    // Total available: 70% of 10K = 7000
    // 4 requests (weight) - 10K (3), 10K(1), 10K(1), 10K(2)
    // Scale down to - 3000, 1000, 1000, 2000
    assertEquals(3000, e1Callback.assigned);
    assertEquals(1000, e2Callback.assigned);
    assertEquals(1000, e3Callback.assigned);
    assertEquals(2000, e4Callback.assigned);
}
Also used : InputDescriptor(org.apache.tez.dag.api.InputDescriptor) Configuration(org.apache.hadoop.conf.Configuration) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) OutputDescriptor(org.apache.tez.dag.api.OutputDescriptor) InputContext(org.apache.tez.runtime.api.InputContext) WeightedScalingMemoryDistributor(org.apache.tez.runtime.library.resources.WeightedScalingMemoryDistributor) OutputContext(org.apache.tez.runtime.api.OutputContext) Test(org.junit.Test)

Example 8 with InputDescriptor

use of org.apache.tez.dag.api.InputDescriptor in project tez by apache.

the class TestVertexManager method testOnRootVertexInitialized2.

/**
 * TEZ-1647
 * custom vertex manager generates events only when both i1 and i2 are initialized.
 * @throws Exception
 */
@Test(timeout = 5000)
public void testOnRootVertexInitialized2() throws Exception {
    VertexManager vm = new VertexManager(VertexManagerPluginDescriptor.create(CustomVertexManager.class.getName()), UserGroupInformation.getCurrentUser(), mockVertex, mockAppContext, mock(StateChangeNotifier.class));
    vm.initialize();
    InputDescriptor id1 = mock(InputDescriptor.class);
    List<Event> events1 = new LinkedList<Event>();
    InputDataInformationEvent diEvent1 = InputDataInformationEvent.createWithSerializedPayload(0, null);
    events1.add(diEvent1);
    // do not call context.addRootInputEvents, just cache the TezEvent
    vm.onRootVertexInitialized("input1", id1, events1);
    verify(mockHandler, times(1)).handle(requestCaptor.capture());
    List<TezEvent> tezEventsAfterInput1 = requestCaptor.getValue().getEvents();
    assertEquals(0, tezEventsAfterInput1.size());
    InputDescriptor id2 = mock(InputDescriptor.class);
    List<Event> events2 = new LinkedList<Event>();
    InputDataInformationEvent diEvent2 = InputDataInformationEvent.createWithSerializedPayload(0, null);
    events2.add(diEvent2);
    // call context.addRootInputEvents(input1), context.addRootInputEvents(input2)
    vm.onRootVertexInitialized("input2", id2, events2);
    verify(mockHandler, times(2)).handle(requestCaptor.capture());
    List<TezEvent> tezEventsAfterInput2 = requestCaptor.getValue().getEvents();
    assertEquals(2, tezEventsAfterInput2.size());
    // also verify the EventMetaData
    Set<String> edgeVertexSet = new HashSet<String>();
    for (TezEvent tezEvent : tezEventsAfterInput2) {
        edgeVertexSet.add(tezEvent.getDestinationInfo().getEdgeVertexName());
    }
    assertEquals(Sets.newHashSet("input1", "input2"), edgeVertexSet);
}
Also used : InputDescriptor(org.apache.tez.dag.api.InputDescriptor) StateChangeNotifier(org.apache.tez.dag.app.dag.StateChangeNotifier) Event(org.apache.tez.runtime.api.Event) VertexManagerEvent(org.apache.tez.runtime.api.events.VertexManagerEvent) CustomProcessorEvent(org.apache.tez.runtime.api.events.CustomProcessorEvent) CallableEvent(org.apache.tez.dag.app.dag.event.CallableEvent) VertexEventRouteEvent(org.apache.tez.dag.app.dag.event.VertexEventRouteEvent) TezEvent(org.apache.tez.runtime.api.impl.TezEvent) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent) TezEvent(org.apache.tez.runtime.api.impl.TezEvent) LinkedList(java.util.LinkedList) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 9 with InputDescriptor

use of org.apache.tez.dag.api.InputDescriptor in project tez by apache.

the class TestVertexManager method testOnRootVertexInitialized.

@Test(timeout = 5000)
public void testOnRootVertexInitialized() throws Exception {
    Configuration conf = new Configuration();
    VertexManager vm = new VertexManager(RootInputVertexManager.createConfigBuilder(conf).build(), UserGroupInformation.getCurrentUser(), mockVertex, mockAppContext, mock(StateChangeNotifier.class));
    vm.initialize();
    InputDescriptor id1 = mock(InputDescriptor.class);
    List<Event> events1 = new LinkedList<Event>();
    InputDataInformationEvent diEvent1 = InputDataInformationEvent.createWithSerializedPayload(0, null);
    events1.add(diEvent1);
    vm.onRootVertexInitialized("input1", id1, events1);
    verify(mockHandler, times(1)).handle(requestCaptor.capture());
    List<TezEvent> tezEvents1 = requestCaptor.getValue().getEvents();
    assertEquals(1, tezEvents1.size());
    assertEquals(diEvent1, tezEvents1.get(0).getEvent());
    InputDescriptor id2 = mock(InputDescriptor.class);
    List<Event> events2 = new LinkedList<Event>();
    InputDataInformationEvent diEvent2 = InputDataInformationEvent.createWithSerializedPayload(0, null);
    events2.add(diEvent2);
    vm.onRootVertexInitialized("input1", id2, events2);
    verify(mockHandler, times(2)).handle(requestCaptor.capture());
    List<TezEvent> tezEvents2 = requestCaptor.getValue().getEvents();
    assertEquals(tezEvents2.size(), 1);
    assertEquals(diEvent2, tezEvents2.get(0).getEvent());
}
Also used : InputDescriptor(org.apache.tez.dag.api.InputDescriptor) Configuration(org.apache.hadoop.conf.Configuration) StateChangeNotifier(org.apache.tez.dag.app.dag.StateChangeNotifier) Event(org.apache.tez.runtime.api.Event) VertexManagerEvent(org.apache.tez.runtime.api.events.VertexManagerEvent) CustomProcessorEvent(org.apache.tez.runtime.api.events.CustomProcessorEvent) CallableEvent(org.apache.tez.dag.app.dag.event.CallableEvent) VertexEventRouteEvent(org.apache.tez.dag.app.dag.event.VertexEventRouteEvent) TezEvent(org.apache.tez.runtime.api.impl.TezEvent) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent) TezEvent(org.apache.tez.runtime.api.impl.TezEvent) LinkedList(java.util.LinkedList) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent) Test(org.junit.Test)

Example 10 with InputDescriptor

use of org.apache.tez.dag.api.InputDescriptor in project tez by apache.

the class TestRootInputVertexManager method testEventsFromMultipleInputs.

@Test(timeout = 5000)
public void testEventsFromMultipleInputs() throws IOException {
    VertexManagerPluginContext context = mock(VertexManagerPluginContext.class);
    TezConfiguration conf = new TezConfiguration();
    UserPayload vertexPayload = TezUtils.createUserPayloadFromConf(conf);
    doReturn("vertex1").when(context).getVertexName();
    doReturn(1).when(context).getVertexNumTasks(eq("vertex1"));
    doReturn(vertexPayload).when(context).getUserPayload();
    RootInputVertexManager rootInputVertexManager = new RootInputVertexManager(context);
    rootInputVertexManager.initialize();
    InputDescriptor id1 = mock(InputDescriptor.class);
    List<Event> events1 = new LinkedList<Event>();
    InputDataInformationEvent diEvent11 = InputDataInformationEvent.createWithSerializedPayload(0, null);
    events1.add(diEvent11);
    rootInputVertexManager.onRootVertexInitialized("input1", id1, events1);
    // All good so far, single input only.
    InputDescriptor id2 = mock(InputDescriptor.class);
    List<Event> events2 = new LinkedList<Event>();
    InputDataInformationEvent diEvent21 = InputDataInformationEvent.createWithSerializedPayload(0, null);
    events2.add(diEvent21);
    try {
        // Should fail due to second input
        rootInputVertexManager.onRootVertexInitialized("input2", id2, events2);
        fail("Expecting failure in case of multiple inputs attempting to send events");
    } catch (IllegalStateException e) {
        assertTrue(e.getMessage().startsWith("RootInputVertexManager cannot configure multiple inputs. Use a custom VertexManager"));
    }
}
Also used : InputDescriptor(org.apache.tez.dag.api.InputDescriptor) VertexManagerPluginContext(org.apache.tez.dag.api.VertexManagerPluginContext) UserPayload(org.apache.tez.dag.api.UserPayload) Event(org.apache.tez.runtime.api.Event) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent) InputConfigureVertexTasksEvent(org.apache.tez.runtime.api.events.InputConfigureVertexTasksEvent) LinkedList(java.util.LinkedList) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent) Test(org.junit.Test)

Aggregations

InputDescriptor (org.apache.tez.dag.api.InputDescriptor)37 Test (org.junit.Test)18 InputInitializerDescriptor (org.apache.tez.dag.api.InputInitializerDescriptor)11 OutputDescriptor (org.apache.tez.dag.api.OutputDescriptor)10 InputContext (org.apache.tez.runtime.api.InputContext)10 TezConfiguration (org.apache.tez.dag.api.TezConfiguration)9 Configuration (org.apache.hadoop.conf.Configuration)8 OutputContext (org.apache.tez.runtime.api.OutputContext)8 UserPayload (org.apache.tez.dag.api.UserPayload)7 LinkedList (java.util.LinkedList)5 WeightedScalingMemoryDistributor (org.apache.tez.runtime.library.resources.WeightedScalingMemoryDistributor)5 DataSourceDescriptor (org.apache.tez.dag.api.DataSourceDescriptor)4 ProcessorDescriptor (org.apache.tez.dag.api.ProcessorDescriptor)4 TezVertexID (org.apache.tez.dag.records.TezVertexID)4 InputSpec (org.apache.tez.runtime.api.impl.InputSpec)4 TezEvent (org.apache.tez.runtime.api.impl.TezEvent)4 IOException (java.io.IOException)3 List (java.util.List)3 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)3 RootInputLeafOutput (org.apache.tez.dag.api.RootInputLeafOutput)3