Search in sources :

Example 1 with InputDescriptor

use of org.apache.tez.dag.api.InputDescriptor in project tez by apache.

the class VertexImpl method setAdditionalInputs.

@Override
public void setAdditionalInputs(List<RootInputLeafOutputProto> inputs) {
    LOG.info("Setting " + inputs.size() + " additional inputs for vertex" + this.logIdentifier);
    this.rootInputDescriptors = Maps.newHashMapWithExpectedSize(inputs.size());
    for (RootInputLeafOutputProto input : inputs) {
        addIO(input.getName());
        InputDescriptor id = DagTypeConverters.convertInputDescriptorFromDAGPlan(input.getIODescriptor());
        this.rootInputDescriptors.put(input.getName(), new RootInputLeafOutput<InputDescriptor, InputInitializerDescriptor>(input.getName(), id, input.hasControllerDescriptor() ? DagTypeConverters.convertInputInitializerDescriptorFromDAGPlan(input.getControllerDescriptor()) : null));
        this.rootInputSpecs.put(input.getName(), DEFAULT_ROOT_INPUT_SPECS);
    }
}
Also used : InputDescriptor(org.apache.tez.dag.api.InputDescriptor) RootInputLeafOutputProto(org.apache.tez.dag.api.records.DAGProtos.RootInputLeafOutputProto) InputInitializerDescriptor(org.apache.tez.dag.api.InputInitializerDescriptor)

Example 2 with InputDescriptor

use of org.apache.tez.dag.api.InputDescriptor in project tez by apache.

the class TestWeightedScalingMemoryDistributor method createTestInputDescriptor.

private InputDescriptor createTestInputDescriptor(Class<? extends LogicalInput> inputClazz) {
    InputDescriptor desc = mock(InputDescriptor.class);
    doReturn(inputClazz.getName()).when(desc).getClassName();
    return desc;
}
Also used : InputDescriptor(org.apache.tez.dag.api.InputDescriptor)

Example 3 with InputDescriptor

use of org.apache.tez.dag.api.InputDescriptor in project tez by apache.

the class TestWeightedScalingMemoryDistributor method testAdditionalReserveFractionWeightedScaling.

@Test(timeout = 5000)
public void testAdditionalReserveFractionWeightedScaling() throws TezException {
    Configuration conf = new Configuration(this.conf);
    conf.setStrings(TezConfiguration.TEZ_TASK_SCALE_MEMORY_WEIGHTED_RATIOS, WeightedScalingMemoryDistributor.generateWeightStrings(0, 0, 2, 3, 6, 1, 1));
    conf.setDouble(TezConfiguration.TEZ_TASK_SCALE_MEMORY_ADDITIONAL_RESERVATION_FRACTION_PER_IO, 0.025d);
    conf.setDouble(TezConfiguration.TEZ_TASK_SCALE_MEMORY_ADDITIONAL_RESERVATION_FRACTION_MAX, 0.2d);
    MemoryDistributor dist = new MemoryDistributor(2, 2, conf);
    dist.setJvmMemory(10000l);
    // First request - ScatterGatherShuffleInput [weight 6]
    MemoryUpdateCallbackForTest e1Callback = new MemoryUpdateCallbackForTest();
    InputContext e1InputContext1 = createTestInputContext();
    InputDescriptor e1InDesc1 = createTestInputDescriptor(OrderedGroupedKVInput.class);
    dist.requestMemory(10000, e1Callback, e1InputContext1, e1InDesc1);
    // Second request - BroadcastInput [weight 2]
    MemoryUpdateCallbackForTest e2Callback = new MemoryUpdateCallbackForTest();
    InputContext e2InputContext2 = createTestInputContext();
    InputDescriptor e2InDesc2 = createTestInputDescriptor(UnorderedKVInput.class);
    dist.requestMemory(10000, e2Callback, e2InputContext2, e2InDesc2);
    // Third request - randomOutput (simulates MROutput) [weight 1]
    MemoryUpdateCallbackForTest e3Callback = new MemoryUpdateCallbackForTest();
    OutputContext e3OutputContext1 = createTestOutputContext();
    OutputDescriptor e3OutDesc1 = createTestOutputDescriptor();
    dist.requestMemory(10000, e3Callback, e3OutputContext1, e3OutDesc1);
    // Fourth request - OnFileSortedOutput [weight 3]
    MemoryUpdateCallbackForTest e4Callback = new MemoryUpdateCallbackForTest();
    OutputContext e4OutputContext2 = createTestOutputContext();
    OutputDescriptor e4OutDesc2 = createTestOutputDescriptor(OrderedPartitionedKVOutput.class);
    dist.requestMemory(10000, e4Callback, e4OutputContext2, e4OutDesc2);
    dist.makeInitialAllocations();
    // Total available: 60% of 10K = 7000
    // 4 requests (weight) - 10K (6), 10K(2), 10K(1), 10K(3)
    // Scale down to - 3000, 1000, 500, 1500
    assertEquals(3000, e1Callback.assigned);
    assertEquals(1000, e2Callback.assigned);
    assertEquals(500, e3Callback.assigned);
    assertEquals(1500, e4Callback.assigned);
}
Also used : InputDescriptor(org.apache.tez.dag.api.InputDescriptor) Configuration(org.apache.hadoop.conf.Configuration) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) OutputDescriptor(org.apache.tez.dag.api.OutputDescriptor) InputContext(org.apache.tez.runtime.api.InputContext) WeightedScalingMemoryDistributor(org.apache.tez.runtime.library.resources.WeightedScalingMemoryDistributor) OutputContext(org.apache.tez.runtime.api.OutputContext) Test(org.junit.Test)

Example 4 with InputDescriptor

use of org.apache.tez.dag.api.InputDescriptor in project tez by apache.

the class TestWeightedScalingMemoryDistributor method testSimpleWeightedScaling.

@Test(timeout = 5000)
public void testSimpleWeightedScaling() throws TezException {
    Configuration conf = new Configuration(this.conf);
    conf.setStrings(TezConfiguration.TEZ_TASK_SCALE_MEMORY_WEIGHTED_RATIOS, WeightedScalingMemoryDistributor.generateWeightStrings(0, 0, 1, 2, 3, 1, 1));
    System.err.println(Joiner.on(",").join(conf.getStringCollection(TezConfiguration.TEZ_TASK_SCALE_MEMORY_WEIGHTED_RATIOS)));
    MemoryDistributor dist = new MemoryDistributor(2, 2, conf);
    dist.setJvmMemory(10000l);
    // First request - ScatterGatherShuffleInput
    MemoryUpdateCallbackForTest e1Callback = new MemoryUpdateCallbackForTest();
    InputContext e1InputContext1 = createTestInputContext();
    InputDescriptor e1InDesc1 = createTestInputDescriptor(OrderedGroupedKVInput.class);
    dist.requestMemory(10000, e1Callback, e1InputContext1, e1InDesc1);
    // Second request - BroadcastInput
    MemoryUpdateCallbackForTest e2Callback = new MemoryUpdateCallbackForTest();
    InputContext e2InputContext2 = createTestInputContext();
    InputDescriptor e2InDesc2 = createTestInputDescriptor(UnorderedKVInput.class);
    dist.requestMemory(10000, e2Callback, e2InputContext2, e2InDesc2);
    // Third request - randomOutput (simulates MROutput)
    MemoryUpdateCallbackForTest e3Callback = new MemoryUpdateCallbackForTest();
    OutputContext e3OutputContext1 = createTestOutputContext();
    OutputDescriptor e3OutDesc1 = createTestOutputDescriptor();
    dist.requestMemory(10000, e3Callback, e3OutputContext1, e3OutDesc1);
    // Fourth request - OnFileSortedOutput
    MemoryUpdateCallbackForTest e4Callback = new MemoryUpdateCallbackForTest();
    OutputContext e4OutputContext2 = createTestOutputContext();
    OutputDescriptor e4OutDesc2 = createTestOutputDescriptor(OrderedPartitionedKVOutput.class);
    dist.requestMemory(10000, e4Callback, e4OutputContext2, e4OutDesc2);
    dist.makeInitialAllocations();
    // Total available: 70% of 10K = 7000
    // 4 requests (weight) - 10K (3), 10K(1), 10K(1), 10K(2)
    // Scale down to - 3000, 1000, 1000, 2000
    assertEquals(3000, e1Callback.assigned);
    assertEquals(1000, e2Callback.assigned);
    assertEquals(1000, e3Callback.assigned);
    assertEquals(2000, e4Callback.assigned);
}
Also used : InputDescriptor(org.apache.tez.dag.api.InputDescriptor) Configuration(org.apache.hadoop.conf.Configuration) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) OutputDescriptor(org.apache.tez.dag.api.OutputDescriptor) InputContext(org.apache.tez.runtime.api.InputContext) WeightedScalingMemoryDistributor(org.apache.tez.runtime.library.resources.WeightedScalingMemoryDistributor) OutputContext(org.apache.tez.runtime.api.OutputContext) Test(org.junit.Test)

Example 5 with InputDescriptor

use of org.apache.tez.dag.api.InputDescriptor in project tez by apache.

the class TestVertexManager method testOnRootVertexInitialized2.

/**
 * TEZ-1647
 * custom vertex manager generates events only when both i1 and i2 are initialized.
 * @throws Exception
 */
@Test(timeout = 5000)
public void testOnRootVertexInitialized2() throws Exception {
    VertexManager vm = new VertexManager(VertexManagerPluginDescriptor.create(CustomVertexManager.class.getName()), UserGroupInformation.getCurrentUser(), mockVertex, mockAppContext, mock(StateChangeNotifier.class));
    vm.initialize();
    InputDescriptor id1 = mock(InputDescriptor.class);
    List<Event> events1 = new LinkedList<Event>();
    InputDataInformationEvent diEvent1 = InputDataInformationEvent.createWithSerializedPayload(0, null);
    events1.add(diEvent1);
    // do not call context.addRootInputEvents, just cache the TezEvent
    vm.onRootVertexInitialized("input1", id1, events1);
    verify(mockHandler, times(1)).handle(requestCaptor.capture());
    List<TezEvent> tezEventsAfterInput1 = requestCaptor.getValue().getEvents();
    assertEquals(0, tezEventsAfterInput1.size());
    InputDescriptor id2 = mock(InputDescriptor.class);
    List<Event> events2 = new LinkedList<Event>();
    InputDataInformationEvent diEvent2 = InputDataInformationEvent.createWithSerializedPayload(0, null);
    events2.add(diEvent2);
    // call context.addRootInputEvents(input1), context.addRootInputEvents(input2)
    vm.onRootVertexInitialized("input2", id2, events2);
    verify(mockHandler, times(2)).handle(requestCaptor.capture());
    List<TezEvent> tezEventsAfterInput2 = requestCaptor.getValue().getEvents();
    assertEquals(2, tezEventsAfterInput2.size());
    // also verify the EventMetaData
    Set<String> edgeVertexSet = new HashSet<String>();
    for (TezEvent tezEvent : tezEventsAfterInput2) {
        edgeVertexSet.add(tezEvent.getDestinationInfo().getEdgeVertexName());
    }
    assertEquals(Sets.newHashSet("input1", "input2"), edgeVertexSet);
}
Also used : InputDescriptor(org.apache.tez.dag.api.InputDescriptor) StateChangeNotifier(org.apache.tez.dag.app.dag.StateChangeNotifier) Event(org.apache.tez.runtime.api.Event) VertexManagerEvent(org.apache.tez.runtime.api.events.VertexManagerEvent) CustomProcessorEvent(org.apache.tez.runtime.api.events.CustomProcessorEvent) CallableEvent(org.apache.tez.dag.app.dag.event.CallableEvent) VertexEventRouteEvent(org.apache.tez.dag.app.dag.event.VertexEventRouteEvent) TezEvent(org.apache.tez.runtime.api.impl.TezEvent) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent) TezEvent(org.apache.tez.runtime.api.impl.TezEvent) LinkedList(java.util.LinkedList) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent) HashSet(java.util.HashSet) Test(org.junit.Test)

Aggregations

InputDescriptor (org.apache.tez.dag.api.InputDescriptor)38 Test (org.junit.Test)18 InputInitializerDescriptor (org.apache.tez.dag.api.InputInitializerDescriptor)12 OutputDescriptor (org.apache.tez.dag.api.OutputDescriptor)10 InputContext (org.apache.tez.runtime.api.InputContext)10 TezConfiguration (org.apache.tez.dag.api.TezConfiguration)9 Configuration (org.apache.hadoop.conf.Configuration)8 UserPayload (org.apache.tez.dag.api.UserPayload)8 OutputContext (org.apache.tez.runtime.api.OutputContext)8 LinkedList (java.util.LinkedList)5 DataSourceDescriptor (org.apache.tez.dag.api.DataSourceDescriptor)5 WeightedScalingMemoryDistributor (org.apache.tez.runtime.library.resources.WeightedScalingMemoryDistributor)5 IOException (java.io.IOException)4 ProcessorDescriptor (org.apache.tez.dag.api.ProcessorDescriptor)4 TezVertexID (org.apache.tez.dag.records.TezVertexID)4 InputSpec (org.apache.tez.runtime.api.impl.InputSpec)4 TezEvent (org.apache.tez.runtime.api.impl.TezEvent)4 List (java.util.List)3 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)3 RootInputLeafOutput (org.apache.tez.dag.api.RootInputLeafOutput)3