Search in sources :

Example 11 with InputDescriptor

use of org.apache.tez.dag.api.InputDescriptor in project tez by apache.

the class TestRootInputInitializerManager method testSuccessBeforeEvent.

// Order event1 success1, success2, event2
// Primarily a failure scenario, when a Task moves back to running from success
@SuppressWarnings("unchecked")
@Test(timeout = 5000)
public void testSuccessBeforeEvent() throws Exception {
    InputDescriptor id = mock(InputDescriptor.class);
    InputInitializerDescriptor iid = mock(InputInitializerDescriptor.class);
    RootInputLeafOutput<InputDescriptor, InputInitializerDescriptor> rootInput = new RootInputLeafOutput<InputDescriptor, InputInitializerDescriptor>("InputName", id, iid);
    InputInitializer initializer = mock(InputInitializer.class);
    InputInitializerContext initializerContext = mock(InputInitializerContext.class);
    Vertex vertex = mock(Vertex.class);
    StateChangeNotifier stateChangeNotifier = mock(StateChangeNotifier.class);
    AppContext appContext = mock(AppContext.class, RETURNS_DEEP_STUBS);
    RootInputInitializerManager.InitializerWrapper initializerWrapper = new RootInputInitializerManager.InitializerWrapper(rootInput, initializer, initializerContext, vertex, stateChangeNotifier, appContext);
    ApplicationId appId = ApplicationId.newInstance(1000, 1);
    TezDAGID dagId = TezDAGID.getInstance(appId, 1);
    TezVertexID srcVertexId = TezVertexID.getInstance(dagId, 2);
    TezTaskID srcTaskId1 = TezTaskID.getInstance(srcVertexId, 3);
    Vertex srcVertex = mock(Vertex.class);
    Task srcTask1 = mock(Task.class);
    doReturn(TaskState.RUNNING).when(srcTask1).getState();
    doReturn(srcTask1).when(srcVertex).getTask(srcTaskId1.getId());
    when(appContext.getCurrentDAG().getVertex(any(String.class))).thenReturn(srcVertex);
    String srcVertexName = "srcVertexName";
    List<TezEvent> eventList = Lists.newLinkedList();
    // First Attempt send event
    TezTaskAttemptID srcTaskAttemptId11 = TezTaskAttemptID.getInstance(srcTaskId1, 1);
    EventMetaData sourceInfo11 = new EventMetaData(EventMetaData.EventProducerConsumerType.PROCESSOR, srcVertexName, null, srcTaskAttemptId11);
    InputInitializerEvent e1 = InputInitializerEvent.create("fakeVertex", "fakeInput", null);
    TezEvent te1 = new TezEvent(e1, sourceInfo11);
    eventList.add(te1);
    initializerWrapper.handleInputInitializerEvents(eventList);
    verify(initializer, never()).handleInputInitializerEvent(any(List.class));
    eventList.clear();
    // First attempt, Task success notification
    initializerWrapper.onTaskSucceeded(srcVertexName, srcTaskId1, srcTaskAttemptId11.getId());
    ArgumentCaptor<List> argumentCaptor = ArgumentCaptor.forClass(List.class);
    verify(initializer, times(1)).handleInputInitializerEvent(argumentCaptor.capture());
    List<InputInitializerEvent> invokedEvents = argumentCaptor.getValue();
    assertEquals(1, invokedEvents.size());
    reset(initializer);
    TezTaskAttemptID srcTaskAttemptId12 = TezTaskAttemptID.getInstance(srcTaskId1, 2);
    // 2nd attempt succeeded
    initializerWrapper.onTaskSucceeded(srcVertexName, srcTaskId1, srcTaskAttemptId12.getId());
    verify(initializer, never()).handleInputInitializerEvent(any(List.class));
    // 2nd attempt send event
    EventMetaData sourceInfo12 = new EventMetaData(EventMetaData.EventProducerConsumerType.PROCESSOR, srcVertexName, null, srcTaskAttemptId12);
    InputInitializerEvent e2 = InputInitializerEvent.create("fakeVertex", "fakeInput", null);
    TezEvent te2 = new TezEvent(e2, sourceInfo12);
    eventList.add(te2);
    initializerWrapper.handleInputInitializerEvents(eventList);
    verify(initializer, never()).handleInputInitializerEvent(any(List.class));
}
Also used : InputDescriptor(org.apache.tez.dag.api.InputDescriptor) InputInitializer(org.apache.tez.runtime.api.InputInitializer) TezDAGID(org.apache.tez.dag.records.TezDAGID) List(java.util.List) TezVertexID(org.apache.tez.dag.records.TezVertexID) EventMetaData(org.apache.tez.runtime.api.impl.EventMetaData) RootInputLeafOutput(org.apache.tez.dag.api.RootInputLeafOutput) AppContext(org.apache.tez.dag.app.AppContext) InputInitializerContext(org.apache.tez.runtime.api.InputInitializerContext) TezTaskID(org.apache.tez.dag.records.TezTaskID) InputInitializerEvent(org.apache.tez.runtime.api.events.InputInitializerEvent) InputInitializerDescriptor(org.apache.tez.dag.api.InputInitializerDescriptor) TezEvent(org.apache.tez.runtime.api.impl.TezEvent) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) TezTaskAttemptID(org.apache.tez.dag.records.TezTaskAttemptID) Test(org.junit.Test)

Example 12 with InputDescriptor

use of org.apache.tez.dag.api.InputDescriptor in project tez by apache.

the class MRInputHelpers method configureMRInputWithLegacySplitGeneration.

/**
 * Setup split generation on the client, with splits being distributed via the traditional
 * MapReduce mechanism of distributing splits via the Distributed Cache.
 * <p/>
 * Usage of this technique for handling splits is not advised. Instead, splits should be either
 * generated in the AM, or generated in the client and distributed via the AM. See {@link
 * org.apache.tez.mapreduce.input.MRInput.MRInputConfigBuilder}
 * <p/>
 * Note: Attempting to use this method to add multiple Inputs to a Vertex is not supported.
 *
 * This mechanism of propagating splits may be removed in a subsequent release, and is not recommended.
 *
 * @param conf           configuration to be used by {@link org.apache.tez.mapreduce.input.MRInput}.
 *                       This is expected to be fully configured.
 * @param splitsDir      the path to which splits will be generated.
 * @param useLegacyInput whether to use {@link org.apache.tez.mapreduce.input.MRInputLegacy} or
 *                       {@link org.apache.tez.mapreduce.input.MRInput}
 * @return an instance of {@link org.apache.tez.dag.api.DataSourceDescriptor} which can be added
 * as a data source to a {@link org.apache.tez.dag.api.Vertex}
 */
@InterfaceStability.Unstable
@InterfaceAudience.LimitedPrivate({ "hive, pig" })
public static DataSourceDescriptor configureMRInputWithLegacySplitGeneration(Configuration conf, Path splitsDir, boolean useLegacyInput) {
    InputSplitInfo inputSplitInfo = null;
    try {
        inputSplitInfo = generateInputSplits(conf, splitsDir);
        InputDescriptor inputDescriptor = InputDescriptor.create(useLegacyInput ? MRInputLegacy.class.getName() : MRInput.class.getName()).setUserPayload(createMRInputPayload(conf, null, false, true));
        Map<String, LocalResource> additionalLocalResources = new HashMap<String, LocalResource>();
        updateLocalResourcesForInputSplits(conf, inputSplitInfo, additionalLocalResources);
        DataSourceDescriptor dsd = DataSourceDescriptor.create(inputDescriptor, null, inputSplitInfo.getNumTasks(), inputSplitInfo.getCredentials(), VertexLocationHint.create(inputSplitInfo.getTaskLocationHints()), additionalLocalResources);
        return dsd;
    } catch (IOException e) {
        throw new TezUncheckedException("Failed to generate InputSplits", e);
    } catch (InterruptedException e) {
        throw new TezUncheckedException("Failed to generate InputSplits", e);
    } catch (ClassNotFoundException e) {
        throw new TezUncheckedException("Failed to generate InputSplits", e);
    }
}
Also used : InputDescriptor(org.apache.tez.dag.api.InputDescriptor) MRInput(org.apache.tez.mapreduce.input.MRInput) TezUncheckedException(org.apache.tez.dag.api.TezUncheckedException) HashMap(java.util.HashMap) ByteString(com.google.protobuf.ByteString) IOException(java.io.IOException) LocalResource(org.apache.hadoop.yarn.api.records.LocalResource) MRInputLegacy(org.apache.tez.mapreduce.input.MRInputLegacy) DataSourceDescriptor(org.apache.tez.dag.api.DataSourceDescriptor) Unstable(org.apache.hadoop.classification.InterfaceStability.Unstable)

Example 13 with InputDescriptor

use of org.apache.tez.dag.api.InputDescriptor in project tez by apache.

the class TestMemoryDistributor method testScalingProcessor.

@Test(timeout = 5000)
public void testScalingProcessor() throws TezException {
    MemoryDistributor dist = new MemoryDistributor(2, 1, conf);
    dist.setJvmMemory(10000l);
    // First request
    MemoryUpdateCallbackForTest e1Callback = new MemoryUpdateCallbackForTest();
    InputContext e1InputContext1 = createTestInputContext();
    InputDescriptor e1InDesc1 = createTestInputDescriptor();
    dist.requestMemory(10000, e1Callback, e1InputContext1, e1InDesc1);
    // Second request
    MemoryUpdateCallbackForTest e2Callback = new MemoryUpdateCallbackForTest();
    InputContext e2InputContext2 = createTestInputContext();
    InputDescriptor e2InDesc2 = createTestInputDescriptor();
    dist.requestMemory(10000, e2Callback, e2InputContext2, e2InDesc2);
    // Third request - output
    MemoryUpdateCallbackForTest e3Callback = new MemoryUpdateCallbackForTest();
    OutputContext e3OutputContext1 = createTestOutputContext();
    OutputDescriptor e3OutDesc1 = createTestOutputDescriptor();
    dist.requestMemory(5000, e3Callback, e3OutputContext1, e3OutDesc1);
    // Fourth request - processor
    MemoryUpdateCallbackForTest e4Callback = new MemoryUpdateCallbackForTest();
    ProcessorContext e4ProcessorContext1 = createTestProcessortContext();
    ProcessorDescriptor e4ProcessorDesc1 = createTestProcessorDescriptor();
    dist.requestMemory(5000, e4Callback, e4ProcessorContext1, e4ProcessorDesc1);
    dist.makeInitialAllocations();
    // Total available: 70% of 10K = 7000
    // 4 requests - 10K, 10K, 5K, 5K
    // Scale down to - 2333.33, 2333.33, 1166.66, 1166.66
    assertTrue(e1Callback.assigned >= 2333 && e1Callback.assigned <= 2334);
    assertTrue(e2Callback.assigned >= 2333 && e2Callback.assigned <= 2334);
    assertTrue(e3Callback.assigned >= 1166 && e3Callback.assigned <= 1167);
    assertTrue(e4Callback.assigned >= 1166 && e4Callback.assigned <= 1167);
}
Also used : InputDescriptor(org.apache.tez.dag.api.InputDescriptor) OutputDescriptor(org.apache.tez.dag.api.OutputDescriptor) InputContext(org.apache.tez.runtime.api.InputContext) ProcessorDescriptor(org.apache.tez.dag.api.ProcessorDescriptor) OutputContext(org.apache.tez.runtime.api.OutputContext) ProcessorContext(org.apache.tez.runtime.api.ProcessorContext) Test(org.junit.Test)

Example 14 with InputDescriptor

use of org.apache.tez.dag.api.InputDescriptor in project tez by apache.

the class TestMemoryDistributor method createTestInputDescriptor.

protected InputDescriptor createTestInputDescriptor() {
    InputDescriptor desc = mock(InputDescriptor.class);
    doReturn("InputClass").when(desc).getClassName();
    return desc;
}
Also used : InputDescriptor(org.apache.tez.dag.api.InputDescriptor)

Example 15 with InputDescriptor

use of org.apache.tez.dag.api.InputDescriptor in project tez by apache.

the class TestMemoryDistributor method testScalingDisabled.

@Test(timeout = 5000)
public void testScalingDisabled() throws TezException {
    // Real world values
    Configuration conf = new Configuration(this.conf);
    conf.setBoolean(TezConfiguration.TEZ_TASK_SCALE_MEMORY_ENABLED, false);
    MemoryDistributor dist = new MemoryDistributor(2, 0, conf);
    dist.setJvmMemory(207093760l);
    // First request
    MemoryUpdateCallbackForTest e1Callback = new MemoryUpdateCallbackForTest();
    InputContext e1InputContext1 = createTestInputContext();
    InputDescriptor e1InDesc1 = createTestInputDescriptor();
    dist.requestMemory(104857600l, e1Callback, e1InputContext1, e1InDesc1);
    // Second request
    MemoryUpdateCallbackForTest e2Callback = new MemoryUpdateCallbackForTest();
    InputContext e2InputContext2 = createTestInputContext();
    InputDescriptor e2InDesc2 = createTestInputDescriptor();
    dist.requestMemory(144965632l, e2Callback, e2InputContext2, e2InDesc2);
    dist.makeInitialAllocations();
    assertEquals(104857600l, e1Callback.assigned);
    assertEquals(144965632l, e2Callback.assigned);
}
Also used : InputDescriptor(org.apache.tez.dag.api.InputDescriptor) Configuration(org.apache.hadoop.conf.Configuration) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) InputContext(org.apache.tez.runtime.api.InputContext) Test(org.junit.Test)

Aggregations

InputDescriptor (org.apache.tez.dag.api.InputDescriptor)37 Test (org.junit.Test)18 InputInitializerDescriptor (org.apache.tez.dag.api.InputInitializerDescriptor)11 OutputDescriptor (org.apache.tez.dag.api.OutputDescriptor)10 InputContext (org.apache.tez.runtime.api.InputContext)10 TezConfiguration (org.apache.tez.dag.api.TezConfiguration)9 Configuration (org.apache.hadoop.conf.Configuration)8 OutputContext (org.apache.tez.runtime.api.OutputContext)8 UserPayload (org.apache.tez.dag.api.UserPayload)7 LinkedList (java.util.LinkedList)5 WeightedScalingMemoryDistributor (org.apache.tez.runtime.library.resources.WeightedScalingMemoryDistributor)5 DataSourceDescriptor (org.apache.tez.dag.api.DataSourceDescriptor)4 ProcessorDescriptor (org.apache.tez.dag.api.ProcessorDescriptor)4 TezVertexID (org.apache.tez.dag.records.TezVertexID)4 InputSpec (org.apache.tez.runtime.api.impl.InputSpec)4 TezEvent (org.apache.tez.runtime.api.impl.TezEvent)4 IOException (java.io.IOException)3 List (java.util.List)3 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)3 RootInputLeafOutput (org.apache.tez.dag.api.RootInputLeafOutput)3