Search in sources :

Example 1 with InputInitializerContext

use of org.apache.tez.runtime.api.InputInitializerContext in project tez by apache.

the class TestRootInputInitializerManager method testSuccessBeforeEvent.

// Order event1 success1, success2, event2
// Primarily a failure scenario, when a Task moves back to running from success
@SuppressWarnings("unchecked")
@Test(timeout = 5000)
public void testSuccessBeforeEvent() throws Exception {
    InputDescriptor id = mock(InputDescriptor.class);
    InputInitializerDescriptor iid = mock(InputInitializerDescriptor.class);
    RootInputLeafOutput<InputDescriptor, InputInitializerDescriptor> rootInput = new RootInputLeafOutput<InputDescriptor, InputInitializerDescriptor>("InputName", id, iid);
    InputInitializer initializer = mock(InputInitializer.class);
    InputInitializerContext initializerContext = mock(InputInitializerContext.class);
    Vertex vertex = mock(Vertex.class);
    StateChangeNotifier stateChangeNotifier = mock(StateChangeNotifier.class);
    AppContext appContext = mock(AppContext.class, RETURNS_DEEP_STUBS);
    RootInputInitializerManager.InitializerWrapper initializerWrapper = new RootInputInitializerManager.InitializerWrapper(rootInput, initializer, initializerContext, vertex, stateChangeNotifier, appContext);
    ApplicationId appId = ApplicationId.newInstance(1000, 1);
    TezDAGID dagId = TezDAGID.getInstance(appId, 1);
    TezVertexID srcVertexId = TezVertexID.getInstance(dagId, 2);
    TezTaskID srcTaskId1 = TezTaskID.getInstance(srcVertexId, 3);
    Vertex srcVertex = mock(Vertex.class);
    Task srcTask1 = mock(Task.class);
    doReturn(TaskState.RUNNING).when(srcTask1).getState();
    doReturn(srcTask1).when(srcVertex).getTask(srcTaskId1.getId());
    when(appContext.getCurrentDAG().getVertex(any(String.class))).thenReturn(srcVertex);
    String srcVertexName = "srcVertexName";
    List<TezEvent> eventList = Lists.newLinkedList();
    // First Attempt send event
    TezTaskAttemptID srcTaskAttemptId11 = TezTaskAttemptID.getInstance(srcTaskId1, 1);
    EventMetaData sourceInfo11 = new EventMetaData(EventMetaData.EventProducerConsumerType.PROCESSOR, srcVertexName, null, srcTaskAttemptId11);
    InputInitializerEvent e1 = InputInitializerEvent.create("fakeVertex", "fakeInput", null);
    TezEvent te1 = new TezEvent(e1, sourceInfo11);
    eventList.add(te1);
    initializerWrapper.handleInputInitializerEvents(eventList);
    verify(initializer, never()).handleInputInitializerEvent(any(List.class));
    eventList.clear();
    // First attempt, Task success notification
    initializerWrapper.onTaskSucceeded(srcVertexName, srcTaskId1, srcTaskAttemptId11.getId());
    ArgumentCaptor<List> argumentCaptor = ArgumentCaptor.forClass(List.class);
    verify(initializer, times(1)).handleInputInitializerEvent(argumentCaptor.capture());
    List<InputInitializerEvent> invokedEvents = argumentCaptor.getValue();
    assertEquals(1, invokedEvents.size());
    reset(initializer);
    TezTaskAttemptID srcTaskAttemptId12 = TezTaskAttemptID.getInstance(srcTaskId1, 2);
    // 2nd attempt succeeded
    initializerWrapper.onTaskSucceeded(srcVertexName, srcTaskId1, srcTaskAttemptId12.getId());
    verify(initializer, never()).handleInputInitializerEvent(any(List.class));
    // 2nd attempt send event
    EventMetaData sourceInfo12 = new EventMetaData(EventMetaData.EventProducerConsumerType.PROCESSOR, srcVertexName, null, srcTaskAttemptId12);
    InputInitializerEvent e2 = InputInitializerEvent.create("fakeVertex", "fakeInput", null);
    TezEvent te2 = new TezEvent(e2, sourceInfo12);
    eventList.add(te2);
    initializerWrapper.handleInputInitializerEvents(eventList);
    verify(initializer, never()).handleInputInitializerEvent(any(List.class));
}
Also used : InputDescriptor(org.apache.tez.dag.api.InputDescriptor) InputInitializer(org.apache.tez.runtime.api.InputInitializer) TezDAGID(org.apache.tez.dag.records.TezDAGID) List(java.util.List) TezVertexID(org.apache.tez.dag.records.TezVertexID) EventMetaData(org.apache.tez.runtime.api.impl.EventMetaData) RootInputLeafOutput(org.apache.tez.dag.api.RootInputLeafOutput) AppContext(org.apache.tez.dag.app.AppContext) InputInitializerContext(org.apache.tez.runtime.api.InputInitializerContext) TezTaskID(org.apache.tez.dag.records.TezTaskID) InputInitializerEvent(org.apache.tez.runtime.api.events.InputInitializerEvent) InputInitializerDescriptor(org.apache.tez.dag.api.InputInitializerDescriptor) TezEvent(org.apache.tez.runtime.api.impl.TezEvent) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) TezTaskAttemptID(org.apache.tez.dag.records.TezTaskAttemptID) Test(org.junit.Test)

Example 2 with InputInitializerContext

use of org.apache.tez.runtime.api.InputInitializerContext in project tez by apache.

the class TestMRInputSplitDistributor method testSerializedPayload.

@Test(timeout = 5000)
public void testSerializedPayload() throws IOException {
    Configuration conf = new Configuration(false);
    conf.setBoolean(MRJobConfig.MR_TEZ_INPUT_INITIALIZER_SERIALIZE_EVENT_PAYLOAD, true);
    ByteString confByteString = TezUtils.createByteStringFromConf(conf);
    InputSplit split1 = new InputSplitForTest(1);
    InputSplit split2 = new InputSplitForTest(2);
    MRSplitProto proto1 = MRInputHelpers.createSplitProto(split1);
    MRSplitProto proto2 = MRInputHelpers.createSplitProto(split2);
    MRSplitsProto.Builder splitsProtoBuilder = MRSplitsProto.newBuilder();
    splitsProtoBuilder.addSplits(proto1);
    splitsProtoBuilder.addSplits(proto2);
    MRInputUserPayloadProto.Builder payloadProto = MRInputUserPayloadProto.newBuilder();
    payloadProto.setSplits(splitsProtoBuilder.build());
    payloadProto.setConfigurationBytes(confByteString);
    UserPayload userPayload = UserPayload.create(payloadProto.build().toByteString().asReadOnlyByteBuffer());
    InputInitializerContext context = new TezTestUtils.TezRootInputInitializerContextForTest(userPayload);
    MRInputSplitDistributor splitDist = new MRInputSplitDistributor(context);
    List<Event> events = splitDist.initialize();
    assertEquals(3, events.size());
    assertTrue(events.get(0) instanceof InputUpdatePayloadEvent);
    assertTrue(events.get(1) instanceof InputDataInformationEvent);
    assertTrue(events.get(2) instanceof InputDataInformationEvent);
    InputDataInformationEvent diEvent1 = (InputDataInformationEvent) (events.get(1));
    InputDataInformationEvent diEvent2 = (InputDataInformationEvent) (events.get(2));
    assertNull(diEvent1.getDeserializedUserPayload());
    assertNull(diEvent2.getDeserializedUserPayload());
    assertNotNull(diEvent1.getUserPayload());
    assertNotNull(diEvent2.getUserPayload());
    MRSplitProto event1Proto = MRSplitProto.parseFrom(ByteString.copyFrom(diEvent1.getUserPayload()));
    InputSplit is1 = MRInputUtils.getOldSplitDetailsFromEvent(event1Proto, new Configuration());
    assertTrue(is1 instanceof InputSplitForTest);
    assertEquals(1, ((InputSplitForTest) is1).identifier);
    MRSplitProto event2Proto = MRSplitProto.parseFrom(ByteString.copyFrom(diEvent2.getUserPayload()));
    InputSplit is2 = MRInputUtils.getOldSplitDetailsFromEvent(event2Proto, new Configuration());
    assertTrue(is2 instanceof InputSplitForTest);
    assertEquals(2, ((InputSplitForTest) is2).identifier);
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) UserPayload(org.apache.tez.dag.api.UserPayload) ByteString(com.google.protobuf.ByteString) InputInitializerContext(org.apache.tez.runtime.api.InputInitializerContext) MRSplitsProto(org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRSplitsProto) InputUpdatePayloadEvent(org.apache.tez.runtime.api.events.InputUpdatePayloadEvent) MRInputUserPayloadProto(org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRInputUserPayloadProto) InputUpdatePayloadEvent(org.apache.tez.runtime.api.events.InputUpdatePayloadEvent) Event(org.apache.tez.runtime.api.Event) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent) InputSplit(org.apache.hadoop.mapred.InputSplit) MRSplitProto(org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRSplitProto) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent) Test(org.junit.Test)

Example 3 with InputInitializerContext

use of org.apache.tez.runtime.api.InputInitializerContext in project tez by apache.

the class TestMRInputSplitDistributor method testDeserializedPayload.

@Test(timeout = 5000)
public void testDeserializedPayload() throws IOException {
    Configuration conf = new Configuration(false);
    conf.setBoolean(MRJobConfig.MR_TEZ_INPUT_INITIALIZER_SERIALIZE_EVENT_PAYLOAD, false);
    ByteString confByteString = TezUtils.createByteStringFromConf(conf);
    InputSplit split1 = new InputSplitForTest(1);
    InputSplit split2 = new InputSplitForTest(2);
    MRSplitProto proto1 = MRInputHelpers.createSplitProto(split1);
    MRSplitProto proto2 = MRInputHelpers.createSplitProto(split2);
    MRSplitsProto.Builder splitsProtoBuilder = MRSplitsProto.newBuilder();
    splitsProtoBuilder.addSplits(proto1);
    splitsProtoBuilder.addSplits(proto2);
    MRInputUserPayloadProto.Builder payloadProto = MRInputUserPayloadProto.newBuilder();
    payloadProto.setSplits(splitsProtoBuilder.build());
    payloadProto.setConfigurationBytes(confByteString);
    UserPayload userPayload = UserPayload.create(payloadProto.build().toByteString().asReadOnlyByteBuffer());
    InputInitializerContext context = new TezTestUtils.TezRootInputInitializerContextForTest(userPayload);
    MRInputSplitDistributor splitDist = new MRInputSplitDistributor(context);
    List<Event> events = splitDist.initialize();
    assertEquals(3, events.size());
    assertTrue(events.get(0) instanceof InputUpdatePayloadEvent);
    assertTrue(events.get(1) instanceof InputDataInformationEvent);
    assertTrue(events.get(2) instanceof InputDataInformationEvent);
    InputDataInformationEvent diEvent1 = (InputDataInformationEvent) (events.get(1));
    InputDataInformationEvent diEvent2 = (InputDataInformationEvent) (events.get(2));
    assertNull(diEvent1.getUserPayload());
    assertNull(diEvent2.getUserPayload());
    assertNotNull(diEvent1.getDeserializedUserPayload());
    assertNotNull(diEvent2.getDeserializedUserPayload());
    assertTrue(diEvent1.getDeserializedUserPayload() instanceof InputSplitForTest);
    assertEquals(1, ((InputSplitForTest) diEvent1.getDeserializedUserPayload()).identifier);
    assertTrue(diEvent2.getDeserializedUserPayload() instanceof InputSplitForTest);
    assertEquals(2, ((InputSplitForTest) diEvent2.getDeserializedUserPayload()).identifier);
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) UserPayload(org.apache.tez.dag.api.UserPayload) ByteString(com.google.protobuf.ByteString) InputInitializerContext(org.apache.tez.runtime.api.InputInitializerContext) MRSplitsProto(org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRSplitsProto) InputUpdatePayloadEvent(org.apache.tez.runtime.api.events.InputUpdatePayloadEvent) MRInputUserPayloadProto(org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRInputUserPayloadProto) InputUpdatePayloadEvent(org.apache.tez.runtime.api.events.InputUpdatePayloadEvent) Event(org.apache.tez.runtime.api.Event) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent) InputSplit(org.apache.hadoop.mapred.InputSplit) MRSplitProto(org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRSplitProto) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent) Test(org.junit.Test)

Example 4 with InputInitializerContext

use of org.apache.tez.runtime.api.InputInitializerContext in project hive by apache.

the class TestDynamicPartitionPruner method testSingleSourceMultipleFiltersOrdering2.

@Test(timeout = 20000)
public void testSingleSourceMultipleFiltersOrdering2() throws InterruptedException, SerDeException {
    InputInitializerContext mockInitContext = mock(InputInitializerContext.class);
    doReturn(2).when(mockInitContext).getVertexNumTasks("v1");
    MapWork mapWork = createMockMapWork(new TestSource("v1", 2));
    DynamicPartitionPrunerForEventTesting pruner = new DynamicPartitionPrunerForEventTesting();
    pruner.initialize(mockInitContext, mapWork, new JobConf());
    PruneRunnable pruneRunnable = new PruneRunnable(pruner);
    Thread t = new Thread(pruneRunnable);
    t.start();
    try {
        pruneRunnable.start();
        InputInitializerEvent event = InputInitializerEvent.create("FakeTarget", "TargetInput", ByteBuffer.allocate(0));
        event.setSourceVertexName("v1");
        pruner.processVertex("v1");
        pruner.addEvent(event);
        pruner.addEvent(event);
        pruner.addEvent(event);
        pruner.addEvent(event);
        pruneRunnable.awaitEnd();
        assertNoError(pruneRunnable);
        assertEquals(4, pruner.eventsProceessed.intValue());
        assertEquals(2, pruner.filteredSources.intValue());
    } finally {
        t.interrupt();
        t.join();
    }
}
Also used : InputInitializerEvent(org.apache.tez.runtime.api.events.InputInitializerEvent) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) InputInitializerContext(org.apache.tez.runtime.api.InputInitializerContext) JobConf(org.apache.hadoop.mapred.JobConf) Test(org.junit.Test)

Example 5 with InputInitializerContext

use of org.apache.tez.runtime.api.InputInitializerContext in project hive by apache.

the class TestDynamicPartitionPruner method testMultipleSourcesOrdering2.

@Test(timeout = 20000)
public void testMultipleSourcesOrdering2() throws InterruptedException, SerDeException {
    InputInitializerContext mockInitContext = mock(InputInitializerContext.class);
    doReturn(2).when(mockInitContext).getVertexNumTasks("v1");
    doReturn(3).when(mockInitContext).getVertexNumTasks("v2");
    MapWork mapWork = createMockMapWork(new TestSource("v1", 2), new TestSource("v2", 1));
    DynamicPartitionPrunerForEventTesting pruner = new DynamicPartitionPrunerForEventTesting();
    pruner.initialize(mockInitContext, mapWork, new JobConf());
    PruneRunnable pruneRunnable = new PruneRunnable(pruner);
    Thread t = new Thread(pruneRunnable);
    t.start();
    try {
        pruneRunnable.start();
        InputInitializerEvent eventV1 = InputInitializerEvent.create("FakeTarget", "TargetInput", ByteBuffer.allocate(0));
        eventV1.setSourceVertexName("v1");
        InputInitializerEvent eventV2 = InputInitializerEvent.create("FakeTarget", "TargetInput", ByteBuffer.allocate(0));
        eventV2.setSourceVertexName("v2");
        // 2 X 2 events for V1. 3 X 1 events for V2
        pruner.processVertex("v1");
        pruner.processVertex("v2");
        pruner.addEvent(eventV1);
        pruner.addEvent(eventV1);
        pruner.addEvent(eventV1);
        pruner.addEvent(eventV1);
        pruner.addEvent(eventV2);
        pruner.addEvent(eventV2);
        pruner.addEvent(eventV2);
        pruneRunnable.awaitEnd();
        assertNoError(pruneRunnable);
        assertEquals(7, pruner.eventsProceessed.intValue());
        assertEquals(3, pruner.filteredSources.intValue());
    } finally {
        t.interrupt();
        t.join();
    }
}
Also used : InputInitializerEvent(org.apache.tez.runtime.api.events.InputInitializerEvent) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) InputInitializerContext(org.apache.tez.runtime.api.InputInitializerContext) JobConf(org.apache.hadoop.mapred.JobConf) Test(org.junit.Test)

Aggregations

InputInitializerContext (org.apache.tez.runtime.api.InputInitializerContext)17 Test (org.junit.Test)14 MapWork (org.apache.hadoop.hive.ql.plan.MapWork)11 InputInitializerEvent (org.apache.tez.runtime.api.events.InputInitializerEvent)11 JobConf (org.apache.hadoop.mapred.JobConf)10 ByteString (com.google.protobuf.ByteString)3 List (java.util.List)3 Configuration (org.apache.hadoop.conf.Configuration)3 InputSplit (org.apache.hadoop.mapred.InputSplit)3 InputDescriptor (org.apache.tez.dag.api.InputDescriptor)3 InputInitializerDescriptor (org.apache.tez.dag.api.InputInitializerDescriptor)3 UserPayload (org.apache.tez.dag.api.UserPayload)3 InputInitializer (org.apache.tez.runtime.api.InputInitializer)3 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)2 RootInputLeafOutput (org.apache.tez.dag.api.RootInputLeafOutput)2 AppContext (org.apache.tez.dag.app.AppContext)2 TezDAGID (org.apache.tez.dag.records.TezDAGID)2 TezTaskAttemptID (org.apache.tez.dag.records.TezTaskAttemptID)2 TezTaskID (org.apache.tez.dag.records.TezTaskID)2 TezVertexID (org.apache.tez.dag.records.TezVertexID)2