Search in sources :

Example 1 with InputDataInformationEvent

use of org.apache.tez.runtime.api.events.InputDataInformationEvent in project tez by apache.

the class VertexImpl method getTaskAttemptTezEvents.

@Override
public TaskAttemptEventInfo getTaskAttemptTezEvents(TezTaskAttemptID attemptID, int fromEventId, int preRoutedFromEventId, int maxEvents) {
    Task task = getTask(attemptID.getTaskID());
    ArrayList<TezEvent> events = task.getTaskAttemptTezEvents(attemptID, preRoutedFromEventId, maxEvents);
    int nextPreRoutedFromEventId = preRoutedFromEventId + events.size();
    int nextFromEventId = fromEventId;
    onDemandRouteEventsReadLock.lock();
    try {
        int currEventCount = onDemandRouteEvents.size();
        try {
            if (currEventCount > fromEventId) {
                if (events != TaskImpl.EMPTY_TASK_ATTEMPT_TEZ_EVENTS) {
                    events.ensureCapacity(maxEvents);
                } else {
                    events = Lists.newArrayListWithCapacity(maxEvents);
                }
                int numPreRoutedEvents = events.size();
                int taskIndex = attemptID.getTaskID().getId();
                Preconditions.checkState(taskIndex < tasks.size(), "Invalid task index for TA: " + attemptID + " vertex: " + getLogIdentifier());
                boolean isFirstEvent = true;
                boolean firstEventObsoleted = false;
                for (nextFromEventId = fromEventId; nextFromEventId < currEventCount; ++nextFromEventId) {
                    boolean earlyExit = false;
                    if (events.size() == maxEvents) {
                        break;
                    }
                    EventInfo eventInfo = onDemandRouteEvents.get(nextFromEventId);
                    if (eventInfo.isObsolete) {
                        // ignore obsolete events
                        firstEventObsoleted = true;
                        continue;
                    }
                    TezEvent tezEvent = eventInfo.tezEvent;
                    switch(tezEvent.getEventType()) {
                        case INPUT_FAILED_EVENT:
                        case DATA_MOVEMENT_EVENT:
                        case COMPOSITE_DATA_MOVEMENT_EVENT:
                            {
                                int srcTaskIndex = eventInfo.eventTaskIndex;
                                Edge srcEdge = eventInfo.eventEdge;
                                PendingEventRouteMetadata pendingRoute = null;
                                if (isFirstEvent) {
                                    // the first event is the one that can have pending routes because its expanded
                                    // events had not been completely sent in the last round.
                                    isFirstEvent = false;
                                    pendingRoute = srcEdge.removePendingEvents(attemptID);
                                    if (pendingRoute != null) {
                                        // obsoleted
                                        if (tezEvent != pendingRoute.getTezEvent()) {
                                            Preconditions.checkState(firstEventObsoleted);
                                            // pending routes can be ignored for obsoleted events
                                            pendingRoute = null;
                                        }
                                    }
                                }
                                if (!srcEdge.maybeAddTezEventForDestinationTask(tezEvent, attemptID, srcTaskIndex, events, maxEvents, pendingRoute)) {
                                    // not enough space left for this iteration events.
                                    // Exit and start from here next time
                                    earlyExit = true;
                                }
                            }
                            break;
                        case ROOT_INPUT_DATA_INFORMATION_EVENT:
                            {
                                InputDataInformationEvent riEvent = (InputDataInformationEvent) tezEvent.getEvent();
                                if (riEvent.getTargetIndex() == taskIndex) {
                                    events.add(tezEvent);
                                }
                            }
                            break;
                        default:
                            throw new TezUncheckedException("Unexpected event type for task: " + tezEvent.getEventType());
                    }
                    if (earlyExit) {
                        break;
                    }
                }
                int numEventsSent = events.size() - numPreRoutedEvents;
                if (numEventsSent > 0) {
                    StringBuilder builder = new StringBuilder();
                    builder.append("Sending ").append(attemptID).append(" ").append(numEventsSent).append(" events [").append(fromEventId).append(",").append(nextFromEventId).append(") total ").append(currEventCount).append(" ").append(getLogIdentifier());
                    LOG.info(builder.toString());
                }
            }
        } catch (AMUserCodeException e) {
            String msg = "Exception in " + e.getSource() + ", vertex=" + getLogIdentifier();
            LOG.error(msg, e);
            eventHandler.handle(new VertexEventManagerUserCodeError(getVertexId(), e));
            nextFromEventId = fromEventId;
            events.clear();
        }
    } finally {
        onDemandRouteEventsReadLock.unlock();
    }
    if (!events.isEmpty()) {
        for (int i = (events.size() - 1); i >= 0; --i) {
            TezEvent lastEvent = events.get(i);
            // record the last event sent by the AM to the task
            EventType lastEventType = lastEvent.getEventType();
            // if the following changes then critical path logic/recording may need revision
            if (lastEventType == EventType.COMPOSITE_DATA_MOVEMENT_EVENT || lastEventType == EventType.COMPOSITE_ROUTED_DATA_MOVEMENT_EVENT || lastEventType == EventType.DATA_MOVEMENT_EVENT || lastEventType == EventType.ROOT_INPUT_DATA_INFORMATION_EVENT) {
                task.getAttempt(attemptID).setLastEventSent(lastEvent);
                break;
            }
        }
    }
    return new TaskAttemptEventInfo(nextFromEventId, events, nextPreRoutedFromEventId);
}
Also used : TaskEventScheduleTask(org.apache.tez.dag.app.dag.event.TaskEventScheduleTask) Task(org.apache.tez.dag.app.dag.Task) TaskAttemptEventInfo(org.apache.tez.dag.app.TaskAttemptEventInfo) TezUncheckedException(org.apache.tez.dag.api.TezUncheckedException) DAGEventType(org.apache.tez.dag.app.dag.event.DAGEventType) EventType(org.apache.tez.runtime.api.impl.EventType) VertexEventType(org.apache.tez.dag.app.dag.event.VertexEventType) TaskEventType(org.apache.tez.dag.app.dag.event.TaskEventType) TaskLocationHint(org.apache.tez.dag.api.TaskLocationHint) VertexLocationHint(org.apache.tez.dag.api.VertexLocationHint) PendingEventRouteMetadata(org.apache.tez.dag.app.dag.impl.Edge.PendingEventRouteMetadata) VertexEventManagerUserCodeError(org.apache.tez.dag.app.dag.event.VertexEventManagerUserCodeError) TezEvent(org.apache.tez.runtime.api.impl.TezEvent) TaskAttemptEventInfo(org.apache.tez.dag.app.TaskAttemptEventInfo) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent)

Example 2 with InputDataInformationEvent

use of org.apache.tez.runtime.api.events.InputDataInformationEvent in project tez by apache.

the class RootInputVertexManager method onRootVertexInitialized.

@Override
public void onRootVertexInitialized(String inputName, InputDescriptor inputDescriptor, List<Event> events) {
    List<InputDataInformationEvent> riEvents = Lists.newLinkedList();
    boolean dataInformationEventSeen = false;
    for (Event event : events) {
        if (event instanceof InputConfigureVertexTasksEvent) {
            // No tasks should have been started yet. Checked by initial state check.
            Preconditions.checkState(dataInformationEventSeen == false);
            Preconditions.checkState(getContext().getVertexNumTasks(getContext().getVertexName()) == -1, "Parallelism for the vertex should be set to -1 if the InputInitializer is setting parallelism" + ", VertexName: " + getContext().getVertexName());
            Preconditions.checkState(configuredInputName == null, "RootInputVertexManager cannot configure multiple inputs. Use a custom VertexManager" + ", VertexName: " + getContext().getVertexName() + ", ConfiguredInput: " + configuredInputName + ", CurrentInput: " + inputName);
            configuredInputName = inputName;
            InputConfigureVertexTasksEvent cEvent = (InputConfigureVertexTasksEvent) event;
            Map<String, InputSpecUpdate> rootInputSpecUpdate = new HashMap<String, InputSpecUpdate>();
            rootInputSpecUpdate.put(inputName, cEvent.getInputSpecUpdate() == null ? InputSpecUpdate.getDefaultSinglePhysicalInputSpecUpdate() : cEvent.getInputSpecUpdate());
            getContext().reconfigureVertex(rootInputSpecUpdate, cEvent.getLocationHint(), cEvent.getNumTasks());
        }
        if (event instanceof InputUpdatePayloadEvent) {
            // No tasks should have been started yet. Checked by initial state check.
            Preconditions.checkState(dataInformationEventSeen == false);
            inputDescriptor.setUserPayload(UserPayload.create(((InputUpdatePayloadEvent) event).getUserPayload()));
        } else if (event instanceof InputDataInformationEvent) {
            dataInformationEventSeen = true;
            // # Tasks should have been set by this point.
            Preconditions.checkState(getContext().getVertexNumTasks(getContext().getVertexName()) != 0);
            Preconditions.checkState(configuredInputName == null || configuredInputName.equals(inputName), "RootInputVertexManager cannot configure multiple inputs. Use a custom VertexManager" + ", VertexName:" + getContext().getVertexName() + ", ConfiguredInput: " + configuredInputName + ", CurrentInput: " + inputName);
            configuredInputName = inputName;
            InputDataInformationEvent rEvent = (InputDataInformationEvent) event;
            // 1:1 routing
            rEvent.setTargetIndex(rEvent.getSourceIndex());
            riEvents.add(rEvent);
        }
    }
    getContext().addRootInputEvents(inputName, riEvents);
}
Also used : HashMap(java.util.HashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) InputUpdatePayloadEvent(org.apache.tez.runtime.api.events.InputUpdatePayloadEvent) InputUpdatePayloadEvent(org.apache.tez.runtime.api.events.InputUpdatePayloadEvent) Event(org.apache.tez.runtime.api.Event) VertexManagerEvent(org.apache.tez.runtime.api.events.VertexManagerEvent) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent) InputConfigureVertexTasksEvent(org.apache.tez.runtime.api.events.InputConfigureVertexTasksEvent) InputSpecUpdate(org.apache.tez.runtime.api.InputSpecUpdate) InputConfigureVertexTasksEvent(org.apache.tez.runtime.api.events.InputConfigureVertexTasksEvent) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent)

Example 3 with InputDataInformationEvent

use of org.apache.tez.runtime.api.events.InputDataInformationEvent in project tez by apache.

the class TestVertexManager method testOnRootVertexInitialized2.

/**
 * TEZ-1647
 * custom vertex manager generates events only when both i1 and i2 are initialized.
 * @throws Exception
 */
@Test(timeout = 5000)
public void testOnRootVertexInitialized2() throws Exception {
    VertexManager vm = new VertexManager(VertexManagerPluginDescriptor.create(CustomVertexManager.class.getName()), UserGroupInformation.getCurrentUser(), mockVertex, mockAppContext, mock(StateChangeNotifier.class));
    vm.initialize();
    InputDescriptor id1 = mock(InputDescriptor.class);
    List<Event> events1 = new LinkedList<Event>();
    InputDataInformationEvent diEvent1 = InputDataInformationEvent.createWithSerializedPayload(0, null);
    events1.add(diEvent1);
    // do not call context.addRootInputEvents, just cache the TezEvent
    vm.onRootVertexInitialized("input1", id1, events1);
    verify(mockHandler, times(1)).handle(requestCaptor.capture());
    List<TezEvent> tezEventsAfterInput1 = requestCaptor.getValue().getEvents();
    assertEquals(0, tezEventsAfterInput1.size());
    InputDescriptor id2 = mock(InputDescriptor.class);
    List<Event> events2 = new LinkedList<Event>();
    InputDataInformationEvent diEvent2 = InputDataInformationEvent.createWithSerializedPayload(0, null);
    events2.add(diEvent2);
    // call context.addRootInputEvents(input1), context.addRootInputEvents(input2)
    vm.onRootVertexInitialized("input2", id2, events2);
    verify(mockHandler, times(2)).handle(requestCaptor.capture());
    List<TezEvent> tezEventsAfterInput2 = requestCaptor.getValue().getEvents();
    assertEquals(2, tezEventsAfterInput2.size());
    // also verify the EventMetaData
    Set<String> edgeVertexSet = new HashSet<String>();
    for (TezEvent tezEvent : tezEventsAfterInput2) {
        edgeVertexSet.add(tezEvent.getDestinationInfo().getEdgeVertexName());
    }
    assertEquals(Sets.newHashSet("input1", "input2"), edgeVertexSet);
}
Also used : InputDescriptor(org.apache.tez.dag.api.InputDescriptor) StateChangeNotifier(org.apache.tez.dag.app.dag.StateChangeNotifier) Event(org.apache.tez.runtime.api.Event) VertexManagerEvent(org.apache.tez.runtime.api.events.VertexManagerEvent) CustomProcessorEvent(org.apache.tez.runtime.api.events.CustomProcessorEvent) CallableEvent(org.apache.tez.dag.app.dag.event.CallableEvent) VertexEventRouteEvent(org.apache.tez.dag.app.dag.event.VertexEventRouteEvent) TezEvent(org.apache.tez.runtime.api.impl.TezEvent) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent) TezEvent(org.apache.tez.runtime.api.impl.TezEvent) LinkedList(java.util.LinkedList) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 4 with InputDataInformationEvent

use of org.apache.tez.runtime.api.events.InputDataInformationEvent in project tez by apache.

the class TestVertexManager method testOnRootVertexInitialized.

@Test(timeout = 5000)
public void testOnRootVertexInitialized() throws Exception {
    Configuration conf = new Configuration();
    VertexManager vm = new VertexManager(RootInputVertexManager.createConfigBuilder(conf).build(), UserGroupInformation.getCurrentUser(), mockVertex, mockAppContext, mock(StateChangeNotifier.class));
    vm.initialize();
    InputDescriptor id1 = mock(InputDescriptor.class);
    List<Event> events1 = new LinkedList<Event>();
    InputDataInformationEvent diEvent1 = InputDataInformationEvent.createWithSerializedPayload(0, null);
    events1.add(diEvent1);
    vm.onRootVertexInitialized("input1", id1, events1);
    verify(mockHandler, times(1)).handle(requestCaptor.capture());
    List<TezEvent> tezEvents1 = requestCaptor.getValue().getEvents();
    assertEquals(1, tezEvents1.size());
    assertEquals(diEvent1, tezEvents1.get(0).getEvent());
    InputDescriptor id2 = mock(InputDescriptor.class);
    List<Event> events2 = new LinkedList<Event>();
    InputDataInformationEvent diEvent2 = InputDataInformationEvent.createWithSerializedPayload(0, null);
    events2.add(diEvent2);
    vm.onRootVertexInitialized("input1", id2, events2);
    verify(mockHandler, times(2)).handle(requestCaptor.capture());
    List<TezEvent> tezEvents2 = requestCaptor.getValue().getEvents();
    assertEquals(tezEvents2.size(), 1);
    assertEquals(diEvent2, tezEvents2.get(0).getEvent());
}
Also used : InputDescriptor(org.apache.tez.dag.api.InputDescriptor) Configuration(org.apache.hadoop.conf.Configuration) StateChangeNotifier(org.apache.tez.dag.app.dag.StateChangeNotifier) Event(org.apache.tez.runtime.api.Event) VertexManagerEvent(org.apache.tez.runtime.api.events.VertexManagerEvent) CustomProcessorEvent(org.apache.tez.runtime.api.events.CustomProcessorEvent) CallableEvent(org.apache.tez.dag.app.dag.event.CallableEvent) VertexEventRouteEvent(org.apache.tez.dag.app.dag.event.VertexEventRouteEvent) TezEvent(org.apache.tez.runtime.api.impl.TezEvent) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent) TezEvent(org.apache.tez.runtime.api.impl.TezEvent) LinkedList(java.util.LinkedList) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent) Test(org.junit.Test)

Example 5 with InputDataInformationEvent

use of org.apache.tez.runtime.api.events.InputDataInformationEvent in project tez by apache.

the class TestRootInputVertexManager method testEventsFromMultipleInputs.

@Test(timeout = 5000)
public void testEventsFromMultipleInputs() throws IOException {
    VertexManagerPluginContext context = mock(VertexManagerPluginContext.class);
    TezConfiguration conf = new TezConfiguration();
    UserPayload vertexPayload = TezUtils.createUserPayloadFromConf(conf);
    doReturn("vertex1").when(context).getVertexName();
    doReturn(1).when(context).getVertexNumTasks(eq("vertex1"));
    doReturn(vertexPayload).when(context).getUserPayload();
    RootInputVertexManager rootInputVertexManager = new RootInputVertexManager(context);
    rootInputVertexManager.initialize();
    InputDescriptor id1 = mock(InputDescriptor.class);
    List<Event> events1 = new LinkedList<Event>();
    InputDataInformationEvent diEvent11 = InputDataInformationEvent.createWithSerializedPayload(0, null);
    events1.add(diEvent11);
    rootInputVertexManager.onRootVertexInitialized("input1", id1, events1);
    // All good so far, single input only.
    InputDescriptor id2 = mock(InputDescriptor.class);
    List<Event> events2 = new LinkedList<Event>();
    InputDataInformationEvent diEvent21 = InputDataInformationEvent.createWithSerializedPayload(0, null);
    events2.add(diEvent21);
    try {
        // Should fail due to second input
        rootInputVertexManager.onRootVertexInitialized("input2", id2, events2);
        fail("Expecting failure in case of multiple inputs attempting to send events");
    } catch (IllegalStateException e) {
        assertTrue(e.getMessage().startsWith("RootInputVertexManager cannot configure multiple inputs. Use a custom VertexManager"));
    }
}
Also used : InputDescriptor(org.apache.tez.dag.api.InputDescriptor) VertexManagerPluginContext(org.apache.tez.dag.api.VertexManagerPluginContext) UserPayload(org.apache.tez.dag.api.UserPayload) Event(org.apache.tez.runtime.api.Event) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent) InputConfigureVertexTasksEvent(org.apache.tez.runtime.api.events.InputConfigureVertexTasksEvent) LinkedList(java.util.LinkedList) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent) Test(org.junit.Test)

Aggregations

InputDataInformationEvent (org.apache.tez.runtime.api.events.InputDataInformationEvent)22 Event (org.apache.tez.runtime.api.Event)16 MRSplitProto (org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRSplitProto)10 Test (org.junit.Test)10 Configuration (org.apache.hadoop.conf.Configuration)8 InputSplit (org.apache.hadoop.mapred.InputSplit)7 JobConf (org.apache.hadoop.mapred.JobConf)5 InputContext (org.apache.tez.runtime.api.InputContext)5 InputConfigureVertexTasksEvent (org.apache.tez.runtime.api.events.InputConfigureVertexTasksEvent)5 LinkedList (java.util.LinkedList)4 AtomicLong (java.util.concurrent.atomic.AtomicLong)4 Path (org.apache.hadoop.fs.Path)4 LongWritable (org.apache.hadoop.io.LongWritable)4 Text (org.apache.hadoop.io.Text)4 SequenceFileInputFormat (org.apache.hadoop.mapred.SequenceFileInputFormat)4 UserPayload (org.apache.tez.dag.api.UserPayload)4 MRInputUserPayloadProto (org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRInputUserPayloadProto)4 MRSplitsProto (org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRSplitsProto)4 ByteString (com.google.protobuf.ByteString)3 ArrayList (java.util.ArrayList)3