Search in sources :

Example 1 with EventType

use of org.apache.tez.runtime.api.impl.EventType in project tez by apache.

the class VertexImpl method getTaskAttemptTezEvents.

@Override
public TaskAttemptEventInfo getTaskAttemptTezEvents(TezTaskAttemptID attemptID, int fromEventId, int preRoutedFromEventId, int maxEvents) {
    Task task = getTask(attemptID.getTaskID());
    ArrayList<TezEvent> events = task.getTaskAttemptTezEvents(attemptID, preRoutedFromEventId, maxEvents);
    int nextPreRoutedFromEventId = preRoutedFromEventId + events.size();
    int nextFromEventId = fromEventId;
    onDemandRouteEventsReadLock.lock();
    try {
        int currEventCount = onDemandRouteEvents.size();
        try {
            if (currEventCount > fromEventId) {
                if (events != TaskImpl.EMPTY_TASK_ATTEMPT_TEZ_EVENTS) {
                    events.ensureCapacity(maxEvents);
                } else {
                    events = Lists.newArrayListWithCapacity(maxEvents);
                }
                int numPreRoutedEvents = events.size();
                int taskIndex = attemptID.getTaskID().getId();
                Preconditions.checkState(taskIndex < tasks.size(), "Invalid task index for TA: " + attemptID + " vertex: " + getLogIdentifier());
                boolean isFirstEvent = true;
                boolean firstEventObsoleted = false;
                for (nextFromEventId = fromEventId; nextFromEventId < currEventCount; ++nextFromEventId) {
                    boolean earlyExit = false;
                    if (events.size() == maxEvents) {
                        break;
                    }
                    EventInfo eventInfo = onDemandRouteEvents.get(nextFromEventId);
                    if (eventInfo.isObsolete) {
                        // ignore obsolete events
                        firstEventObsoleted = true;
                        continue;
                    }
                    TezEvent tezEvent = eventInfo.tezEvent;
                    switch(tezEvent.getEventType()) {
                        case INPUT_FAILED_EVENT:
                        case DATA_MOVEMENT_EVENT:
                        case COMPOSITE_DATA_MOVEMENT_EVENT:
                            {
                                int srcTaskIndex = eventInfo.eventTaskIndex;
                                Edge srcEdge = eventInfo.eventEdge;
                                PendingEventRouteMetadata pendingRoute = null;
                                if (isFirstEvent) {
                                    // the first event is the one that can have pending routes because its expanded
                                    // events had not been completely sent in the last round.
                                    isFirstEvent = false;
                                    pendingRoute = srcEdge.removePendingEvents(attemptID);
                                    if (pendingRoute != null) {
                                        // obsoleted
                                        if (tezEvent != pendingRoute.getTezEvent()) {
                                            Preconditions.checkState(firstEventObsoleted);
                                            // pending routes can be ignored for obsoleted events
                                            pendingRoute = null;
                                        }
                                    }
                                }
                                if (!srcEdge.maybeAddTezEventForDestinationTask(tezEvent, attemptID, srcTaskIndex, events, maxEvents, pendingRoute)) {
                                    // not enough space left for this iteration events.
                                    // Exit and start from here next time
                                    earlyExit = true;
                                }
                            }
                            break;
                        case ROOT_INPUT_DATA_INFORMATION_EVENT:
                            {
                                InputDataInformationEvent riEvent = (InputDataInformationEvent) tezEvent.getEvent();
                                if (riEvent.getTargetIndex() == taskIndex) {
                                    events.add(tezEvent);
                                }
                            }
                            break;
                        default:
                            throw new TezUncheckedException("Unexpected event type for task: " + tezEvent.getEventType());
                    }
                    if (earlyExit) {
                        break;
                    }
                }
                int numEventsSent = events.size() - numPreRoutedEvents;
                if (numEventsSent > 0) {
                    StringBuilder builder = new StringBuilder();
                    builder.append("Sending ").append(attemptID).append(" ").append(numEventsSent).append(" events [").append(fromEventId).append(",").append(nextFromEventId).append(") total ").append(currEventCount).append(" ").append(getLogIdentifier());
                    LOG.info(builder.toString());
                }
            }
        } catch (AMUserCodeException e) {
            String msg = "Exception in " + e.getSource() + ", vertex=" + getLogIdentifier();
            LOG.error(msg, e);
            eventHandler.handle(new VertexEventManagerUserCodeError(getVertexId(), e));
            nextFromEventId = fromEventId;
            events.clear();
        }
    } finally {
        onDemandRouteEventsReadLock.unlock();
    }
    if (!events.isEmpty()) {
        for (int i = (events.size() - 1); i >= 0; --i) {
            TezEvent lastEvent = events.get(i);
            // record the last event sent by the AM to the task
            EventType lastEventType = lastEvent.getEventType();
            // if the following changes then critical path logic/recording may need revision
            if (lastEventType == EventType.COMPOSITE_DATA_MOVEMENT_EVENT || lastEventType == EventType.COMPOSITE_ROUTED_DATA_MOVEMENT_EVENT || lastEventType == EventType.DATA_MOVEMENT_EVENT || lastEventType == EventType.ROOT_INPUT_DATA_INFORMATION_EVENT) {
                task.getAttempt(attemptID).setLastEventSent(lastEvent);
                break;
            }
        }
    }
    return new TaskAttemptEventInfo(nextFromEventId, events, nextPreRoutedFromEventId);
}
Also used : TaskEventScheduleTask(org.apache.tez.dag.app.dag.event.TaskEventScheduleTask) Task(org.apache.tez.dag.app.dag.Task) TaskAttemptEventInfo(org.apache.tez.dag.app.TaskAttemptEventInfo) TezUncheckedException(org.apache.tez.dag.api.TezUncheckedException) DAGEventType(org.apache.tez.dag.app.dag.event.DAGEventType) EventType(org.apache.tez.runtime.api.impl.EventType) VertexEventType(org.apache.tez.dag.app.dag.event.VertexEventType) TaskEventType(org.apache.tez.dag.app.dag.event.TaskEventType) TaskLocationHint(org.apache.tez.dag.api.TaskLocationHint) VertexLocationHint(org.apache.tez.dag.api.VertexLocationHint) PendingEventRouteMetadata(org.apache.tez.dag.app.dag.impl.Edge.PendingEventRouteMetadata) VertexEventManagerUserCodeError(org.apache.tez.dag.app.dag.event.VertexEventManagerUserCodeError) TezEvent(org.apache.tez.runtime.api.impl.TezEvent) TaskAttemptEventInfo(org.apache.tez.dag.app.TaskAttemptEventInfo) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent)

Example 2 with EventType

use of org.apache.tez.runtime.api.impl.EventType in project tez by apache.

the class TaskCommunicatorManager method heartbeat.

public TaskHeartbeatResponse heartbeat(TaskHeartbeatRequest request) throws IOException, TezException {
    ContainerId containerId = ConverterUtils.toContainerId(request.getContainerIdentifier());
    if (LOG.isDebugEnabled()) {
        LOG.debug("Received heartbeat from container" + ", request=" + request);
    }
    if (!registeredContainers.containsKey(containerId)) {
        LOG.warn("Received task heartbeat from unknown container with id: " + containerId + ", asking it to die");
        return RESPONSE_SHOULD_DIE;
    }
    // A heartbeat can come in anytime. The AM may have made a decision to kill a running task/container
    // meanwhile. If the decision is processed through the pipeline before the heartbeat is processed,
    // the heartbeat will be dropped. Otherwise the heartbeat will be processed - and the system
    // know how to handle this - via FailedInputEvents for example (relevant only if the heartbeat has events).
    // So - avoiding synchronization.
    pingContainerHeartbeatHandler(containerId);
    TaskAttemptEventInfo eventInfo = new TaskAttemptEventInfo(0, null, 0);
    TezTaskAttemptID taskAttemptID = request.getTaskAttemptId();
    if (taskAttemptID != null) {
        ContainerId containerIdFromMap = registeredAttempts.get(taskAttemptID);
        if (containerIdFromMap == null || !containerIdFromMap.equals(containerId)) {
            // This can happen when a task heartbeats. Meanwhile the container is unregistered.
            // The information will eventually make it through to the plugin via a corresponding unregister.
            // There's a race in that case between the unregister making it through, and this method returning.
            // TODO TEZ-2003 (post) TEZ-2666. An exception back is likely a better approach than sending a shouldDie = true,
            // so that the plugin can handle the scenario. Alternately augment the response with error codes.
            // Error codes would be better than exceptions.
            LOG.info("Attempt: " + taskAttemptID + " is not recognized for heartbeats");
            return RESPONSE_SHOULD_DIE;
        }
        List<TezEvent> inEvents = request.getEvents();
        if (LOG.isDebugEnabled()) {
            LOG.debug("Ping from " + taskAttemptID.toString() + " events: " + (inEvents != null ? inEvents.size() : -1));
        }
        long currTime = context.getClock().getTime();
        // taFinishedEvents - means the TaskAttemptFinishedEvent
        // taGeneratedEvents - for recovery, means the events generated by this task attempt and is needed by its downstream vertices
        // eventsForVertex - including all the taGeneratedEvents and other events such as INPUT_READ_ERROR_EVENT/INPUT_FAILED_EVENT
        // taGeneratedEvents is routed both to TaskAttempt & Vertex. Route to Vertex is for performance consideration
        // taFinishedEvents must be routed before taGeneratedEvents
        List<TezEvent> taFinishedEvents = new ArrayList<TezEvent>();
        List<TezEvent> taGeneratedEvents = new ArrayList<TezEvent>();
        List<TezEvent> eventsForVertex = new ArrayList<TezEvent>();
        TaskAttemptEventStatusUpdate taskAttemptEvent = null;
        boolean readErrorReported = false;
        for (TezEvent tezEvent : ListUtils.emptyIfNull(inEvents)) {
            // for now, set the event time on the AM when it is received.
            // this avoids any time disparity between machines.
            tezEvent.setEventReceivedTime(currTime);
            final EventType eventType = tezEvent.getEventType();
            if (eventType == EventType.TASK_STATUS_UPDATE_EVENT) {
                // send TA_STATUS_UPDATE before TA_DONE/TA_FAILED/TA_KILLED otherwise Status may be missed
                taskAttemptEvent = new TaskAttemptEventStatusUpdate(taskAttemptID, (TaskStatusUpdateEvent) tezEvent.getEvent());
            } else if (eventType == EventType.TASK_ATTEMPT_COMPLETED_EVENT || eventType == EventType.TASK_ATTEMPT_FAILED_EVENT || eventType == EventType.TASK_ATTEMPT_KILLED_EVENT) {
                taFinishedEvents.add(tezEvent);
            } else {
                if (eventType == EventType.INPUT_READ_ERROR_EVENT) {
                    readErrorReported = true;
                }
                if (eventType == EventType.DATA_MOVEMENT_EVENT || eventType == EventType.COMPOSITE_DATA_MOVEMENT_EVENT || eventType == EventType.ROOT_INPUT_INITIALIZER_EVENT || eventType == EventType.VERTEX_MANAGER_EVENT) {
                    taGeneratedEvents.add(tezEvent);
                }
                eventsForVertex.add(tezEvent);
            }
        }
        if (taskAttemptEvent != null) {
            taskAttemptEvent.setReadErrorReported(readErrorReported);
            sendEvent(taskAttemptEvent);
        }
        // route taGeneratedEvents to TaskAttempt
        if (!taGeneratedEvents.isEmpty()) {
            sendEvent(new TaskAttemptEventTezEventUpdate(taskAttemptID, taGeneratedEvents));
        }
        // route events to TaskAttempt
        Preconditions.checkArgument(taFinishedEvents.size() <= 1, "Multiple TaskAttemptFinishedEvent");
        for (TezEvent e : taFinishedEvents) {
            EventMetaData sourceMeta = e.getSourceInfo();
            switch(e.getEventType()) {
                case TASK_ATTEMPT_FAILED_EVENT:
                case TASK_ATTEMPT_KILLED_EVENT:
                    TaskAttemptTerminationCause errCause = null;
                    switch(sourceMeta.getEventGenerator()) {
                        case INPUT:
                            errCause = TaskAttemptTerminationCause.INPUT_READ_ERROR;
                            break;
                        case PROCESSOR:
                            errCause = TaskAttemptTerminationCause.APPLICATION_ERROR;
                            break;
                        case OUTPUT:
                            errCause = TaskAttemptTerminationCause.OUTPUT_WRITE_ERROR;
                            break;
                        case SYSTEM:
                            errCause = TaskAttemptTerminationCause.FRAMEWORK_ERROR;
                            break;
                        default:
                            throw new TezUncheckedException("Unknown EventProducerConsumerType: " + sourceMeta.getEventGenerator());
                    }
                    if (e.getEventType() == EventType.TASK_ATTEMPT_FAILED_EVENT) {
                        TaskAttemptFailedEvent taskFailedEvent = (TaskAttemptFailedEvent) e.getEvent();
                        sendEvent(new TaskAttemptEventAttemptFailed(sourceMeta.getTaskAttemptID(), TaskAttemptEventType.TA_FAILED, taskFailedEvent.getTaskFailureType(), "Error: " + taskFailedEvent.getDiagnostics(), errCause));
                    } else {
                        // Killed
                        TaskAttemptKilledEvent taskKilledEvent = (TaskAttemptKilledEvent) e.getEvent();
                        sendEvent(new TaskAttemptEventAttemptKilled(sourceMeta.getTaskAttemptID(), "Error: " + taskKilledEvent.getDiagnostics(), errCause));
                    }
                    break;
                case TASK_ATTEMPT_COMPLETED_EVENT:
                    sendEvent(new TaskAttemptEvent(sourceMeta.getTaskAttemptID(), TaskAttemptEventType.TA_DONE));
                    break;
                default:
                    throw new TezUncheckedException("Unhandled tez event type: " + e.getEventType());
            }
        }
        if (!eventsForVertex.isEmpty()) {
            TezVertexID vertexId = taskAttemptID.getTaskID().getVertexID();
            sendEvent(new VertexEventRouteEvent(vertexId, Collections.unmodifiableList(eventsForVertex)));
        }
        taskHeartbeatHandler.pinged(taskAttemptID);
        eventInfo = context.getCurrentDAG().getVertex(taskAttemptID.getTaskID().getVertexID()).getTaskAttemptTezEvents(taskAttemptID, request.getStartIndex(), request.getPreRoutedStartIndex(), request.getMaxEvents());
    }
    return new TaskHeartbeatResponse(false, eventInfo.getEvents(), eventInfo.getNextFromEventId(), eventInfo.getNextPreRoutedFromEventId());
}
Also used : TezUncheckedException(org.apache.tez.dag.api.TezUncheckedException) TaskAttemptEventStatusUpdate(org.apache.tez.dag.app.dag.event.TaskAttemptEventStatusUpdate) DAGAppMasterEventType(org.apache.tez.dag.app.dag.event.DAGAppMasterEventType) EventType(org.apache.tez.runtime.api.impl.EventType) TaskAttemptEventType(org.apache.tez.dag.app.dag.event.TaskAttemptEventType) ArrayList(java.util.ArrayList) TaskAttemptEvent(org.apache.tez.dag.app.dag.event.TaskAttemptEvent) TaskStatusUpdateEvent(org.apache.tez.runtime.api.events.TaskStatusUpdateEvent) VertexEventRouteEvent(org.apache.tez.dag.app.dag.event.VertexEventRouteEvent) TaskAttemptFailedEvent(org.apache.tez.runtime.api.events.TaskAttemptFailedEvent) TaskAttemptEventTezEventUpdate(org.apache.tez.dag.app.dag.event.TaskAttemptEventTezEventUpdate) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) TaskAttemptEventAttemptKilled(org.apache.tez.dag.app.dag.event.TaskAttemptEventAttemptKilled) TaskHeartbeatResponse(org.apache.tez.serviceplugins.api.TaskHeartbeatResponse) TezEvent(org.apache.tez.runtime.api.impl.TezEvent) TaskAttemptTerminationCause(org.apache.tez.dag.records.TaskAttemptTerminationCause) TaskAttemptKilledEvent(org.apache.tez.runtime.api.events.TaskAttemptKilledEvent) EventMetaData(org.apache.tez.runtime.api.impl.EventMetaData) TezVertexID(org.apache.tez.dag.records.TezVertexID) TezTaskAttemptID(org.apache.tez.dag.records.TezTaskAttemptID) TaskAttemptEventAttemptFailed(org.apache.tez.dag.app.dag.event.TaskAttemptEventAttemptFailed)

Aggregations

TezUncheckedException (org.apache.tez.dag.api.TezUncheckedException)2 EventType (org.apache.tez.runtime.api.impl.EventType)2 TezEvent (org.apache.tez.runtime.api.impl.TezEvent)2 ArrayList (java.util.ArrayList)1 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)1 TaskLocationHint (org.apache.tez.dag.api.TaskLocationHint)1 VertexLocationHint (org.apache.tez.dag.api.VertexLocationHint)1 TaskAttemptEventInfo (org.apache.tez.dag.app.TaskAttemptEventInfo)1 Task (org.apache.tez.dag.app.dag.Task)1 DAGAppMasterEventType (org.apache.tez.dag.app.dag.event.DAGAppMasterEventType)1 DAGEventType (org.apache.tez.dag.app.dag.event.DAGEventType)1 TaskAttemptEvent (org.apache.tez.dag.app.dag.event.TaskAttemptEvent)1 TaskAttemptEventAttemptFailed (org.apache.tez.dag.app.dag.event.TaskAttemptEventAttemptFailed)1 TaskAttemptEventAttemptKilled (org.apache.tez.dag.app.dag.event.TaskAttemptEventAttemptKilled)1 TaskAttemptEventStatusUpdate (org.apache.tez.dag.app.dag.event.TaskAttemptEventStatusUpdate)1 TaskAttemptEventTezEventUpdate (org.apache.tez.dag.app.dag.event.TaskAttemptEventTezEventUpdate)1 TaskAttemptEventType (org.apache.tez.dag.app.dag.event.TaskAttemptEventType)1 TaskEventScheduleTask (org.apache.tez.dag.app.dag.event.TaskEventScheduleTask)1 TaskEventType (org.apache.tez.dag.app.dag.event.TaskEventType)1 VertexEventManagerUserCodeError (org.apache.tez.dag.app.dag.event.VertexEventManagerUserCodeError)1