use of org.apache.tez.runtime.api.impl.TezEvent in project tez by apache.
the class TaskImpl method getTaskAttemptTezEvents.
@Override
public ArrayList<TezEvent> getTaskAttemptTezEvents(TezTaskAttemptID attemptID, int fromEventId, int maxEvents) {
ArrayList<TezEvent> events = EMPTY_TASK_ATTEMPT_TEZ_EVENTS;
readLock.lock();
try {
if (!attempts.containsKey(attemptID)) {
throw new TezUncheckedException("Unknown TA: " + attemptID + " asking for events from task:" + getTaskId());
}
if (tezEventsForTaskAttempts.size() > fromEventId) {
int actualMax = Math.min(maxEvents, (tezEventsForTaskAttempts.size() - fromEventId));
int toEventId = actualMax + fromEventId;
events = new ArrayList<TezEvent>(tezEventsForTaskAttempts.subList(fromEventId, toEventId));
LOG.info("TaskAttempt:" + attemptID + " sent events: (" + fromEventId + "-" + toEventId + ").");
// currently not modifying the events so that we dont have to create
// copies of events. e.g. if we have to set taskAttemptId into the TezEvent
// destination metadata then we will need to create a copy of the TezEvent
// and then modify the metadata and then send the copy on the RPC. This
// is important because TezEvents are only routed in the AM and not copied
// during routing. So e.g. a broadcast edge will send the same event to
// all consumers (like it should). If copies were created then re-routing
// the events on parallelism changes would be difficult. We would have to
// buffer the events in the Vertex until the parallelism was set and then
// route the events.
}
return events;
} finally {
readLock.unlock();
}
}
use of org.apache.tez.runtime.api.impl.TezEvent in project tez by apache.
the class VertexImpl method checkTasksForCompletion.
// triggered by task_complete
static VertexState checkTasksForCompletion(final VertexImpl vertex) {
// this log helps quickly count the completion count for a vertex.
// grepping and counting for attempts and handling re-tries is time consuming
LOG.info("Task Completion: " + constructCheckTasksForCompletionLog(vertex));
// check for vertex failure first
if (vertex.completedTaskCount > vertex.tasks.size()) {
LOG.error("task completion accounting issue: completedTaskCount > nTasks:" + constructCheckTasksForCompletionLog(vertex));
}
if (vertex.completedTaskCount == vertex.tasks.size()) {
// finished - gather stats
vertex.finalStatistics = vertex.constructStatistics();
// Only succeed if tasks complete successfully and no terminationCause is registered or if failures are below configured threshold.
boolean vertexSucceeded = vertex.succeededTaskCount == vertex.numTasks;
boolean vertexFailuresBelowThreshold = (vertex.succeededTaskCount + vertex.failedTaskCount == vertex.numTasks) && (vertex.failedTaskCount * 100 <= vertex.maxFailuresPercent * vertex.numTasks);
if ((vertexSucceeded || vertexFailuresBelowThreshold) && vertex.terminationCause == null) {
if (vertexSucceeded) {
LOG.info("All tasks have succeeded, vertex:" + vertex.logIdentifier);
} else {
LOG.info("All tasks in the vertex " + vertex.logIdentifier + " have completed and the percentage of failed tasks (failed/total) (" + vertex.failedTaskCount + "/" + vertex.numTasks + ") is less that the threshold of " + vertex.maxFailuresPercent);
vertex.addDiagnostic("Vertex succeeded as percentage of failed tasks (failed/total) (" + vertex.failedTaskCount + "/" + vertex.numTasks + ") is less that the threshold of " + vertex.maxFailuresPercent);
vertex.logSuccessDiagnostics = true;
for (Task task : vertex.tasks.values()) {
if (!task.getState().equals(TaskState.FAILED)) {
continue;
}
// Find the last attempt and mark that as successful
Iterator<TezTaskAttemptID> attempts = task.getAttempts().keySet().iterator();
TezTaskAttemptID lastAttempt = null;
while (attempts.hasNext()) {
TezTaskAttemptID attempt = attempts.next();
if (lastAttempt == null || attempt.getId() > lastAttempt.getId()) {
lastAttempt = attempt;
}
}
LOG.info("Succeeding failed task attempt:" + lastAttempt);
for (Map.Entry<Vertex, Edge> vertexEdge : vertex.targetVertices.entrySet()) {
Vertex destVertex = vertexEdge.getKey();
Edge edge = vertexEdge.getValue();
try {
List<TezEvent> tezEvents = edge.generateEmptyEventsForAttempt(lastAttempt);
// Downstream vertices need to receive a SUCCEEDED completion event for each failed task to ensure num bipartite count is correct
VertexEventTaskAttemptCompleted completionEvent = new VertexEventTaskAttemptCompleted(lastAttempt, TaskAttemptStateInternal.SUCCEEDED);
// Notify all target vertices
vertex.eventHandler.handle(new VertexEventSourceTaskAttemptCompleted(destVertex.getVertexId(), completionEvent));
vertex.eventHandler.handle(new VertexEventRouteEvent(destVertex.getVertexId(), tezEvents));
} catch (Exception e) {
throw new TezUncheckedException(e);
}
}
}
}
if (vertex.commitVertexOutputs && !vertex.committed.getAndSet(true)) {
// start commit if there're commits or just finish if no commits
return commitOrFinish(vertex);
} else {
// just finish because no vertex committing needed
return vertex.finished(VertexState.SUCCEEDED);
}
}
return finishWithTerminationCause(vertex);
}
// return the current state, Vertex not finished yet
return vertex.getInternalState();
}
use of org.apache.tez.runtime.api.impl.TezEvent in project tez by apache.
the class VertexImpl method getTaskAttemptTezEvents.
@Override
public TaskAttemptEventInfo getTaskAttemptTezEvents(TezTaskAttemptID attemptID, int fromEventId, int preRoutedFromEventId, int maxEvents) {
Task task = getTask(attemptID.getTaskID());
ArrayList<TezEvent> events = task.getTaskAttemptTezEvents(attemptID, preRoutedFromEventId, maxEvents);
int nextPreRoutedFromEventId = preRoutedFromEventId + events.size();
int nextFromEventId = fromEventId;
onDemandRouteEventsReadLock.lock();
try {
int currEventCount = onDemandRouteEvents.size();
try {
if (currEventCount > fromEventId) {
if (events != TaskImpl.EMPTY_TASK_ATTEMPT_TEZ_EVENTS) {
events.ensureCapacity(maxEvents);
} else {
events = Lists.newArrayListWithCapacity(maxEvents);
}
int numPreRoutedEvents = events.size();
int taskIndex = attemptID.getTaskID().getId();
Preconditions.checkState(taskIndex < tasks.size(), "Invalid task index for TA: " + attemptID + " vertex: " + getLogIdentifier());
boolean isFirstEvent = true;
boolean firstEventObsoleted = false;
for (nextFromEventId = fromEventId; nextFromEventId < currEventCount; ++nextFromEventId) {
boolean earlyExit = false;
if (events.size() == maxEvents) {
break;
}
EventInfo eventInfo = onDemandRouteEvents.get(nextFromEventId);
if (eventInfo.isObsolete) {
// ignore obsolete events
firstEventObsoleted = true;
continue;
}
TezEvent tezEvent = eventInfo.tezEvent;
switch(tezEvent.getEventType()) {
case INPUT_FAILED_EVENT:
case DATA_MOVEMENT_EVENT:
case COMPOSITE_DATA_MOVEMENT_EVENT:
{
int srcTaskIndex = eventInfo.eventTaskIndex;
Edge srcEdge = eventInfo.eventEdge;
PendingEventRouteMetadata pendingRoute = null;
if (isFirstEvent) {
// the first event is the one that can have pending routes because its expanded
// events had not been completely sent in the last round.
isFirstEvent = false;
pendingRoute = srcEdge.removePendingEvents(attemptID);
if (pendingRoute != null) {
// obsoleted
if (tezEvent != pendingRoute.getTezEvent()) {
Preconditions.checkState(firstEventObsoleted);
// pending routes can be ignored for obsoleted events
pendingRoute = null;
}
}
}
if (!srcEdge.maybeAddTezEventForDestinationTask(tezEvent, attemptID, srcTaskIndex, events, maxEvents, pendingRoute)) {
// not enough space left for this iteration events.
// Exit and start from here next time
earlyExit = true;
}
}
break;
case ROOT_INPUT_DATA_INFORMATION_EVENT:
{
InputDataInformationEvent riEvent = (InputDataInformationEvent) tezEvent.getEvent();
if (riEvent.getTargetIndex() == taskIndex) {
events.add(tezEvent);
}
}
break;
default:
throw new TezUncheckedException("Unexpected event type for task: " + tezEvent.getEventType());
}
if (earlyExit) {
break;
}
}
int numEventsSent = events.size() - numPreRoutedEvents;
if (numEventsSent > 0) {
StringBuilder builder = new StringBuilder();
builder.append("Sending ").append(attemptID).append(" ").append(numEventsSent).append(" events [").append(fromEventId).append(",").append(nextFromEventId).append(") total ").append(currEventCount).append(" ").append(getLogIdentifier());
LOG.info(builder.toString());
}
}
} catch (AMUserCodeException e) {
String msg = "Exception in " + e.getSource() + ", vertex=" + getLogIdentifier();
LOG.error(msg, e);
eventHandler.handle(new VertexEventManagerUserCodeError(getVertexId(), e));
nextFromEventId = fromEventId;
events.clear();
}
} finally {
onDemandRouteEventsReadLock.unlock();
}
if (!events.isEmpty()) {
for (int i = (events.size() - 1); i >= 0; --i) {
TezEvent lastEvent = events.get(i);
// record the last event sent by the AM to the task
EventType lastEventType = lastEvent.getEventType();
// if the following changes then critical path logic/recording may need revision
if (lastEventType == EventType.COMPOSITE_DATA_MOVEMENT_EVENT || lastEventType == EventType.COMPOSITE_ROUTED_DATA_MOVEMENT_EVENT || lastEventType == EventType.DATA_MOVEMENT_EVENT || lastEventType == EventType.ROOT_INPUT_DATA_INFORMATION_EVENT) {
task.getAttempt(attemptID).setLastEventSent(lastEvent);
break;
}
}
}
return new TaskAttemptEventInfo(nextFromEventId, events, nextPreRoutedFromEventId);
}
use of org.apache.tez.runtime.api.impl.TezEvent in project tez by apache.
the class Edge method sendDmEventOrIfEventToTasks.
void sendDmEventOrIfEventToTasks(TezEvent tezEvent, int srcTaskIndex, boolean isDataMovementEvent, Map<Integer, List<Integer>> taskAndInputIndices) {
Preconditions.checkState(edgeManager != null, "Edge Manager must be initialized by this time");
Event event = tezEvent.getEvent();
// cache of event object per input index
Map<Integer, TezEvent> inputIndicesWithEvents = Maps.newHashMap();
for (Map.Entry<Integer, List<Integer>> entry : taskAndInputIndices.entrySet()) {
int destTaskIndex = entry.getKey();
List<Integer> inputIndices = entry.getValue();
for (int i = 0; i < inputIndices.size(); ++i) {
Integer inputIndex = inputIndices.get(i);
TezEvent tezEventToSend = inputIndicesWithEvents.get(inputIndex);
if (tezEventToSend == null) {
Event e;
if (isDataMovementEvent) {
DataMovementEvent dmEvent = (DataMovementEvent) event;
e = DataMovementEvent.create(dmEvent.getSourceIndex(), inputIndex, dmEvent.getVersion(), dmEvent.getUserPayload());
} else {
InputFailedEvent ifEvent = ((InputFailedEvent) event);
e = InputFailedEvent.create(inputIndex, ifEvent.getVersion());
}
tezEventToSend = new TezEvent(e, tezEvent.getSourceInfo(), tezEvent.getEventReceivedTime());
tezEventToSend.setDestinationInfo(destinationMetaInfo);
// cache the event object per input because are unique per input index
inputIndicesWithEvents.put(inputIndex, tezEventToSend);
}
Task destTask = destinationVertex.getTask(destTaskIndex);
if (destTask == null) {
throw new TezUncheckedException("Unexpected null task." + " sourceVertex=" + sourceVertex.getLogIdentifier() + " srcTaskIndex = " + srcTaskIndex + " destVertex=" + destinationVertex.getLogIdentifier() + " destTaskIndex=" + destTaskIndex + " destNumTasks=" + destinationVertex.getTotalTasks() + " edgeManager=" + edgeManager.getClass().getName());
}
sendEventToTask(destTask, tezEventToSend);
}
}
}
use of org.apache.tez.runtime.api.impl.TezEvent in project tez by apache.
the class Edge method generateEmptyEventsForAttempt.
public List<TezEvent> generateEmptyEventsForAttempt(TezTaskAttemptID attempt) throws Exception {
if (!edgeProperty.getEdgeSource().getClassName().startsWith("org.apache.tez")) {
throw new TezException("Only org.apache.tez outputs are allowed for max percent failure feature. Disallowed Output: " + edgeProperty.getEdgeSource().getClassName());
}
List<Event> events = new ArrayList<>();
Deflater deflater = TezCommonUtils.newBestCompressionDeflater();
try {
ShuffleUtils.generateEventsForNonStartedOutput(events, edgeManager.getNumDestinationConsumerTasks(attempt.getTaskID().getId()), null, false, true, deflater);
} catch (Exception e) {
throw new TezException(e);
}
EventMetaData sourceInfo = new EventMetaData(EventMetaData.EventProducerConsumerType.INPUT, sourceVertex.getName(), getDestinationVertexName(), attempt);
List<TezEvent> tezEvents = new ArrayList<>(events.size());
for (Event e : events) {
TezEvent tezEvent = new TezEvent(e, sourceInfo);
tezEvents.add(tezEvent);
}
return tezEvents;
}
Aggregations