use of org.apache.tez.dag.history.events.VertexInitializedEvent in project tez by apache.
the class TestHistoryEventJsonConversion method testHandlerExists.
@Test(timeout = 5000)
public void testHandlerExists() throws JSONException {
for (HistoryEventType eventType : HistoryEventType.values()) {
HistoryEvent event = null;
switch(eventType) {
case APP_LAUNCHED:
event = new AppLaunchedEvent(applicationId, random.nextInt(), random.nextInt(), user, new Configuration(false), null);
break;
case AM_LAUNCHED:
event = new AMLaunchedEvent(applicationAttemptId, random.nextInt(), random.nextInt(), user);
break;
case AM_STARTED:
event = new AMStartedEvent(applicationAttemptId, random.nextInt(), user);
break;
case DAG_SUBMITTED:
event = new DAGSubmittedEvent(tezDAGID, random.nextInt(), dagPlan, applicationAttemptId, null, user, null, null, "Q_" + eventType.name());
break;
case DAG_INITIALIZED:
event = new DAGInitializedEvent(tezDAGID, random.nextInt(), user, dagPlan.getName(), null);
break;
case DAG_STARTED:
event = new DAGStartedEvent(tezDAGID, random.nextInt(), user, dagPlan.getName());
break;
case DAG_FINISHED:
event = new DAGFinishedEvent(tezDAGID, random.nextInt(), random.nextInt(), DAGState.ERROR, null, null, user, dagPlan.getName(), null, applicationAttemptId, dagPlan);
break;
case VERTEX_INITIALIZED:
event = new VertexInitializedEvent(tezVertexID, "v1", random.nextInt(), random.nextInt(), random.nextInt(), "proc", null, null, null);
break;
case VERTEX_STARTED:
event = new VertexStartedEvent(tezVertexID, random.nextInt(), random.nextInt());
break;
case VERTEX_CONFIGURE_DONE:
event = new VertexConfigurationDoneEvent(tezVertexID, 0L, 1, null, null, null, true);
break;
case VERTEX_FINISHED:
event = new VertexFinishedEvent(tezVertexID, "v1", 1, random.nextInt(), random.nextInt(), random.nextInt(), random.nextInt(), random.nextInt(), VertexState.ERROR, null, null, null, null, null);
break;
case TASK_STARTED:
event = new TaskStartedEvent(tezTaskID, "v1", random.nextInt(), random.nextInt());
break;
case TASK_FINISHED:
event = new TaskFinishedEvent(tezTaskID, "v1", random.nextInt(), random.nextInt(), tezTaskAttemptID, TaskState.FAILED, null, null, 0);
break;
case TASK_ATTEMPT_STARTED:
event = new TaskAttemptStartedEvent(tezTaskAttemptID, "v1", random.nextInt(), containerId, nodeId, null, null, "nodeHttpAddress");
break;
case TASK_ATTEMPT_FINISHED:
event = new TaskAttemptFinishedEvent(tezTaskAttemptID, "v1", random.nextInt(), random.nextInt(), TaskAttemptState.KILLED, null, TaskAttemptTerminationCause.TERMINATED_BY_CLIENT, null, null, null, null, 0, null, 0, containerId, nodeId, null, null, "nodeHttpAddress");
break;
case CONTAINER_LAUNCHED:
event = new ContainerLaunchedEvent(containerId, random.nextInt(), applicationAttemptId);
break;
case CONTAINER_STOPPED:
event = new ContainerStoppedEvent(containerId, random.nextInt(), -1, applicationAttemptId);
break;
case DAG_COMMIT_STARTED:
event = new DAGCommitStartedEvent();
break;
case VERTEX_COMMIT_STARTED:
event = new VertexCommitStartedEvent();
break;
case VERTEX_GROUP_COMMIT_STARTED:
event = new VertexGroupCommitStartedEvent();
break;
case VERTEX_GROUP_COMMIT_FINISHED:
event = new VertexGroupCommitFinishedEvent();
break;
case DAG_RECOVERED:
event = new DAGRecoveredEvent(applicationAttemptId, tezDAGID, dagPlan.getName(), user, 1l, null);
break;
case DAG_KILL_REQUEST:
event = new DAGKillRequestEvent();
break;
default:
Assert.fail("Unhandled event type " + eventType);
}
if (event == null || !event.isHistoryEvent()) {
continue;
}
JSONObject json = HistoryEventJsonConversion.convertToJson(event);
if (eventType == HistoryEventType.DAG_SUBMITTED) {
try {
Assert.assertEquals("Q_" + eventType.name(), json.getJSONObject(ATSConstants.OTHER_INFO).getString(ATSConstants.DAG_QUEUE_NAME));
Assert.assertEquals("Q_" + eventType.name(), json.getJSONObject(ATSConstants.PRIMARY_FILTERS).getString(ATSConstants.DAG_QUEUE_NAME));
} catch (JSONException ex) {
Assert.fail("Exception: " + ex.getMessage() + " for type: " + eventType);
}
}
}
}
use of org.apache.tez.dag.history.events.VertexInitializedEvent in project tez by apache.
the class VertexImpl method logJobHistoryVertexInitializedEvent.
void logJobHistoryVertexInitializedEvent() {
if (recoveryData == null || !recoveryData.shouldSkipInit()) {
VertexInitializedEvent initEvt = new VertexInitializedEvent(vertexId, vertexName, initTimeRequested, initedTime, numTasks, getProcessorName(), getAdditionalInputs(), initGeneratedEvents, servicePluginInfo);
this.appContext.getHistoryHandler().handle(new DAGHistoryEvent(getDAGId(), initEvt));
}
}
use of org.apache.tez.dag.history.events.VertexInitializedEvent in project tez by apache.
the class RecoveryParser method parseRecoveryData.
/**
* 1. Read Summary Recovery file and build DAGSummaryData
* Check whether it is recoverable based on the summary file (whether dag is
* in the middle of committing)
* 2. Read the non-Summary Recovery file and build DAGRecoveryData
* Check whether it is recoverable based on both the summary file and non-summary file
* (whether vertex has completed its committing, but its full non-summary recovery events are not seen)
* @return DAGRecoveryData
* @throws IOException
*/
public DAGRecoveryData parseRecoveryData() throws IOException {
int dagCounter = 0;
Map<TezDAGID, DAGSummaryData> dagSummaryDataMap = new HashMap<TezDAGID, DAGSummaryData>();
List<Path> summaryFiles = getSummaryFiles();
LOG.debug("SummaryFile size:" + summaryFiles.size());
for (Path summaryFile : summaryFiles) {
FileStatus summaryFileStatus = recoveryFS.getFileStatus(summaryFile);
LOG.info("Parsing summary file" + ", path=" + summaryFile.toString() + ", len=" + summaryFileStatus.getLen() + ", lastModTime=" + summaryFileStatus.getModificationTime());
FSDataInputStream summaryStream = getSummaryStream(summaryFile);
while (true) {
RecoveryProtos.SummaryEventProto proto;
try {
proto = RecoveryProtos.SummaryEventProto.parseDelimitedFrom(summaryStream);
if (proto == null) {
LOG.info("Reached end of summary stream");
break;
}
} catch (EOFException eof) {
LOG.info("Reached end of summary stream");
break;
}
HistoryEventType eventType = HistoryEventType.values()[proto.getEventType()];
if (LOG.isDebugEnabled()) {
LOG.debug("[RECOVERY SUMMARY]" + " dagId=" + proto.getDagId() + ", timestamp=" + proto.getTimestamp() + ", event=" + eventType);
}
TezDAGID dagId;
try {
dagId = TezDAGID.fromString(proto.getDagId());
} catch (IllegalArgumentException e) {
throw new IOException("Invalid dagId, summary records may be corrupted", e);
}
if (dagCounter < dagId.getId()) {
dagCounter = dagId.getId();
}
if (!dagSummaryDataMap.containsKey(dagId)) {
dagSummaryDataMap.put(dagId, new DAGSummaryData(dagId));
}
try {
dagSummaryDataMap.get(dagId).handleSummaryEvent(proto);
} catch (Exception e) {
// any exception when parsing protobuf
throw new IOException("Error when parsing summary event proto", e);
}
}
summaryStream.close();
}
// Set counter for next set of DAGs & update dagNames Set in DAGAppMaster
dagAppMaster.setDAGCounter(dagCounter);
for (DAGSummaryData dagSummaryData : dagSummaryDataMap.values()) {
dagAppMaster.dagIDs.add(dagSummaryData.dagId.toString());
}
DAGSummaryData lastInProgressDAGData = getLastCompletedOrInProgressDAG(dagSummaryDataMap);
if (lastInProgressDAGData == null) {
LOG.info("Nothing to recover as no uncompleted/completed DAGs found");
return null;
}
TezDAGID lastInProgressDAG = lastInProgressDAGData.dagId;
if (lastInProgressDAG == null) {
LOG.info("Nothing to recover as no uncompleted/completed DAGs found");
return null;
}
LOG.info("Checking if DAG is in recoverable state" + ", dagId=" + lastInProgressDAGData.dagId);
final DAGRecoveryData recoveredDAGData = new DAGRecoveryData(lastInProgressDAGData);
List<Path> dagRecoveryFiles = getDAGRecoveryFiles(lastInProgressDAG);
boolean skipAllOtherEvents = false;
Path lastRecoveryFile = null;
// to create the DAGImpl)
for (Path dagRecoveryFile : dagRecoveryFiles) {
if (skipAllOtherEvents) {
LOG.warn("Other recovery files will be skipped due to error in the previous recovery file" + lastRecoveryFile);
break;
}
FileStatus fileStatus = recoveryFS.getFileStatus(dagRecoveryFile);
lastRecoveryFile = dagRecoveryFile;
LOG.info("Trying to recover dag from recovery file" + ", dagId=" + lastInProgressDAG.toString() + ", dagRecoveryFile=" + dagRecoveryFile + ", len=" + fileStatus.getLen());
FSDataInputStream dagRecoveryStream = recoveryFS.open(dagRecoveryFile, recoveryBufferSize);
while (true) {
HistoryEvent event;
try {
event = getNextEvent(dagRecoveryStream);
if (event == null) {
LOG.info("Reached end of dag recovery stream");
break;
}
} catch (EOFException eof) {
LOG.info("Reached end of dag recovery stream");
break;
} catch (IOException ioe) {
LOG.warn("Corrupt data found when trying to read next event", ioe);
break;
}
if (skipAllOtherEvents) {
// hit an error - skip reading other events
break;
}
HistoryEventType eventType = event.getEventType();
LOG.info("Recovering from event" + ", eventType=" + eventType + ", event=" + event.toString());
switch(eventType) {
case DAG_SUBMITTED:
DAGSubmittedEvent submittedEvent = (DAGSubmittedEvent) event;
recoveredDAGData.recoveredDAG = dagAppMaster.createDAG(submittedEvent.getDAGPlan(), lastInProgressDAG);
recoveredDAGData.cumulativeAdditionalResources = submittedEvent.getCumulativeAdditionalLocalResources();
recoveredDAGData.recoveredDagID = recoveredDAGData.recoveredDAG.getID();
dagAppMaster.setCurrentDAG(recoveredDAGData.recoveredDAG);
if (recoveredDAGData.nonRecoverable) {
skipAllOtherEvents = true;
}
break;
case DAG_INITIALIZED:
recoveredDAGData.dagInitedEvent = (DAGInitializedEvent) event;
break;
case DAG_STARTED:
recoveredDAGData.dagStartedEvent = (DAGStartedEvent) event;
break;
case DAG_FINISHED:
recoveredDAGData.dagFinishedEvent = (DAGFinishedEvent) event;
skipAllOtherEvents = true;
break;
case DAG_COMMIT_STARTED:
case VERTEX_GROUP_COMMIT_STARTED:
case VERTEX_GROUP_COMMIT_FINISHED:
case CONTAINER_LAUNCHED:
{
// Nothing to do for now
break;
}
case DAG_KILL_REQUEST:
{
break;
}
case VERTEX_INITIALIZED:
{
VertexInitializedEvent vertexInitEvent = (VertexInitializedEvent) event;
VertexRecoveryData vertexRecoveryData = recoveredDAGData.maybeCreateVertexRecoveryData(vertexInitEvent.getVertexID());
vertexRecoveryData.vertexInitedEvent = vertexInitEvent;
break;
}
case VERTEX_CONFIGURE_DONE:
{
VertexConfigurationDoneEvent reconfigureDoneEvent = (VertexConfigurationDoneEvent) event;
VertexRecoveryData vertexRecoveryData = recoveredDAGData.maybeCreateVertexRecoveryData(reconfigureDoneEvent.getVertexID());
vertexRecoveryData.vertexConfigurationDoneEvent = reconfigureDoneEvent;
break;
}
case VERTEX_STARTED:
{
VertexStartedEvent vertexStartedEvent = (VertexStartedEvent) event;
VertexRecoveryData vertexRecoveryData = recoveredDAGData.vertexRecoveryDataMap.get(vertexStartedEvent.getVertexID());
Preconditions.checkArgument(vertexRecoveryData != null, "No VertexInitializedEvent before VertexStartedEvent");
vertexRecoveryData.vertexStartedEvent = vertexStartedEvent;
break;
}
case VERTEX_COMMIT_STARTED:
{
break;
}
case VERTEX_FINISHED:
{
VertexFinishedEvent vertexFinishedEvent = (VertexFinishedEvent) event;
VertexRecoveryData vertexRecoveryData = recoveredDAGData.maybeCreateVertexRecoveryData(vertexFinishedEvent.getVertexID());
vertexRecoveryData.vertexFinishedEvent = vertexFinishedEvent;
break;
}
case TASK_STARTED:
{
TaskStartedEvent taskStartedEvent = (TaskStartedEvent) event;
VertexRecoveryData vertexRecoveryData = recoveredDAGData.vertexRecoveryDataMap.get(taskStartedEvent.getTaskID().getVertexID());
Preconditions.checkArgument(vertexRecoveryData != null, "Invalid TaskStartedEvent, its vertex does not exist:" + taskStartedEvent.getTaskID().getVertexID());
TaskRecoveryData taskRecoveryData = vertexRecoveryData.maybeCreateTaskRecoveryData(taskStartedEvent.getTaskID());
taskRecoveryData.taskStartedEvent = taskStartedEvent;
break;
}
case TASK_FINISHED:
{
TaskFinishedEvent taskFinishedEvent = (TaskFinishedEvent) event;
VertexRecoveryData vertexRecoveryData = recoveredDAGData.vertexRecoveryDataMap.get(taskFinishedEvent.getTaskID().getVertexID());
Preconditions.checkArgument(vertexRecoveryData != null, "Invalid TaskFinishedEvent, its vertex does not exist:" + taskFinishedEvent.getTaskID().getVertexID());
TaskRecoveryData taskRecoveryData = vertexRecoveryData.maybeCreateTaskRecoveryData(taskFinishedEvent.getTaskID());
taskRecoveryData.taskFinishedEvent = taskFinishedEvent;
break;
}
case TASK_ATTEMPT_STARTED:
{
TaskAttemptStartedEvent taStartedEvent = (TaskAttemptStartedEvent) event;
VertexRecoveryData vertexRecoveryData = recoveredDAGData.vertexRecoveryDataMap.get(taStartedEvent.getTaskAttemptID().getTaskID().getVertexID());
Preconditions.checkArgument(vertexRecoveryData != null, "Invalid TaskAttemptStartedEvent, its vertexId does not exist, taId=" + taStartedEvent.getTaskAttemptID());
TaskRecoveryData taskRecoveryData = vertexRecoveryData.taskRecoveryDataMap.get(taStartedEvent.getTaskAttemptID().getTaskID());
Preconditions.checkArgument(taskRecoveryData != null, "Invalid TaskAttemptStartedEvent, its taskId does not exist, taId=" + taStartedEvent.getTaskAttemptID());
TaskAttemptRecoveryData taRecoveryData = taskRecoveryData.maybeCreateTaskAttemptRecoveryData(taStartedEvent.getTaskAttemptID());
taRecoveryData.taStartedEvent = taStartedEvent;
break;
}
case TASK_ATTEMPT_FINISHED:
{
TaskAttemptFinishedEvent taFinishedEvent = (TaskAttemptFinishedEvent) event;
VertexRecoveryData vertexRecoveryData = recoveredDAGData.vertexRecoveryDataMap.get(taFinishedEvent.getTaskAttemptID().getTaskID().getVertexID());
Preconditions.checkArgument(vertexRecoveryData != null, "Invalid TaskAttemtFinishedEvent, its vertexId does not exist, taId=" + taFinishedEvent.getTaskAttemptID());
TaskRecoveryData taskRecoveryData = vertexRecoveryData.taskRecoveryDataMap.get(taFinishedEvent.getTaskAttemptID().getTaskID());
Preconditions.checkArgument(taskRecoveryData != null, "Invalid TaskAttemptFinishedEvent, its taskId does not exist, taId=" + taFinishedEvent.getTaskAttemptID());
TaskAttemptRecoveryData taRecoveryData = taskRecoveryData.maybeCreateTaskAttemptRecoveryData(taFinishedEvent.getTaskAttemptID());
taRecoveryData.taFinishedEvent = taFinishedEvent;
break;
}
default:
throw new RuntimeException("Invalid data found, unknown event type " + eventType);
}
if (LOG.isDebugEnabled()) {
LOG.debug("[DAG RECOVERY]" + " dagId=" + lastInProgressDAG + ", eventType=" + eventType + ", event=" + event.toString());
}
}
dagRecoveryStream.close();
}
recoveredDAGData.checkRecoverableNonSummary();
return recoveredDAGData;
}
use of org.apache.tez.dag.history.events.VertexInitializedEvent in project tez by apache.
the class RecoveryParser method getNextEvent.
private static HistoryEvent getNextEvent(FSDataInputStream inputStream) throws IOException {
int eventTypeOrdinal = -1;
try {
eventTypeOrdinal = inputStream.readInt();
} catch (EOFException eof) {
return null;
}
if (eventTypeOrdinal < 0 || eventTypeOrdinal >= HistoryEventType.values().length) {
// reached end
throw new IOException("Corrupt data found when trying to read next event type" + ", eventTypeOrdinal=" + eventTypeOrdinal);
}
HistoryEventType eventType = HistoryEventType.values()[eventTypeOrdinal];
HistoryEvent event;
switch(eventType) {
case AM_LAUNCHED:
event = new AMLaunchedEvent();
break;
case AM_STARTED:
event = new AMStartedEvent();
break;
case DAG_SUBMITTED:
event = new DAGSubmittedEvent();
break;
case DAG_INITIALIZED:
event = new DAGInitializedEvent();
break;
case DAG_STARTED:
event = new DAGStartedEvent();
break;
case DAG_COMMIT_STARTED:
event = new DAGCommitStartedEvent();
break;
case DAG_FINISHED:
event = new DAGFinishedEvent();
break;
case DAG_KILL_REQUEST:
event = new DAGKillRequestEvent();
break;
case CONTAINER_LAUNCHED:
event = new ContainerLaunchedEvent();
break;
case CONTAINER_STOPPED:
event = new ContainerStoppedEvent();
break;
case VERTEX_INITIALIZED:
event = new VertexInitializedEvent();
break;
case VERTEX_CONFIGURE_DONE:
event = new VertexConfigurationDoneEvent();
break;
case VERTEX_STARTED:
event = new VertexStartedEvent();
break;
case VERTEX_COMMIT_STARTED:
event = new VertexCommitStartedEvent();
break;
case VERTEX_GROUP_COMMIT_STARTED:
event = new VertexGroupCommitStartedEvent();
break;
case VERTEX_GROUP_COMMIT_FINISHED:
event = new VertexGroupCommitFinishedEvent();
break;
case VERTEX_FINISHED:
event = new VertexFinishedEvent();
break;
case TASK_STARTED:
event = new TaskStartedEvent();
break;
case TASK_FINISHED:
event = new TaskFinishedEvent();
break;
case TASK_ATTEMPT_STARTED:
event = new TaskAttemptStartedEvent();
break;
case TASK_ATTEMPT_FINISHED:
event = new TaskAttemptFinishedEvent();
break;
default:
throw new IOException("Invalid data found, unknown event type " + eventType);
}
try {
event.fromProtoStream(inputStream);
} catch (EOFException eof) {
return null;
}
if (LOG.isDebugEnabled()) {
LOG.debug("Parsed event from input stream" + ", eventType=" + eventType + ", event=" + event.toString());
}
return event;
}
use of org.apache.tez.dag.history.events.VertexInitializedEvent in project tez by apache.
the class TestRecovery method testTwoRoundsRecoverying.
@Test(timeout = 1800000)
public void testTwoRoundsRecoverying() throws Exception {
ApplicationId appId = ApplicationId.newInstance(System.currentTimeMillis(), 1);
TezDAGID dagId = TezDAGID.getInstance(appId, 1);
TezVertexID vertexId0 = TezVertexID.getInstance(dagId, 0);
TezVertexID vertexId1 = TezVertexID.getInstance(dagId, 1);
TezVertexID vertexId2 = TezVertexID.getInstance(dagId, 2);
ContainerId containerId = ContainerId.newInstance(ApplicationAttemptId.newInstance(appId, 1), 1);
NodeId nodeId = NodeId.newInstance("localhost", 10);
List<TezEvent> initGeneratedEvents = Lists.newArrayList(new TezEvent(InputDataInformationEvent.createWithObjectPayload(0, new Object()), null));
List<SimpleShutdownCondition> shutdownConditions = Lists.newArrayList(new SimpleShutdownCondition(TIMING.POST, new DAGInitializedEvent(dagId, 0L, "username", "dagName", null)), new SimpleShutdownCondition(TIMING.POST, new DAGStartedEvent(dagId, 0L, "username", "dagName")), new SimpleShutdownCondition(TIMING.POST, new VertexInitializedEvent(vertexId0, "Tokenizer", 0L, 0L, 0, "", null, initGeneratedEvents, null)), new SimpleShutdownCondition(TIMING.POST, new VertexStartedEvent(vertexId0, 0L, 0L)), new SimpleShutdownCondition(TIMING.POST, new VertexConfigurationDoneEvent(vertexId0, 0L, 2, null, null, null, true)), new SimpleShutdownCondition(TIMING.POST, new TaskStartedEvent(TezTaskID.getInstance(vertexId0, 0), "vertexName", 0L, 0L)), new SimpleShutdownCondition(TIMING.POST, new TaskAttemptStartedEvent(TezTaskAttemptID.getInstance(TezTaskID.getInstance(vertexId0, 0), 0), "vertexName", 0L, containerId, nodeId, "", "", "")), new SimpleShutdownCondition(TIMING.POST, new TaskFinishedEvent(TezTaskID.getInstance(vertexId0, 0), "vertexName", 0L, 0L, null, TaskState.SUCCEEDED, "", new TezCounters(), 0)), new SimpleShutdownCondition(TIMING.POST, new VertexFinishedEvent(vertexId0, "vertexName", 1, 0L, 0L, 0L, 0L, 0L, VertexState.SUCCEEDED, "", new TezCounters(), new VertexStats(), new HashMap<String, Integer>(), null)), new SimpleShutdownCondition(TIMING.POST, new VertexFinishedEvent(vertexId1, "vertexName", 1, 0L, 0L, 0L, 0L, 0L, VertexState.SUCCEEDED, "", new TezCounters(), new VertexStats(), new HashMap<String, Integer>(), null)), new SimpleShutdownCondition(TIMING.POST, new VertexFinishedEvent(vertexId2, "vertexName", 1, 0L, 0L, 0L, 0L, 0L, VertexState.SUCCEEDED, "", new TezCounters(), new VertexStats(), new HashMap<String, Integer>(), null)), new SimpleShutdownCondition(TIMING.POST, new DAGFinishedEvent(dagId, 0L, 0L, DAGState.SUCCEEDED, "", new TezCounters(), "username", "dagName", new HashMap<String, Integer>(), ApplicationAttemptId.newInstance(appId, 1), null)));
Random rand = new Random();
for (int i = 0; i < shutdownConditions.size() - 1; i++) {
// timeout.
if (rand.nextDouble() < 0.5) {
int nextSimpleConditionIndex = i + 1 + rand.nextInt(shutdownConditions.size() - i - 1);
if (nextSimpleConditionIndex == shutdownConditions.size() - 1) {
testOrderedWordCountMultipleRoundRecoverying(new RecoveryServiceWithEventHandlingHook.MultipleRoundShutdownCondition(Lists.newArrayList(shutdownConditions.get(i), shutdownConditions.get(nextSimpleConditionIndex))), true, shutdownConditions.get(i).getHistoryEvent().getEventType() == HistoryEventType.VERTEX_STARTED);
}
}
}
}
Aggregations