use of org.apache.oozie.CoordinatorActionBean in project oozie by apache.
the class TestSLAService method testEndMissDBConfirm.
@Test
public void testEndMissDBConfirm() throws Exception {
SLAService slas = Services.get().get(SLAService.class);
EventHandlerService ehs = Services.get().get(EventHandlerService.class);
JPAService jpaService = Services.get().get(JPAService.class);
Date date = new Date();
// CASE 1: positive test WF job
WorkflowJobBean job1 = addRecordToWfJobTable(WorkflowJob.Status.PREP, WorkflowInstance.Status.PREP);
SLARegistrationBean sla = _createSLARegistration(job1.getId(), AppType.WORKFLOW_JOB);
// half hour back
sla.setExpectedEnd(new Date(date.getTime() - 1 * 1800 * 1000));
slas.addRegistrationEvent(sla);
// CASE 2: negative test WF job
WorkflowJobBean job2 = addRecordToWfJobTable(WorkflowJob.Status.SUCCEEDED, WorkflowInstance.Status.SUCCEEDED);
job2.setEndTime(new Date(date.getTime() - 1 * 1800 * 1000));
job2.setStartTime(new Date(date.getTime() - 1 * 2000 * 1000));
job2.setLastModifiedTime(new Date());
WorkflowJobQueryExecutor.getInstance().executeUpdate(WorkflowJobQuery.UPDATE_WORKFLOW_STATUS_INSTANCE_MOD_START_END, job2);
sla = _createSLARegistration(job2.getId(), AppType.WORKFLOW_JOB);
// in past but > actual end
sla.setExpectedEnd(new Date(date.getTime() - 1 * 1500 * 1000));
// unreasonable to cause MISS
sla.setExpectedDuration(100);
slas.addRegistrationEvent(sla);
slas.runSLAWorker();
// CASE 3: positive test Coord action
CoordinatorActionBean action1 = addRecordToCoordActionTable("coord-action-C@1", 1, CoordinatorAction.Status.WAITING, "coord-action-get.xml", 0);
action1.setExternalId(null);
CoordActionQueryExecutor.getInstance().executeUpdate(CoordActionQuery.UPDATE_COORD_ACTION_RERUN, action1);
sla = _createSLARegistration(action1.getId(), AppType.COORDINATOR_ACTION);
// past
sla.setExpectedEnd(new Date(System.currentTimeMillis() - 1 * 2000 * 1000));
slas.addRegistrationEvent(sla);
// CASE 4: positive test coord action
CoordinatorActionBean action2 = addRecordToCoordActionTable("coord-action-C@2", 1, CoordinatorAction.Status.FAILED, "coord-action-get.xml", 0);
WorkflowJobBean extWf = new WorkflowJobBean();
extWf.setId(action2.getExternalId());
// actual end before expected. but action is failed
extWf.setEndTime(new Date(System.currentTimeMillis() - 1 * 1800 * 1000));
extWf.setStartTime(new Date(System.currentTimeMillis() - 1 * 2000 * 1000));
jpaService.execute(new WorkflowJobInsertJPAExecutor(extWf));
sla = _createSLARegistration(action2.getId(), AppType.COORDINATOR_ACTION);
sla.setExpectedEnd(new Date(System.currentTimeMillis() - 1 * 1500 * 1000));
slas.addRegistrationEvent(sla);
// CASE 5: negative test coord action
CoordinatorActionBean action3 = addRecordToCoordActionTable("coord-action-C@3", 1, CoordinatorAction.Status.SUCCEEDED, "coord-action-get.xml", 0);
extWf = new WorkflowJobBean();
extWf.setId(action3.getExternalId());
extWf.setStartTime(new Date(System.currentTimeMillis() - 1 * 2100 * 1000));
extWf.setEndTime(new Date(System.currentTimeMillis() - 1 * 1800 * 1000));
jpaService.execute(new WorkflowJobInsertJPAExecutor(extWf));
sla = _createSLARegistration(action3.getId(), AppType.COORDINATOR_ACTION);
// cause start_miss
sla.setExpectedStart(new Date(System.currentTimeMillis() - 1 * 3600 * 1000));
// in past but > actual end, end_met
sla.setExpectedEnd(new Date(System.currentTimeMillis() - 1 * 1500 * 1000));
// cause duration miss
sla.setExpectedDuration(0);
slas.addRegistrationEvent(sla);
slas.runSLAWorker();
ehs.new EventWorker().run();
int count = 0;
for (int ptr = output.indexOf("END - MISS"); ptr < output.length() && ptr > 0; ptr = output.indexOf("END - MISS", ptr + 1)) {
count++;
}
// only 3 out of the 5 are correct end_misses
assertEquals(3, count);
assertEventNoDuplicates(output.toString(), job1.getId() + " Sla END - MISS!!!");
assertEventNoDuplicates(output.toString(), action1.getId() + " Sla END - MISS!!!");
assertEventNoDuplicates(output.toString(), action2.getId() + " Sla END - MISS!!!");
assertEventNoDuplicates(output.toString(), job2.getId() + " Sla END - MET!!!");
assertEventNoDuplicates(output.toString(), job2.getId() + " Sla DURATION - MISS!!!");
assertEventNoDuplicates(output.toString(), action3.getId() + " Sla START - MISS!!!");
assertEventNoDuplicates(output.toString(), action3.getId() + " Sla DURATION - MISS!!!");
assertEventNoDuplicates(output.toString(), action3.getId() + " Sla END - MET!!!");
// negative on MISS after DB check, updated with actual times
SLASummaryBean slaSummary = SLASummaryQueryExecutor.getInstance().get(SLASummaryQuery.GET_SLA_SUMMARY, job2.getId());
assertEquals(job2.getStartTime(), slaSummary.getActualStart());
assertEquals(job2.getEndTime(), slaSummary.getActualEnd());
assertEquals(job2.getEndTime().getTime() - job2.getStartTime().getTime(), slaSummary.getActualDuration());
assertEquals(job2.getStatusStr(), slaSummary.getJobStatus());
assertEquals(SLAEvent.EventStatus.END_MET, slaSummary.getEventStatus());
assertEquals(SLAStatus.MET, slaSummary.getSLAStatus());
assertEquals(8, slaSummary.getEventProcessed());
// removed from memory
assertNull(slas.getSLACalculator().get(job2.getId()));
// positives but also updated with actual times immediately after DB check
slaSummary = SLASummaryQueryExecutor.getInstance().get(SLASummaryQuery.GET_SLA_SUMMARY, action2.getId());
extWf = jpaService.execute(new WorkflowJobGetJPAExecutor(action2.getExternalId()));
assertEquals(extWf.getStartTime(), slaSummary.getActualStart());
assertEquals(extWf.getEndTime(), slaSummary.getActualEnd());
assertEquals(extWf.getEndTime().getTime() - extWf.getStartTime().getTime(), slaSummary.getActualDuration());
assertEquals(action2.getStatusStr(), slaSummary.getJobStatus());
assertEquals(SLAEvent.EventStatus.END_MISS, slaSummary.getEventStatus());
assertEquals(SLAStatus.MISS, slaSummary.getSLAStatus());
assertEquals(8, slaSummary.getEventProcessed());
// removed from memory
assertNull(slas.getSLACalculator().get(action2.getId()));
slaSummary = SLASummaryQueryExecutor.getInstance().get(SLASummaryQuery.GET_SLA_SUMMARY, action1.getId());
assertNull(slaSummary.getActualStart());
assertNull(slaSummary.getActualEnd());
assertEquals(action1.getStatusStr(), slaSummary.getJobStatus());
assertEquals(SLAEvent.EventStatus.END_MISS, slaSummary.getEventStatus());
assertEquals(SLAStatus.MISS, slaSummary.getSLAStatus());
assertEquals(7, slaSummary.getEventProcessed());
assertNotNull(slas.getSLACalculator().get(action1.getId()));
// From waiting to TIMEOUT with wf jobid
action1.setStatus(CoordinatorAction.Status.TIMEDOUT);
CoordActionQueryExecutor.getInstance().executeUpdate(CoordActionQuery.UPDATE_COORD_ACTION_RERUN, action1);
slas.getSLACalculator().addJobStatus(action1.getId(), null, null, null, null);
slaSummary = SLASummaryQueryExecutor.getInstance().get(SLASummaryQuery.GET_SLA_SUMMARY, action1.getId());
assertNull(slaSummary.getActualStart());
assertNotNull(slaSummary.getActualEnd());
assertEquals("TIMEDOUT", slaSummary.getJobStatus());
assertEquals(SLAEvent.EventStatus.END_MISS, slaSummary.getEventStatus());
assertEquals(SLAStatus.MISS, slaSummary.getSLAStatus());
assertEquals(8, slaSummary.getEventProcessed());
}
use of org.apache.oozie.CoordinatorActionBean in project oozie by apache.
the class CoordMaterializeTransitionXCommand method materializeActions.
/**
* Create action instances starting from "startMatdTime" to "endMatdTime" and store them into coord action table.
*
* @param dryrun if this is a dry run
* @throws Exception thrown if failed to materialize actions
*/
protected String materializeActions(boolean dryrun) throws Exception {
Configuration jobConf = null;
try {
jobConf = new XConfiguration(new StringReader(coordJob.getConf()));
} catch (IOException ioe) {
LOG.warn("Configuration parse error. read from DB :" + coordJob.getConf(), ioe);
throw new CommandException(ErrorCode.E1005, ioe.getMessage(), ioe);
}
String jobXml = coordJob.getJobXml();
Element eJob = XmlUtils.parseXml(jobXml);
TimeZone appTz = DateUtils.getTimeZone(coordJob.getTimeZone());
String frequency = coordJob.getFrequency();
TimeUnit freqTU = TimeUnit.valueOf(coordJob.getTimeUnitStr());
TimeUnit endOfFlag = TimeUnit.valueOf(eJob.getAttributeValue("end_of_duration"));
Calendar start = Calendar.getInstance(appTz);
start.setTime(startMatdTime);
DateUtils.moveToEnd(start, endOfFlag);
Calendar end = Calendar.getInstance(appTz);
end.setTime(endMatdTime);
lastActionNumber = coordJob.getLastActionNumber();
// Intentionally printing dates in their own timezone, not Oozie timezone
LOG.info("materialize actions for tz=" + appTz.getDisplayName() + ",\n start=" + start.getTime() + ", end=" + end.getTime() + ",\n timeUnit " + freqTU.getCalendarUnit() + ",\n frequency :" + frequency + ":" + freqTU + ",\n lastActionNumber " + lastActionNumber);
// Keep the actual start time
Calendar origStart = Calendar.getInstance(appTz);
origStart.setTime(coordJob.getStartTimestamp());
// Move to the End of duration, if needed.
DateUtils.moveToEnd(origStart, endOfFlag);
StringBuilder actionStrings = new StringBuilder();
Date jobPauseTime = coordJob.getPauseTime();
Calendar pause = null;
if (jobPauseTime != null) {
pause = Calendar.getInstance(appTz);
pause.setTime(DateUtils.convertDateToTimestamp(jobPauseTime));
}
String action = null;
int numWaitingActions = dryrun ? 0 : jpaService.execute(new CoordActionsActiveCountJPAExecutor(coordJob.getId()));
int maxActionToBeCreated = coordJob.getMatThrottling() - numWaitingActions;
// If LAST_ONLY and all materialization is in the past, ignore maxActionsToBeCreated
boolean ignoreMaxActions = (coordJob.getExecutionOrder().equals(CoordinatorJob.Execution.LAST_ONLY) || coordJob.getExecutionOrder().equals(CoordinatorJob.Execution.NONE)) && endMatdTime.before(new Date());
LOG.debug("Coordinator job :" + coordJob.getId() + ", maxActionToBeCreated :" + maxActionToBeCreated + ", Mat_Throttle :" + coordJob.getMatThrottling() + ", numWaitingActions :" + numWaitingActions);
boolean isCronFrequency = false;
Calendar effStart = (Calendar) start.clone();
try {
int intFrequency = Integer.parseInt(coordJob.getFrequency());
effStart = (Calendar) origStart.clone();
effStart.add(freqTU.getCalendarUnit(), lastActionNumber * intFrequency);
} catch (NumberFormatException e) {
isCronFrequency = true;
}
boolean firstMater = true;
end = new DaylightOffsetCalculator(startMatdTime, endMatdTime).calculate(appTz, end);
while (effStart.compareTo(end) < 0 && (ignoreMaxActions || maxActionToBeCreated-- > 0)) {
if (pause != null && effStart.compareTo(pause) >= 0) {
break;
}
Date nextTime = effStart.getTime();
if (isCronFrequency) {
if (effStart.getTime().compareTo(startMatdTime) == 0 && firstMater) {
effStart.add(Calendar.MINUTE, -1);
firstMater = false;
}
nextTime = CoordCommandUtils.getNextValidActionTimeForCronFrequency(effStart.getTime(), coordJob);
effStart.setTime(nextTime);
}
if (effStart.compareTo(end) < 0) {
if (pause != null && effStart.compareTo(pause) >= 0) {
break;
}
CoordinatorActionBean actionBean = new CoordinatorActionBean();
lastActionNumber++;
int timeout = coordJob.getTimeout();
LOG.debug("Materializing action for time=" + DateUtils.formatDateOozieTZ(effStart.getTime()) + ", lastactionnumber=" + lastActionNumber + " timeout=" + timeout + " minutes");
Date actualTime = new Date();
action = CoordCommandUtils.materializeOneInstance(jobId, dryrun, (Element) eJob.clone(), nextTime, actualTime, lastActionNumber, jobConf, actionBean);
actionBean.setTimeOut(timeout);
if (!dryrun) {
// Storing to table
storeToDB(actionBean, action, jobConf);
} else {
actionStrings.append("action for new instance");
actionStrings.append(action);
}
} else {
break;
}
if (!isCronFrequency) {
effStart = (Calendar) origStart.clone();
effStart.add(freqTU.getCalendarUnit(), lastActionNumber * Integer.parseInt(coordJob.getFrequency()));
}
}
if (isCronFrequency) {
if (effStart.compareTo(end) < 0 && !(ignoreMaxActions || maxActionToBeCreated-- > 0)) {
// to avoid creating duplicate actions
if (!firstMater) {
effStart.setTime(CoordCommandUtils.getNextValidActionTimeForCronFrequency(effStart.getTime(), coordJob));
}
}
}
endMatdTime = effStart.getTime();
if (!dryrun) {
return action;
} else {
return actionStrings.toString();
}
}
use of org.apache.oozie.CoordinatorActionBean in project oozie by apache.
the class CoordSuspendXCommand method suspendChildren.
@Override
public void suspendChildren() throws CommandException {
try {
// Get all running actions of a job to suspend them
List<CoordinatorActionBean> actionList = jpaService.execute(new CoordJobGetActionsRunningJPAExecutor(jobId));
for (CoordinatorActionBean action : actionList) {
// queue a SuspendXCommand
if (action.getExternalId() != null) {
queue(new SuspendXCommand(action.getExternalId()));
updateCoordAction(action);
LOG.debug("Suspend coord action = [{0}], new status = [{1}], pending = [{2}] and queue SuspendXCommand for [{3}]", action.getId(), action.getStatus(), action.getPending(), action.getExternalId());
} else {
updateCoordAction(action);
LOG.debug("Suspend coord action = [{0}], new status = [{1}], pending = [{2}] and external id is null", action.getId(), action.getStatus(), action.getPending());
}
}
LOG.debug("Suspended coordinator actions for the coordinator=[{0}]", jobId);
} catch (XException ex) {
exceptionOccured = true;
throw new CommandException(ex);
} finally {
if (exceptionOccured) {
coordJob.setStatus(CoordinatorJob.Status.FAILED);
coordJob.resetPending();
LOG.debug("Exception happened, fail coordinator job id = " + jobId + ", status = " + coordJob.getStatus());
updateList.add(new UpdateEntry<CoordJobQuery>(CoordJobQuery.UPDATE_COORD_JOB_STATUS_PENDING_TIME, coordJob));
}
}
}
use of org.apache.oozie.CoordinatorActionBean in project oozie by apache.
the class CoordActionsKillXCommand method killChildren.
@Override
public void killChildren() throws CommandException {
InstrumentUtils.incrJobCounter(getName(), 1, getInstrumentation());
for (CoordinatorActionBean coordAction : coordActions) {
coordAction.setStatus(CoordinatorAction.Status.KILLED);
coordAction.setLastModifiedTime(new Date());
// kill Workflow job associated with this Coord action
if (coordAction.getExternalId() != null) {
queue(new KillXCommand(coordAction.getExternalId()));
coordAction.incrementAndGetPending();
} else {
coordAction.setPending(0);
}
updateList.add(new UpdateEntry(CoordActionQuery.UPDATE_COORD_ACTION_STATUS_PENDING_TIME, coordAction));
if (EventHandlerService.isEnabled()) {
CoordinatorXCommand.generateEvent(coordAction, coordJob.getUser(), coordJob.getAppName(), coordAction.getCreatedTime());
}
queue(new CoordActionNotificationXCommand(coordAction), 100);
}
CoordinatorActionInfo coordInfo = new CoordinatorActionInfo(coordActions);
ret = coordInfo;
}
use of org.apache.oozie.CoordinatorActionBean in project oozie by apache.
the class CoordRerunXCommand method rerunChildren.
@Override
public void rerunChildren() throws CommandException {
boolean isError = false;
try {
CoordinatorActionInfo coordInfo = null;
InstrumentUtils.incrJobCounter(getName(), 1, getInstrumentation());
List<CoordinatorActionBean> coordActions = CoordUtils.getCoordActions(rerunType, jobId, scope, false);
if (checkAllActionsRunnable(coordActions)) {
Map<String, Context> uriHandlerContextMap = new HashMap<String, Context>();
Configuration coordJobConf = null;
try {
coordJobConf = new XConfiguration(new StringReader(coordJob.getConf()));
} catch (IOException e) {
throw new CommandException(ErrorCode.E0907, "failed to read coord job conf to clean up output data");
}
try {
for (CoordinatorActionBean coordAction : coordActions) {
String actionXml = coordAction.getActionXml();
// Cleanup activity should not run when failed option has been provided
if (!noCleanup && !failed) {
Element eAction = XmlUtils.parseXml(actionXml);
cleanupOutputEvents(eAction, coordJobConf, uriHandlerContextMap);
}
if (refresh) {
refreshAction(coordJob, coordAction);
}
updateAction(coordJob, coordAction);
if (SLAService.isEnabled()) {
SLAOperations.updateRegistrationEvent(coordAction.getId());
}
queue(new CoordActionNotificationXCommand(coordAction), 100);
queue(new CoordActionInputCheckXCommand(coordAction.getId(), coordAction.getJobId()), 100);
if (coordAction.getPushMissingDependencies() != null) {
queue(new CoordPushDependencyCheckXCommand(coordAction.getId(), true), 100);
}
}
} finally {
Iterator<Entry<String, Context>> itr = uriHandlerContextMap.entrySet().iterator();
while (itr.hasNext()) {
Entry<String, Context> entry = itr.next();
entry.getValue().destroy();
itr.remove();
}
}
} else {
isError = true;
throw new CommandException(ErrorCode.E1018, "part or all actions are not eligible to rerun!");
}
coordInfo = new CoordinatorActionInfo(coordActions);
ret = coordInfo;
} catch (XException xex) {
isError = true;
throw new CommandException(xex);
} catch (JDOMException jex) {
isError = true;
throw new CommandException(ErrorCode.E0700, jex.getMessage(), jex);
} catch (Exception ex) {
isError = true;
throw new CommandException(ErrorCode.E1018, ex.getMessage(), ex);
} finally {
if (isError) {
transitToPrevious();
}
}
}
Aggregations