use of org.apache.oozie.sla.service.SLAService in project oozie by apache.
the class TestCoordSubmitXCommand method testSLAAlertWithNewlyCreatedActions.
public void testSLAAlertWithNewlyCreatedActions() throws Exception {
Configuration conf = new XConfiguration();
File appPathFile = new File(getTestCaseDir(), "coordinator.xml");
// CASE 1: Failure case i.e. multiple data-in instances
Reader reader = IOUtils.getResourceAsReader("coord-action-sla.xml", -1);
Writer writer = new FileWriter(appPathFile);
IOUtils.copyCharStream(reader, writer);
conf.set(OozieClient.COORDINATOR_APP_PATH, appPathFile.toURI().toString());
conf.set("start", DateUtils.formatDateOozieTZ(org.apache.commons.lang.time.DateUtils.addDays(new Date(), -1)));
conf.set("end", DateUtils.formatDateOozieTZ(org.apache.commons.lang.time.DateUtils.addMonths(new Date(), 1)));
conf.set(OozieClient.USER_NAME, getTestUser());
reader = IOUtils.getResourceAsReader("wf-credentials.xml", -1);
appPathFile = new File(getTestCaseDir(), "workflow.xml");
writer = new FileWriter(appPathFile);
IOUtils.copyCharStream(reader, writer);
conf.set("wfAppPath", appPathFile.getPath());
Date nominalTime = new Date();
conf.set("nominal_time", DateUtils.formatDateOozieTZ(nominalTime));
String coordId = new CoordSubmitXCommand(conf).call();
CoordinatorJobBean job = CoordJobQueryExecutor.getInstance().get(CoordJobQueryExecutor.CoordJobQuery.GET_COORD_JOB, coordId);
job.setMatThrottling(1);
CoordJobQueryExecutor.getInstance().executeUpdate(CoordJobQueryExecutor.CoordJobQuery.UPDATE_COORD_JOB, job);
new CoordMaterializeTransitionXCommand(coordId, 3600).call();
SLAService slaService = services.get(SLAService.class);
SLACalculator calc = slaService.getSLACalculator();
SLACalcStatus slaCalc = calc.get(coordId + "@" + 1);
assertFalse(Boolean.valueOf(slaCalc.getSLAConfigMap().get(OozieClient.SLA_DISABLE_ALERT)));
assertEquals(slaCalc.getExpectedDuration(), 1800000);
job = CoordJobQueryExecutor.getInstance().get(CoordJobQueryExecutor.CoordJobQuery.GET_COORD_JOB, coordId);
assertEquals(job.getLastActionNumber(), 1);
String newParams = RestConstants.SLA_MAX_DURATION + "=${5 * MINUTES}";
new CoordSLAChangeXCommand(coordId, null, null, JobUtils.parseChangeValue(newParams)).call();
new CoordSLAAlertsDisableXCommand(coordId, null, null).call();
job = CoordJobQueryExecutor.getInstance().get(CoordJobQueryExecutor.CoordJobQuery.GET_COORD_JOB, coordId);
job.setMatThrottling(2);
CoordJobQueryExecutor.getInstance().executeUpdate(CoordJobQueryExecutor.CoordJobQuery.UPDATE_COORD_JOB, job);
job = CoordJobQueryExecutor.getInstance().get(CoordJobQueryExecutor.CoordJobQuery.GET_COORD_JOB, coordId);
new CoordMaterializeTransitionXCommand(coordId, 3600).call();
job = CoordJobQueryExecutor.getInstance().get(CoordJobQueryExecutor.CoordJobQuery.GET_COORD_JOB, coordId);
slaCalc = calc.get(coordId + "@" + job.getLastActionNumber());
assertEquals(slaCalc.getExpectedDuration(), 300000);
// newly action should have sla disable after coord disable command on coord job
assertTrue(Boolean.valueOf(slaCalc.getSLAConfigMap().get(OozieClient.SLA_DISABLE_ALERT)));
Element eAction = XmlUtils.parseXml(job.getJobXml());
Element eSla = eAction.getChild("action", eAction.getNamespace()).getChild("info", eAction.getNamespace("sla"));
assertEquals(SLAOperations.getTagElement(eSla, "max-duration"), "${5 * MINUTES}");
}
use of org.apache.oozie.sla.service.SLAService in project oozie by apache.
the class DummySLACalculatorMemory method testSLAFailOverWithHA.
public void testSLAFailOverWithHA() throws Exception {
SLAService slas = Services.get().get(SLAService.class);
SLACalculatorMemory slaCalcMem = (SLACalculatorMemory) slas.getSLACalculator();
EventHandlerService ehs = Services.get().get(EventHandlerService.class);
// start another dummy oozie instance (dummy sla and eventhandler
// services)
DummyZKOozie dummyOozie_1 = null;
try {
dummyOozie_1 = new DummyZKOozie("a", "http://blah");
DummySLACalculatorMemory dummyCalc = new DummySLACalculatorMemory();
EventHandlerService dummyEhs = new EventHandlerService();
dummyCalc.setEventHandlerService(dummyEhs);
dummyEhs.init(Services.get());
dummyCalc.init(Services.get().getConf());
// Case 1 workflow job submitted to dummy server,
// but before start running, the dummy server is down
WorkflowJobBean wfJob1 = createWorkflow("job-1-W");
SLARegistrationBean sla1 = TestSLAService._createSLARegistration("job-1-W", AppType.WORKFLOW_JOB);
// 2 hr before
sla1.setExpectedStart(new Date(System.currentTimeMillis() - 2 * 3600 * 1000));
// 1 hr before
sla1.setExpectedEnd(new Date(System.currentTimeMillis() - 1 * 3600 * 1000));
// 10 mins
sla1.setExpectedDuration(10 * 60 * 1000);
dummyCalc.addRegistration(sla1.getId(), sla1);
dummyCalc.updateAllSlaStatus();
// Case 2. workflow job submitted to dummy server, start running,
// then the dummy server is down
WorkflowJobBean wfJob2 = createWorkflow("job-2-W");
SLARegistrationBean sla2 = TestSLAService._createSLARegistration("job-2-W", AppType.WORKFLOW_JOB);
// 2hr before
sla2.setExpectedStart(new Date(System.currentTimeMillis() - 2 * 3600 * 1000));
// 1hr ahead
sla2.setExpectedEnd(new Date(System.currentTimeMillis() + 1 * 3600 * 1000));
// 10 mins
sla2.setExpectedDuration(10 * 60 * 1000);
dummyCalc.addRegistration(sla2.getId(), sla2);
dummyCalc.addJobStatus(sla2.getId(), WorkflowJob.Status.RUNNING.name(), EventStatus.STARTED, new Date(), new Date());
dummyCalc.updateAllSlaStatus();
dummyEhs.new EventWorker().run();
assertTrue(output.toString().contains(sla2.getId() + " Sla START - MISS!!!"));
// suppose dummy Server is down
dummyCalc.clear();
dummyCalc = null;
dummyOozie_1.teardown();
slaCalcMem.updateAllSlaStatus();
// Job 1 started running on the living server --> start miss
slaCalcMem.addJobStatus(sla1.getId(), WorkflowJob.Status.RUNNING.name(), EventStatus.STARTED, new Date(), new Date());
// job 1 is added to slamap of living oozie server
assertNotNull(slaCalcMem.get(sla1.getId()));
ehs.new EventWorker().run();
assertTrue(output.toString().contains(sla1.getId() + " Sla START - MISS!!!"));
wfJob1.setStatus(WorkflowJob.Status.SUCCEEDED);
wfJob1.setEndTime(new Date());
wfJob1.setStartTime(new Date());
WorkflowJobQueryExecutor.getInstance().executeUpdate(WorkflowJobQuery.UPDATE_WORKFLOW_STATUS_INSTANCE_MOD_START_END, wfJob1);
// Job 1 succeeded on the living server --> duration met and end miss
slaCalcMem.addJobStatus(sla1.getId(), WorkflowJob.Status.SUCCEEDED.name(), EventStatus.SUCCESS, new Date(), new Date());
ehs.new EventWorker().run();
assertTrue(output.toString().contains(sla1.getId() + " Sla DURATION - MET!!!"));
assertTrue(output.toString().contains(sla1.getId() + " Sla END - MISS!!!"));
wfJob2.setStatus(WorkflowJob.Status.SUCCEEDED);
wfJob2.setEndTime(new Date());
wfJob2.setStartTime(new Date());
WorkflowJobQueryExecutor.getInstance().executeUpdate(WorkflowJobQuery.UPDATE_WORKFLOW_STATUS_INSTANCE_MOD_START_END, wfJob2);
// Job 2 succeeded on the living server --> duration met and end met
slaCalcMem.addJobStatus(sla2.getId(), WorkflowJob.Status.SUCCEEDED.name(), EventStatus.SUCCESS, new Date(), new Date());
// eventProc >= 7(already processed duration/end met), should be removed from slaMap
assertNull(slaCalcMem.get(sla2.getId()));
ehs.new EventWorker().run();
assertTrue(output.toString().contains(sla2.getId() + " Sla DURATION - MET!!!"));
assertTrue(output.toString().contains(sla2.getId() + " Sla END - MET!!!"));
} finally {
if (dummyOozie_1 != null) {
dummyOozie_1.teardown();
}
}
}
use of org.apache.oozie.sla.service.SLAService in project oozie by apache.
the class DummySLACalculatorMemory method testNoDuplicateEventsInHA.
public void testNoDuplicateEventsInHA() throws Exception {
String id1 = "0000001-130521183438837-oozie-test-C@1";
SLAService slas = Services.get().get(SLAService.class);
SLACalculatorMemory slaCalcMem = (SLACalculatorMemory) slas.getSLACalculator();
// loads the job in sla map
slaCalcMem.init(Services.get().getConf());
EventHandlerService ehs = Services.get().get(EventHandlerService.class);
EventQueue ehs_q = ehs.getEventQueue();
DummyZKOozie dummyOozie_1 = null;
try {
// start another dummy oozie instance (dummy sla and event handler services)
dummyOozie_1 = new DummyZKOozie("a", "http://blah");
DummySLACalculatorMemory dummySlaCalcMem = new DummySLACalculatorMemory();
dummySlaCalcMem.init(Services.get().getConf());
EventHandlerService dummyEhs = new EventHandlerService();
dummySlaCalcMem.setEventHandlerService(dummyEhs);
dummyEhs.init(Services.get());
EventQueue dummyEhs_q = dummyEhs.getEventQueue();
// get MISS
Date expectedStartTS = new Date(System.currentTimeMillis() + 2 * 3600 * 1000);
// get MISS
Date expectedEndTS = new Date(System.currentTimeMillis() + 1 * 3600 * 1000);
SLASummaryBean sla = createDBEntryForStarted(id1, expectedStartTS, expectedEndTS, 0);
sla.setExpectedDuration(-1);
sla.setLastModifiedTime(new Date());
SLASummaryQueryExecutor.getInstance().executeUpdate(SLASummaryQuery.UPDATE_SLA_SUMMARY_FOR_EXPECTED_TIMES, sla);
// Action started on Server 1
updateCoordAction(id1, "RUNNING");
slaCalcMem.addJobStatus(id1, CoordinatorAction.Status.RUNNING.name(), EventStatus.STARTED, new Date(), null);
assertEquals(1, ehs_q.size());
SLACalcStatus s1 = (SLACalcStatus) ehs_q.poll();
assertEquals(SLAStatus.IN_PROCESS, s1.getSLAStatus());
// Action ended on Server 2
updateCoordAction(id1, "FAILED");
dummySlaCalcMem.addJobStatus(id1, CoordinatorAction.Status.FAILED.name(), EventStatus.FAILURE, new Date(System.currentTimeMillis() - 1800 * 1000), new Date());
SLACalcStatus s2 = (SLACalcStatus) dummyEhs_q.poll();
assertEquals(SLAStatus.MISS, s2.getSLAStatus());
slaCalcMem.updateAllSlaStatus();
dummySlaCalcMem.updateAllSlaStatus();
// no dupe event should be created again by Server 1
assertEquals(0, ehs_q.size());
} finally {
if (dummyOozie_1 != null) {
dummyOozie_1.teardown();
}
}
}
use of org.apache.oozie.sla.service.SLAService in project oozie by apache.
the class DummySLACalculatorMemory method testSLAAlertCommandWithHA.
public void testSLAAlertCommandWithHA() throws Exception {
// Test SLA ALERT commands in HA mode.
// slaCalcMem1 is for server 1 and slaCalcMem2 is for server2
String id = "0000001-130521183438837-oozie-test-C@1";
// 2 hrs passed
Date expectedStartTS = new Date(System.currentTimeMillis() - 2 * 3600 * 1000);
// 1 hour ahead
Date expectedEndTS1 = new Date(System.currentTimeMillis() + 1 * 3600 * 1000);
// Coord Action of jobs 1-4 not started yet
createDBEntry(id, expectedStartTS, expectedEndTS1);
SLAService slas = Services.get().get(SLAService.class);
SLACalculatorMemory slaCalcMem1 = (SLACalculatorMemory) slas.getSLACalculator();
slaCalcMem1.init(Services.get().get(ConfigurationService.class).getConf());
slaCalcMem1.updateAllSlaStatus();
List<String> idList = new ArrayList<String>();
idList.add(id);
slaCalcMem1.disableAlert(idList);
assertTrue(slaCalcMem1.get(id).getSLAConfigMap().containsKey(OozieClient.SLA_DISABLE_ALERT));
DummyZKOozie dummyOozie_1 = null;
try {
// start another dummy oozie instance (dummy sla and event handler services)
dummyOozie_1 = new DummyZKOozie("a", "http://blah");
DummySLACalculatorMemory slaCalcMem2 = new DummySLACalculatorMemory();
EventHandlerService dummyEhs = new EventHandlerService();
slaCalcMem2.setEventHandlerService(dummyEhs);
// So that job sla updated doesn't run automatically
Services.get().get(ConfigurationService.class).getConf().setInt(SLAService.CONF_SLA_CHECK_INTERVAL, 100000);
Services.get().get(ConfigurationService.class).getConf().setInt(SLAService.CONF_SLA_CHECK_INITIAL_DELAY, 100000);
dummyEhs.init(Services.get());
slaCalcMem2.init(Services.get().get(ConfigurationService.class).getConf());
slaCalcMem2.updateAllSlaStatus();
assertTrue(slaCalcMem2.get(id).getSLAConfigMap().containsKey(OozieClient.SLA_DISABLE_ALERT));
String newParams = RestConstants.SLA_MAX_DURATION + "=5";
List<Pair<String, Map<String, String>>> jobIdsSLAPair = new ArrayList<Pair<String, Map<String, String>>>();
jobIdsSLAPair.add(new Pair<String, Map<String, String>>(id, JobUtils.parseChangeValue(newParams)));
slaCalcMem1.changeDefinition(jobIdsSLAPair);
assertEquals(slaCalcMem1.get(id).getExpectedDuration(), 5 * 60 * 1000);
// Before update, default is 10.
assertEquals(slaCalcMem2.get(id).getExpectedDuration(), 10 * 60 * 1000);
slaCalcMem2.updateAllSlaStatus();
assertEquals(slaCalcMem2.get(id).getExpectedDuration(), 5 * 60 * 1000);
newParams = RestConstants.SLA_MAX_DURATION + "=15";
jobIdsSLAPair.clear();
jobIdsSLAPair.add(new Pair<String, Map<String, String>>(id, JobUtils.parseChangeValue(newParams)));
slaCalcMem1.changeDefinition(jobIdsSLAPair);
// Before update
assertEquals(slaCalcMem2.get(id).getExpectedDuration(), 5 * 60 * 1000);
slaCalcMem2.updateAllSlaStatus();
assertEquals(slaCalcMem2.get(id).getExpectedDuration(), 15 * 60 * 1000);
} finally {
if (dummyOozie_1 != null) {
dummyOozie_1.teardown();
}
}
}
use of org.apache.oozie.sla.service.SLAService in project oozie by apache.
the class TestSLACalculatorMemory method testErrorLoggingWithJobIdPrefix.
public void testErrorLoggingWithJobIdPrefix() throws Exception {
SLAService slaService = Services.get().get(SLAService.class);
SLACalculator slaCalculator = slaService.getSLACalculator();
slaCalculator.addRegistration("dummy-id", _createSLARegistration("dummy-id", AppType.WORKFLOW_JOB));
TestLogAppender appender = getTestLogAppender();
Logger logger = Logger.getLogger(SLACalculatorMemory.class);
logger.addAppender(appender);
logger.setLevel(Level.ERROR);
try {
slaService.runSLAWorker();
} finally {
logger.removeAppender(appender);
}
List<LoggingEvent> log = appender.getLog();
LoggingEvent firstLogEntry = log.get(0);
assertEquals(Level.ERROR, firstLogEntry.getLevel());
assertTrue(firstLogEntry.getMessage().toString().contains("JOB[dummy-id]"));
assertEquals("org.apache.oozie.sla.SLACalculatorMemory", firstLogEntry.getLoggerName());
}
Aggregations