use of org.apache.oozie.sla.SLACalculatorMemory in project oozie by apache.
the class DummySLACalculatorMemory method testSLAUpdateWithHA.
public void testSLAUpdateWithHA() throws Exception {
String id1 = "0000001-130521183438837-oozie-test-C@1";
String id2 = "0000002-130521183438837-oozie-test-C@1";
String id3 = "0000003-130521183438837-oozie-test-C@1";
String id4 = "0000004-130521183438837-oozie-test-C@1";
String id5 = "0000005-130521183438837-oozie-test-C@1";
String id6 = "0000006-130521183438837-oozie-test-C@1";
// 2 hrs passed
Date expectedStartTS = new Date(System.currentTimeMillis() - 2 * 3600 * 1000);
// 1 hour ahead
Date expectedEndTS1 = new Date(System.currentTimeMillis() + 1 * 3600 * 1000);
// 1 hour passed
Date expectedEndTS2 = new Date(System.currentTimeMillis() - 1 * 3600 * 1000);
// Coord Action of jobs 1-4 not started yet
createDBEntry(id1, expectedStartTS, expectedEndTS1);
createDBEntry(id2, expectedStartTS, expectedEndTS1);
createDBEntry(id3, expectedStartTS, expectedEndTS1);
createDBEntry(id4, expectedStartTS, expectedEndTS1);
// Coord Action of jobs 5-6 already started and currently running (to test history set)
createDBEntryForStarted(id5, expectedStartTS, expectedEndTS2, 1);
createDBEntryForStarted(id6, expectedStartTS, expectedEndTS2, 1);
SLAService slas = Services.get().get(SLAService.class);
SLACalculatorMemory slaCalcMem = (SLACalculatorMemory) slas.getSLACalculator();
slaCalcMem.init(Services.get().getConf());
slaCalcMem.updateAllSlaStatus();
List<String> slaMapKeys = new ArrayList<String>();
Iterator<String> itr = slaCalcMem.iterator();
while (itr.hasNext()) {
slaMapKeys.add(itr.next());
}
// 4 jobs expected end is not yet reached
// 2 jobs has end miss, waiting for job to complete
assertEquals(4, slaMapKeys.size());
assertEquals(2, slaCalcMem.getHistorySet().size());
DummyZKOozie dummyOozie_1 = null;
try {
// start another dummy oozie instance (dummy sla and event handler services)
dummyOozie_1 = new DummyZKOozie("a", "http://blah");
DummySLACalculatorMemory dummySlaCalcMem = new DummySLACalculatorMemory();
EventHandlerService dummyEhs = new EventHandlerService();
dummySlaCalcMem.setEventHandlerService(dummyEhs);
dummyEhs.init(Services.get());
dummySlaCalcMem.init(Services.get().getConf());
dummySlaCalcMem.updateAllSlaStatus();
slaMapKeys = new ArrayList<String>();
itr = dummySlaCalcMem.iterator();
while (itr.hasNext()) {
slaMapKeys.add(itr.next());
}
assertEquals(4, slaMapKeys.size());
assertEquals(2, dummySlaCalcMem.getHistorySet().size());
// Coord Action 1,3 run and update status on *non-dummy* server
updateCoordAction(id1, "RUNNING");
slaCalcMem.addJobStatus(id1, CoordinatorAction.Status.RUNNING.name(), EventStatus.STARTED, new Date(), null);
updateCoordAction(id3, "FAILED");
slaCalcMem.addJobStatus(id3, CoordinatorAction.Status.FAILED.name(), EventStatus.FAILURE, null, new Date());
// Coord Action 2,4 run and update status on *dummy* server
updateCoordAction(id2, "RUNNING");
dummySlaCalcMem.addJobStatus(id2, CoordinatorAction.Status.RUNNING.name(), EventStatus.STARTED, new Date(), null);
updateCoordAction(id4, "FAILED");
dummySlaCalcMem.addJobStatus(id4, CoordinatorAction.Status.FAILED.name(), EventStatus.FAILURE, null, new Date());
// Both servers iterate SlaMap (updateAllSlaStatus)
slaCalcMem.updateAllSlaStatus();
dummySlaCalcMem.updateAllSlaStatus();
// SlaMap on both Servers synced
SLACalcStatus sla1_nodummy = slaCalcMem.get(id1);
SLACalcStatus sla1_dummy = dummySlaCalcMem.get(id1);
SLACalcStatus sla2_nodummy = slaCalcMem.get(id2);
SLACalcStatus sla2_dummy = dummySlaCalcMem.get(id2);
assertEquals(1, sla1_nodummy.getEventProcessed());
assertEquals(1, sla1_dummy.getEventProcessed());
assertEquals(1, sla2_dummy.getEventProcessed());
assertEquals(1, sla2_nodummy.getEventProcessed());
assertFalse(slaCalcMem.isJobIdInSLAMap(id3));
assertFalse(dummySlaCalcMem.isJobIdInSLAMap(id3));
assertFalse(slaCalcMem.isJobIdInSLAMap(id4));
assertFalse(dummySlaCalcMem.isJobIdInSLAMap(id4));
Byte eventProc = (Byte) SLASummaryQueryExecutor.getInstance().getSingleValue(SLASummaryQuery.GET_SLA_SUMMARY_EVENTPROCESSED, id3);
assertEquals(8, eventProc.byteValue());
eventProc = (Byte) SLASummaryQueryExecutor.getInstance().getSingleValue(SLASummaryQuery.GET_SLA_SUMMARY_EVENTPROCESSED, id4);
assertEquals(8, eventProc.byteValue());
// Action 5 was processed as END_MISS in updateAllSlaStatus, put into history set
assertTrue(slaCalcMem.isJobIdInHistorySet(id5));
assertTrue(dummySlaCalcMem.isJobIdInHistorySet(id6));
// Action 6 was processed as END_MISS in updateAllSlaStatus, put into history set
assertTrue(slaCalcMem.isJobIdInHistorySet(id5));
assertTrue(dummySlaCalcMem.isJobIdInHistorySet(id6));
eventProc = (Byte) SLASummaryQueryExecutor.getInstance().getSingleValue(SLASummaryQuery.GET_SLA_SUMMARY_EVENTPROCESSED, id5);
assertEquals(7, eventProc.byteValue());
eventProc = (Byte) SLASummaryQueryExecutor.getInstance().getSingleValue(SLASummaryQuery.GET_SLA_SUMMARY_EVENTPROCESSED, id6);
assertEquals(7, eventProc.byteValue());
// Action 1 Succeeded on non-dummy server
updateCoordAction(id1, "SUCCEEDED");
slaCalcMem.addJobStatus(id1, CoordinatorAction.Status.SUCCEEDED.name(), EventStatus.SUCCESS, new Date(System.currentTimeMillis() - 1800 * 1000), new Date());
// Action 2 Succeeded on dummy server
updateCoordAction(id2, "SUCCEEDED");
dummySlaCalcMem.addJobStatus(id2, CoordinatorAction.Status.SUCCEEDED.name(), EventStatus.SUCCESS, new Date(System.currentTimeMillis() - 1800 * 1000), new Date());
// Both servers iterate SlaMap (updateAllSlaStatus)
slaCalcMem.updateAllSlaStatus();
dummySlaCalcMem.updateAllSlaStatus();
// Action 1, 2 are removed from both servers
assertNull(slaCalcMem.get(id1));
assertNull(dummySlaCalcMem.get(id1));
assertNull(slaCalcMem.get(id2));
assertNull(dummySlaCalcMem.get(id2));
eventProc = (Byte) SLASummaryQueryExecutor.getInstance().getSingleValue(SLASummaryQuery.GET_SLA_SUMMARY_EVENTPROCESSED, id1);
assertEquals(8, eventProc.byteValue());
eventProc = (Byte) SLASummaryQueryExecutor.getInstance().getSingleValue(SLASummaryQuery.GET_SLA_SUMMARY_EVENTPROCESSED, id2);
assertEquals(8, eventProc.byteValue());
// Test HistoryPurgeWorker purges Action 5,6 from history set
updateCoordAction(id5, "SUCCEEDED");
slaCalcMem.new HistoryPurgeWorker().run();
assertFalse(slaCalcMem.isJobIdInHistorySet(id5));
updateCoordAction(id6, "SUCCEEDED");
dummySlaCalcMem.new HistoryPurgeWorker().run();
assertFalse(dummySlaCalcMem.isJobIdInHistorySet(id6));
} finally {
if (dummyOozie_1 != null) {
dummyOozie_1.teardown();
}
}
}
use of org.apache.oozie.sla.SLACalculatorMemory in project oozie by apache.
the class SLAService method init.
@Override
public void init(Services services) throws ServiceException {
try {
Configuration conf = services.getConf();
Class<? extends SLACalculator> calcClazz = (Class<? extends SLACalculator>) ConfigurationService.getClass(conf, CONF_CALCULATOR_IMPL);
calcImpl = calcClazz == null ? new SLACalculatorMemory() : (SLACalculator) calcClazz.newInstance();
calcImpl.init(conf);
eventHandler = Services.get().get(EventHandlerService.class);
if (eventHandler == null) {
throw new ServiceException(ErrorCode.E0103, "EventHandlerService", "Add it under config " + Services.CONF_SERVICE_EXT_CLASSES + " or declare it BEFORE SLAService");
}
LOG = XLog.getLog(getClass());
java.util.Set<String> appTypes = eventHandler.getAppTypes();
appTypes.add("workflow_action");
eventHandler.setAppTypes(appTypes);
Runnable slaThread = new SLAWorker(calcImpl);
// schedule runnable by default every 30 sec
int slaCheckInterval = ConfigurationService.getInt(conf, CONF_SLA_CHECK_INTERVAL);
int slaCheckInitialDelay = ConfigurationService.getInt(conf, CONF_SLA_CHECK_INITIAL_DELAY);
services.get(SchedulerService.class).schedule(slaThread, slaCheckInitialDelay, slaCheckInterval, SchedulerService.Unit.SEC);
slaEnabled = true;
LOG.info("SLAService initialized with impl [{0}] capacity [{1}]", calcImpl.getClass().getName(), conf.get(SLAService.CONF_CAPACITY));
} catch (Exception ex) {
throw new ServiceException(ErrorCode.E0102, ex.getMessage(), ex);
}
}
use of org.apache.oozie.sla.SLACalculatorMemory in project oozie by apache.
the class DummySLACalculatorMemory method testSLAFailOverWithHA.
public void testSLAFailOverWithHA() throws Exception {
SLAService slas = Services.get().get(SLAService.class);
SLACalculatorMemory slaCalcMem = (SLACalculatorMemory) slas.getSLACalculator();
EventHandlerService ehs = Services.get().get(EventHandlerService.class);
// start another dummy oozie instance (dummy sla and eventhandler
// services)
DummyZKOozie dummyOozie_1 = null;
try {
dummyOozie_1 = new DummyZKOozie("a", "http://blah");
DummySLACalculatorMemory dummyCalc = new DummySLACalculatorMemory();
EventHandlerService dummyEhs = new EventHandlerService();
dummyCalc.setEventHandlerService(dummyEhs);
dummyEhs.init(Services.get());
dummyCalc.init(Services.get().getConf());
// Case 1 workflow job submitted to dummy server,
// but before start running, the dummy server is down
WorkflowJobBean wfJob1 = createWorkflow("job-1-W");
SLARegistrationBean sla1 = TestSLAService._createSLARegistration("job-1-W", AppType.WORKFLOW_JOB);
// 2 hr before
sla1.setExpectedStart(new Date(System.currentTimeMillis() - 2 * 3600 * 1000));
// 1 hr before
sla1.setExpectedEnd(new Date(System.currentTimeMillis() - 1 * 3600 * 1000));
// 10 mins
sla1.setExpectedDuration(10 * 60 * 1000);
dummyCalc.addRegistration(sla1.getId(), sla1);
dummyCalc.updateAllSlaStatus();
// Case 2. workflow job submitted to dummy server, start running,
// then the dummy server is down
WorkflowJobBean wfJob2 = createWorkflow("job-2-W");
SLARegistrationBean sla2 = TestSLAService._createSLARegistration("job-2-W", AppType.WORKFLOW_JOB);
// 2hr before
sla2.setExpectedStart(new Date(System.currentTimeMillis() - 2 * 3600 * 1000));
// 1hr ahead
sla2.setExpectedEnd(new Date(System.currentTimeMillis() + 1 * 3600 * 1000));
// 10 mins
sla2.setExpectedDuration(10 * 60 * 1000);
dummyCalc.addRegistration(sla2.getId(), sla2);
dummyCalc.addJobStatus(sla2.getId(), WorkflowJob.Status.RUNNING.name(), EventStatus.STARTED, new Date(), new Date());
dummyCalc.updateAllSlaStatus();
dummyEhs.new EventWorker().run();
assertTrue(output.toString().contains(sla2.getId() + " Sla START - MISS!!!"));
// suppose dummy Server is down
dummyCalc.clear();
dummyCalc = null;
dummyOozie_1.teardown();
slaCalcMem.updateAllSlaStatus();
// Job 1 started running on the living server --> start miss
slaCalcMem.addJobStatus(sla1.getId(), WorkflowJob.Status.RUNNING.name(), EventStatus.STARTED, new Date(), new Date());
// job 1 is added to slamap of living oozie server
assertNotNull(slaCalcMem.get(sla1.getId()));
ehs.new EventWorker().run();
assertTrue(output.toString().contains(sla1.getId() + " Sla START - MISS!!!"));
wfJob1.setStatus(WorkflowJob.Status.SUCCEEDED);
wfJob1.setEndTime(new Date());
wfJob1.setStartTime(new Date());
WorkflowJobQueryExecutor.getInstance().executeUpdate(WorkflowJobQuery.UPDATE_WORKFLOW_STATUS_INSTANCE_MOD_START_END, wfJob1);
// Job 1 succeeded on the living server --> duration met and end miss
slaCalcMem.addJobStatus(sla1.getId(), WorkflowJob.Status.SUCCEEDED.name(), EventStatus.SUCCESS, new Date(), new Date());
ehs.new EventWorker().run();
assertTrue(output.toString().contains(sla1.getId() + " Sla DURATION - MET!!!"));
assertTrue(output.toString().contains(sla1.getId() + " Sla END - MISS!!!"));
wfJob2.setStatus(WorkflowJob.Status.SUCCEEDED);
wfJob2.setEndTime(new Date());
wfJob2.setStartTime(new Date());
WorkflowJobQueryExecutor.getInstance().executeUpdate(WorkflowJobQuery.UPDATE_WORKFLOW_STATUS_INSTANCE_MOD_START_END, wfJob2);
// Job 2 succeeded on the living server --> duration met and end met
slaCalcMem.addJobStatus(sla2.getId(), WorkflowJob.Status.SUCCEEDED.name(), EventStatus.SUCCESS, new Date(), new Date());
// eventProc >= 7(already processed duration/end met), should be removed from slaMap
assertNull(slaCalcMem.get(sla2.getId()));
ehs.new EventWorker().run();
assertTrue(output.toString().contains(sla2.getId() + " Sla DURATION - MET!!!"));
assertTrue(output.toString().contains(sla2.getId() + " Sla END - MET!!!"));
} finally {
if (dummyOozie_1 != null) {
dummyOozie_1.teardown();
}
}
}
use of org.apache.oozie.sla.SLACalculatorMemory in project oozie by apache.
the class DummySLACalculatorMemory method testNoDuplicateEventsInHA.
public void testNoDuplicateEventsInHA() throws Exception {
String id1 = "0000001-130521183438837-oozie-test-C@1";
SLAService slas = Services.get().get(SLAService.class);
SLACalculatorMemory slaCalcMem = (SLACalculatorMemory) slas.getSLACalculator();
// loads the job in sla map
slaCalcMem.init(Services.get().getConf());
EventHandlerService ehs = Services.get().get(EventHandlerService.class);
EventQueue ehs_q = ehs.getEventQueue();
DummyZKOozie dummyOozie_1 = null;
try {
// start another dummy oozie instance (dummy sla and event handler services)
dummyOozie_1 = new DummyZKOozie("a", "http://blah");
DummySLACalculatorMemory dummySlaCalcMem = new DummySLACalculatorMemory();
dummySlaCalcMem.init(Services.get().getConf());
EventHandlerService dummyEhs = new EventHandlerService();
dummySlaCalcMem.setEventHandlerService(dummyEhs);
dummyEhs.init(Services.get());
EventQueue dummyEhs_q = dummyEhs.getEventQueue();
// get MISS
Date expectedStartTS = new Date(System.currentTimeMillis() + 2 * 3600 * 1000);
// get MISS
Date expectedEndTS = new Date(System.currentTimeMillis() + 1 * 3600 * 1000);
SLASummaryBean sla = createDBEntryForStarted(id1, expectedStartTS, expectedEndTS, 0);
sla.setExpectedDuration(-1);
sla.setLastModifiedTime(new Date());
SLASummaryQueryExecutor.getInstance().executeUpdate(SLASummaryQuery.UPDATE_SLA_SUMMARY_FOR_EXPECTED_TIMES, sla);
// Action started on Server 1
updateCoordAction(id1, "RUNNING");
slaCalcMem.addJobStatus(id1, CoordinatorAction.Status.RUNNING.name(), EventStatus.STARTED, new Date(), null);
assertEquals(1, ehs_q.size());
SLACalcStatus s1 = (SLACalcStatus) ehs_q.poll();
assertEquals(SLAStatus.IN_PROCESS, s1.getSLAStatus());
// Action ended on Server 2
updateCoordAction(id1, "FAILED");
dummySlaCalcMem.addJobStatus(id1, CoordinatorAction.Status.FAILED.name(), EventStatus.FAILURE, new Date(System.currentTimeMillis() - 1800 * 1000), new Date());
SLACalcStatus s2 = (SLACalcStatus) dummyEhs_q.poll();
assertEquals(SLAStatus.MISS, s2.getSLAStatus());
slaCalcMem.updateAllSlaStatus();
dummySlaCalcMem.updateAllSlaStatus();
// no dupe event should be created again by Server 1
assertEquals(0, ehs_q.size());
} finally {
if (dummyOozie_1 != null) {
dummyOozie_1.teardown();
}
}
}
use of org.apache.oozie.sla.SLACalculatorMemory in project oozie by apache.
the class DummySLACalculatorMemory method testSLAAlertCommandWithHA.
public void testSLAAlertCommandWithHA() throws Exception {
// Test SLA ALERT commands in HA mode.
// slaCalcMem1 is for server 1 and slaCalcMem2 is for server2
String id = "0000001-130521183438837-oozie-test-C@1";
// 2 hrs passed
Date expectedStartTS = new Date(System.currentTimeMillis() - 2 * 3600 * 1000);
// 1 hour ahead
Date expectedEndTS1 = new Date(System.currentTimeMillis() + 1 * 3600 * 1000);
// Coord Action of jobs 1-4 not started yet
createDBEntry(id, expectedStartTS, expectedEndTS1);
SLAService slas = Services.get().get(SLAService.class);
SLACalculatorMemory slaCalcMem1 = (SLACalculatorMemory) slas.getSLACalculator();
slaCalcMem1.init(Services.get().get(ConfigurationService.class).getConf());
slaCalcMem1.updateAllSlaStatus();
List<String> idList = new ArrayList<String>();
idList.add(id);
slaCalcMem1.disableAlert(idList);
assertTrue(slaCalcMem1.get(id).getSLAConfigMap().containsKey(OozieClient.SLA_DISABLE_ALERT));
DummyZKOozie dummyOozie_1 = null;
try {
// start another dummy oozie instance (dummy sla and event handler services)
dummyOozie_1 = new DummyZKOozie("a", "http://blah");
DummySLACalculatorMemory slaCalcMem2 = new DummySLACalculatorMemory();
EventHandlerService dummyEhs = new EventHandlerService();
slaCalcMem2.setEventHandlerService(dummyEhs);
// So that job sla updated doesn't run automatically
Services.get().get(ConfigurationService.class).getConf().setInt(SLAService.CONF_SLA_CHECK_INTERVAL, 100000);
Services.get().get(ConfigurationService.class).getConf().setInt(SLAService.CONF_SLA_CHECK_INITIAL_DELAY, 100000);
dummyEhs.init(Services.get());
slaCalcMem2.init(Services.get().get(ConfigurationService.class).getConf());
slaCalcMem2.updateAllSlaStatus();
assertTrue(slaCalcMem2.get(id).getSLAConfigMap().containsKey(OozieClient.SLA_DISABLE_ALERT));
String newParams = RestConstants.SLA_MAX_DURATION + "=5";
List<Pair<String, Map<String, String>>> jobIdsSLAPair = new ArrayList<Pair<String, Map<String, String>>>();
jobIdsSLAPair.add(new Pair<String, Map<String, String>>(id, JobUtils.parseChangeValue(newParams)));
slaCalcMem1.changeDefinition(jobIdsSLAPair);
assertEquals(slaCalcMem1.get(id).getExpectedDuration(), 5 * 60 * 1000);
// Before update, default is 10.
assertEquals(slaCalcMem2.get(id).getExpectedDuration(), 10 * 60 * 1000);
slaCalcMem2.updateAllSlaStatus();
assertEquals(slaCalcMem2.get(id).getExpectedDuration(), 5 * 60 * 1000);
newParams = RestConstants.SLA_MAX_DURATION + "=15";
jobIdsSLAPair.clear();
jobIdsSLAPair.add(new Pair<String, Map<String, String>>(id, JobUtils.parseChangeValue(newParams)));
slaCalcMem1.changeDefinition(jobIdsSLAPair);
// Before update
assertEquals(slaCalcMem2.get(id).getExpectedDuration(), 5 * 60 * 1000);
slaCalcMem2.updateAllSlaStatus();
assertEquals(slaCalcMem2.get(id).getExpectedDuration(), 15 * 60 * 1000);
} finally {
if (dummyOozie_1 != null) {
dummyOozie_1.teardown();
}
}
}
Aggregations