Search in sources :

Example 11 with SLAService

use of org.apache.oozie.sla.service.SLAService in project oozie by apache.

the class TestCoordSubmitXCommand method testSLAAlertWithNewlyCreatedActions.

public void testSLAAlertWithNewlyCreatedActions() throws Exception {
    Configuration conf = new XConfiguration();
    File appPathFile = new File(getTestCaseDir(), "coordinator.xml");
    // CASE 1: Failure case i.e. multiple data-in instances
    Reader reader = IOUtils.getResourceAsReader("coord-action-sla.xml", -1);
    Writer writer = new FileWriter(appPathFile);
    IOUtils.copyCharStream(reader, writer);
    conf.set(OozieClient.COORDINATOR_APP_PATH, appPathFile.toURI().toString());
    conf.set("start", DateUtils.formatDateOozieTZ(org.apache.commons.lang.time.DateUtils.addDays(new Date(), -1)));
    conf.set("end", DateUtils.formatDateOozieTZ(org.apache.commons.lang.time.DateUtils.addMonths(new Date(), 1)));
    conf.set(OozieClient.USER_NAME, getTestUser());
    reader = IOUtils.getResourceAsReader("wf-credentials.xml", -1);
    appPathFile = new File(getTestCaseDir(), "workflow.xml");
    writer = new FileWriter(appPathFile);
    IOUtils.copyCharStream(reader, writer);
    conf.set("wfAppPath", appPathFile.getPath());
    Date nominalTime = new Date();
    conf.set("nominal_time", DateUtils.formatDateOozieTZ(nominalTime));
    String coordId = new CoordSubmitXCommand(conf).call();
    CoordinatorJobBean job = CoordJobQueryExecutor.getInstance().get(CoordJobQueryExecutor.CoordJobQuery.GET_COORD_JOB, coordId);
    job.setMatThrottling(1);
    CoordJobQueryExecutor.getInstance().executeUpdate(CoordJobQueryExecutor.CoordJobQuery.UPDATE_COORD_JOB, job);
    new CoordMaterializeTransitionXCommand(coordId, 3600).call();
    SLAService slaService = services.get(SLAService.class);
    SLACalculator calc = slaService.getSLACalculator();
    SLACalcStatus slaCalc = calc.get(coordId + "@" + 1);
    assertFalse(Boolean.valueOf(slaCalc.getSLAConfigMap().get(OozieClient.SLA_DISABLE_ALERT)));
    assertEquals(slaCalc.getExpectedDuration(), 1800000);
    job = CoordJobQueryExecutor.getInstance().get(CoordJobQueryExecutor.CoordJobQuery.GET_COORD_JOB, coordId);
    assertEquals(job.getLastActionNumber(), 1);
    String newParams = RestConstants.SLA_MAX_DURATION + "=${5 * MINUTES}";
    new CoordSLAChangeXCommand(coordId, null, null, JobUtils.parseChangeValue(newParams)).call();
    new CoordSLAAlertsDisableXCommand(coordId, null, null).call();
    job = CoordJobQueryExecutor.getInstance().get(CoordJobQueryExecutor.CoordJobQuery.GET_COORD_JOB, coordId);
    job.setMatThrottling(2);
    CoordJobQueryExecutor.getInstance().executeUpdate(CoordJobQueryExecutor.CoordJobQuery.UPDATE_COORD_JOB, job);
    job = CoordJobQueryExecutor.getInstance().get(CoordJobQueryExecutor.CoordJobQuery.GET_COORD_JOB, coordId);
    new CoordMaterializeTransitionXCommand(coordId, 3600).call();
    job = CoordJobQueryExecutor.getInstance().get(CoordJobQueryExecutor.CoordJobQuery.GET_COORD_JOB, coordId);
    slaCalc = calc.get(coordId + "@" + job.getLastActionNumber());
    assertEquals(slaCalc.getExpectedDuration(), 300000);
    // newly action should have sla disable after coord disable command on coord job
    assertTrue(Boolean.valueOf(slaCalc.getSLAConfigMap().get(OozieClient.SLA_DISABLE_ALERT)));
    Element eAction = XmlUtils.parseXml(job.getJobXml());
    Element eSla = eAction.getChild("action", eAction.getNamespace()).getChild("info", eAction.getNamespace("sla"));
    assertEquals(SLAOperations.getTagElement(eSla, "max-duration"), "${5 * MINUTES}");
}
Also used : CoordinatorJobBean(org.apache.oozie.CoordinatorJobBean) XConfiguration(org.apache.oozie.util.XConfiguration) Configuration(org.apache.hadoop.conf.Configuration) SLAService(org.apache.oozie.sla.service.SLAService) SLACalcStatus(org.apache.oozie.sla.SLACalcStatus) FileWriter(java.io.FileWriter) Element(org.jdom.Element) Reader(java.io.Reader) Date(java.util.Date) XConfiguration(org.apache.oozie.util.XConfiguration) SLACalculator(org.apache.oozie.sla.SLACalculator) File(java.io.File) FileWriter(java.io.FileWriter) Writer(java.io.Writer)

Example 12 with SLAService

use of org.apache.oozie.sla.service.SLAService in project oozie by apache.

the class DummySLACalculatorMemory method testSLAFailOverWithHA.

public void testSLAFailOverWithHA() throws Exception {
    SLAService slas = Services.get().get(SLAService.class);
    SLACalculatorMemory slaCalcMem = (SLACalculatorMemory) slas.getSLACalculator();
    EventHandlerService ehs = Services.get().get(EventHandlerService.class);
    // start another dummy oozie instance (dummy sla and eventhandler
    // services)
    DummyZKOozie dummyOozie_1 = null;
    try {
        dummyOozie_1 = new DummyZKOozie("a", "http://blah");
        DummySLACalculatorMemory dummyCalc = new DummySLACalculatorMemory();
        EventHandlerService dummyEhs = new EventHandlerService();
        dummyCalc.setEventHandlerService(dummyEhs);
        dummyEhs.init(Services.get());
        dummyCalc.init(Services.get().getConf());
        // Case 1 workflow job submitted to dummy server,
        // but before start running, the dummy server is down
        WorkflowJobBean wfJob1 = createWorkflow("job-1-W");
        SLARegistrationBean sla1 = TestSLAService._createSLARegistration("job-1-W", AppType.WORKFLOW_JOB);
        // 2 hr before
        sla1.setExpectedStart(new Date(System.currentTimeMillis() - 2 * 3600 * 1000));
        // 1 hr before
        sla1.setExpectedEnd(new Date(System.currentTimeMillis() - 1 * 3600 * 1000));
        // 10 mins
        sla1.setExpectedDuration(10 * 60 * 1000);
        dummyCalc.addRegistration(sla1.getId(), sla1);
        dummyCalc.updateAllSlaStatus();
        // Case 2. workflow job submitted to dummy server, start running,
        // then the dummy server is down
        WorkflowJobBean wfJob2 = createWorkflow("job-2-W");
        SLARegistrationBean sla2 = TestSLAService._createSLARegistration("job-2-W", AppType.WORKFLOW_JOB);
        // 2hr before
        sla2.setExpectedStart(new Date(System.currentTimeMillis() - 2 * 3600 * 1000));
        // 1hr ahead
        sla2.setExpectedEnd(new Date(System.currentTimeMillis() + 1 * 3600 * 1000));
        // 10 mins
        sla2.setExpectedDuration(10 * 60 * 1000);
        dummyCalc.addRegistration(sla2.getId(), sla2);
        dummyCalc.addJobStatus(sla2.getId(), WorkflowJob.Status.RUNNING.name(), EventStatus.STARTED, new Date(), new Date());
        dummyCalc.updateAllSlaStatus();
        dummyEhs.new EventWorker().run();
        assertTrue(output.toString().contains(sla2.getId() + " Sla START - MISS!!!"));
        // suppose dummy Server is down
        dummyCalc.clear();
        dummyCalc = null;
        dummyOozie_1.teardown();
        slaCalcMem.updateAllSlaStatus();
        // Job 1 started running on the living server --> start miss
        slaCalcMem.addJobStatus(sla1.getId(), WorkflowJob.Status.RUNNING.name(), EventStatus.STARTED, new Date(), new Date());
        // job 1 is added to slamap of living oozie server
        assertNotNull(slaCalcMem.get(sla1.getId()));
        ehs.new EventWorker().run();
        assertTrue(output.toString().contains(sla1.getId() + " Sla START - MISS!!!"));
        wfJob1.setStatus(WorkflowJob.Status.SUCCEEDED);
        wfJob1.setEndTime(new Date());
        wfJob1.setStartTime(new Date());
        WorkflowJobQueryExecutor.getInstance().executeUpdate(WorkflowJobQuery.UPDATE_WORKFLOW_STATUS_INSTANCE_MOD_START_END, wfJob1);
        // Job 1 succeeded on the living server --> duration met and end miss
        slaCalcMem.addJobStatus(sla1.getId(), WorkflowJob.Status.SUCCEEDED.name(), EventStatus.SUCCESS, new Date(), new Date());
        ehs.new EventWorker().run();
        assertTrue(output.toString().contains(sla1.getId() + " Sla DURATION - MET!!!"));
        assertTrue(output.toString().contains(sla1.getId() + " Sla END - MISS!!!"));
        wfJob2.setStatus(WorkflowJob.Status.SUCCEEDED);
        wfJob2.setEndTime(new Date());
        wfJob2.setStartTime(new Date());
        WorkflowJobQueryExecutor.getInstance().executeUpdate(WorkflowJobQuery.UPDATE_WORKFLOW_STATUS_INSTANCE_MOD_START_END, wfJob2);
        // Job 2 succeeded on the living server --> duration met and end met
        slaCalcMem.addJobStatus(sla2.getId(), WorkflowJob.Status.SUCCEEDED.name(), EventStatus.SUCCESS, new Date(), new Date());
        // eventProc >= 7(already processed duration/end met), should be removed from slaMap
        assertNull(slaCalcMem.get(sla2.getId()));
        ehs.new EventWorker().run();
        assertTrue(output.toString().contains(sla2.getId() + " Sla DURATION - MET!!!"));
        assertTrue(output.toString().contains(sla2.getId() + " Sla END - MET!!!"));
    } finally {
        if (dummyOozie_1 != null) {
            dummyOozie_1.teardown();
        }
    }
}
Also used : SLARegistrationBean(org.apache.oozie.sla.SLARegistrationBean) TestSLAService(org.apache.oozie.sla.TestSLAService) SLAService(org.apache.oozie.sla.service.SLAService) SLACalculatorMemory(org.apache.oozie.sla.SLACalculatorMemory) WorkflowJobBean(org.apache.oozie.WorkflowJobBean) Date(java.util.Date)

Example 13 with SLAService

use of org.apache.oozie.sla.service.SLAService in project oozie by apache.

the class DummySLACalculatorMemory method testNoDuplicateEventsInHA.

public void testNoDuplicateEventsInHA() throws Exception {
    String id1 = "0000001-130521183438837-oozie-test-C@1";
    SLAService slas = Services.get().get(SLAService.class);
    SLACalculatorMemory slaCalcMem = (SLACalculatorMemory) slas.getSLACalculator();
    // loads the job in sla map
    slaCalcMem.init(Services.get().getConf());
    EventHandlerService ehs = Services.get().get(EventHandlerService.class);
    EventQueue ehs_q = ehs.getEventQueue();
    DummyZKOozie dummyOozie_1 = null;
    try {
        // start another dummy oozie instance (dummy sla and event handler services)
        dummyOozie_1 = new DummyZKOozie("a", "http://blah");
        DummySLACalculatorMemory dummySlaCalcMem = new DummySLACalculatorMemory();
        dummySlaCalcMem.init(Services.get().getConf());
        EventHandlerService dummyEhs = new EventHandlerService();
        dummySlaCalcMem.setEventHandlerService(dummyEhs);
        dummyEhs.init(Services.get());
        EventQueue dummyEhs_q = dummyEhs.getEventQueue();
        // get MISS
        Date expectedStartTS = new Date(System.currentTimeMillis() + 2 * 3600 * 1000);
        // get MISS
        Date expectedEndTS = new Date(System.currentTimeMillis() + 1 * 3600 * 1000);
        SLASummaryBean sla = createDBEntryForStarted(id1, expectedStartTS, expectedEndTS, 0);
        sla.setExpectedDuration(-1);
        sla.setLastModifiedTime(new Date());
        SLASummaryQueryExecutor.getInstance().executeUpdate(SLASummaryQuery.UPDATE_SLA_SUMMARY_FOR_EXPECTED_TIMES, sla);
        // Action started on Server 1
        updateCoordAction(id1, "RUNNING");
        slaCalcMem.addJobStatus(id1, CoordinatorAction.Status.RUNNING.name(), EventStatus.STARTED, new Date(), null);
        assertEquals(1, ehs_q.size());
        SLACalcStatus s1 = (SLACalcStatus) ehs_q.poll();
        assertEquals(SLAStatus.IN_PROCESS, s1.getSLAStatus());
        // Action ended on Server 2
        updateCoordAction(id1, "FAILED");
        dummySlaCalcMem.addJobStatus(id1, CoordinatorAction.Status.FAILED.name(), EventStatus.FAILURE, new Date(System.currentTimeMillis() - 1800 * 1000), new Date());
        SLACalcStatus s2 = (SLACalcStatus) dummyEhs_q.poll();
        assertEquals(SLAStatus.MISS, s2.getSLAStatus());
        slaCalcMem.updateAllSlaStatus();
        dummySlaCalcMem.updateAllSlaStatus();
        // no dupe event should be created again by Server 1
        assertEquals(0, ehs_q.size());
    } finally {
        if (dummyOozie_1 != null) {
            dummyOozie_1.teardown();
        }
    }
}
Also used : TestSLAService(org.apache.oozie.sla.TestSLAService) SLAService(org.apache.oozie.sla.service.SLAService) SLACalcStatus(org.apache.oozie.sla.SLACalcStatus) SLACalculatorMemory(org.apache.oozie.sla.SLACalculatorMemory) EventQueue(org.apache.oozie.event.EventQueue) Date(java.util.Date) SLASummaryBean(org.apache.oozie.sla.SLASummaryBean)

Example 14 with SLAService

use of org.apache.oozie.sla.service.SLAService in project oozie by apache.

the class DummySLACalculatorMemory method testSLAAlertCommandWithHA.

public void testSLAAlertCommandWithHA() throws Exception {
    // Test SLA ALERT commands in HA mode.
    // slaCalcMem1 is for server 1 and slaCalcMem2 is for server2
    String id = "0000001-130521183438837-oozie-test-C@1";
    // 2 hrs passed
    Date expectedStartTS = new Date(System.currentTimeMillis() - 2 * 3600 * 1000);
    // 1 hour ahead
    Date expectedEndTS1 = new Date(System.currentTimeMillis() + 1 * 3600 * 1000);
    // Coord Action of jobs 1-4 not started yet
    createDBEntry(id, expectedStartTS, expectedEndTS1);
    SLAService slas = Services.get().get(SLAService.class);
    SLACalculatorMemory slaCalcMem1 = (SLACalculatorMemory) slas.getSLACalculator();
    slaCalcMem1.init(Services.get().get(ConfigurationService.class).getConf());
    slaCalcMem1.updateAllSlaStatus();
    List<String> idList = new ArrayList<String>();
    idList.add(id);
    slaCalcMem1.disableAlert(idList);
    assertTrue(slaCalcMem1.get(id).getSLAConfigMap().containsKey(OozieClient.SLA_DISABLE_ALERT));
    DummyZKOozie dummyOozie_1 = null;
    try {
        // start another dummy oozie instance (dummy sla and event handler services)
        dummyOozie_1 = new DummyZKOozie("a", "http://blah");
        DummySLACalculatorMemory slaCalcMem2 = new DummySLACalculatorMemory();
        EventHandlerService dummyEhs = new EventHandlerService();
        slaCalcMem2.setEventHandlerService(dummyEhs);
        // So that job sla updated doesn't run automatically
        Services.get().get(ConfigurationService.class).getConf().setInt(SLAService.CONF_SLA_CHECK_INTERVAL, 100000);
        Services.get().get(ConfigurationService.class).getConf().setInt(SLAService.CONF_SLA_CHECK_INITIAL_DELAY, 100000);
        dummyEhs.init(Services.get());
        slaCalcMem2.init(Services.get().get(ConfigurationService.class).getConf());
        slaCalcMem2.updateAllSlaStatus();
        assertTrue(slaCalcMem2.get(id).getSLAConfigMap().containsKey(OozieClient.SLA_DISABLE_ALERT));
        String newParams = RestConstants.SLA_MAX_DURATION + "=5";
        List<Pair<String, Map<String, String>>> jobIdsSLAPair = new ArrayList<Pair<String, Map<String, String>>>();
        jobIdsSLAPair.add(new Pair<String, Map<String, String>>(id, JobUtils.parseChangeValue(newParams)));
        slaCalcMem1.changeDefinition(jobIdsSLAPair);
        assertEquals(slaCalcMem1.get(id).getExpectedDuration(), 5 * 60 * 1000);
        // Before update, default is 10.
        assertEquals(slaCalcMem2.get(id).getExpectedDuration(), 10 * 60 * 1000);
        slaCalcMem2.updateAllSlaStatus();
        assertEquals(slaCalcMem2.get(id).getExpectedDuration(), 5 * 60 * 1000);
        newParams = RestConstants.SLA_MAX_DURATION + "=15";
        jobIdsSLAPair.clear();
        jobIdsSLAPair.add(new Pair<String, Map<String, String>>(id, JobUtils.parseChangeValue(newParams)));
        slaCalcMem1.changeDefinition(jobIdsSLAPair);
        // Before update
        assertEquals(slaCalcMem2.get(id).getExpectedDuration(), 5 * 60 * 1000);
        slaCalcMem2.updateAllSlaStatus();
        assertEquals(slaCalcMem2.get(id).getExpectedDuration(), 15 * 60 * 1000);
    } finally {
        if (dummyOozie_1 != null) {
            dummyOozie_1.teardown();
        }
    }
}
Also used : TestSLAService(org.apache.oozie.sla.TestSLAService) SLAService(org.apache.oozie.sla.service.SLAService) SLACalculatorMemory(org.apache.oozie.sla.SLACalculatorMemory) ArrayList(java.util.ArrayList) Date(java.util.Date) Map(java.util.Map) Pair(org.apache.oozie.util.Pair)

Example 15 with SLAService

use of org.apache.oozie.sla.service.SLAService in project oozie by apache.

the class TestSLACalculatorMemory method testErrorLoggingWithJobIdPrefix.

public void testErrorLoggingWithJobIdPrefix() throws Exception {
    SLAService slaService = Services.get().get(SLAService.class);
    SLACalculator slaCalculator = slaService.getSLACalculator();
    slaCalculator.addRegistration("dummy-id", _createSLARegistration("dummy-id", AppType.WORKFLOW_JOB));
    TestLogAppender appender = getTestLogAppender();
    Logger logger = Logger.getLogger(SLACalculatorMemory.class);
    logger.addAppender(appender);
    logger.setLevel(Level.ERROR);
    try {
        slaService.runSLAWorker();
    } finally {
        logger.removeAppender(appender);
    }
    List<LoggingEvent> log = appender.getLog();
    LoggingEvent firstLogEntry = log.get(0);
    assertEquals(Level.ERROR, firstLogEntry.getLevel());
    assertTrue(firstLogEntry.getMessage().toString().contains("JOB[dummy-id]"));
    assertEquals("org.apache.oozie.sla.SLACalculatorMemory", firstLogEntry.getLoggerName());
}
Also used : LoggingEvent(org.apache.log4j.spi.LoggingEvent) SLAService(org.apache.oozie.sla.service.SLAService) Logger(org.apache.log4j.Logger)

Aggregations

SLAService (org.apache.oozie.sla.service.SLAService)21 Date (java.util.Date)17 Test (org.junit.Test)12 Configuration (org.apache.hadoop.conf.Configuration)10 XConfiguration (org.apache.oozie.util.XConfiguration)9 Path (org.apache.hadoop.fs.Path)7 WorkflowJobBean (org.apache.oozie.WorkflowJobBean)6 CoordinatorActionBean (org.apache.oozie.CoordinatorActionBean)4 SLACalcStatus (org.apache.oozie.sla.SLACalcStatus)4 SLACalculatorMemory (org.apache.oozie.sla.SLACalculatorMemory)4 TestSLAService (org.apache.oozie.sla.TestSLAService)4 CoordSubmitXCommand (org.apache.oozie.command.coord.CoordSubmitXCommand)3 WorkflowJobGetJPAExecutor (org.apache.oozie.executor.jpa.WorkflowJobGetJPAExecutor)3 EventHandlerService (org.apache.oozie.service.EventHandlerService)3 JPAService (org.apache.oozie.service.JPAService)3 File (java.io.File)2 FileWriter (java.io.FileWriter)2 Reader (java.io.Reader)2 Writer (java.io.Writer)2 ArrayList (java.util.ArrayList)2