Search in sources :

Example 1 with JobExecutionException

use of org.graylog.scheduler.JobExecutionException in project graylog2-server by Graylog2.

the class EventProcessorExecutionJobTest method executeWithInvalidTimerange.

@Test
public void executeWithInvalidTimerange() throws Exception {
    final DateTime now = clock.nowUTC();
    final long processingWindowSize = Duration.standardSeconds(60).getMillis();
    final long processingHopSize = Duration.standardSeconds(60).getMillis();
    final int scheduleIntervalSeconds = 1;
    // We set "from" to be AFTER "to" - this is not valid so the job should not be executed and the triggers
    // should be set to ERROR
    final DateTime from = now.plusSeconds(1);
    final DateTime to = now;
    final DateTime triggerNextTime = now;
    final TestEventProcessorParameters eventProcessorParameters = TestEventProcessorParameters.create(from, to);
    final JobDefinitionDto jobDefinition = JobDefinitionDto.builder().id("job-1").title("Test").description("A test").config(EventProcessorExecutionJob.Config.builder().eventDefinitionId("processor-1").processingWindowSize(processingWindowSize).processingHopSize(processingHopSize).parameters(eventProcessorParameters).build()).build();
    final EventProcessorExecutionJob job = new EventProcessorExecutionJob(jobScheduleStrategies, clock, eventProcessorEngine, eventsConfigurationProvider, jobDefinition);
    final JobTriggerDto trigger = JobTriggerDto.builderWithClock(clock).id("trigger-1").jobDefinitionId(jobDefinition.id()).startTime(now).nextTime(triggerNextTime).status(JobTriggerStatus.RUNNABLE).schedule(IntervalJobSchedule.builder().interval(scheduleIntervalSeconds).unit(TimeUnit.SECONDS).build()).build();
    final JobExecutionContext jobExecutionContext = JobExecutionContext.builder().definition(jobDefinition).trigger(trigger).isRunning(new AtomicBoolean(true)).jobTriggerUpdates(new JobTriggerUpdates(clock, jobScheduleStrategies, trigger)).build();
    assertThatThrownBy(() -> job.execute(jobExecutionContext)).isInstanceOf(JobExecutionException.class).hasMessageContaining("is not after").satisfies(t -> {
        final JobExecutionException e = (JobExecutionException) t;
        assertThat(e.getTrigger()).isEqualTo(trigger);
        assertThat(e.getUpdate()).satisfies(update -> {
            // When setting the status to ERROR, we will keen the last nextTime
            assertThat(update.nextTime()).isPresent().get().isEqualTo(triggerNextTime);
            assertThat(update.data()).isNotPresent();
            assertThat(update.status()).isPresent().get().isEqualTo(JobTriggerStatus.ERROR);
        });
    });
    // The engine should not be called because the timerange is invalid
    verify(eventProcessorEngine, never()).execute(any(), any());
}
Also used : AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) JobExecutionException(org.graylog.scheduler.JobExecutionException) JobDefinitionDto(org.graylog.scheduler.JobDefinitionDto) JobTriggerUpdates(org.graylog.scheduler.JobTriggerUpdates) JobExecutionContext(org.graylog.scheduler.JobExecutionContext) DateTime(org.joda.time.DateTime) TestEventProcessorParameters(org.graylog.events.TestEventProcessorParameters) EventProcessorExecutionJob(org.graylog.events.processor.EventProcessorExecutionJob) JobTriggerDto(org.graylog.scheduler.JobTriggerDto) Test(org.junit.Test)

Example 2 with JobExecutionException

use of org.graylog.scheduler.JobExecutionException in project graylog2-server by Graylog2.

the class EventNotificationExecutionJob method execute.

@Override
public JobTriggerUpdate execute(JobExecutionContext ctx) throws JobExecutionException {
    Optional<EventDefinitionDto> optionalEventDefinition;
    long gracePeriodInMS = 0;
    final JobTriggerDto trigger = ctx.trigger();
    final Optional<Data> optionalData = trigger.data().map(d -> (Data) d);
    if (!optionalData.isPresent()) {
        throw new JobExecutionException("Missing notification job data for notification <" + jobConfig.notificationId() + ">, unable to execute notification: " + ctx.definition().title(), trigger, JobTriggerUpdate.withoutNextTime());
    }
    final Data data = optionalData.get();
    final EventDto eventDto = data.eventDto();
    final NotificationDto notification = notificationService.get(jobConfig.notificationId()).orElseThrow(() -> new JobExecutionException("Couldn't find notification <" + jobConfig.notificationId() + ">", trigger, JobTriggerUpdate.withError(trigger)));
    final EventNotification.Factory eventNotificationFactory = eventNotificationFactories.get(notification.config().type());
    if (eventNotificationFactory == null) {
        throw new JobExecutionException("Couldn't find factory for notification type <" + notification.config().type() + ">", trigger, ctx.jobTriggerUpdates().scheduleNextExecution());
    }
    final EventNotification eventNotification = eventNotificationFactory.create();
    metrics.registerEventNotification(eventNotification, notification);
    try {
        optionalEventDefinition = Optional.ofNullable(getEventDefinition(eventDto));
        if (optionalEventDefinition.isPresent()) {
            gracePeriodInMS = optionalEventDefinition.get().notificationSettings().gracePeriodMs();
        }
    } catch (NotFoundException e) {
        LOG.error("Couldn't find event definition with ID <{}>.", eventDto.eventDefinitionId());
        optionalEventDefinition = Optional.empty();
    }
    EventNotificationContext notificationContext = EventNotificationContext.builder().notificationId(notification.id()).notificationConfig(notification.config()).event(eventDto).eventDefinition(optionalEventDefinition.get()).jobTrigger(trigger).build();
    updateTriggerStatus(eventDto, gracePeriodInMS);
    if (inGrace(eventDto, gracePeriodInMS)) {
        LOG.debug("Notification <{}> triggered but it's in grace period.", jobConfig.notificationId());
        metrics.markInGrace(eventNotification, notification);
        return ctx.jobTriggerUpdates().scheduleNextExecution();
    }
    try {
        metrics.markExecution(eventNotification, notification);
        eventNotification.execute(notificationContext);
        metrics.markSuccess(eventNotification, notification);
    } catch (TemporaryEventNotificationException e) {
        metrics.markFailedTemporarily(eventNotification, notification);
        final long retryPeriod = configurationProvider.get().eventNotificationsRetry();
        throw new JobExecutionException(String.format(Locale.ROOT, "Failed to execute notification, retrying in %d minutes - <%s/%s/%s>", TimeUnit.MILLISECONDS.toMinutes(retryPeriod), notification.id(), notification.title(), notification.config().type()), trigger, ctx.jobTriggerUpdates().retryIn(retryPeriod, TimeUnit.MILLISECONDS), e);
    } catch (PermanentEventNotificationException e) {
        metrics.markFailedPermanently(eventNotification, notification);
        throw new JobExecutionException(String.format(Locale.ROOT, "Failed permanently to execute notification, giving up - <%s/%s/%s>", notification.id(), notification.title(), notification.config().type()), trigger, ctx.jobTriggerUpdates().scheduleNextExecution(), e);
    } catch (EventNotificationException e) {
        metrics.markFailed(eventNotification, notification);
        throw new JobExecutionException(String.format(Locale.ROOT, "Notification failed to execute - <%s/%s/%s>", notification.id(), notification.title(), notification.config().type()), trigger, ctx.jobTriggerUpdates().scheduleNextExecution(), e);
    }
    updateNotifiedStatus(eventDto, gracePeriodInMS);
    return ctx.jobTriggerUpdates().scheduleNextExecution();
}
Also used : EventDto(org.graylog.events.event.EventDto) NotFoundException(org.graylog2.database.NotFoundException) JobTriggerData(org.graylog.scheduler.JobTriggerData) JobTriggerDto(org.graylog.scheduler.JobTriggerDto) JobExecutionException(org.graylog.scheduler.JobExecutionException) EventDefinitionDto(org.graylog.events.processor.EventDefinitionDto)

Example 3 with JobExecutionException

use of org.graylog.scheduler.JobExecutionException in project graylog2-server by Graylog2.

the class EventProcessorExecutionJob method execute.

@Override
public JobTriggerUpdate execute(JobExecutionContext ctx) throws JobExecutionException {
    final Optional<Data> data = ctx.trigger().data().map(d -> (Data) d);
    // Use timerange from job trigger data if it exists
    final EventProcessorParametersWithTimerange parameters;
    if (data.isPresent()) {
        LOG.trace("Using timerange from job trigger data: from={} to={} (trigger={})", data.get().timerangeFrom(), data.get().timerangeTo(), ctx.trigger().id());
        parameters = config.parameters().withTimerange(data.get().timerangeFrom(), data.get().timerangeTo());
    } else {
        parameters = config.parameters();
    }
    final DateTime from = parameters.timerange().getFrom();
    final DateTime to = parameters.timerange().getTo();
    // The "to" timestamp must be after the "from" timestamp!
    if (!to.isAfter(from)) {
        // This should not happen(TM)
        // If it does, set the error status to ERROR so the scheduler doesn't try to execute it until the problem
        // has been resolved.
        // TODO: Send an event when this happens so admins can get alerted
        final JobTriggerUpdate triggerUpdate = JobTriggerUpdate.withError(ctx.trigger());
        throw new JobExecutionException("Invalid time range - \"to\" timestamp <" + to.toString() + "> is not after \"from\" timestamp <" + from.toString() + ">", ctx.trigger(), triggerUpdate);
    }
    // We cannot run the event processor if the "to" timestamp of the timerange we want to process is in the future.
    final DateTime now = clock.nowUTC();
    if (now.isBefore(to)) {
        LOG.error("The end of the timerange to process is in the future, re-scheduling job trigger <{}> to run at <{}>", ctx.trigger().id(), to);
        return JobTriggerUpdate.withNextTime(to);
    }
    try {
        eventProcessorEngine.execute(config.eventDefinitionId(), parameters);
        // By using the processingWindowSize and the processingHopSize we can implement hopping and tumbling
        // windows. (a tumbling window is simply a hopping window where windowSize and hopSize are the same)
        DateTime nextTo = to.plus(config.processingHopSize());
        DateTime nextFrom = nextTo.minus(config.processingWindowSize());
        // If the event processor is catching up on old data (e.g. the server was shut down for a significant time),
        // we can switch to a bigger scheduling window: `processingCatchUpWindowSize`.
        // If engaged, we will schedule jobs with a timerange of multiple processingWindowSize chunks.
        // It's the specific event processors' duty to handle being executed with this larger timerange.
        // If an event processor was configured with a processingHopSize greater than the processingWindowSize
        // we can't use the catchup mode.
        final long catchUpSize = configurationProvider.get().eventCatchupWindow();
        if (catchUpSize > 0 && catchUpSize > config.processingWindowSize() && to.plus(catchUpSize).isBefore(now) && config.processingHopSize() <= config.processingWindowSize()) {
            final long chunkCount = catchUpSize / config.processingWindowSize();
            // Align to multiples of the processingWindowSize
            nextTo = to.plus(config.processingWindowSize() * chunkCount);
            LOG.info("Event processor <{}> is catching up on old data. Combining {} search windows with catchUpWindowSize={}ms: from={} to={}", config.eventDefinitionId(), chunkCount, catchUpSize, nextFrom, nextTo);
        }
        LOG.trace("Set new timerange of eventproc <{}> in job trigger data: from={} to={} (hopSize={}ms windowSize={}ms)", config.eventDefinitionId(), nextFrom, nextTo, config.processingHopSize(), config.processingWindowSize());
        final Data newData = data.map(Data::toBuilder).orElse(Data.builder()).timerangeFrom(nextFrom).timerangeTo(nextTo).build();
        final Optional<DateTime> nextTime = scheduleStrategies.nextTime(ctx.trigger());
        // The nextTime Optional can be empty if there will be no further executions of the trigger
        if (nextTime.isPresent()) {
            if (nextTo.isBefore(now)) {
                // If the next "to" timestamp of the timerange to process is in the past, we want to schedule the next
                // execution of this job as soon as possible to make sure we catch up.
                LOG.trace("Set nextTime to <{}> to catch up faster - calculated nextTime was <{}>", now, nextTime.get());
                return JobTriggerUpdate.withNextTimeAndData(now, newData);
            } else if (nextTo.isBefore(nextTime.get())) {
                LOG.trace("Set nextTime to <{}> because it's closer to the timerange time - calculated nextTime was <{}>", nextTo, nextTime.get());
                return JobTriggerUpdate.withNextTimeAndData(nextTo, newData);
            } else {
                // Otherwise use the calculated nextTime
                LOG.trace("Set nextTime to <{}>", nextTime.get());
                return JobTriggerUpdate.withNextTimeAndData(nextTime.get(), newData);
            }
        } else {
            // Or no next time if this has been a ONCE trigger
            LOG.trace("No nextTime for trigger <{}>", ctx.trigger().id());
            return JobTriggerUpdate.withoutNextTime();
        }
    } catch (EventProcessorPreconditionException e) {
        // A precondition for the event processor is not ready yet. This job must be retried.
        if (e.getEventDefinition().isPresent()) {
            LOG.debug("Event processor <{}/{}> couldn't be executed because of a failed precondition (retry in {} ms)", e.getEventDefinition().get().title(), e.getEventDefinitionId(), RETRY_INTERVAL);
        } else {
            LOG.debug("Event processor <{}> couldn't be executed because of a failed precondition (retry in {} ms)", e.getEventDefinitionId(), RETRY_INTERVAL);
        }
        return ctx.jobTriggerUpdates().retryIn(RETRY_INTERVAL, MILLISECONDS);
    } catch (EventProcessorException e) {
        if (e.getEventDefinition().isPresent()) {
            LOG.error("Event processor <{}/{}> failed to execute: {} (retry in {} ms)", e.getEventDefinition().get().config().type(), e.getEventDefinitionId(), e.getMessage(), RETRY_INTERVAL, e);
        } else {
            LOG.error("Event processor <{}> failed to execute: {} (retry in {} ms)", e.getEventDefinitionId(), e.getMessage(), RETRY_INTERVAL, e);
        }
        if (e.isPermanent()) {
            // We cannot retry a permanent error so we have to set the job trigger status to ERROR so it doesn't
            // get executed again
            LOG.error("Caught a permanent error, trigger <{}> will go into ERROR state - it will not be executed anymore and needs manual intervention! (event-definition-id: {} job-definition={}/{})", ctx.trigger().id(), e.getEventDefinitionId(), ctx.definition().id(), ctx.definition().title());
            return JobTriggerUpdate.withError(ctx.trigger());
        }
        return ctx.jobTriggerUpdates().retryIn(RETRY_INTERVAL, MILLISECONDS);
    } catch (Exception e) {
        LOG.error("Event processor <{}> failed to execute: parameters={} (retry in {} ms)", config.eventDefinitionId(), parameters, RETRY_INTERVAL, e);
        return ctx.jobTriggerUpdates().retryIn(RETRY_INTERVAL, MILLISECONDS);
    }
}
Also used : JobExecutionException(org.graylog.scheduler.JobExecutionException) JobTriggerUpdate(org.graylog.scheduler.JobTriggerUpdate) JobTriggerData(org.graylog.scheduler.JobTriggerData) DateTime(org.joda.time.DateTime) JobExecutionException(org.graylog.scheduler.JobExecutionException)

Aggregations

JobExecutionException (org.graylog.scheduler.JobExecutionException)3 JobTriggerData (org.graylog.scheduler.JobTriggerData)2 JobTriggerDto (org.graylog.scheduler.JobTriggerDto)2 DateTime (org.joda.time.DateTime)2 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)1 TestEventProcessorParameters (org.graylog.events.TestEventProcessorParameters)1 EventDto (org.graylog.events.event.EventDto)1 EventDefinitionDto (org.graylog.events.processor.EventDefinitionDto)1 EventProcessorExecutionJob (org.graylog.events.processor.EventProcessorExecutionJob)1 JobDefinitionDto (org.graylog.scheduler.JobDefinitionDto)1 JobExecutionContext (org.graylog.scheduler.JobExecutionContext)1 JobTriggerUpdate (org.graylog.scheduler.JobTriggerUpdate)1 JobTriggerUpdates (org.graylog.scheduler.JobTriggerUpdates)1 NotFoundException (org.graylog2.database.NotFoundException)1 Test (org.junit.Test)1