use of org.graylog.scheduler.JobExecutionException in project graylog2-server by Graylog2.
the class EventProcessorExecutionJobTest method executeWithInvalidTimerange.
@Test
public void executeWithInvalidTimerange() throws Exception {
final DateTime now = clock.nowUTC();
final long processingWindowSize = Duration.standardSeconds(60).getMillis();
final long processingHopSize = Duration.standardSeconds(60).getMillis();
final int scheduleIntervalSeconds = 1;
// We set "from" to be AFTER "to" - this is not valid so the job should not be executed and the triggers
// should be set to ERROR
final DateTime from = now.plusSeconds(1);
final DateTime to = now;
final DateTime triggerNextTime = now;
final TestEventProcessorParameters eventProcessorParameters = TestEventProcessorParameters.create(from, to);
final JobDefinitionDto jobDefinition = JobDefinitionDto.builder().id("job-1").title("Test").description("A test").config(EventProcessorExecutionJob.Config.builder().eventDefinitionId("processor-1").processingWindowSize(processingWindowSize).processingHopSize(processingHopSize).parameters(eventProcessorParameters).build()).build();
final EventProcessorExecutionJob job = new EventProcessorExecutionJob(jobScheduleStrategies, clock, eventProcessorEngine, eventsConfigurationProvider, jobDefinition);
final JobTriggerDto trigger = JobTriggerDto.builderWithClock(clock).id("trigger-1").jobDefinitionId(jobDefinition.id()).startTime(now).nextTime(triggerNextTime).status(JobTriggerStatus.RUNNABLE).schedule(IntervalJobSchedule.builder().interval(scheduleIntervalSeconds).unit(TimeUnit.SECONDS).build()).build();
final JobExecutionContext jobExecutionContext = JobExecutionContext.builder().definition(jobDefinition).trigger(trigger).isRunning(new AtomicBoolean(true)).jobTriggerUpdates(new JobTriggerUpdates(clock, jobScheduleStrategies, trigger)).build();
assertThatThrownBy(() -> job.execute(jobExecutionContext)).isInstanceOf(JobExecutionException.class).hasMessageContaining("is not after").satisfies(t -> {
final JobExecutionException e = (JobExecutionException) t;
assertThat(e.getTrigger()).isEqualTo(trigger);
assertThat(e.getUpdate()).satisfies(update -> {
// When setting the status to ERROR, we will keen the last nextTime
assertThat(update.nextTime()).isPresent().get().isEqualTo(triggerNextTime);
assertThat(update.data()).isNotPresent();
assertThat(update.status()).isPresent().get().isEqualTo(JobTriggerStatus.ERROR);
});
});
// The engine should not be called because the timerange is invalid
verify(eventProcessorEngine, never()).execute(any(), any());
}
use of org.graylog.scheduler.JobExecutionException in project graylog2-server by Graylog2.
the class EventNotificationExecutionJob method execute.
@Override
public JobTriggerUpdate execute(JobExecutionContext ctx) throws JobExecutionException {
Optional<EventDefinitionDto> optionalEventDefinition;
long gracePeriodInMS = 0;
final JobTriggerDto trigger = ctx.trigger();
final Optional<Data> optionalData = trigger.data().map(d -> (Data) d);
if (!optionalData.isPresent()) {
throw new JobExecutionException("Missing notification job data for notification <" + jobConfig.notificationId() + ">, unable to execute notification: " + ctx.definition().title(), trigger, JobTriggerUpdate.withoutNextTime());
}
final Data data = optionalData.get();
final EventDto eventDto = data.eventDto();
final NotificationDto notification = notificationService.get(jobConfig.notificationId()).orElseThrow(() -> new JobExecutionException("Couldn't find notification <" + jobConfig.notificationId() + ">", trigger, JobTriggerUpdate.withError(trigger)));
final EventNotification.Factory eventNotificationFactory = eventNotificationFactories.get(notification.config().type());
if (eventNotificationFactory == null) {
throw new JobExecutionException("Couldn't find factory for notification type <" + notification.config().type() + ">", trigger, ctx.jobTriggerUpdates().scheduleNextExecution());
}
final EventNotification eventNotification = eventNotificationFactory.create();
metrics.registerEventNotification(eventNotification, notification);
try {
optionalEventDefinition = Optional.ofNullable(getEventDefinition(eventDto));
if (optionalEventDefinition.isPresent()) {
gracePeriodInMS = optionalEventDefinition.get().notificationSettings().gracePeriodMs();
}
} catch (NotFoundException e) {
LOG.error("Couldn't find event definition with ID <{}>.", eventDto.eventDefinitionId());
optionalEventDefinition = Optional.empty();
}
EventNotificationContext notificationContext = EventNotificationContext.builder().notificationId(notification.id()).notificationConfig(notification.config()).event(eventDto).eventDefinition(optionalEventDefinition.get()).jobTrigger(trigger).build();
updateTriggerStatus(eventDto, gracePeriodInMS);
if (inGrace(eventDto, gracePeriodInMS)) {
LOG.debug("Notification <{}> triggered but it's in grace period.", jobConfig.notificationId());
metrics.markInGrace(eventNotification, notification);
return ctx.jobTriggerUpdates().scheduleNextExecution();
}
try {
metrics.markExecution(eventNotification, notification);
eventNotification.execute(notificationContext);
metrics.markSuccess(eventNotification, notification);
} catch (TemporaryEventNotificationException e) {
metrics.markFailedTemporarily(eventNotification, notification);
final long retryPeriod = configurationProvider.get().eventNotificationsRetry();
throw new JobExecutionException(String.format(Locale.ROOT, "Failed to execute notification, retrying in %d minutes - <%s/%s/%s>", TimeUnit.MILLISECONDS.toMinutes(retryPeriod), notification.id(), notification.title(), notification.config().type()), trigger, ctx.jobTriggerUpdates().retryIn(retryPeriod, TimeUnit.MILLISECONDS), e);
} catch (PermanentEventNotificationException e) {
metrics.markFailedPermanently(eventNotification, notification);
throw new JobExecutionException(String.format(Locale.ROOT, "Failed permanently to execute notification, giving up - <%s/%s/%s>", notification.id(), notification.title(), notification.config().type()), trigger, ctx.jobTriggerUpdates().scheduleNextExecution(), e);
} catch (EventNotificationException e) {
metrics.markFailed(eventNotification, notification);
throw new JobExecutionException(String.format(Locale.ROOT, "Notification failed to execute - <%s/%s/%s>", notification.id(), notification.title(), notification.config().type()), trigger, ctx.jobTriggerUpdates().scheduleNextExecution(), e);
}
updateNotifiedStatus(eventDto, gracePeriodInMS);
return ctx.jobTriggerUpdates().scheduleNextExecution();
}
use of org.graylog.scheduler.JobExecutionException in project graylog2-server by Graylog2.
the class EventProcessorExecutionJob method execute.
@Override
public JobTriggerUpdate execute(JobExecutionContext ctx) throws JobExecutionException {
final Optional<Data> data = ctx.trigger().data().map(d -> (Data) d);
// Use timerange from job trigger data if it exists
final EventProcessorParametersWithTimerange parameters;
if (data.isPresent()) {
LOG.trace("Using timerange from job trigger data: from={} to={} (trigger={})", data.get().timerangeFrom(), data.get().timerangeTo(), ctx.trigger().id());
parameters = config.parameters().withTimerange(data.get().timerangeFrom(), data.get().timerangeTo());
} else {
parameters = config.parameters();
}
final DateTime from = parameters.timerange().getFrom();
final DateTime to = parameters.timerange().getTo();
// The "to" timestamp must be after the "from" timestamp!
if (!to.isAfter(from)) {
// This should not happen(TM)
// If it does, set the error status to ERROR so the scheduler doesn't try to execute it until the problem
// has been resolved.
// TODO: Send an event when this happens so admins can get alerted
final JobTriggerUpdate triggerUpdate = JobTriggerUpdate.withError(ctx.trigger());
throw new JobExecutionException("Invalid time range - \"to\" timestamp <" + to.toString() + "> is not after \"from\" timestamp <" + from.toString() + ">", ctx.trigger(), triggerUpdate);
}
// We cannot run the event processor if the "to" timestamp of the timerange we want to process is in the future.
final DateTime now = clock.nowUTC();
if (now.isBefore(to)) {
LOG.error("The end of the timerange to process is in the future, re-scheduling job trigger <{}> to run at <{}>", ctx.trigger().id(), to);
return JobTriggerUpdate.withNextTime(to);
}
try {
eventProcessorEngine.execute(config.eventDefinitionId(), parameters);
// By using the processingWindowSize and the processingHopSize we can implement hopping and tumbling
// windows. (a tumbling window is simply a hopping window where windowSize and hopSize are the same)
DateTime nextTo = to.plus(config.processingHopSize());
DateTime nextFrom = nextTo.minus(config.processingWindowSize());
// If the event processor is catching up on old data (e.g. the server was shut down for a significant time),
// we can switch to a bigger scheduling window: `processingCatchUpWindowSize`.
// If engaged, we will schedule jobs with a timerange of multiple processingWindowSize chunks.
// It's the specific event processors' duty to handle being executed with this larger timerange.
// If an event processor was configured with a processingHopSize greater than the processingWindowSize
// we can't use the catchup mode.
final long catchUpSize = configurationProvider.get().eventCatchupWindow();
if (catchUpSize > 0 && catchUpSize > config.processingWindowSize() && to.plus(catchUpSize).isBefore(now) && config.processingHopSize() <= config.processingWindowSize()) {
final long chunkCount = catchUpSize / config.processingWindowSize();
// Align to multiples of the processingWindowSize
nextTo = to.plus(config.processingWindowSize() * chunkCount);
LOG.info("Event processor <{}> is catching up on old data. Combining {} search windows with catchUpWindowSize={}ms: from={} to={}", config.eventDefinitionId(), chunkCount, catchUpSize, nextFrom, nextTo);
}
LOG.trace("Set new timerange of eventproc <{}> in job trigger data: from={} to={} (hopSize={}ms windowSize={}ms)", config.eventDefinitionId(), nextFrom, nextTo, config.processingHopSize(), config.processingWindowSize());
final Data newData = data.map(Data::toBuilder).orElse(Data.builder()).timerangeFrom(nextFrom).timerangeTo(nextTo).build();
final Optional<DateTime> nextTime = scheduleStrategies.nextTime(ctx.trigger());
// The nextTime Optional can be empty if there will be no further executions of the trigger
if (nextTime.isPresent()) {
if (nextTo.isBefore(now)) {
// If the next "to" timestamp of the timerange to process is in the past, we want to schedule the next
// execution of this job as soon as possible to make sure we catch up.
LOG.trace("Set nextTime to <{}> to catch up faster - calculated nextTime was <{}>", now, nextTime.get());
return JobTriggerUpdate.withNextTimeAndData(now, newData);
} else if (nextTo.isBefore(nextTime.get())) {
LOG.trace("Set nextTime to <{}> because it's closer to the timerange time - calculated nextTime was <{}>", nextTo, nextTime.get());
return JobTriggerUpdate.withNextTimeAndData(nextTo, newData);
} else {
// Otherwise use the calculated nextTime
LOG.trace("Set nextTime to <{}>", nextTime.get());
return JobTriggerUpdate.withNextTimeAndData(nextTime.get(), newData);
}
} else {
// Or no next time if this has been a ONCE trigger
LOG.trace("No nextTime for trigger <{}>", ctx.trigger().id());
return JobTriggerUpdate.withoutNextTime();
}
} catch (EventProcessorPreconditionException e) {
// A precondition for the event processor is not ready yet. This job must be retried.
if (e.getEventDefinition().isPresent()) {
LOG.debug("Event processor <{}/{}> couldn't be executed because of a failed precondition (retry in {} ms)", e.getEventDefinition().get().title(), e.getEventDefinitionId(), RETRY_INTERVAL);
} else {
LOG.debug("Event processor <{}> couldn't be executed because of a failed precondition (retry in {} ms)", e.getEventDefinitionId(), RETRY_INTERVAL);
}
return ctx.jobTriggerUpdates().retryIn(RETRY_INTERVAL, MILLISECONDS);
} catch (EventProcessorException e) {
if (e.getEventDefinition().isPresent()) {
LOG.error("Event processor <{}/{}> failed to execute: {} (retry in {} ms)", e.getEventDefinition().get().config().type(), e.getEventDefinitionId(), e.getMessage(), RETRY_INTERVAL, e);
} else {
LOG.error("Event processor <{}> failed to execute: {} (retry in {} ms)", e.getEventDefinitionId(), e.getMessage(), RETRY_INTERVAL, e);
}
if (e.isPermanent()) {
// We cannot retry a permanent error so we have to set the job trigger status to ERROR so it doesn't
// get executed again
LOG.error("Caught a permanent error, trigger <{}> will go into ERROR state - it will not be executed anymore and needs manual intervention! (event-definition-id: {} job-definition={}/{})", ctx.trigger().id(), e.getEventDefinitionId(), ctx.definition().id(), ctx.definition().title());
return JobTriggerUpdate.withError(ctx.trigger());
}
return ctx.jobTriggerUpdates().retryIn(RETRY_INTERVAL, MILLISECONDS);
} catch (Exception e) {
LOG.error("Event processor <{}> failed to execute: parameters={} (retry in {} ms)", config.eventDefinitionId(), parameters, RETRY_INTERVAL, e);
return ctx.jobTriggerUpdates().retryIn(RETRY_INTERVAL, MILLISECONDS);
}
}
Aggregations