use of io.druid.indexing.common.TaskStatus in project druid by druid-io.
the class RealtimeIndexTaskTest method testRestoreCorruptData.
@Test(timeout = 60_000L)
public void testRestoreCorruptData() throws Exception {
final File directory = tempFolder.newFolder();
final RealtimeIndexTask task1 = makeRealtimeTask(null);
// First run:
{
final TestIndexerMetadataStorageCoordinator mdc = new TestIndexerMetadataStorageCoordinator();
final TaskToolbox taskToolbox = makeToolbox(task1, mdc, directory);
final ListenableFuture<TaskStatus> statusFuture = runTask(task1, taskToolbox);
// Wait for firehose to show up, it starts off null.
while (task1.getFirehose() == null) {
Thread.sleep(50);
}
final TestFirehose firehose = (TestFirehose) task1.getFirehose();
firehose.addRows(ImmutableList.<InputRow>of(new MapBasedInputRow(now, ImmutableList.of("dim1"), ImmutableMap.<String, Object>of("dim1", "foo"))));
// Trigger graceful shutdown.
task1.stopGracefully();
// Wait for the task to finish. The status doesn't really matter, but we'll check it anyway.
final TaskStatus taskStatus = statusFuture.get();
Assert.assertEquals(TaskStatus.Status.SUCCESS, taskStatus.getStatusCode());
// Nothing should be published.
Assert.assertEquals(Sets.newHashSet(), mdc.getPublished());
}
// Corrupt the data:
final File smooshFile = new File(String.format("%s/persistent/task/%s/work/persist/%s/%s_%s/0/00000.smoosh", directory, task1.getId(), task1.getDataSource(), Granularities.DAY.bucketStart(now), Granularities.DAY.bucketEnd(now)));
Files.write(smooshFile.toPath(), "oops!".getBytes(Charsets.UTF_8));
// Second run:
{
final TestIndexerMetadataStorageCoordinator mdc = new TestIndexerMetadataStorageCoordinator();
final RealtimeIndexTask task2 = makeRealtimeTask(task1.getId());
final TaskToolbox taskToolbox = makeToolbox(task2, mdc, directory);
final ListenableFuture<TaskStatus> statusFuture = runTask(task2, taskToolbox);
// Wait for the task to finish.
boolean caught = false;
try {
statusFuture.get();
} catch (Exception e) {
caught = true;
}
Assert.assertTrue("expected exception", caught);
}
}
use of io.druid.indexing.common.TaskStatus in project druid by druid-io.
the class RealtimeIndexTaskTest method testHandoffTimeout.
@Test(timeout = 60_000L, expected = ExecutionException.class)
public void testHandoffTimeout() throws Exception {
final TestIndexerMetadataStorageCoordinator mdc = new TestIndexerMetadataStorageCoordinator();
final RealtimeIndexTask task = makeRealtimeTask(null, true, 100L);
final TaskToolbox taskToolbox = makeToolbox(task, mdc, tempFolder.newFolder());
final ListenableFuture<TaskStatus> statusFuture = runTask(task, taskToolbox);
// Wait for firehose to show up, it starts off null.
while (task.getFirehose() == null) {
Thread.sleep(50);
}
final TestFirehose firehose = (TestFirehose) task.getFirehose();
firehose.addRows(ImmutableList.<InputRow>of(new MapBasedInputRow(now, ImmutableList.of("dim1"), ImmutableMap.<String, Object>of("dim1", "foo", "met1", "1"))));
// Stop the firehose, this will drain out existing events.
firehose.close();
// Wait for publish.
while (mdc.getPublished().isEmpty()) {
Thread.sleep(50);
}
Assert.assertEquals(1, task.getMetrics().processed());
Assert.assertNotNull(Iterables.getOnlyElement(mdc.getPublished()));
// handoff would timeout, resulting in exception
statusFuture.get();
}
use of io.druid.indexing.common.TaskStatus in project druid by druid-io.
the class TaskQueue method manage.
/**
* Main task runner management loop. Meant to run forever, or, at least until we're stopped.
*/
private void manage() throws InterruptedException {
log.info("Beginning management in %s.", config.getStartDelay());
Thread.sleep(config.getStartDelay().getMillis());
// Ignore return value- we'll get the IDs and futures from getKnownTasks later.
taskRunner.restore();
while (active) {
giant.lock();
try {
// Task futures available from the taskRunner
final Map<String, ListenableFuture<TaskStatus>> runnerTaskFutures = Maps.newHashMap();
for (final TaskRunnerWorkItem workItem : taskRunner.getKnownTasks()) {
runnerTaskFutures.put(workItem.getTaskId(), workItem.getResult());
}
// Copy tasks list, as notifyStatus may modify it.
for (final Task task : ImmutableList.copyOf(tasks)) {
if (!taskFutures.containsKey(task.getId())) {
final ListenableFuture<TaskStatus> runnerTaskFuture;
if (runnerTaskFutures.containsKey(task.getId())) {
runnerTaskFuture = runnerTaskFutures.get(task.getId());
} else {
// Task should be running, so run it.
final boolean taskIsReady;
try {
taskIsReady = task.isReady(taskActionClientFactory.create(task));
} catch (Exception e) {
log.warn(e, "Exception thrown during isReady for task: %s", task.getId());
notifyStatus(task, TaskStatus.failure(task.getId()));
continue;
}
if (taskIsReady) {
log.info("Asking taskRunner to run: %s", task.getId());
runnerTaskFuture = taskRunner.run(task);
} else {
continue;
}
}
taskFutures.put(task.getId(), attachCallbacks(task, runnerTaskFuture));
}
}
// Kill tasks that shouldn't be running
final Set<String> tasksToKill = Sets.difference(runnerTaskFutures.keySet(), ImmutableSet.copyOf(Lists.transform(tasks, new Function<Task, Object>() {
@Override
public String apply(Task task) {
return task.getId();
}
})));
if (!tasksToKill.isEmpty()) {
log.info("Asking taskRunner to clean up %,d tasks.", tasksToKill.size());
for (final String taskId : tasksToKill) {
try {
taskRunner.shutdown(taskId);
} catch (Exception e) {
log.warn(e, "TaskRunner failed to clean up task: %s", taskId);
}
}
}
// awaitNanos because management may become necessary without this condition signalling,
// due to e.g. tasks becoming ready when other folks mess with the TaskLockbox.
managementMayBeNecessary.awaitNanos(60000000000L);
} finally {
giant.unlock();
}
}
}
use of io.druid.indexing.common.TaskStatus in project druid by druid-io.
the class ThreadPoolTaskRunner method run.
@Override
public ListenableFuture<TaskStatus> run(final Task task) {
final TaskToolbox toolbox = toolboxFactory.build(task);
final Object taskPriorityObj = task.getContextValue(TaskThreadPriority.CONTEXT_KEY);
int taskPriority = 0;
if (taskPriorityObj != null) {
if (taskPriorityObj instanceof Number) {
taskPriority = ((Number) taskPriorityObj).intValue();
} else if (taskPriorityObj instanceof String) {
try {
taskPriority = Integer.parseInt(taskPriorityObj.toString());
} catch (NumberFormatException e) {
log.error(e, "Error parsing task priority [%s] for task [%s]", taskPriorityObj, task.getId());
}
}
}
// Ensure an executor for that priority exists
if (!exec.containsKey(taskPriority)) {
final ListeningExecutorService executorService = buildExecutorService(taskPriority);
if (exec.putIfAbsent(taskPriority, executorService) != null) {
// favor prior service
executorService.shutdownNow();
}
}
final ListenableFuture<TaskStatus> statusFuture = exec.get(taskPriority).submit(new ThreadPoolTaskRunnerCallable(task, location, toolbox));
final ThreadPoolTaskRunnerWorkItem taskRunnerWorkItem = new ThreadPoolTaskRunnerWorkItem(task, location, statusFuture);
runningItems.add(taskRunnerWorkItem);
Futures.addCallback(statusFuture, new FutureCallback<TaskStatus>() {
@Override
public void onSuccess(TaskStatus result) {
runningItems.remove(taskRunnerWorkItem);
}
@Override
public void onFailure(Throwable t) {
runningItems.remove(taskRunnerWorkItem);
}
});
return statusFuture;
}
use of io.druid.indexing.common.TaskStatus in project druid by druid-io.
the class ThreadPoolTaskRunner method stop.
@Override
@LifecycleStop
public void stop() {
stopping = true;
for (Map.Entry<Integer, ListeningExecutorService> entry : exec.entrySet()) {
try {
entry.getValue().shutdown();
} catch (SecurityException ex) {
log.wtf(ex, "I can't control my own threads!");
}
}
for (ThreadPoolTaskRunnerWorkItem item : runningItems) {
final Task task = item.getTask();
final long start = System.currentTimeMillis();
final boolean graceful;
final long elapsed;
boolean error = false;
if (taskConfig.isRestoreTasksOnRestart() && task.canRestore()) {
// Attempt graceful shutdown.
graceful = true;
log.info("Starting graceful shutdown of task[%s].", task.getId());
try {
task.stopGracefully();
final TaskStatus taskStatus = item.getResult().get(new Interval(new DateTime(start), taskConfig.getGracefulShutdownTimeout()).toDurationMillis(), TimeUnit.MILLISECONDS);
// Ignore status, it doesn't matter for graceful shutdowns.
log.info("Graceful shutdown of task[%s] finished in %,dms.", task.getId(), System.currentTimeMillis() - start);
TaskRunnerUtils.notifyStatusChanged(listeners, task.getId(), taskStatus);
} catch (Exception e) {
log.makeAlert(e, "Graceful task shutdown failed: %s", task.getDataSource()).addData("taskId", task.getId()).addData("dataSource", task.getDataSource()).emit();
log.warn(e, "Graceful shutdown of task[%s] aborted with exception.", task.getId());
error = true;
TaskRunnerUtils.notifyStatusChanged(listeners, task.getId(), TaskStatus.failure(task.getId()));
}
} else {
graceful = false;
TaskRunnerUtils.notifyStatusChanged(listeners, task.getId(), TaskStatus.failure(task.getId()));
}
elapsed = System.currentTimeMillis() - start;
final ServiceMetricEvent.Builder metricBuilder = ServiceMetricEvent.builder().setDimension("task", task.getId()).setDimension("dataSource", task.getDataSource()).setDimension("graceful", String.valueOf(graceful)).setDimension("error", String.valueOf(error));
emitter.emit(metricBuilder.build("task/interrupt/count", 1L));
emitter.emit(metricBuilder.build("task/interrupt/elapsed", elapsed));
}
// Ok, now interrupt everything.
for (Map.Entry<Integer, ListeningExecutorService> entry : exec.entrySet()) {
try {
entry.getValue().shutdownNow();
} catch (SecurityException ex) {
log.wtf(ex, "I can't control my own threads!");
}
}
}
Aggregations