use of org.apache.druid.java.util.common.ISE in project druid by druid-io.
the class TaskLockbox method revokeLock.
/**
* Mark the lock as revoked. Note that revoked locks are NOT removed. Instead, they are maintained in {@link #running}
* and {@link #taskStorage} as the normal locks do. This is to check locks are revoked when they are requested to be
* acquired and notify to the callers if revoked. Revoked locks are removed by calling
* {@link #unlock(Task, Interval)}.
*
* @param taskId an id of the task holding the lock
* @param lock lock to be revoked
*/
@VisibleForTesting
protected void revokeLock(String taskId, TaskLock lock) {
giant.lock();
try {
if (!activeTasks.contains(taskId)) {
throw new ISE("Cannot revoke lock for inactive task[%s]", taskId);
}
final Task task = taskStorage.getTask(taskId).orNull();
if (task == null) {
throw new ISE("Cannot revoke lock for unknown task[%s]", taskId);
}
log.info("Revoking task lock[%s] for task[%s]", lock, taskId);
if (lock.isRevoked()) {
log.warn("TaskLock[%s] is already revoked", lock);
} else {
final TaskLock revokedLock = lock.revokedCopy();
taskStorage.replaceLock(taskId, lock, revokedLock);
final List<TaskLockPosse> possesHolder = running.get(task.getDataSource()).get(lock.getInterval().getStart()).get(lock.getInterval());
final TaskLockPosse foundPosse = possesHolder.stream().filter(posse -> posse.getTaskLock().equals(lock)).findFirst().orElseThrow(() -> new ISE("Failed to find lock posse for lock[%s]", lock));
possesHolder.remove(foundPosse);
possesHolder.add(foundPosse.withTaskLock(revokedLock));
log.info("Revoked taskLock[%s]", lock);
}
} finally {
giant.unlock();
}
}
use of org.apache.druid.java.util.common.ISE in project druid by druid-io.
the class SeekableStreamIndexTaskClient method pause.
public Map<PartitionIdType, SequenceOffsetType> pause(final String id) {
log.debug("Pause task[%s]", id);
try {
final StringFullResponseHolder response = submitRequestWithEmptyContent(id, HttpMethod.POST, "pause", null, true);
final HttpResponseStatus responseStatus = response.getStatus();
final String responseContent = response.getContent();
if (responseStatus.equals(HttpResponseStatus.OK)) {
log.info("Task [%s] paused successfully", id);
return deserializeMap(responseContent, Map.class, getPartitionType(), getSequenceType());
} else if (responseStatus.equals(HttpResponseStatus.ACCEPTED)) {
// The task received the pause request, but its status hasn't been changed yet.
final RetryPolicy retryPolicy = newRetryPolicy();
while (true) {
final SeekableStreamIndexTaskRunner.Status status = getStatus(id);
if (status == SeekableStreamIndexTaskRunner.Status.PAUSED) {
return getCurrentOffsets(id, true);
}
final Duration delay = retryPolicy.getAndIncrementRetryDelay();
if (delay == null) {
throw new ISE("Task [%s] failed to change its status from [%s] to [%s], aborting", id, status, SeekableStreamIndexTaskRunner.Status.PAUSED);
} else {
final long sleepTime = delay.getMillis();
log.info("Still waiting for task [%s] to change its status to [%s]; will try again in [%s]", id, SeekableStreamIndexTaskRunner.Status.PAUSED, new Duration(sleepTime).toString());
Thread.sleep(sleepTime);
}
}
} else {
throw new ISE("Pause request for task [%s] failed with response [%s] : [%s]", id, responseStatus, responseContent);
}
} catch (NoTaskLocationException e) {
log.error("Exception [%s] while pausing Task [%s]", e.getMessage(), id);
return ImmutableMap.of();
} catch (IOException | InterruptedException e) {
throw new RE(e, "Exception [%s] while pausing Task [%s]", e.getMessage(), id);
}
}
use of org.apache.druid.java.util.common.ISE in project druid by druid-io.
the class TaskLifecycleTest method testLockRevoked.
@Test
public void testLockRevoked() throws Exception {
final Task task = new AbstractFixedIntervalTask("id1", "id1", new TaskResource("id1", 1), "ds", Intervals.of("2012-01-01/P1D"), null) {
@Override
public String getType() {
return "test";
}
@Override
public void stopGracefully(TaskConfig taskConfig) {
}
@Override
public TaskStatus run(TaskToolbox toolbox) throws Exception {
final Interval interval = Intervals.of("2012-01-01/P1D");
final TimeChunkLockTryAcquireAction action = new TimeChunkLockTryAcquireAction(TaskLockType.EXCLUSIVE, interval);
final TaskLock lock = toolbox.getTaskActionClient().submit(action);
if (lock == null) {
throw new ISE("Failed to get a lock");
}
final TaskLock lockBeforeRevoke = toolbox.getTaskActionClient().submit(action);
Assert.assertFalse(lockBeforeRevoke.isRevoked());
taskLockbox.revokeLock(getId(), lock);
final TaskLock lockAfterRevoke = toolbox.getTaskActionClient().submit(action);
Assert.assertTrue(lockAfterRevoke.isRevoked());
return TaskStatus.failure(getId(), "lock revoked test");
}
};
final TaskStatus status = runTask(task);
Assert.assertEquals(taskLocation, status.getLocation());
Assert.assertEquals("statusCode", TaskState.FAILED, status.getStatusCode());
Assert.assertEquals("segments published", 0, mdc.getPublished().size());
Assert.assertEquals("segments nuked", 0, mdc.getNuked().size());
}
use of org.apache.druid.java.util.common.ISE in project druid by druid-io.
the class HttpRemoteTaskRunnerTest method testTaskRunnerRestart.
/*
Simulates restart of the Overlord where taskRunner, on start, discovers workers with prexisting tasks.
*/
@Test(timeout = 60_000L)
public void testTaskRunnerRestart() throws Exception {
TestDruidNodeDiscovery druidNodeDiscovery = new TestDruidNodeDiscovery();
DruidNodeDiscoveryProvider druidNodeDiscoveryProvider = EasyMock.createMock(DruidNodeDiscoveryProvider.class);
EasyMock.expect(druidNodeDiscoveryProvider.getForService(WorkerNodeService.DISCOVERY_SERVICE_KEY)).andReturn(druidNodeDiscovery);
EasyMock.replay(druidNodeDiscoveryProvider);
ConcurrentMap<String, CustomFunction> workerHolders = new ConcurrentHashMap<>();
Task task1 = NoopTask.create("task-id-1", 0);
Task task2 = NoopTask.create("task-id-2", 0);
Task task3 = NoopTask.create("task-id-3", 0);
Task task4 = NoopTask.create("task-id-4", 0);
Task task5 = NoopTask.create("task-id-5", 0);
TaskStorage taskStorageMock = EasyMock.createStrictMock(TaskStorage.class);
EasyMock.expect(taskStorageMock.getStatus(task1.getId())).andReturn(Optional.absent());
EasyMock.expect(taskStorageMock.getStatus(task2.getId())).andReturn(Optional.absent()).times(2);
EasyMock.expect(taskStorageMock.getStatus(task3.getId())).andReturn(Optional.of(TaskStatus.running(task3.getId())));
EasyMock.expect(taskStorageMock.getStatus(task4.getId())).andReturn(Optional.of(TaskStatus.running(task4.getId())));
EasyMock.expect(taskStorageMock.getStatus(task5.getId())).andReturn(Optional.of(TaskStatus.success(task5.getId())));
EasyMock.replay(taskStorageMock);
HttpRemoteTaskRunner taskRunner = new HttpRemoteTaskRunner(TestHelper.makeJsonMapper(), new HttpRemoteTaskRunnerConfig() {
@Override
public int getPendingTasksRunnerNumThreads() {
return 3;
}
}, EasyMock.createNiceMock(HttpClient.class), DSuppliers.of(new AtomicReference<>(DefaultWorkerBehaviorConfig.defaultConfig())), new NoopProvisioningStrategy<>(), druidNodeDiscoveryProvider, taskStorageMock, EasyMock.createNiceMock(CuratorFramework.class), new IndexerZkConfig(new ZkPathsConfig(), null, null, null, null)) {
@Override
protected WorkerHolder createWorkerHolder(ObjectMapper smileMapper, HttpClient httpClient, HttpRemoteTaskRunnerConfig config, ScheduledExecutorService workersSyncExec, WorkerHolder.Listener listener, Worker worker, List<TaskAnnouncement> knownAnnouncements) {
if (workerHolders.containsKey(worker.getHost())) {
return workerHolders.get(worker.getHost()).apply(smileMapper, httpClient, config, workersSyncExec, listener, worker, knownAnnouncements);
} else {
throw new ISE("No WorkerHolder for [%s].", worker.getHost());
}
}
};
taskRunner.start();
DiscoveryDruidNode druidNode = new DiscoveryDruidNode(new DruidNode("service", "host", false, 1234, null, true, false), NodeRole.MIDDLE_MANAGER, ImmutableMap.of(WorkerNodeService.DISCOVERY_SERVICE_KEY, new WorkerNodeService("ip1", 2, "0", WorkerConfig.DEFAULT_CATEGORY)));
AtomicInteger ticks = new AtomicInteger();
Set<String> taskShutdowns = new HashSet<>();
workerHolders.put("host:1234", (mapper, httpClient, config, exec, listener, worker, knownAnnouncements) -> createWorkerHolder(mapper, httpClient, config, exec, listener, worker, knownAnnouncements, ImmutableList.of(TaskAnnouncement.create(task1, TaskStatus.success(task1.getId()), TaskLocation.create("host", 1234, 1235)), TaskAnnouncement.create(task2, TaskStatus.running(task2.getId()), TaskLocation.create("host", 1234, 1235)), TaskAnnouncement.create(task2, TaskStatus.success(task2.getId()), TaskLocation.create("host", 1234, 1235)), TaskAnnouncement.create(task3, TaskStatus.success(task3.getId()), TaskLocation.create("host", 1234, 1235)), TaskAnnouncement.create(task4, TaskStatus.running(task4.getId()), TaskLocation.create("host", 1234, 1235)), TaskAnnouncement.create(task5, TaskStatus.running(task5.getId()), TaskLocation.create("host", 1234, 1235))), ImmutableMap.of(), ticks, taskShutdowns));
druidNodeDiscovery.getListeners().get(0).nodesAdded(ImmutableList.of(druidNode));
while (ticks.get() < 1) {
Thread.sleep(100);
}
EasyMock.verify(taskStorageMock);
Assert.assertEquals(ImmutableSet.of(task2.getId(), task5.getId()), taskShutdowns);
Assert.assertTrue(taskRunner.getPendingTasks().isEmpty());
TaskRunnerWorkItem item = Iterables.getOnlyElement(taskRunner.getRunningTasks());
Assert.assertEquals(task4.getId(), item.getTaskId());
Assert.assertTrue(taskRunner.run(task3).get().isSuccess());
Assert.assertEquals(2, taskRunner.getKnownTasks().size());
}
use of org.apache.druid.java.util.common.ISE in project druid by druid-io.
the class RemoteTaskRunnerRunPendingTasksConcurrencyTest method testConcurrency.
// This task reproduces the races described in https://github.com/apache/druid/issues/2842
@Test(timeout = 60_000L)
public void testConcurrency() throws Exception {
rtrTestUtils.makeWorker("worker0", 3);
rtrTestUtils.makeWorker("worker1", 3);
remoteTaskRunner = rtrTestUtils.makeRemoteTaskRunner(new TestRemoteTaskRunnerConfig(new Period("PT3600S")) {
@Override
public int getPendingTasksRunnerNumThreads() {
return 2;
}
});
int numTasks = 6;
ListenableFuture<TaskStatus>[] results = new ListenableFuture[numTasks];
Task[] tasks = new Task[numTasks];
// 2 tasks
for (int i = 0; i < 2; i++) {
tasks[i] = TestTasks.unending("task" + i);
results[i] = (remoteTaskRunner.run(tasks[i]));
}
waitForBothWorkersToHaveUnackedTasks();
// 3 more tasks, all of which get queued up
for (int i = 2; i < 5; i++) {
tasks[i] = TestTasks.unending("task" + i);
results[i] = (remoteTaskRunner.run(tasks[i]));
}
// simulate completion of task0 and task1
mockWorkerRunningAndCompletionSuccessfulTasks(tasks[0], tasks[1]);
Assert.assertEquals(TaskState.SUCCESS, results[0].get().getStatusCode());
Assert.assertEquals(TaskState.SUCCESS, results[1].get().getStatusCode());
// now both threads race to run the last 3 tasks. task2 and task3 are being assigned
waitForBothWorkersToHaveUnackedTasks();
if (remoteTaskRunner.getWorkersWithUnacknowledgedTask().containsValue(tasks[2].getId()) && remoteTaskRunner.getWorkersWithUnacknowledgedTask().containsValue(tasks[3].getId())) {
remoteTaskRunner.shutdown("task4", "test");
mockWorkerRunningAndCompletionSuccessfulTasks(tasks[3], tasks[2]);
Assert.assertEquals(TaskState.SUCCESS, results[3].get().getStatusCode());
Assert.assertEquals(TaskState.SUCCESS, results[2].get().getStatusCode());
} else if (remoteTaskRunner.getWorkersWithUnacknowledgedTask().containsValue(tasks[3].getId()) && remoteTaskRunner.getWorkersWithUnacknowledgedTask().containsValue(tasks[4].getId())) {
remoteTaskRunner.shutdown("task2", "test");
mockWorkerRunningAndCompletionSuccessfulTasks(tasks[4], tasks[3]);
Assert.assertEquals(TaskState.SUCCESS, results[4].get().getStatusCode());
Assert.assertEquals(TaskState.SUCCESS, results[3].get().getStatusCode());
} else if (remoteTaskRunner.getWorkersWithUnacknowledgedTask().containsValue(tasks[4].getId()) && remoteTaskRunner.getWorkersWithUnacknowledgedTask().containsValue(tasks[2].getId())) {
remoteTaskRunner.shutdown("task3", "test");
mockWorkerRunningAndCompletionSuccessfulTasks(tasks[4], tasks[2]);
Assert.assertEquals(TaskState.SUCCESS, results[4].get().getStatusCode());
Assert.assertEquals(TaskState.SUCCESS, results[2].get().getStatusCode());
} else {
throw new ISE("two out of three tasks 2,3 and 4 must be waiting for ack.");
}
// ensure that RTR is doing OK and still making progress
tasks[5] = TestTasks.unending("task5");
results[5] = remoteTaskRunner.run(tasks[5]);
waitForOneWorkerToHaveUnackedTasks();
if (rtrTestUtils.taskAnnounced("worker0", tasks[5].getId())) {
rtrTestUtils.mockWorkerRunningTask("worker0", tasks[5]);
rtrTestUtils.mockWorkerCompleteSuccessfulTask("worker0", tasks[5]);
} else {
rtrTestUtils.mockWorkerRunningTask("worker1", tasks[5]);
rtrTestUtils.mockWorkerCompleteSuccessfulTask("worker1", tasks[5]);
}
Assert.assertEquals(TaskState.SUCCESS, results[5].get().getStatusCode());
}
Aggregations