use of org.apache.druid.indexing.worker.Worker in project druid by druid-io.
the class RemoteTaskRunner method cleanup.
/**
* Removes a task from the complete queue and clears out the ZK status path of the task.
*
* @param taskId - the task to cleanup
*/
private void cleanup(final String taskId) {
if (!lifecycleLock.awaitStarted(1, TimeUnit.SECONDS)) {
return;
}
final RemoteTaskRunnerWorkItem removed = completeTasks.remove(taskId);
final Worker worker;
if (removed == null || (worker = removed.getWorker()) == null) {
log.makeAlert("Asked to cleanup nonexistent task").addData("taskId", taskId).emit();
} else {
final String workerId = worker.getHost();
log.info("Cleaning up task[%s] on worker[%s]", taskId, workerId);
final String statusPath = JOINER.join(indexerZkConfig.getStatusPath(), workerId, taskId);
try {
cf.delete().guaranteed().forPath(statusPath);
} catch (KeeperException.NoNodeException e) {
log.info("Tried to delete status path[%s] that didn't exist! Must've gone away already?", statusPath);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
}
use of org.apache.druid.indexing.worker.Worker in project druid by druid-io.
the class ZkWorker method setWorker.
public void setWorker(Worker newWorker) {
final Worker oldWorker = worker.get();
Preconditions.checkArgument(newWorker.getHost().equals(oldWorker.getHost()), "Cannot change Worker host");
Preconditions.checkArgument(newWorker.getIp().equals(oldWorker.getIp()), "Cannot change Worker ip");
worker.set(newWorker);
}
use of org.apache.druid.indexing.worker.Worker in project druid by druid-io.
the class HttpRemoteTaskRunnerTest method testOneStuckTaskAssignmentDoesntBlockOthers.
/*
Simulates one task not getting acknowledged to be running after assigning it to a worker. But, other tasks are
successfully assigned to other worker and get completed.
*/
@Test(timeout = 60_000L)
public void testOneStuckTaskAssignmentDoesntBlockOthers() throws Exception {
TestDruidNodeDiscovery druidNodeDiscovery = new TestDruidNodeDiscovery();
DruidNodeDiscoveryProvider druidNodeDiscoveryProvider = EasyMock.createMock(DruidNodeDiscoveryProvider.class);
EasyMock.expect(druidNodeDiscoveryProvider.getForService(WorkerNodeService.DISCOVERY_SERVICE_KEY)).andReturn(druidNodeDiscovery);
EasyMock.replay(druidNodeDiscoveryProvider);
Task task1 = NoopTask.create("task-id-1", 0);
Task task2 = NoopTask.create("task-id-2", 0);
Task task3 = NoopTask.create("task-id-3", 0);
HttpRemoteTaskRunner taskRunner = new HttpRemoteTaskRunner(TestHelper.makeJsonMapper(), new HttpRemoteTaskRunnerConfig() {
@Override
public int getPendingTasksRunnerNumThreads() {
return 3;
}
}, EasyMock.createNiceMock(HttpClient.class), DSuppliers.of(new AtomicReference<>(DefaultWorkerBehaviorConfig.defaultConfig())), new NoopProvisioningStrategy<>(), druidNodeDiscoveryProvider, EasyMock.createNiceMock(TaskStorage.class), EasyMock.createNiceMock(CuratorFramework.class), new IndexerZkConfig(new ZkPathsConfig(), null, null, null, null)) {
@Override
protected WorkerHolder createWorkerHolder(ObjectMapper smileMapper, HttpClient httpClient, HttpRemoteTaskRunnerConfig config, ScheduledExecutorService workersSyncExec, WorkerHolder.Listener listener, Worker worker, List<TaskAnnouncement> knownAnnouncements) {
return HttpRemoteTaskRunnerTest.createWorkerHolder(smileMapper, httpClient, config, workersSyncExec, listener, worker, ImmutableList.of(), ImmutableList.of(), // no announcements would be received for task1
ImmutableMap.of(task1, ImmutableList.of()), new AtomicInteger(), ImmutableSet.of());
}
};
taskRunner.start();
DiscoveryDruidNode druidNode1 = new DiscoveryDruidNode(new DruidNode("service", "host1", false, 8080, null, true, false), NodeRole.MIDDLE_MANAGER, ImmutableMap.of(WorkerNodeService.DISCOVERY_SERVICE_KEY, new WorkerNodeService("ip1", 2, "0", WorkerConfig.DEFAULT_CATEGORY)));
DiscoveryDruidNode druidNode2 = new DiscoveryDruidNode(new DruidNode("service", "host2", false, 8080, null, true, false), NodeRole.MIDDLE_MANAGER, ImmutableMap.of(WorkerNodeService.DISCOVERY_SERVICE_KEY, new WorkerNodeService("ip2", 2, "0", WorkerConfig.DEFAULT_CATEGORY)));
druidNodeDiscovery.getListeners().get(0).nodesAdded(ImmutableList.of(druidNode1, druidNode2));
taskRunner.run(task1);
Future<TaskStatus> future2 = taskRunner.run(task2);
Future<TaskStatus> future3 = taskRunner.run(task3);
Assert.assertTrue(future2.get().isSuccess());
Assert.assertTrue(future3.get().isSuccess());
Assert.assertEquals(task1.getId(), Iterables.getOnlyElement(taskRunner.getPendingTasks()).getTaskId());
}
use of org.apache.druid.indexing.worker.Worker in project druid by druid-io.
the class HttpRemoteTaskRunnerTest method testWorkerDisapperAndReappearBeforeItsCleanup.
@Test(timeout = 60_000L)
public void testWorkerDisapperAndReappearBeforeItsCleanup() throws Exception {
TestDruidNodeDiscovery druidNodeDiscovery = new TestDruidNodeDiscovery();
DruidNodeDiscoveryProvider druidNodeDiscoveryProvider = EasyMock.createMock(DruidNodeDiscoveryProvider.class);
EasyMock.expect(druidNodeDiscoveryProvider.getForService(WorkerNodeService.DISCOVERY_SERVICE_KEY)).andReturn(druidNodeDiscovery);
EasyMock.replay(druidNodeDiscoveryProvider);
ConcurrentMap<String, CustomFunction> workerHolders = new ConcurrentHashMap<>();
HttpRemoteTaskRunner taskRunner = new HttpRemoteTaskRunner(TestHelper.makeJsonMapper(), new HttpRemoteTaskRunnerConfig() {
@Override
public int getPendingTasksRunnerNumThreads() {
return 3;
}
}, EasyMock.createNiceMock(HttpClient.class), DSuppliers.of(new AtomicReference<>(DefaultWorkerBehaviorConfig.defaultConfig())), new NoopProvisioningStrategy<>(), druidNodeDiscoveryProvider, EasyMock.createNiceMock(TaskStorage.class), EasyMock.createNiceMock(CuratorFramework.class), new IndexerZkConfig(new ZkPathsConfig(), null, null, null, null)) {
@Override
protected WorkerHolder createWorkerHolder(ObjectMapper smileMapper, HttpClient httpClient, HttpRemoteTaskRunnerConfig config, ScheduledExecutorService workersSyncExec, WorkerHolder.Listener listener, Worker worker, List<TaskAnnouncement> knownAnnouncements) {
if (workerHolders.containsKey(worker.getHost())) {
return workerHolders.get(worker.getHost()).apply(smileMapper, httpClient, config, workersSyncExec, listener, worker, knownAnnouncements);
} else {
throw new ISE("No WorkerHolder for [%s].", worker.getHost());
}
}
};
taskRunner.start();
Task task1 = NoopTask.create("task-id-1", 0);
Task task2 = NoopTask.create("task-id-2", 0);
DiscoveryDruidNode druidNode = new DiscoveryDruidNode(new DruidNode("service", "host", false, 1234, null, true, false), NodeRole.MIDDLE_MANAGER, ImmutableMap.of(WorkerNodeService.DISCOVERY_SERVICE_KEY, new WorkerNodeService("ip1", 2, "0", WorkerConfig.DEFAULT_CATEGORY)));
workerHolders.put("host:1234", (mapper, httpClient, config, exec, listener, worker, knownAnnouncements) -> createWorkerHolder(mapper, httpClient, config, exec, listener, worker, knownAnnouncements, ImmutableList.of(), ImmutableMap.of(task1, ImmutableList.of(TaskAnnouncement.create(task1, TaskStatus.running(task1.getId()), TaskLocation.unknown()), TaskAnnouncement.create(task1, TaskStatus.running(task1.getId()), TaskLocation.create("host", 1234, 1235)), TaskAnnouncement.create(task1, TaskStatus.success(task1.getId()), TaskLocation.create("host", 1234, 1235))), task2, ImmutableList.of(TaskAnnouncement.create(task2, TaskStatus.running(task2.getId()), TaskLocation.unknown()), TaskAnnouncement.create(task2, TaskStatus.running(task2.getId()), TaskLocation.create("host", 1234, 1235)))), new AtomicInteger(), ImmutableSet.of()));
druidNodeDiscovery.getListeners().get(0).nodesAdded(ImmutableList.of(druidNode));
Future<TaskStatus> future1 = taskRunner.run(task1);
Future<TaskStatus> future2 = taskRunner.run(task2);
while (taskRunner.getPendingTasks().size() > 0) {
Thread.sleep(100);
}
druidNodeDiscovery.getListeners().get(0).nodesRemoved(ImmutableList.of(druidNode));
workerHolders.put("host:1234", (mapper, httpClient, config, exec, listener, worker, knownAnnouncements) -> createWorkerHolder(mapper, httpClient, config, exec, listener, worker, knownAnnouncements, ImmutableList.of(TaskAnnouncement.create(task2, TaskStatus.running(task2.getId()), TaskLocation.create("host", 1234, 1235)), TaskAnnouncement.create(task2, TaskStatus.success(task2.getId()), TaskLocation.create("host", 1234, 1235))), ImmutableMap.of(), new AtomicInteger(), ImmutableSet.of()));
druidNodeDiscovery.getListeners().get(0).nodesAdded(ImmutableList.of(druidNode));
Assert.assertTrue(future1.get().isSuccess());
Assert.assertTrue(future2.get().isSuccess());
}
use of org.apache.druid.indexing.worker.Worker in project druid by druid-io.
the class HttpRemoteTaskRunnerTest method testWorkerDisapperAndReappearAfterItsCleanup.
@Test(timeout = 60_000L)
public void testWorkerDisapperAndReappearAfterItsCleanup() throws Exception {
TestDruidNodeDiscovery druidNodeDiscovery = new TestDruidNodeDiscovery();
DruidNodeDiscoveryProvider druidNodeDiscoveryProvider = EasyMock.createMock(DruidNodeDiscoveryProvider.class);
EasyMock.expect(druidNodeDiscoveryProvider.getForService(WorkerNodeService.DISCOVERY_SERVICE_KEY)).andReturn(druidNodeDiscovery);
EasyMock.replay(druidNodeDiscoveryProvider);
ConcurrentMap<String, CustomFunction> workerHolders = new ConcurrentHashMap<>();
HttpRemoteTaskRunner taskRunner = new HttpRemoteTaskRunner(TestHelper.makeJsonMapper(), new HttpRemoteTaskRunnerConfig() {
@Override
public Period getTaskCleanupTimeout() {
return Period.millis(1);
}
}, EasyMock.createNiceMock(HttpClient.class), DSuppliers.of(new AtomicReference<>(DefaultWorkerBehaviorConfig.defaultConfig())), new NoopProvisioningStrategy<>(), druidNodeDiscoveryProvider, EasyMock.createNiceMock(TaskStorage.class), EasyMock.createNiceMock(CuratorFramework.class), new IndexerZkConfig(new ZkPathsConfig(), null, null, null, null)) {
@Override
protected WorkerHolder createWorkerHolder(ObjectMapper smileMapper, HttpClient httpClient, HttpRemoteTaskRunnerConfig config, ScheduledExecutorService workersSyncExec, WorkerHolder.Listener listener, Worker worker, List<TaskAnnouncement> knownAnnouncements) {
if (workerHolders.containsKey(worker.getHost())) {
return workerHolders.get(worker.getHost()).apply(smileMapper, httpClient, config, workersSyncExec, listener, worker, knownAnnouncements);
} else {
throw new ISE("No WorkerHolder for [%s].", worker.getHost());
}
}
};
taskRunner.start();
Task task1 = NoopTask.create("task-id-1", 0);
Task task2 = NoopTask.create("task-id-2", 0);
DiscoveryDruidNode druidNode = new DiscoveryDruidNode(new DruidNode("service", "host", false, 1234, null, true, false), NodeRole.MIDDLE_MANAGER, ImmutableMap.of(WorkerNodeService.DISCOVERY_SERVICE_KEY, new WorkerNodeService("ip1", 2, "0", WorkerConfig.DEFAULT_CATEGORY)));
workerHolders.put("host:1234", (mapper, httpClient, config, exec, listener, worker, knownAnnouncements) -> createWorkerHolder(mapper, httpClient, config, exec, listener, worker, knownAnnouncements, ImmutableList.of(), ImmutableMap.of(task1, ImmutableList.of(TaskAnnouncement.create(task1, TaskStatus.running(task1.getId()), TaskLocation.unknown()), TaskAnnouncement.create(task1, TaskStatus.running(task1.getId()), TaskLocation.create("host", 1234, 1235))), task2, ImmutableList.of(TaskAnnouncement.create(task2, TaskStatus.running(task2.getId()), TaskLocation.unknown()), TaskAnnouncement.create(task2, TaskStatus.running(task2.getId()), TaskLocation.create("host", 1234, 1235)))), new AtomicInteger(), ImmutableSet.of()));
druidNodeDiscovery.getListeners().get(0).nodesAdded(ImmutableList.of(druidNode));
Future<TaskStatus> future1 = taskRunner.run(task1);
Future<TaskStatus> future2 = taskRunner.run(task2);
while (taskRunner.getPendingTasks().size() > 0) {
Thread.sleep(100);
}
druidNodeDiscovery.getListeners().get(0).nodesRemoved(ImmutableList.of(druidNode));
Assert.assertTrue(future1.get().isFailure());
Assert.assertTrue(future2.get().isFailure());
Assert.assertNotNull(future1.get().getErrorMsg());
Assert.assertNotNull(future2.get().getErrorMsg());
Assert.assertTrue(future1.get().getErrorMsg().startsWith("The worker that this task was assigned disappeared and did not report cleanup within timeout"));
Assert.assertTrue(future2.get().getErrorMsg().startsWith("The worker that this task was assigned disappeared and did not report cleanup within timeout"));
AtomicInteger ticks = new AtomicInteger();
Set<String> actualShutdowns = new ConcurrentHashSet<>();
workerHolders.put("host:1234", (mapper, httpClient, config, exec, listener, worker, knownAnnouncements) -> createWorkerHolder(mapper, httpClient, config, exec, listener, worker, knownAnnouncements, ImmutableList.of(TaskAnnouncement.create(task1, TaskStatus.success(task1.getId()), TaskLocation.create("host", 1234, 1235)), TaskAnnouncement.create(task2, TaskStatus.running(task2.getId()), TaskLocation.create("host", 1234, 1235))), ImmutableMap.of(), ticks, actualShutdowns));
druidNodeDiscovery.getListeners().get(0).nodesAdded(ImmutableList.of(druidNode));
while (ticks.get() < 1) {
Thread.sleep(100);
}
Assert.assertEquals(ImmutableSet.of(task2.getId()), actualShutdowns);
Assert.assertTrue(taskRunner.run(task1).get().isFailure());
Assert.assertTrue(taskRunner.run(task2).get().isFailure());
}
Aggregations