Search in sources :

Example 31 with Worker

use of org.apache.druid.indexing.worker.Worker in project druid by druid-io.

the class RemoteTaskRunner method cleanup.

/**
 * Removes a task from the complete queue and clears out the ZK status path of the task.
 *
 * @param taskId - the task to cleanup
 */
private void cleanup(final String taskId) {
    if (!lifecycleLock.awaitStarted(1, TimeUnit.SECONDS)) {
        return;
    }
    final RemoteTaskRunnerWorkItem removed = completeTasks.remove(taskId);
    final Worker worker;
    if (removed == null || (worker = removed.getWorker()) == null) {
        log.makeAlert("Asked to cleanup nonexistent task").addData("taskId", taskId).emit();
    } else {
        final String workerId = worker.getHost();
        log.info("Cleaning up task[%s] on worker[%s]", taskId, workerId);
        final String statusPath = JOINER.join(indexerZkConfig.getStatusPath(), workerId, taskId);
        try {
            cf.delete().guaranteed().forPath(statusPath);
        } catch (KeeperException.NoNodeException e) {
            log.info("Tried to delete status path[%s] that didn't exist! Must've gone away already?", statusPath);
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }
}
Also used : Worker(org.apache.druid.indexing.worker.Worker) KeeperException(org.apache.zookeeper.KeeperException) TimeoutException(java.util.concurrent.TimeoutException) KeeperException(org.apache.zookeeper.KeeperException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException)

Example 32 with Worker

use of org.apache.druid.indexing.worker.Worker in project druid by druid-io.

the class ZkWorker method setWorker.

public void setWorker(Worker newWorker) {
    final Worker oldWorker = worker.get();
    Preconditions.checkArgument(newWorker.getHost().equals(oldWorker.getHost()), "Cannot change Worker host");
    Preconditions.checkArgument(newWorker.getIp().equals(oldWorker.getIp()), "Cannot change Worker ip");
    worker.set(newWorker);
}
Also used : Worker(org.apache.druid.indexing.worker.Worker)

Example 33 with Worker

use of org.apache.druid.indexing.worker.Worker in project druid by druid-io.

the class HttpRemoteTaskRunnerTest method testOneStuckTaskAssignmentDoesntBlockOthers.

/*
  Simulates one task not getting acknowledged to be running after assigning it to a worker. But, other tasks are
  successfully assigned to other worker and get completed.
   */
@Test(timeout = 60_000L)
public void testOneStuckTaskAssignmentDoesntBlockOthers() throws Exception {
    TestDruidNodeDiscovery druidNodeDiscovery = new TestDruidNodeDiscovery();
    DruidNodeDiscoveryProvider druidNodeDiscoveryProvider = EasyMock.createMock(DruidNodeDiscoveryProvider.class);
    EasyMock.expect(druidNodeDiscoveryProvider.getForService(WorkerNodeService.DISCOVERY_SERVICE_KEY)).andReturn(druidNodeDiscovery);
    EasyMock.replay(druidNodeDiscoveryProvider);
    Task task1 = NoopTask.create("task-id-1", 0);
    Task task2 = NoopTask.create("task-id-2", 0);
    Task task3 = NoopTask.create("task-id-3", 0);
    HttpRemoteTaskRunner taskRunner = new HttpRemoteTaskRunner(TestHelper.makeJsonMapper(), new HttpRemoteTaskRunnerConfig() {

        @Override
        public int getPendingTasksRunnerNumThreads() {
            return 3;
        }
    }, EasyMock.createNiceMock(HttpClient.class), DSuppliers.of(new AtomicReference<>(DefaultWorkerBehaviorConfig.defaultConfig())), new NoopProvisioningStrategy<>(), druidNodeDiscoveryProvider, EasyMock.createNiceMock(TaskStorage.class), EasyMock.createNiceMock(CuratorFramework.class), new IndexerZkConfig(new ZkPathsConfig(), null, null, null, null)) {

        @Override
        protected WorkerHolder createWorkerHolder(ObjectMapper smileMapper, HttpClient httpClient, HttpRemoteTaskRunnerConfig config, ScheduledExecutorService workersSyncExec, WorkerHolder.Listener listener, Worker worker, List<TaskAnnouncement> knownAnnouncements) {
            return HttpRemoteTaskRunnerTest.createWorkerHolder(smileMapper, httpClient, config, workersSyncExec, listener, worker, ImmutableList.of(), ImmutableList.of(), // no announcements would be received for task1
            ImmutableMap.of(task1, ImmutableList.of()), new AtomicInteger(), ImmutableSet.of());
        }
    };
    taskRunner.start();
    DiscoveryDruidNode druidNode1 = new DiscoveryDruidNode(new DruidNode("service", "host1", false, 8080, null, true, false), NodeRole.MIDDLE_MANAGER, ImmutableMap.of(WorkerNodeService.DISCOVERY_SERVICE_KEY, new WorkerNodeService("ip1", 2, "0", WorkerConfig.DEFAULT_CATEGORY)));
    DiscoveryDruidNode druidNode2 = new DiscoveryDruidNode(new DruidNode("service", "host2", false, 8080, null, true, false), NodeRole.MIDDLE_MANAGER, ImmutableMap.of(WorkerNodeService.DISCOVERY_SERVICE_KEY, new WorkerNodeService("ip2", 2, "0", WorkerConfig.DEFAULT_CATEGORY)));
    druidNodeDiscovery.getListeners().get(0).nodesAdded(ImmutableList.of(druidNode1, druidNode2));
    taskRunner.run(task1);
    Future<TaskStatus> future2 = taskRunner.run(task2);
    Future<TaskStatus> future3 = taskRunner.run(task3);
    Assert.assertTrue(future2.get().isSuccess());
    Assert.assertTrue(future3.get().isSuccess());
    Assert.assertEquals(task1.getId(), Iterables.getOnlyElement(taskRunner.getPendingTasks()).getTaskId());
}
Also used : IndexerZkConfig(org.apache.druid.server.initialization.IndexerZkConfig) Task(org.apache.druid.indexing.common.task.Task) NoopTask(org.apache.druid.indexing.common.task.NoopTask) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) TaskRunnerListener(org.apache.druid.indexing.overlord.TaskRunnerListener) AtomicReference(java.util.concurrent.atomic.AtomicReference) TaskStatus(org.apache.druid.indexer.TaskStatus) HttpRemoteTaskRunnerConfig(org.apache.druid.indexing.overlord.config.HttpRemoteTaskRunnerConfig) WorkerNodeService(org.apache.druid.discovery.WorkerNodeService) CuratorFramework(org.apache.curator.framework.CuratorFramework) TaskStorage(org.apache.druid.indexing.overlord.TaskStorage) DiscoveryDruidNode(org.apache.druid.discovery.DiscoveryDruidNode) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) DruidNodeDiscoveryProvider(org.apache.druid.discovery.DruidNodeDiscoveryProvider) ZkPathsConfig(org.apache.druid.server.initialization.ZkPathsConfig) HttpClient(org.apache.druid.java.util.http.client.HttpClient) Worker(org.apache.druid.indexing.worker.Worker) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) DiscoveryDruidNode(org.apache.druid.discovery.DiscoveryDruidNode) DruidNode(org.apache.druid.server.DruidNode) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Test(org.junit.Test)

Example 34 with Worker

use of org.apache.druid.indexing.worker.Worker in project druid by druid-io.

the class HttpRemoteTaskRunnerTest method testWorkerDisapperAndReappearBeforeItsCleanup.

@Test(timeout = 60_000L)
public void testWorkerDisapperAndReappearBeforeItsCleanup() throws Exception {
    TestDruidNodeDiscovery druidNodeDiscovery = new TestDruidNodeDiscovery();
    DruidNodeDiscoveryProvider druidNodeDiscoveryProvider = EasyMock.createMock(DruidNodeDiscoveryProvider.class);
    EasyMock.expect(druidNodeDiscoveryProvider.getForService(WorkerNodeService.DISCOVERY_SERVICE_KEY)).andReturn(druidNodeDiscovery);
    EasyMock.replay(druidNodeDiscoveryProvider);
    ConcurrentMap<String, CustomFunction> workerHolders = new ConcurrentHashMap<>();
    HttpRemoteTaskRunner taskRunner = new HttpRemoteTaskRunner(TestHelper.makeJsonMapper(), new HttpRemoteTaskRunnerConfig() {

        @Override
        public int getPendingTasksRunnerNumThreads() {
            return 3;
        }
    }, EasyMock.createNiceMock(HttpClient.class), DSuppliers.of(new AtomicReference<>(DefaultWorkerBehaviorConfig.defaultConfig())), new NoopProvisioningStrategy<>(), druidNodeDiscoveryProvider, EasyMock.createNiceMock(TaskStorage.class), EasyMock.createNiceMock(CuratorFramework.class), new IndexerZkConfig(new ZkPathsConfig(), null, null, null, null)) {

        @Override
        protected WorkerHolder createWorkerHolder(ObjectMapper smileMapper, HttpClient httpClient, HttpRemoteTaskRunnerConfig config, ScheduledExecutorService workersSyncExec, WorkerHolder.Listener listener, Worker worker, List<TaskAnnouncement> knownAnnouncements) {
            if (workerHolders.containsKey(worker.getHost())) {
                return workerHolders.get(worker.getHost()).apply(smileMapper, httpClient, config, workersSyncExec, listener, worker, knownAnnouncements);
            } else {
                throw new ISE("No WorkerHolder for [%s].", worker.getHost());
            }
        }
    };
    taskRunner.start();
    Task task1 = NoopTask.create("task-id-1", 0);
    Task task2 = NoopTask.create("task-id-2", 0);
    DiscoveryDruidNode druidNode = new DiscoveryDruidNode(new DruidNode("service", "host", false, 1234, null, true, false), NodeRole.MIDDLE_MANAGER, ImmutableMap.of(WorkerNodeService.DISCOVERY_SERVICE_KEY, new WorkerNodeService("ip1", 2, "0", WorkerConfig.DEFAULT_CATEGORY)));
    workerHolders.put("host:1234", (mapper, httpClient, config, exec, listener, worker, knownAnnouncements) -> createWorkerHolder(mapper, httpClient, config, exec, listener, worker, knownAnnouncements, ImmutableList.of(), ImmutableMap.of(task1, ImmutableList.of(TaskAnnouncement.create(task1, TaskStatus.running(task1.getId()), TaskLocation.unknown()), TaskAnnouncement.create(task1, TaskStatus.running(task1.getId()), TaskLocation.create("host", 1234, 1235)), TaskAnnouncement.create(task1, TaskStatus.success(task1.getId()), TaskLocation.create("host", 1234, 1235))), task2, ImmutableList.of(TaskAnnouncement.create(task2, TaskStatus.running(task2.getId()), TaskLocation.unknown()), TaskAnnouncement.create(task2, TaskStatus.running(task2.getId()), TaskLocation.create("host", 1234, 1235)))), new AtomicInteger(), ImmutableSet.of()));
    druidNodeDiscovery.getListeners().get(0).nodesAdded(ImmutableList.of(druidNode));
    Future<TaskStatus> future1 = taskRunner.run(task1);
    Future<TaskStatus> future2 = taskRunner.run(task2);
    while (taskRunner.getPendingTasks().size() > 0) {
        Thread.sleep(100);
    }
    druidNodeDiscovery.getListeners().get(0).nodesRemoved(ImmutableList.of(druidNode));
    workerHolders.put("host:1234", (mapper, httpClient, config, exec, listener, worker, knownAnnouncements) -> createWorkerHolder(mapper, httpClient, config, exec, listener, worker, knownAnnouncements, ImmutableList.of(TaskAnnouncement.create(task2, TaskStatus.running(task2.getId()), TaskLocation.create("host", 1234, 1235)), TaskAnnouncement.create(task2, TaskStatus.success(task2.getId()), TaskLocation.create("host", 1234, 1235))), ImmutableMap.of(), new AtomicInteger(), ImmutableSet.of()));
    druidNodeDiscovery.getListeners().get(0).nodesAdded(ImmutableList.of(druidNode));
    Assert.assertTrue(future1.get().isSuccess());
    Assert.assertTrue(future2.get().isSuccess());
}
Also used : IndexerZkConfig(org.apache.druid.server.initialization.IndexerZkConfig) Task(org.apache.druid.indexing.common.task.Task) NoopTask(org.apache.druid.indexing.common.task.NoopTask) TaskRunnerListener(org.apache.druid.indexing.overlord.TaskRunnerListener) WorkerNodeService(org.apache.druid.discovery.WorkerNodeService) CuratorFramework(org.apache.curator.framework.CuratorFramework) ZkPathsConfig(org.apache.druid.server.initialization.ZkPathsConfig) Worker(org.apache.druid.indexing.worker.Worker) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) ISE(org.apache.druid.java.util.common.ISE) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) AtomicReference(java.util.concurrent.atomic.AtomicReference) TaskStatus(org.apache.druid.indexer.TaskStatus) HttpRemoteTaskRunnerConfig(org.apache.druid.indexing.overlord.config.HttpRemoteTaskRunnerConfig) TaskStorage(org.apache.druid.indexing.overlord.TaskStorage) DiscoveryDruidNode(org.apache.druid.discovery.DiscoveryDruidNode) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) DruidNodeDiscoveryProvider(org.apache.druid.discovery.DruidNodeDiscoveryProvider) HttpClient(org.apache.druid.java.util.http.client.HttpClient) DiscoveryDruidNode(org.apache.druid.discovery.DiscoveryDruidNode) DruidNode(org.apache.druid.server.DruidNode) Test(org.junit.Test)

Example 35 with Worker

use of org.apache.druid.indexing.worker.Worker in project druid by druid-io.

the class HttpRemoteTaskRunnerTest method testWorkerDisapperAndReappearAfterItsCleanup.

@Test(timeout = 60_000L)
public void testWorkerDisapperAndReappearAfterItsCleanup() throws Exception {
    TestDruidNodeDiscovery druidNodeDiscovery = new TestDruidNodeDiscovery();
    DruidNodeDiscoveryProvider druidNodeDiscoveryProvider = EasyMock.createMock(DruidNodeDiscoveryProvider.class);
    EasyMock.expect(druidNodeDiscoveryProvider.getForService(WorkerNodeService.DISCOVERY_SERVICE_KEY)).andReturn(druidNodeDiscovery);
    EasyMock.replay(druidNodeDiscoveryProvider);
    ConcurrentMap<String, CustomFunction> workerHolders = new ConcurrentHashMap<>();
    HttpRemoteTaskRunner taskRunner = new HttpRemoteTaskRunner(TestHelper.makeJsonMapper(), new HttpRemoteTaskRunnerConfig() {

        @Override
        public Period getTaskCleanupTimeout() {
            return Period.millis(1);
        }
    }, EasyMock.createNiceMock(HttpClient.class), DSuppliers.of(new AtomicReference<>(DefaultWorkerBehaviorConfig.defaultConfig())), new NoopProvisioningStrategy<>(), druidNodeDiscoveryProvider, EasyMock.createNiceMock(TaskStorage.class), EasyMock.createNiceMock(CuratorFramework.class), new IndexerZkConfig(new ZkPathsConfig(), null, null, null, null)) {

        @Override
        protected WorkerHolder createWorkerHolder(ObjectMapper smileMapper, HttpClient httpClient, HttpRemoteTaskRunnerConfig config, ScheduledExecutorService workersSyncExec, WorkerHolder.Listener listener, Worker worker, List<TaskAnnouncement> knownAnnouncements) {
            if (workerHolders.containsKey(worker.getHost())) {
                return workerHolders.get(worker.getHost()).apply(smileMapper, httpClient, config, workersSyncExec, listener, worker, knownAnnouncements);
            } else {
                throw new ISE("No WorkerHolder for [%s].", worker.getHost());
            }
        }
    };
    taskRunner.start();
    Task task1 = NoopTask.create("task-id-1", 0);
    Task task2 = NoopTask.create("task-id-2", 0);
    DiscoveryDruidNode druidNode = new DiscoveryDruidNode(new DruidNode("service", "host", false, 1234, null, true, false), NodeRole.MIDDLE_MANAGER, ImmutableMap.of(WorkerNodeService.DISCOVERY_SERVICE_KEY, new WorkerNodeService("ip1", 2, "0", WorkerConfig.DEFAULT_CATEGORY)));
    workerHolders.put("host:1234", (mapper, httpClient, config, exec, listener, worker, knownAnnouncements) -> createWorkerHolder(mapper, httpClient, config, exec, listener, worker, knownAnnouncements, ImmutableList.of(), ImmutableMap.of(task1, ImmutableList.of(TaskAnnouncement.create(task1, TaskStatus.running(task1.getId()), TaskLocation.unknown()), TaskAnnouncement.create(task1, TaskStatus.running(task1.getId()), TaskLocation.create("host", 1234, 1235))), task2, ImmutableList.of(TaskAnnouncement.create(task2, TaskStatus.running(task2.getId()), TaskLocation.unknown()), TaskAnnouncement.create(task2, TaskStatus.running(task2.getId()), TaskLocation.create("host", 1234, 1235)))), new AtomicInteger(), ImmutableSet.of()));
    druidNodeDiscovery.getListeners().get(0).nodesAdded(ImmutableList.of(druidNode));
    Future<TaskStatus> future1 = taskRunner.run(task1);
    Future<TaskStatus> future2 = taskRunner.run(task2);
    while (taskRunner.getPendingTasks().size() > 0) {
        Thread.sleep(100);
    }
    druidNodeDiscovery.getListeners().get(0).nodesRemoved(ImmutableList.of(druidNode));
    Assert.assertTrue(future1.get().isFailure());
    Assert.assertTrue(future2.get().isFailure());
    Assert.assertNotNull(future1.get().getErrorMsg());
    Assert.assertNotNull(future2.get().getErrorMsg());
    Assert.assertTrue(future1.get().getErrorMsg().startsWith("The worker that this task was assigned disappeared and did not report cleanup within timeout"));
    Assert.assertTrue(future2.get().getErrorMsg().startsWith("The worker that this task was assigned disappeared and did not report cleanup within timeout"));
    AtomicInteger ticks = new AtomicInteger();
    Set<String> actualShutdowns = new ConcurrentHashSet<>();
    workerHolders.put("host:1234", (mapper, httpClient, config, exec, listener, worker, knownAnnouncements) -> createWorkerHolder(mapper, httpClient, config, exec, listener, worker, knownAnnouncements, ImmutableList.of(TaskAnnouncement.create(task1, TaskStatus.success(task1.getId()), TaskLocation.create("host", 1234, 1235)), TaskAnnouncement.create(task2, TaskStatus.running(task2.getId()), TaskLocation.create("host", 1234, 1235))), ImmutableMap.of(), ticks, actualShutdowns));
    druidNodeDiscovery.getListeners().get(0).nodesAdded(ImmutableList.of(druidNode));
    while (ticks.get() < 1) {
        Thread.sleep(100);
    }
    Assert.assertEquals(ImmutableSet.of(task2.getId()), actualShutdowns);
    Assert.assertTrue(taskRunner.run(task1).get().isFailure());
    Assert.assertTrue(taskRunner.run(task2).get().isFailure());
}
Also used : IndexerZkConfig(org.apache.druid.server.initialization.IndexerZkConfig) Task(org.apache.druid.indexing.common.task.Task) NoopTask(org.apache.druid.indexing.common.task.NoopTask) TaskRunnerListener(org.apache.druid.indexing.overlord.TaskRunnerListener) WorkerNodeService(org.apache.druid.discovery.WorkerNodeService) CuratorFramework(org.apache.curator.framework.CuratorFramework) ZkPathsConfig(org.apache.druid.server.initialization.ZkPathsConfig) ConcurrentHashSet(org.eclipse.jetty.util.ConcurrentHashSet) Worker(org.apache.druid.indexing.worker.Worker) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) ISE(org.apache.druid.java.util.common.ISE) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) Period(org.joda.time.Period) AtomicReference(java.util.concurrent.atomic.AtomicReference) TaskStatus(org.apache.druid.indexer.TaskStatus) HttpRemoteTaskRunnerConfig(org.apache.druid.indexing.overlord.config.HttpRemoteTaskRunnerConfig) TaskStorage(org.apache.druid.indexing.overlord.TaskStorage) DiscoveryDruidNode(org.apache.druid.discovery.DiscoveryDruidNode) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) DruidNodeDiscoveryProvider(org.apache.druid.discovery.DruidNodeDiscoveryProvider) HttpClient(org.apache.druid.java.util.http.client.HttpClient) DiscoveryDruidNode(org.apache.druid.discovery.DiscoveryDruidNode) DruidNode(org.apache.druid.server.DruidNode) Test(org.junit.Test)

Aggregations

Worker (org.apache.druid.indexing.worker.Worker)46 Test (org.junit.Test)32 NoopTask (org.apache.druid.indexing.common.task.NoopTask)21 ImmutableWorkerInfo (org.apache.druid.indexing.overlord.ImmutableWorkerInfo)15 ArrayList (java.util.ArrayList)14 Task (org.apache.druid.indexing.common.task.Task)13 TaskStorage (org.apache.druid.indexing.overlord.TaskStorage)12 RemoteTaskRunnerConfig (org.apache.druid.indexing.overlord.config.RemoteTaskRunnerConfig)11 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)10 ScheduledExecutorService (java.util.concurrent.ScheduledExecutorService)10 AtomicReference (java.util.concurrent.atomic.AtomicReference)10 HttpRemoteTaskRunnerConfig (org.apache.druid.indexing.overlord.config.HttpRemoteTaskRunnerConfig)10 HttpClient (org.apache.druid.java.util.http.client.HttpClient)10 IndexerZkConfig (org.apache.druid.server.initialization.IndexerZkConfig)10 ImmutableList (com.google.common.collect.ImmutableList)9 List (java.util.List)9 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)9 CuratorFramework (org.apache.curator.framework.CuratorFramework)9 DruidNodeDiscoveryProvider (org.apache.druid.discovery.DruidNodeDiscoveryProvider)9 TaskStatus (org.apache.druid.indexer.TaskStatus)9