Search in sources :

Example 41 with Worker

use of org.apache.druid.indexing.worker.Worker in project druid by druid-io.

the class HttpRemoteTaskRunner method startWorkersHandling.

private void startWorkersHandling() throws InterruptedException {
    final CountDownLatch workerViewInitialized = new CountDownLatch(1);
    DruidNodeDiscovery druidNodeDiscovery = druidNodeDiscoveryProvider.getForService(WorkerNodeService.DISCOVERY_SERVICE_KEY);
    this.nodeDiscoveryListener = new DruidNodeDiscovery.Listener() {

        @Override
        public void nodesAdded(Collection<DiscoveryDruidNode> nodes) {
            nodes.forEach(node -> addWorker(toWorker(node)));
        }

        @Override
        public void nodesRemoved(Collection<DiscoveryDruidNode> nodes) {
            nodes.forEach(node -> removeWorker(toWorker(node)));
        }

        @Override
        public void nodeViewInitialized() {
            // CountDownLatch.countDown() does nothing when count has already reached 0.
            workerViewInitialized.countDown();
        }
    };
    druidNodeDiscovery.registerListener(nodeDiscoveryListener);
    long workerDiscoveryStartTime = System.currentTimeMillis();
    while (!workerViewInitialized.await(30, TimeUnit.SECONDS)) {
        if (System.currentTimeMillis() - workerDiscoveryStartTime > TimeUnit.MINUTES.toMillis(5)) {
            throw new ISE("Couldn't discover workers.");
        } else {
            log.info("Waiting for worker discovery...");
        }
    }
    log.info("[%s] Workers are discovered.", workers.size());
    // We would start assigning tasks which are pretty soon going to be reported by discovered workers.
    for (WorkerHolder worker : workers.values()) {
        log.info("Waiting for worker[%s] to sync state...", worker.getWorker().getHost());
        worker.waitForInitialization();
    }
    log.info("Workers have sync'd state successfully.");
}
Also used : JsonProperty(com.fasterxml.jackson.annotation.JsonProperty) DruidNodeDiscoveryProvider(org.apache.druid.discovery.DruidNodeDiscoveryProvider) ScheduledFuture(java.util.concurrent.ScheduledFuture) RemoteTaskRunnerWorkItem(org.apache.druid.indexing.overlord.RemoteTaskRunnerWorkItem) TaskLogStreamer(org.apache.druid.tasklogs.TaskLogStreamer) URL(java.net.URL) HttpMethod(org.jboss.netty.handler.codec.http.HttpMethod) Collections2(com.google.common.collect.Collections2) LifecycleStart(org.apache.druid.java.util.common.lifecycle.LifecycleStart) Pair(org.apache.druid.java.util.common.Pair) Optional(com.google.common.base.Optional) Task(org.apache.druid.indexing.common.task.Task) Map(java.util.Map) TaskAnnouncement(org.apache.druid.indexing.worker.TaskAnnouncement) WorkerBehaviorConfig(org.apache.druid.indexing.overlord.setup.WorkerBehaviorConfig) DateTimes(org.apache.druid.java.util.common.DateTimes) Execs(org.apache.druid.java.util.common.concurrent.Execs) ImmutableMap(com.google.common.collect.ImmutableMap) GuardedBy(com.google.errorprone.annotations.concurrent.GuardedBy) InputStreamResponseHandler(org.apache.druid.java.util.http.client.response.InputStreamResponseHandler) Collection(java.util.Collection) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) StringUtils(org.apache.druid.java.util.common.StringUtils) Set(java.util.Set) ISE(org.apache.druid.java.util.common.ISE) Collectors(java.util.stream.Collectors) TaskState(org.apache.druid.indexer.TaskState) WorkerTaskRunnerConfig(org.apache.druid.indexing.overlord.config.WorkerTaskRunnerConfig) CountDownLatch(java.util.concurrent.CountDownLatch) List(java.util.List) CuratorFramework(org.apache.curator.framework.CuratorFramework) Predicate(com.google.common.base.Predicate) WorkerNodeService(org.apache.druid.discovery.WorkerNodeService) TaskStorage(org.apache.druid.indexing.overlord.TaskStorage) WorkerSelectStrategy(org.apache.druid.indexing.overlord.setup.WorkerSelectStrategy) Joiner(com.google.common.base.Joiner) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) DiscoveryDruidNode(org.apache.druid.discovery.DiscoveryDruidNode) MoreExecutors(com.google.common.util.concurrent.MoreExecutors) ScheduledExecutors(org.apache.druid.java.util.common.concurrent.ScheduledExecutors) HttpClient(org.apache.druid.java.util.http.client.HttpClient) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) Supplier(com.google.common.base.Supplier) HashMap(java.util.HashMap) HttpRemoteTaskRunnerConfig(org.apache.druid.indexing.overlord.config.HttpRemoteTaskRunnerConfig) TaskStatus(org.apache.druid.indexer.TaskStatus) ScalingStats(org.apache.druid.indexing.overlord.autoscaling.ScalingStats) Worker(org.apache.druid.indexing.worker.Worker) ArrayList(java.util.ArrayList) ConcurrentMap(java.util.concurrent.ConcurrentMap) HashSet(java.util.HashSet) DruidNodeDiscovery(org.apache.druid.discovery.DruidNodeDiscovery) ImmutableList(com.google.common.collect.ImmutableList) LifecycleStop(org.apache.druid.java.util.common.lifecycle.LifecycleStop) Request(org.apache.druid.java.util.http.client.Request) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) TaskRunnerListener(org.apache.druid.indexing.overlord.TaskRunnerListener) ByteSource(com.google.common.io.ByteSource) ProvisioningService(org.apache.druid.indexing.overlord.autoscaling.ProvisioningService) ListeningScheduledExecutorService(com.google.common.util.concurrent.ListeningScheduledExecutorService) Nullable(javax.annotation.Nullable) ExecutorService(java.util.concurrent.ExecutorService) ImmutableWorkerInfo(org.apache.druid.indexing.overlord.ImmutableWorkerInfo) Period(org.joda.time.Period) TaskRunnerUtils(org.apache.druid.indexing.overlord.TaskRunnerUtils) EmittingLogger(org.apache.druid.java.util.emitter.EmittingLogger) KeeperException(org.apache.zookeeper.KeeperException) Iterator(java.util.Iterator) Executor(java.util.concurrent.Executor) IndexerZkConfig(org.apache.druid.server.initialization.IndexerZkConfig) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Throwables(com.google.common.base.Throwables) TaskLocation(org.apache.druid.indexer.TaskLocation) WorkerTaskRunner(org.apache.druid.indexing.overlord.WorkerTaskRunner) IOException(java.io.IOException) Maps(com.google.common.collect.Maps) FutureCallback(com.google.common.util.concurrent.FutureCallback) RunnerTaskState(org.apache.druid.indexer.RunnerTaskState) ExecutionException(java.util.concurrent.ExecutionException) TimeUnit(java.util.concurrent.TimeUnit) ProvisioningStrategy(org.apache.druid.indexing.overlord.autoscaling.ProvisioningStrategy) Futures(com.google.common.util.concurrent.Futures) LifecycleLock(org.apache.druid.concurrent.LifecycleLock) ListenableScheduledFuture(com.google.common.util.concurrent.ListenableScheduledFuture) TaskRunnerWorkItem(org.apache.druid.indexing.overlord.TaskRunnerWorkItem) Preconditions(com.google.common.base.Preconditions) VisibleForTesting(com.google.common.annotations.VisibleForTesting) InputStream(java.io.InputStream) DiscoveryDruidNode(org.apache.druid.discovery.DiscoveryDruidNode) DruidNodeDiscovery(org.apache.druid.discovery.DruidNodeDiscovery) ISE(org.apache.druid.java.util.common.ISE) CountDownLatch(java.util.concurrent.CountDownLatch)

Example 42 with Worker

use of org.apache.druid.indexing.worker.Worker in project druid by druid-io.

the class HttpRemoteTaskRunner method getLazyTaskSlotCount.

@Override
public Map<String, Long> getLazyTaskSlotCount() {
    Map<String, Long> totalLazyPeons = new HashMap<>();
    for (Worker worker : getLazyWorkers()) {
        String workerCategory = worker.getCategory();
        int workerLazyPeons = worker.getCapacity();
        totalLazyPeons.compute(workerCategory, (category, lazyPeons) -> lazyPeons == null ? workerLazyPeons : lazyPeons + workerLazyPeons);
    }
    return totalLazyPeons;
}
Also used : ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) Worker(org.apache.druid.indexing.worker.Worker)

Example 43 with Worker

use of org.apache.druid.indexing.worker.Worker in project druid by druid-io.

the class HttpRemoteTaskRunner method addWorker.

private void addWorker(final Worker worker) {
    synchronized (workers) {
        log.info("Worker[%s] reportin' for duty!", worker.getHost());
        cancelWorkerCleanup(worker.getHost());
        WorkerHolder holder = workers.get(worker.getHost());
        if (holder == null) {
            List<TaskAnnouncement> expectedAnnouncements = new ArrayList<>();
            synchronized (statusLock) {
                // manages the task syncing with that worker.
                for (Map.Entry<String, HttpRemoteTaskRunnerWorkItem> e : tasks.entrySet()) {
                    if (e.getValue().getState() == HttpRemoteTaskRunnerWorkItem.State.RUNNING) {
                        Worker w = e.getValue().getWorker();
                        if (w != null && w.getHost().equals(worker.getHost())) {
                            expectedAnnouncements.add(TaskAnnouncement.create(e.getValue().getTask(), TaskStatus.running(e.getKey()), e.getValue().getLocation()));
                        }
                    }
                }
            }
            holder = createWorkerHolder(smileMapper, httpClient, config, workersSyncExec, this::taskAddedOrUpdated, worker, expectedAnnouncements);
            holder.start();
            workers.put(worker.getHost(), holder);
        } else {
            log.info("Worker[%s] already exists.", worker.getHost());
        }
    }
    synchronized (statusLock) {
        statusLock.notifyAll();
    }
}
Also used : TaskAnnouncement(org.apache.druid.indexing.worker.TaskAnnouncement) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) ArrayList(java.util.ArrayList) Worker(org.apache.druid.indexing.worker.Worker) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) ConcurrentMap(java.util.concurrent.ConcurrentMap)

Example 44 with Worker

use of org.apache.druid.indexing.worker.Worker in project druid by druid-io.

the class WorkerHolderTest method testSyncListener.

@Test
public void testSyncListener() {
    List<TaskAnnouncement> updates = new ArrayList<>();
    Task task0 = NoopTask.create("task0", 0);
    Task task1 = NoopTask.create("task1", 0);
    Task task2 = NoopTask.create("task2", 0);
    Task task3 = NoopTask.create("task3", 0);
    WorkerHolder workerHolder = new WorkerHolder(TestHelper.makeJsonMapper(), EasyMock.createNiceMock(HttpClient.class), new HttpRemoteTaskRunnerConfig(), EasyMock.createNiceMock(ScheduledExecutorService.class), (taskAnnouncement, holder) -> updates.add(taskAnnouncement), new Worker("http", "localhost", "127.0.0.1", 5, "v0", WorkerConfig.DEFAULT_CATEGORY), ImmutableList.of(TaskAnnouncement.create(task0, TaskStatus.running(task0.getId()), TaskLocation.unknown()), TaskAnnouncement.create(task1, TaskStatus.running(task1.getId()), TaskLocation.unknown())));
    ChangeRequestHttpSyncer.Listener<WorkerHistoryItem> syncListener = workerHolder.createSyncListener();
    Assert.assertTrue(workerHolder.disabled.get());
    syncListener.fullSync(ImmutableList.of(new WorkerHistoryItem.Metadata(false), new WorkerHistoryItem.TaskUpdate(TaskAnnouncement.create(task1, TaskStatus.success(task1.getId()), TaskLocation.create("w1", 1, -1))), new WorkerHistoryItem.TaskUpdate(TaskAnnouncement.create(task2, TaskStatus.running(task2.getId()), TaskLocation.create("w1", 2, -1))), new WorkerHistoryItem.TaskUpdate(TaskAnnouncement.create(task3, TaskStatus.running(task3.getId()), TaskLocation.create("w1", 2, -1)))));
    Assert.assertFalse(workerHolder.disabled.get());
    Assert.assertEquals(4, updates.size());
    Assert.assertEquals(task1.getId(), updates.get(0).getTaskId());
    Assert.assertTrue(updates.get(0).getTaskStatus().isSuccess());
    Assert.assertEquals(task2.getId(), updates.get(1).getTaskId());
    Assert.assertTrue(updates.get(1).getTaskStatus().isRunnable());
    Assert.assertEquals(task3.getId(), updates.get(2).getTaskId());
    Assert.assertTrue(updates.get(2).getTaskStatus().isRunnable());
    Assert.assertEquals(task0.getId(), updates.get(3).getTaskId());
    Assert.assertTrue(updates.get(3).getTaskStatus().isFailure());
    Assert.assertNotNull(updates.get(3).getTaskStatus().getErrorMsg());
    Assert.assertTrue(updates.get(3).getTaskStatus().getErrorMsg().startsWith("This task disappeared on the worker where it was assigned"));
    updates.clear();
    syncListener.deltaSync(ImmutableList.of(new WorkerHistoryItem.Metadata(false), new WorkerHistoryItem.TaskRemoval(task1.getId()), new WorkerHistoryItem.Metadata(true), new WorkerHistoryItem.TaskRemoval(task2.getId()), new WorkerHistoryItem.Metadata(false), new WorkerHistoryItem.TaskUpdate(TaskAnnouncement.create(task3, TaskStatus.running(task3.getId()), TaskLocation.create("w1", 3, -1)))));
    Assert.assertFalse(workerHolder.disabled.get());
    Assert.assertEquals(2, updates.size());
    Assert.assertEquals(task2.getId(), updates.get(0).getTaskId());
    Assert.assertTrue(updates.get(0).getTaskStatus().isFailure());
    Assert.assertNotNull(updates.get(0).getTaskStatus().getErrorMsg());
    Assert.assertTrue(updates.get(0).getTaskStatus().getErrorMsg().startsWith("This task disappeared on the worker where it was assigned"));
    Assert.assertEquals(task3.getId(), updates.get(1).getTaskId());
    Assert.assertTrue(updates.get(1).getTaskStatus().isRunnable());
    updates.clear();
    syncListener.fullSync(ImmutableList.of(new WorkerHistoryItem.Metadata(true), new WorkerHistoryItem.TaskUpdate(TaskAnnouncement.create(task1, TaskStatus.success(task1.getId()), TaskLocation.create("w1", 1, -1))), new WorkerHistoryItem.TaskUpdate(TaskAnnouncement.create(task2, TaskStatus.running(task2.getId()), TaskLocation.create("w1", 2, -1))), new WorkerHistoryItem.TaskUpdate(TaskAnnouncement.create(task3, TaskStatus.running(task3.getId()), TaskLocation.create("w1", 2, -1)))));
    Assert.assertTrue(workerHolder.disabled.get());
    Assert.assertEquals(3, updates.size());
    Assert.assertEquals(task1.getId(), updates.get(0).getTaskId());
    Assert.assertTrue(updates.get(0).getTaskStatus().isSuccess());
    Assert.assertEquals(task2.getId(), updates.get(1).getTaskId());
    Assert.assertTrue(updates.get(1).getTaskStatus().isRunnable());
    Assert.assertEquals(task3.getId(), updates.get(2).getTaskId());
    Assert.assertTrue(updates.get(2).getTaskStatus().isRunnable());
    updates.clear();
}
Also used : NoopTask(org.apache.druid.indexing.common.task.NoopTask) Task(org.apache.druid.indexing.common.task.Task) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) TaskAnnouncement(org.apache.druid.indexing.worker.TaskAnnouncement) WorkerHistoryItem(org.apache.druid.indexing.worker.WorkerHistoryItem) ArrayList(java.util.ArrayList) ChangeRequestHttpSyncer(org.apache.druid.server.coordination.ChangeRequestHttpSyncer) HttpRemoteTaskRunnerConfig(org.apache.druid.indexing.overlord.config.HttpRemoteTaskRunnerConfig) HttpClient(org.apache.druid.java.util.http.client.HttpClient) Worker(org.apache.druid.indexing.worker.Worker) Test(org.junit.Test)

Example 45 with Worker

use of org.apache.druid.indexing.worker.Worker in project druid by druid-io.

the class OverlordResourceTest method testGetTotalWorkerCapacityWithAutoScaleConfiguredAndProvisioningStrategyNotSupportExpectedWorkerCapacity.

@Test
public void testGetTotalWorkerCapacityWithAutoScaleConfiguredAndProvisioningStrategyNotSupportExpectedWorkerCapacity() {
    int invalidExpectedCapacity = -1;
    int maxNumWorkers = 2;
    WorkerTaskRunner workerTaskRunner = EasyMock.createMock(WorkerTaskRunner.class);
    Collection<ImmutableWorkerInfo> workerInfos = ImmutableList.of(new ImmutableWorkerInfo(new Worker("http", "testWorker", "192.0.0.1", 3, "v1", WorkerConfig.DEFAULT_CATEGORY), 2, ImmutableSet.of("grp1", "grp2"), ImmutableSet.of("task1", "task2"), DateTimes.of("2015-01-01T01:01:01Z")));
    EasyMock.expect(workerTaskRunner.getWorkers()).andReturn(workerInfos);
    EasyMock.reset(taskMaster);
    EasyMock.expect(taskMaster.getTaskRunner()).andReturn(Optional.of(workerTaskRunner)).anyTimes();
    EasyMock.expect(provisioningStrategy.getExpectedWorkerCapacity(workerInfos)).andReturn(invalidExpectedCapacity).anyTimes();
    AutoScaler autoScaler = EasyMock.createMock(AutoScaler.class);
    EasyMock.expect(autoScaler.getMinNumWorkers()).andReturn(0);
    EasyMock.expect(autoScaler.getMaxNumWorkers()).andReturn(maxNumWorkers);
    DefaultWorkerBehaviorConfig workerBehaviorConfig = new DefaultWorkerBehaviorConfig(null, autoScaler);
    AtomicReference<WorkerBehaviorConfig> workerBehaviorConfigAtomicReference = new AtomicReference<>(workerBehaviorConfig);
    EasyMock.expect(configManager.watch(WorkerBehaviorConfig.CONFIG_KEY, WorkerBehaviorConfig.class)).andReturn(workerBehaviorConfigAtomicReference);
    EasyMock.replay(workerTaskRunner, autoScaler, taskRunner, taskMaster, taskStorageQueryAdapter, indexerMetadataStorageAdapter, req, workerTaskRunnerQueryAdapter, configManager, provisioningStrategy);
    final Response response = overlordResource.getTotalWorkerCapacity();
    Assert.assertEquals(HttpResponseStatus.OK.getCode(), response.getStatus());
    Assert.assertEquals(workerInfos.stream().findFirst().get().getWorker().getCapacity(), ((TotalWorkerCapacityResponse) response.getEntity()).getCurrentClusterCapacity());
    Assert.assertEquals(invalidExpectedCapacity, ((TotalWorkerCapacityResponse) response.getEntity()).getMaximumCapacityWithAutoScale());
}
Also used : Response(javax.ws.rs.core.Response) WorkerBehaviorConfig(org.apache.druid.indexing.overlord.setup.WorkerBehaviorConfig) DefaultWorkerBehaviorConfig(org.apache.druid.indexing.overlord.setup.DefaultWorkerBehaviorConfig) WorkerTaskRunner(org.apache.druid.indexing.overlord.WorkerTaskRunner) DefaultWorkerBehaviorConfig(org.apache.druid.indexing.overlord.setup.DefaultWorkerBehaviorConfig) Worker(org.apache.druid.indexing.worker.Worker) AtomicReference(java.util.concurrent.atomic.AtomicReference) AutoScaler(org.apache.druid.indexing.overlord.autoscaling.AutoScaler) ImmutableWorkerInfo(org.apache.druid.indexing.overlord.ImmutableWorkerInfo) Test(org.junit.Test)

Aggregations

Worker (org.apache.druid.indexing.worker.Worker)46 Test (org.junit.Test)32 NoopTask (org.apache.druid.indexing.common.task.NoopTask)21 ImmutableWorkerInfo (org.apache.druid.indexing.overlord.ImmutableWorkerInfo)15 ArrayList (java.util.ArrayList)14 Task (org.apache.druid.indexing.common.task.Task)13 TaskStorage (org.apache.druid.indexing.overlord.TaskStorage)12 RemoteTaskRunnerConfig (org.apache.druid.indexing.overlord.config.RemoteTaskRunnerConfig)11 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)10 ScheduledExecutorService (java.util.concurrent.ScheduledExecutorService)10 AtomicReference (java.util.concurrent.atomic.AtomicReference)10 HttpRemoteTaskRunnerConfig (org.apache.druid.indexing.overlord.config.HttpRemoteTaskRunnerConfig)10 HttpClient (org.apache.druid.java.util.http.client.HttpClient)10 IndexerZkConfig (org.apache.druid.server.initialization.IndexerZkConfig)10 ImmutableList (com.google.common.collect.ImmutableList)9 List (java.util.List)9 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)9 CuratorFramework (org.apache.curator.framework.CuratorFramework)9 DruidNodeDiscoveryProvider (org.apache.druid.discovery.DruidNodeDiscoveryProvider)9 TaskStatus (org.apache.druid.indexer.TaskStatus)9