Search in sources :

Example 1 with Worker

use of io.druid.indexing.worker.Worker in project druid by druid-io.

the class RemoteTaskRunner method start.

@Override
@LifecycleStart
public void start() {
    try {
        if (started) {
            return;
        }
        final MutableInt waitingFor = new MutableInt(1);
        final Object waitingForMonitor = new Object();
        // Add listener for creation/deletion of workers
        workerPathCache.getListenable().addListener(new PathChildrenCacheListener() {

            @Override
            public void childEvent(CuratorFramework client, final PathChildrenCacheEvent event) throws Exception {
                final Worker worker;
                switch(event.getType()) {
                    case CHILD_ADDED:
                        worker = jsonMapper.readValue(event.getData().getData(), Worker.class);
                        synchronized (waitingForMonitor) {
                            waitingFor.increment();
                        }
                        Futures.addCallback(addWorker(worker), new FutureCallback<ZkWorker>() {

                            @Override
                            public void onSuccess(ZkWorker zkWorker) {
                                synchronized (waitingForMonitor) {
                                    waitingFor.decrement();
                                    waitingForMonitor.notifyAll();
                                }
                            }

                            @Override
                            public void onFailure(Throwable throwable) {
                                synchronized (waitingForMonitor) {
                                    waitingFor.decrement();
                                    waitingForMonitor.notifyAll();
                                }
                            }
                        });
                        break;
                    case CHILD_UPDATED:
                        worker = jsonMapper.readValue(event.getData().getData(), Worker.class);
                        updateWorker(worker);
                        break;
                    case CHILD_REMOVED:
                        worker = jsonMapper.readValue(event.getData().getData(), Worker.class);
                        removeWorker(worker);
                        break;
                    case INITIALIZED:
                        // Schedule cleanup for task status of the workers that might have disconnected while overlord was not running
                        List<String> workers;
                        try {
                            workers = cf.getChildren().forPath(indexerZkConfig.getStatusPath());
                        } catch (KeeperException.NoNodeException e) {
                            // statusPath doesn't exist yet; can occur if no middleManagers have started.
                            workers = ImmutableList.of();
                        }
                        for (String workerId : workers) {
                            final String workerAnnouncePath = JOINER.join(indexerZkConfig.getAnnouncementsPath(), workerId);
                            final String workerStatusPath = JOINER.join(indexerZkConfig.getStatusPath(), workerId);
                            if (!zkWorkers.containsKey(workerId) && cf.checkExists().forPath(workerAnnouncePath) == null) {
                                try {
                                    scheduleTasksCleanupForWorker(workerId, cf.getChildren().forPath(workerStatusPath));
                                } catch (Exception e) {
                                    log.warn(e, "Could not schedule cleanup for worker[%s] during startup (maybe someone removed the status znode[%s]?). Skipping.", workerId, workerStatusPath);
                                }
                            }
                        }
                        synchronized (waitingForMonitor) {
                            waitingFor.decrement();
                            waitingForMonitor.notifyAll();
                        }
                    default:
                        break;
                }
            }
        });
        workerPathCache.start(PathChildrenCache.StartMode.POST_INITIALIZED_EVENT);
        synchronized (waitingForMonitor) {
            while (waitingFor.intValue() > 0) {
                waitingForMonitor.wait();
            }
        }
        scheduleBlackListedNodesCleanUp();
        resourceManagement.startManagement(this);
        started = true;
    } catch (Exception e) {
        throw Throwables.propagate(e);
    }
}
Also used : PathChildrenCacheListener(org.apache.curator.framework.recipes.cache.PathChildrenCacheListener) PathChildrenCacheEvent(org.apache.curator.framework.recipes.cache.PathChildrenCacheEvent) KeeperException(org.apache.zookeeper.KeeperException) MalformedURLException(java.net.MalformedURLException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) CuratorFramework(org.apache.curator.framework.CuratorFramework) MutableInt(org.apache.commons.lang.mutable.MutableInt) Worker(io.druid.indexing.worker.Worker) List(java.util.List) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) ImmutableList(com.google.common.collect.ImmutableList) FutureCallback(com.google.common.util.concurrent.FutureCallback) LifecycleStart(io.druid.java.util.common.lifecycle.LifecycleStart)

Example 2 with Worker

use of io.druid.indexing.worker.Worker in project druid by druid-io.

the class RemoteTaskRunner method cleanup.

/**
   * Removes a task from the complete queue and clears out the ZK status path of the task.
   *
   * @param taskId - the task to cleanup
   */
private void cleanup(final String taskId) {
    if (!started) {
        return;
    }
    final RemoteTaskRunnerWorkItem removed = completeTasks.remove(taskId);
    final Worker worker = removed.getWorker();
    if (removed == null || worker == null) {
        log.makeAlert("WTF?! Asked to cleanup nonexistent task").addData("taskId", taskId).emit();
    } else {
        final String workerId = worker.getHost();
        log.info("Cleaning up task[%s] on worker[%s]", taskId, workerId);
        final String statusPath = JOINER.join(indexerZkConfig.getStatusPath(), workerId, taskId);
        try {
            cf.delete().guaranteed().forPath(statusPath);
        } catch (KeeperException.NoNodeException e) {
            log.info("Tried to delete status path[%s] that didn't exist! Must've gone away already?", statusPath);
        } catch (Exception e) {
            throw Throwables.propagate(e);
        }
    }
}
Also used : Worker(io.druid.indexing.worker.Worker) KeeperException(org.apache.zookeeper.KeeperException) KeeperException(org.apache.zookeeper.KeeperException) MalformedURLException(java.net.MalformedURLException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException)

Example 3 with Worker

use of io.druid.indexing.worker.Worker in project druid by druid-io.

the class WorkerResource method isEnabled.

@GET
@Path("/enabled")
@Produces(MediaType.APPLICATION_JSON)
@ResourceFilters(StateResourceFilter.class)
public Response isEnabled() {
    try {
        final Worker theWorker = curatorCoordinator.getWorker();
        final boolean enabled = !theWorker.getVersion().equalsIgnoreCase(DISABLED_VERSION);
        return Response.ok(ImmutableMap.of(theWorker.getHost(), enabled)).build();
    } catch (Exception e) {
        return Response.serverError().build();
    }
}
Also used : Worker(io.druid.indexing.worker.Worker) IOException(java.io.IOException) Path(javax.ws.rs.Path) ResourceFilters(com.sun.jersey.spi.container.ResourceFilters) Produces(javax.ws.rs.Produces) GET(javax.ws.rs.GET)

Example 4 with Worker

use of io.druid.indexing.worker.Worker in project druid by druid-io.

the class PendingTaskBasedWorkerResourceManagementStrategy method doTerminate.

@Override
public boolean doTerminate(WorkerTaskRunner runner) {
    Collection<ImmutableWorkerInfo> zkWorkers = runner.getWorkers();
    synchronized (lock) {
        final WorkerBehaviorConfig workerConfig = workerConfigRef.get();
        if (workerConfig == null) {
            log.warn("No workerConfig available, cannot terminate workers.");
            return false;
        }
        if (!currentlyProvisioning.isEmpty()) {
            log.debug("Already provisioning nodes, Not Terminating any nodes.");
            return false;
        }
        boolean didTerminate = false;
        final Collection<String> workerNodeIds = getWorkerNodeIDs(runner.getLazyWorkers(), workerConfig);
        final Set<String> stillExisting = Sets.newHashSet();
        for (String s : currentlyTerminating) {
            if (workerNodeIds.contains(s)) {
                stillExisting.add(s);
            }
        }
        currentlyTerminating.clear();
        currentlyTerminating.addAll(stillExisting);
        if (currentlyTerminating.isEmpty()) {
            final int maxWorkersToTerminate = maxWorkersToTerminate(zkWorkers, workerConfig);
            final Predicate<ImmutableWorkerInfo> isLazyWorker = ResourceManagementUtil.createLazyWorkerPredicate(config);
            final List<String> laziestWorkerIps = Lists.newArrayList(Collections2.transform(runner.markWorkersLazy(isLazyWorker, maxWorkersToTerminate), new Function<Worker, String>() {

                @Override
                public String apply(Worker zkWorker) {
                    return zkWorker.getIp();
                }
            }));
            if (laziestWorkerIps.isEmpty()) {
                log.debug("Found no lazy workers");
            } else {
                log.info("Terminating %,d lazy workers: %s", laziestWorkerIps.size(), Joiner.on(", ").join(laziestWorkerIps));
                final AutoScalingData terminated = workerConfig.getAutoScaler().terminate(laziestWorkerIps);
                if (terminated != null) {
                    currentlyTerminating.addAll(terminated.getNodeIds());
                    lastTerminateTime = new DateTime();
                    scalingStats.addTerminateEvent(terminated);
                    didTerminate = true;
                }
            }
        } else {
            Duration durSinceLastTerminate = new Duration(lastTerminateTime, new DateTime());
            log.info("%s terminating. Current wait time: %s", currentlyTerminating, durSinceLastTerminate);
            if (durSinceLastTerminate.isLongerThan(config.getMaxScalingDuration().toStandardDuration())) {
                log.makeAlert("Worker node termination taking too long!").addData("millisSinceLastTerminate", durSinceLastTerminate.getMillis()).addData("terminatingCount", currentlyTerminating.size()).emit();
                currentlyTerminating.clear();
            }
        }
        return didTerminate;
    }
}
Also used : Duration(org.joda.time.Duration) ImmutableWorkerInfo(io.druid.indexing.overlord.ImmutableWorkerInfo) DateTime(org.joda.time.DateTime) WorkerBehaviorConfig(io.druid.indexing.overlord.setup.WorkerBehaviorConfig) Function(com.google.common.base.Function) Worker(io.druid.indexing.worker.Worker)

Example 5 with Worker

use of io.druid.indexing.worker.Worker in project druid by druid-io.

the class ImmutableWorkerInfoTest method testSerde.

@Test
public void testSerde() throws Exception {
    ImmutableWorkerInfo workerInfo = new ImmutableWorkerInfo(new Worker("testWorker", "192.0.0.1", 10, "v1"), 2, ImmutableSet.of("grp1", "grp2"), ImmutableSet.of("task1", "task2"), new DateTime("2015-01-01T01:01:01Z"));
    ObjectMapper mapper = new DefaultObjectMapper();
    final ImmutableWorkerInfo serde = mapper.readValue(mapper.writeValueAsString(workerInfo), ImmutableWorkerInfo.class);
    Assert.assertEquals(workerInfo, serde);
}
Also used : Worker(io.druid.indexing.worker.Worker) DefaultObjectMapper(io.druid.jackson.DefaultObjectMapper) DateTime(org.joda.time.DateTime) DefaultObjectMapper(io.druid.jackson.DefaultObjectMapper) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Test(org.junit.Test)

Aggregations

Worker (io.druid.indexing.worker.Worker)20 Test (org.junit.Test)11 ImmutableWorkerInfo (io.druid.indexing.overlord.ImmutableWorkerInfo)10 RemoteTaskRunnerConfig (io.druid.indexing.overlord.config.RemoteTaskRunnerConfig)7 NoopTask (io.druid.indexing.common.task.NoopTask)6 IOException (java.io.IOException)4 DateTime (org.joda.time.DateTime)4 List (java.util.List)3 Function (com.google.common.base.Function)2 ResourceFilters (com.sun.jersey.spi.container.ResourceFilters)2 RemoteTaskRunner (io.druid.indexing.overlord.RemoteTaskRunner)2 ZkWorker (io.druid.indexing.overlord.ZkWorker)2 WorkerBehaviorConfig (io.druid.indexing.overlord.setup.WorkerBehaviorConfig)2 MalformedURLException (java.net.MalformedURLException)2 ExecutionException (java.util.concurrent.ExecutionException)2 Path (javax.ws.rs.Path)2 Produces (javax.ws.rs.Produces)2 Response (javax.ws.rs.core.Response)2 KeeperException (org.apache.zookeeper.KeeperException)2 Duration (org.joda.time.Duration)2