use of io.druid.indexing.worker.Worker in project druid by druid-io.
the class RemoteTaskRunner method start.
@Override
@LifecycleStart
public void start() {
try {
if (started) {
return;
}
final MutableInt waitingFor = new MutableInt(1);
final Object waitingForMonitor = new Object();
// Add listener for creation/deletion of workers
workerPathCache.getListenable().addListener(new PathChildrenCacheListener() {
@Override
public void childEvent(CuratorFramework client, final PathChildrenCacheEvent event) throws Exception {
final Worker worker;
switch(event.getType()) {
case CHILD_ADDED:
worker = jsonMapper.readValue(event.getData().getData(), Worker.class);
synchronized (waitingForMonitor) {
waitingFor.increment();
}
Futures.addCallback(addWorker(worker), new FutureCallback<ZkWorker>() {
@Override
public void onSuccess(ZkWorker zkWorker) {
synchronized (waitingForMonitor) {
waitingFor.decrement();
waitingForMonitor.notifyAll();
}
}
@Override
public void onFailure(Throwable throwable) {
synchronized (waitingForMonitor) {
waitingFor.decrement();
waitingForMonitor.notifyAll();
}
}
});
break;
case CHILD_UPDATED:
worker = jsonMapper.readValue(event.getData().getData(), Worker.class);
updateWorker(worker);
break;
case CHILD_REMOVED:
worker = jsonMapper.readValue(event.getData().getData(), Worker.class);
removeWorker(worker);
break;
case INITIALIZED:
// Schedule cleanup for task status of the workers that might have disconnected while overlord was not running
List<String> workers;
try {
workers = cf.getChildren().forPath(indexerZkConfig.getStatusPath());
} catch (KeeperException.NoNodeException e) {
// statusPath doesn't exist yet; can occur if no middleManagers have started.
workers = ImmutableList.of();
}
for (String workerId : workers) {
final String workerAnnouncePath = JOINER.join(indexerZkConfig.getAnnouncementsPath(), workerId);
final String workerStatusPath = JOINER.join(indexerZkConfig.getStatusPath(), workerId);
if (!zkWorkers.containsKey(workerId) && cf.checkExists().forPath(workerAnnouncePath) == null) {
try {
scheduleTasksCleanupForWorker(workerId, cf.getChildren().forPath(workerStatusPath));
} catch (Exception e) {
log.warn(e, "Could not schedule cleanup for worker[%s] during startup (maybe someone removed the status znode[%s]?). Skipping.", workerId, workerStatusPath);
}
}
}
synchronized (waitingForMonitor) {
waitingFor.decrement();
waitingForMonitor.notifyAll();
}
default:
break;
}
}
});
workerPathCache.start(PathChildrenCache.StartMode.POST_INITIALIZED_EVENT);
synchronized (waitingForMonitor) {
while (waitingFor.intValue() > 0) {
waitingForMonitor.wait();
}
}
scheduleBlackListedNodesCleanUp();
resourceManagement.startManagement(this);
started = true;
} catch (Exception e) {
throw Throwables.propagate(e);
}
}
use of io.druid.indexing.worker.Worker in project druid by druid-io.
the class RemoteTaskRunner method cleanup.
/**
* Removes a task from the complete queue and clears out the ZK status path of the task.
*
* @param taskId - the task to cleanup
*/
private void cleanup(final String taskId) {
if (!started) {
return;
}
final RemoteTaskRunnerWorkItem removed = completeTasks.remove(taskId);
final Worker worker = removed.getWorker();
if (removed == null || worker == null) {
log.makeAlert("WTF?! Asked to cleanup nonexistent task").addData("taskId", taskId).emit();
} else {
final String workerId = worker.getHost();
log.info("Cleaning up task[%s] on worker[%s]", taskId, workerId);
final String statusPath = JOINER.join(indexerZkConfig.getStatusPath(), workerId, taskId);
try {
cf.delete().guaranteed().forPath(statusPath);
} catch (KeeperException.NoNodeException e) {
log.info("Tried to delete status path[%s] that didn't exist! Must've gone away already?", statusPath);
} catch (Exception e) {
throw Throwables.propagate(e);
}
}
}
use of io.druid.indexing.worker.Worker in project druid by druid-io.
the class WorkerResource method isEnabled.
@GET
@Path("/enabled")
@Produces(MediaType.APPLICATION_JSON)
@ResourceFilters(StateResourceFilter.class)
public Response isEnabled() {
try {
final Worker theWorker = curatorCoordinator.getWorker();
final boolean enabled = !theWorker.getVersion().equalsIgnoreCase(DISABLED_VERSION);
return Response.ok(ImmutableMap.of(theWorker.getHost(), enabled)).build();
} catch (Exception e) {
return Response.serverError().build();
}
}
use of io.druid.indexing.worker.Worker in project druid by druid-io.
the class PendingTaskBasedWorkerResourceManagementStrategy method doTerminate.
@Override
public boolean doTerminate(WorkerTaskRunner runner) {
Collection<ImmutableWorkerInfo> zkWorkers = runner.getWorkers();
synchronized (lock) {
final WorkerBehaviorConfig workerConfig = workerConfigRef.get();
if (workerConfig == null) {
log.warn("No workerConfig available, cannot terminate workers.");
return false;
}
if (!currentlyProvisioning.isEmpty()) {
log.debug("Already provisioning nodes, Not Terminating any nodes.");
return false;
}
boolean didTerminate = false;
final Collection<String> workerNodeIds = getWorkerNodeIDs(runner.getLazyWorkers(), workerConfig);
final Set<String> stillExisting = Sets.newHashSet();
for (String s : currentlyTerminating) {
if (workerNodeIds.contains(s)) {
stillExisting.add(s);
}
}
currentlyTerminating.clear();
currentlyTerminating.addAll(stillExisting);
if (currentlyTerminating.isEmpty()) {
final int maxWorkersToTerminate = maxWorkersToTerminate(zkWorkers, workerConfig);
final Predicate<ImmutableWorkerInfo> isLazyWorker = ResourceManagementUtil.createLazyWorkerPredicate(config);
final List<String> laziestWorkerIps = Lists.newArrayList(Collections2.transform(runner.markWorkersLazy(isLazyWorker, maxWorkersToTerminate), new Function<Worker, String>() {
@Override
public String apply(Worker zkWorker) {
return zkWorker.getIp();
}
}));
if (laziestWorkerIps.isEmpty()) {
log.debug("Found no lazy workers");
} else {
log.info("Terminating %,d lazy workers: %s", laziestWorkerIps.size(), Joiner.on(", ").join(laziestWorkerIps));
final AutoScalingData terminated = workerConfig.getAutoScaler().terminate(laziestWorkerIps);
if (terminated != null) {
currentlyTerminating.addAll(terminated.getNodeIds());
lastTerminateTime = new DateTime();
scalingStats.addTerminateEvent(terminated);
didTerminate = true;
}
}
} else {
Duration durSinceLastTerminate = new Duration(lastTerminateTime, new DateTime());
log.info("%s terminating. Current wait time: %s", currentlyTerminating, durSinceLastTerminate);
if (durSinceLastTerminate.isLongerThan(config.getMaxScalingDuration().toStandardDuration())) {
log.makeAlert("Worker node termination taking too long!").addData("millisSinceLastTerminate", durSinceLastTerminate.getMillis()).addData("terminatingCount", currentlyTerminating.size()).emit();
currentlyTerminating.clear();
}
}
return didTerminate;
}
}
use of io.druid.indexing.worker.Worker in project druid by druid-io.
the class ImmutableWorkerInfoTest method testSerde.
@Test
public void testSerde() throws Exception {
ImmutableWorkerInfo workerInfo = new ImmutableWorkerInfo(new Worker("testWorker", "192.0.0.1", 10, "v1"), 2, ImmutableSet.of("grp1", "grp2"), ImmutableSet.of("task1", "task2"), new DateTime("2015-01-01T01:01:01Z"));
ObjectMapper mapper = new DefaultObjectMapper();
final ImmutableWorkerInfo serde = mapper.readValue(mapper.writeValueAsString(workerInfo), ImmutableWorkerInfo.class);
Assert.assertEquals(workerInfo, serde);
}
Aggregations