use of io.druid.indexing.overlord.setup.WorkerBehaviorConfig in project druid by druid-io.
the class PendingTaskBasedWorkerResourceManagementStrategy method doProvision.
@Override
public boolean doProvision(WorkerTaskRunner runner) {
Collection<Task> pendingTasks = runner.getPendingTaskPayloads();
Collection<ImmutableWorkerInfo> workers = runner.getWorkers();
synchronized (lock) {
boolean didProvision = false;
final WorkerBehaviorConfig workerConfig = workerConfigRef.get();
if (workerConfig == null || workerConfig.getAutoScaler() == null) {
log.error("No workerConfig available, cannot provision new workers.");
return false;
}
final Collection<String> workerNodeIds = getWorkerNodeIDs(Collections2.transform(workers, new Function<ImmutableWorkerInfo, Worker>() {
@Override
public Worker apply(ImmutableWorkerInfo input) {
return input.getWorker();
}
}), workerConfig);
currentlyProvisioning.removeAll(workerNodeIds);
if (currentlyProvisioning.isEmpty()) {
int want = getScaleUpNodeCount(runner.getConfig(), workerConfig, pendingTasks, workers);
while (want > 0) {
final AutoScalingData provisioned = workerConfig.getAutoScaler().provision();
final List<String> newNodes = provisioned == null ? ImmutableList.<String>of() : provisioned.getNodeIds();
if (newNodes.isEmpty()) {
log.warn("NewNodes is empty, returning from provision loop");
break;
} else {
currentlyProvisioning.addAll(newNodes);
lastProvisionTime = new DateTime();
scalingStats.addProvisionEvent(provisioned);
want -= provisioned.getNodeIds().size();
didProvision = true;
}
}
} else {
Duration durSinceLastProvision = new Duration(lastProvisionTime, new DateTime());
log.info("%s provisioning. Current wait time: %s", currentlyProvisioning, durSinceLastProvision);
if (durSinceLastProvision.isLongerThan(config.getMaxScalingDuration().toStandardDuration())) {
log.makeAlert("Worker node provisioning taking too long!").addData("millisSinceLastProvision", durSinceLastProvision.getMillis()).addData("provisioningCount", currentlyProvisioning.size()).emit();
workerConfig.getAutoScaler().terminateWithIds(Lists.newArrayList(currentlyProvisioning));
currentlyProvisioning.clear();
}
}
return didProvision;
}
}
use of io.druid.indexing.overlord.setup.WorkerBehaviorConfig in project druid by druid-io.
the class SimpleWorkerResourceManagementStrategy method doTerminate.
boolean doTerminate(WorkerTaskRunner runner) {
Collection<? extends TaskRunnerWorkItem> pendingTasks = runner.getPendingTasks();
synchronized (lock) {
final WorkerBehaviorConfig workerConfig = workerConfigRef.get();
if (workerConfig == null) {
log.warn("No workerConfig available, cannot terminate workers.");
return false;
}
boolean didTerminate = false;
final Set<String> workerNodeIds = Sets.newHashSet(workerConfig.getAutoScaler().ipToIdLookup(Lists.newArrayList(Iterables.transform(runner.getLazyWorkers(), new Function<Worker, String>() {
@Override
public String apply(Worker input) {
return input.getIp();
}
}))));
final Set<String> stillExisting = Sets.newHashSet();
for (String s : currentlyTerminating) {
if (workerNodeIds.contains(s)) {
stillExisting.add(s);
}
}
currentlyTerminating.clear();
currentlyTerminating.addAll(stillExisting);
Collection<ImmutableWorkerInfo> workers = getWorkers(runner);
updateTargetWorkerCount(workerConfig, pendingTasks, workers);
if (currentlyTerminating.isEmpty()) {
final int excessWorkers = (workers.size() + currentlyProvisioning.size()) - targetWorkerCount;
if (excessWorkers > 0) {
final Predicate<ImmutableWorkerInfo> isLazyWorker = ResourceManagementUtil.createLazyWorkerPredicate(config);
final Collection<String> laziestWorkerIps = Collections2.transform(runner.markWorkersLazy(isLazyWorker, excessWorkers), new Function<Worker, String>() {
@Override
public String apply(Worker worker) {
return worker.getIp();
}
});
if (laziestWorkerIps.isEmpty()) {
log.info("Wanted to terminate %,d workers, but couldn't find any lazy ones!", excessWorkers);
} else {
log.info("Terminating %,d workers (wanted %,d): %s", laziestWorkerIps.size(), excessWorkers, Joiner.on(", ").join(laziestWorkerIps));
final AutoScalingData terminated = workerConfig.getAutoScaler().terminate(ImmutableList.copyOf(laziestWorkerIps));
if (terminated != null) {
currentlyTerminating.addAll(terminated.getNodeIds());
lastTerminateTime = new DateTime();
scalingStats.addTerminateEvent(terminated);
didTerminate = true;
}
}
}
} else {
Duration durSinceLastTerminate = new Duration(lastTerminateTime, new DateTime());
log.info("%s terminating. Current wait time: %s", currentlyTerminating, durSinceLastTerminate);
if (durSinceLastTerminate.isLongerThan(config.getMaxScalingDuration().toStandardDuration())) {
log.makeAlert("Worker node termination taking too long!").addData("millisSinceLastTerminate", durSinceLastTerminate.getMillis()).addData("terminatingCount", currentlyTerminating.size()).emit();
currentlyTerminating.clear();
}
}
return didTerminate;
}
}
use of io.druid.indexing.overlord.setup.WorkerBehaviorConfig in project druid by druid-io.
the class SimpleResourceManagementStrategyTest method setUp.
@Before
public void setUp() throws Exception {
autoScaler = EasyMock.createMock(AutoScaler.class);
testTask = TestTasks.immediateSuccess("task1");
final SimpleWorkerResourceManagementConfig simpleWorkerResourceManagementConfig = new SimpleWorkerResourceManagementConfig().setWorkerIdleTimeout(new Period(0)).setMaxScalingDuration(new Period(1000)).setNumEventsToTrack(1).setPendingTaskTimeout(new Period(0)).setWorkerVersion("");
final ResourceManagementSchedulerConfig schedulerConfig = new ResourceManagementSchedulerConfig();
workerConfig = new AtomicReference<>(new WorkerBehaviorConfig(null, autoScaler));
simpleResourceManagementStrategy = new SimpleWorkerResourceManagementStrategy(simpleWorkerResourceManagementConfig, DSuppliers.of(workerConfig), schedulerConfig, executorService);
}
Aggregations