use of io.druid.indexing.overlord.setup.WorkerBehaviorConfig in project druid by druid-io.
the class RemoteTaskRunner method tryAssignTask.
/**
* Ensures no workers are already running a task before assigning the task to a worker.
* It is possible that a worker is running a task that the RTR has no knowledge of. This occurs when the RTR
* needs to bootstrap after a restart.
*
* @param taskRunnerWorkItem - the task to assign
*
* @return true iff the task is now assigned
*/
private boolean tryAssignTask(final Task task, final RemoteTaskRunnerWorkItem taskRunnerWorkItem) throws Exception {
Preconditions.checkNotNull(task, "task");
Preconditions.checkNotNull(taskRunnerWorkItem, "taskRunnerWorkItem");
Preconditions.checkArgument(task.getId().equals(taskRunnerWorkItem.getTaskId()), "task id != workItem id");
if (runningTasks.containsKey(task.getId()) || findWorkerRunningTask(task.getId()) != null) {
log.info("Task[%s] already running.", task.getId());
return true;
} else {
// Nothing running this task, announce it in ZK for a worker to run it
WorkerBehaviorConfig workerConfig = workerConfigRef.get();
WorkerSelectStrategy strategy;
if (workerConfig == null || workerConfig.getSelectStrategy() == null) {
log.warn("No worker selections strategy set. Using default.");
strategy = WorkerBehaviorConfig.DEFAULT_STRATEGY;
} else {
strategy = workerConfig.getSelectStrategy();
}
ZkWorker assignedWorker = null;
Optional<ImmutableWorkerInfo> immutableZkWorker = null;
try {
synchronized (workersWithUnacknowledgedTask) {
immutableZkWorker = strategy.findWorkerForTask(config, ImmutableMap.copyOf(Maps.transformEntries(Maps.filterEntries(zkWorkers, new Predicate<Map.Entry<String, ZkWorker>>() {
@Override
public boolean apply(Map.Entry<String, ZkWorker> input) {
return !lazyWorkers.containsKey(input.getKey()) && !workersWithUnacknowledgedTask.containsKey(input.getKey()) && !blackListedWorkers.contains(input.getValue());
}
}), new Maps.EntryTransformer<String, ZkWorker, ImmutableWorkerInfo>() {
@Override
public ImmutableWorkerInfo transformEntry(String key, ZkWorker value) {
return value.toImmutable();
}
})), task);
if (immutableZkWorker.isPresent() && workersWithUnacknowledgedTask.putIfAbsent(immutableZkWorker.get().getWorker().getHost(), task.getId()) == null) {
assignedWorker = zkWorkers.get(immutableZkWorker.get().getWorker().getHost());
}
}
if (assignedWorker != null) {
return announceTask(task, assignedWorker, taskRunnerWorkItem);
} else {
log.debug("Unsuccessful task-assign attempt for task [%s] on workers [%s]. Workers to ack tasks are [%s].", task.getId(), zkWorkers.values(), workersWithUnacknowledgedTask);
}
return false;
} finally {
if (assignedWorker != null) {
workersWithUnacknowledgedTask.remove(assignedWorker.getWorker().getHost());
//if this attempt won the race to run the task then other task might be able to use this worker now after task ack.
runPendingTasks();
}
}
}
}
use of io.druid.indexing.overlord.setup.WorkerBehaviorConfig in project druid by druid-io.
the class PendingTaskBasedWorkerResourceManagementStrategy method doTerminate.
@Override
public boolean doTerminate(WorkerTaskRunner runner) {
Collection<ImmutableWorkerInfo> zkWorkers = runner.getWorkers();
synchronized (lock) {
final WorkerBehaviorConfig workerConfig = workerConfigRef.get();
if (workerConfig == null) {
log.warn("No workerConfig available, cannot terminate workers.");
return false;
}
if (!currentlyProvisioning.isEmpty()) {
log.debug("Already provisioning nodes, Not Terminating any nodes.");
return false;
}
boolean didTerminate = false;
final Collection<String> workerNodeIds = getWorkerNodeIDs(runner.getLazyWorkers(), workerConfig);
final Set<String> stillExisting = Sets.newHashSet();
for (String s : currentlyTerminating) {
if (workerNodeIds.contains(s)) {
stillExisting.add(s);
}
}
currentlyTerminating.clear();
currentlyTerminating.addAll(stillExisting);
if (currentlyTerminating.isEmpty()) {
final int maxWorkersToTerminate = maxWorkersToTerminate(zkWorkers, workerConfig);
final Predicate<ImmutableWorkerInfo> isLazyWorker = ResourceManagementUtil.createLazyWorkerPredicate(config);
final List<String> laziestWorkerIps = Lists.newArrayList(Collections2.transform(runner.markWorkersLazy(isLazyWorker, maxWorkersToTerminate), new Function<Worker, String>() {
@Override
public String apply(Worker zkWorker) {
return zkWorker.getIp();
}
}));
if (laziestWorkerIps.isEmpty()) {
log.debug("Found no lazy workers");
} else {
log.info("Terminating %,d lazy workers: %s", laziestWorkerIps.size(), Joiner.on(", ").join(laziestWorkerIps));
final AutoScalingData terminated = workerConfig.getAutoScaler().terminate(laziestWorkerIps);
if (terminated != null) {
currentlyTerminating.addAll(terminated.getNodeIds());
lastTerminateTime = new DateTime();
scalingStats.addTerminateEvent(terminated);
didTerminate = true;
}
}
} else {
Duration durSinceLastTerminate = new Duration(lastTerminateTime, new DateTime());
log.info("%s terminating. Current wait time: %s", currentlyTerminating, durSinceLastTerminate);
if (durSinceLastTerminate.isLongerThan(config.getMaxScalingDuration().toStandardDuration())) {
log.makeAlert("Worker node termination taking too long!").addData("millisSinceLastTerminate", durSinceLastTerminate.getMillis()).addData("terminatingCount", currentlyTerminating.size()).emit();
currentlyTerminating.clear();
}
}
return didTerminate;
}
}
use of io.druid.indexing.overlord.setup.WorkerBehaviorConfig in project druid by druid-io.
the class SimpleWorkerResourceManagementStrategy method doProvision.
protected boolean doProvision(WorkerTaskRunner runner) {
Collection<? extends TaskRunnerWorkItem> pendingTasks = runner.getPendingTasks();
Collection<ImmutableWorkerInfo> workers = getWorkers(runner);
synchronized (lock) {
boolean didProvision = false;
final WorkerBehaviorConfig workerConfig = workerConfigRef.get();
if (workerConfig == null || workerConfig.getAutoScaler() == null) {
log.warn("No workerConfig available, cannot provision new workers.");
return false;
}
final Predicate<ImmutableWorkerInfo> isValidWorker = ResourceManagementUtil.createValidWorkerPredicate(config);
final int currValidWorkers = Collections2.filter(workers, isValidWorker).size();
final List<String> workerNodeIds = workerConfig.getAutoScaler().ipToIdLookup(Lists.newArrayList(Iterables.transform(workers, new Function<ImmutableWorkerInfo, String>() {
@Override
public String apply(ImmutableWorkerInfo input) {
return input.getWorker().getIp();
}
})));
currentlyProvisioning.removeAll(workerNodeIds);
updateTargetWorkerCount(workerConfig, pendingTasks, workers);
int want = targetWorkerCount - (currValidWorkers + currentlyProvisioning.size());
while (want > 0) {
final AutoScalingData provisioned = workerConfig.getAutoScaler().provision();
final List<String> newNodes;
if (provisioned == null || (newNodes = provisioned.getNodeIds()).isEmpty()) {
break;
} else {
currentlyProvisioning.addAll(newNodes);
lastProvisionTime = new DateTime();
scalingStats.addProvisionEvent(provisioned);
want -= provisioned.getNodeIds().size();
didProvision = true;
}
}
if (!currentlyProvisioning.isEmpty()) {
Duration durSinceLastProvision = new Duration(lastProvisionTime, new DateTime());
log.info("%s provisioning. Current wait time: %s", currentlyProvisioning, durSinceLastProvision);
if (durSinceLastProvision.isLongerThan(config.getMaxScalingDuration().toStandardDuration())) {
log.makeAlert("Worker node provisioning taking too long!").addData("millisSinceLastProvision", durSinceLastProvision.getMillis()).addData("provisioningCount", currentlyProvisioning.size()).emit();
workerConfig.getAutoScaler().terminateWithIds(Lists.newArrayList(currentlyProvisioning));
currentlyProvisioning.clear();
}
}
return didProvision;
}
}
use of io.druid.indexing.overlord.setup.WorkerBehaviorConfig in project druid by druid-io.
the class RemoteTaskRunnerFactoryTest method testExecNotSharedBetweenRunners.
@Test
public void testExecNotSharedBetweenRunners() {
final AtomicInteger executorCount = new AtomicInteger(0);
RemoteTaskRunnerConfig config = new RemoteTaskRunnerConfig();
IndexerZkConfig indexerZkConfig = new IndexerZkConfig(new ZkPathsConfig() {
@Override
public String getBase() {
return basePath;
}
}, null, null, null, null, null);
HttpClient httpClient = EasyMock.createMock(HttpClient.class);
Supplier<WorkerBehaviorConfig> workerBehaviorConfig = EasyMock.createMock(Supplier.class);
ScheduledExecutorFactory executorFactory = new ScheduledExecutorFactory() {
@Override
public ScheduledExecutorService create(int i, String s) {
executorCount.incrementAndGet();
return ScheduledExecutors.fixed(i, s);
}
};
SimpleWorkerResourceManagementConfig resourceManagementConfig = new SimpleWorkerResourceManagementConfig();
ResourceManagementSchedulerConfig resourceManagementSchedulerConfig = new ResourceManagementSchedulerConfig() {
@Override
public boolean isDoAutoscale() {
return true;
}
};
RemoteTaskRunnerFactory factory = new RemoteTaskRunnerFactory(cf, config, indexerZkConfig, jsonMapper, httpClient, workerBehaviorConfig, executorFactory, resourceManagementSchedulerConfig, new SimpleWorkerResourceManagementStrategy(resourceManagementConfig, workerBehaviorConfig, resourceManagementSchedulerConfig, executorFactory));
Assert.assertEquals(1, executorCount.get());
RemoteTaskRunner remoteTaskRunner1 = factory.build();
Assert.assertEquals(2, executorCount.get());
RemoteTaskRunner remoteTaskRunner2 = factory.build();
Assert.assertEquals(3, executorCount.get());
}
use of io.druid.indexing.overlord.setup.WorkerBehaviorConfig in project druid by druid-io.
the class PendingTaskBasedResourceManagementStrategyTest method setUp.
@Before
public void setUp() throws Exception {
autoScaler = EasyMock.createMock(AutoScaler.class);
testTask = TestTasks.immediateSuccess("task1");
config = new PendingTaskBasedWorkerResourceManagementConfig().setMaxScalingDuration(new Period(1000)).setNumEventsToTrack(10).setPendingTaskTimeout(new Period(0)).setWorkerVersion(MIN_VERSION).setMaxScalingStep(2);
workerConfig = new AtomicReference<>(new WorkerBehaviorConfig(new FillCapacityWorkerSelectStrategy(), autoScaler));
strategy = new PendingTaskBasedWorkerResourceManagementStrategy(config, DSuppliers.of(workerConfig), new ResourceManagementSchedulerConfig(), executorService);
}
Aggregations