use of com.google.common.base.Function in project druid by druid-io.
the class KafkaSupervisor method discoverTasks.
private void discoverTasks() throws ExecutionException, InterruptedException, TimeoutException {
int taskCount = 0;
List<String> futureTaskIds = Lists.newArrayList();
List<ListenableFuture<Boolean>> futures = Lists.newArrayList();
List<Task> tasks = taskStorage.getActiveTasks();
for (Task task : tasks) {
if (!(task instanceof KafkaIndexTask) || !dataSource.equals(task.getDataSource())) {
continue;
}
taskCount++;
final KafkaIndexTask kafkaTask = (KafkaIndexTask) task;
final String taskId = task.getId();
// Determine which task group this task belongs to based on one of the partitions handled by this task. If we
// later determine that this task is actively reading, we will make sure that it matches our current partition
// allocation (getTaskGroupIdForPartition(partition) should return the same value for every partition being read
// by this task) and kill it if it is not compatible. If the task is instead found to be in the publishing
// state, we will permit it to complete even if it doesn't match our current partition allocation to support
// seamless schema migration.
Iterator<Integer> it = kafkaTask.getIOConfig().getStartPartitions().getPartitionOffsetMap().keySet().iterator();
final Integer taskGroupId = (it.hasNext() ? getTaskGroupIdForPartition(it.next()) : null);
if (taskGroupId != null) {
// check to see if we already know about this task, either in [taskGroups] or in [pendingCompletionTaskGroups]
// and if not add it to taskGroups or pendingCompletionTaskGroups (if status = PUBLISHING)
TaskGroup taskGroup = taskGroups.get(taskGroupId);
if (!isTaskInPendingCompletionGroups(taskId) && (taskGroup == null || !taskGroup.tasks.containsKey(taskId))) {
futureTaskIds.add(taskId);
futures.add(Futures.transform(taskClient.getStatusAsync(taskId), new Function<KafkaIndexTask.Status, Boolean>() {
@Override
public Boolean apply(KafkaIndexTask.Status status) {
if (status == KafkaIndexTask.Status.PUBLISHING) {
addDiscoveredTaskToPendingCompletionTaskGroups(taskGroupId, taskId, kafkaTask.getIOConfig().getStartPartitions().getPartitionOffsetMap());
// update partitionGroups with the publishing task's offsets (if they are greater than what is
// existing) so that the next tasks will start reading from where this task left off
Map<Integer, Long> publishingTaskCurrentOffsets = taskClient.getCurrentOffsets(taskId, true);
for (Map.Entry<Integer, Long> entry : publishingTaskCurrentOffsets.entrySet()) {
Integer partition = entry.getKey();
Long offset = entry.getValue();
ConcurrentHashMap<Integer, Long> partitionOffsets = partitionGroups.get(getTaskGroupIdForPartition(partition));
boolean succeeded;
do {
succeeded = true;
Long previousOffset = partitionOffsets.putIfAbsent(partition, offset);
if (previousOffset != null && previousOffset < offset) {
succeeded = partitionOffsets.replace(partition, previousOffset, offset);
}
} while (!succeeded);
}
} else {
for (Integer partition : kafkaTask.getIOConfig().getStartPartitions().getPartitionOffsetMap().keySet()) {
if (!taskGroupId.equals(getTaskGroupIdForPartition(partition))) {
log.warn("Stopping task [%s] which does not match the expected partition allocation", taskId);
try {
stopTask(taskId, false).get(futureTimeoutInSeconds, TimeUnit.SECONDS);
} catch (InterruptedException | ExecutionException | TimeoutException e) {
log.warn(e, "Exception while stopping task");
}
return false;
}
}
if (taskGroups.putIfAbsent(taskGroupId, new TaskGroup(ImmutableMap.copyOf(kafkaTask.getIOConfig().getStartPartitions().getPartitionOffsetMap()), kafkaTask.getIOConfig().getMinimumMessageTime())) == null) {
log.debug("Created new task group [%d]", taskGroupId);
}
if (!isTaskCurrent(taskGroupId, taskId)) {
log.info("Stopping task [%s] which does not match the expected parameters and ingestion spec", taskId);
try {
stopTask(taskId, false).get(futureTimeoutInSeconds, TimeUnit.SECONDS);
} catch (InterruptedException | ExecutionException | TimeoutException e) {
log.warn(e, "Exception while stopping task");
}
return false;
} else {
taskGroups.get(taskGroupId).tasks.putIfAbsent(taskId, new TaskData());
}
}
return true;
}
}, workerExec));
}
}
}
List<Boolean> results = Futures.successfulAsList(futures).get(futureTimeoutInSeconds, TimeUnit.SECONDS);
for (int i = 0; i < results.size(); i++) {
if (results.get(i) == null) {
String taskId = futureTaskIds.get(i);
log.warn("Task [%s] failed to return status, killing task", taskId);
killTask(taskId);
}
}
log.debug("Found [%d] Kafka indexing tasks for dataSource [%s]", taskCount, dataSource);
}
use of com.google.common.base.Function in project druid by druid-io.
the class IngestSegmentFirehoseFactory method connect.
@Override
public Firehose connect(InputRowParser inputRowParser) throws IOException, ParseException {
log.info("Connecting firehose: dataSource[%s], interval[%s]", dataSource, interval);
if (taskToolbox == null) {
// Noop Task is just used to create the toolbox and list segments.
taskToolbox = injector.getInstance(TaskToolboxFactory.class).build(new NoopTask("reingest", 0, 0, null, null, null));
}
try {
final List<DataSegment> usedSegments = taskToolbox.getTaskActionClient().submit(new SegmentListUsedAction(dataSource, interval, null));
final Map<DataSegment, File> segmentFileMap = taskToolbox.fetchSegments(usedSegments);
VersionedIntervalTimeline<String, DataSegment> timeline = new VersionedIntervalTimeline<>(Ordering.<String>natural().nullsFirst());
for (DataSegment segment : usedSegments) {
timeline.add(segment.getInterval(), segment.getVersion(), segment.getShardSpec().createChunk(segment));
}
final List<TimelineObjectHolder<String, DataSegment>> timeLineSegments = timeline.lookup(interval);
final List<String> dims;
if (dimensions != null) {
dims = dimensions;
} else if (inputRowParser.getParseSpec().getDimensionsSpec().hasCustomDimensions()) {
dims = inputRowParser.getParseSpec().getDimensionsSpec().getDimensionNames();
} else {
Set<String> dimSet = Sets.newHashSet(Iterables.concat(Iterables.transform(timeLineSegments, new Function<TimelineObjectHolder<String, DataSegment>, Iterable<String>>() {
@Override
public Iterable<String> apply(TimelineObjectHolder<String, DataSegment> timelineObjectHolder) {
return Iterables.concat(Iterables.transform(timelineObjectHolder.getObject(), new Function<PartitionChunk<DataSegment>, Iterable<String>>() {
@Override
public Iterable<String> apply(PartitionChunk<DataSegment> input) {
return input.getObject().getDimensions();
}
}));
}
})));
dims = Lists.newArrayList(Sets.difference(dimSet, inputRowParser.getParseSpec().getDimensionsSpec().getDimensionExclusions()));
}
final List<String> metricsList;
if (metrics != null) {
metricsList = metrics;
} else {
Set<String> metricsSet = Sets.newHashSet(Iterables.concat(Iterables.transform(timeLineSegments, new Function<TimelineObjectHolder<String, DataSegment>, Iterable<String>>() {
@Override
public Iterable<String> apply(TimelineObjectHolder<String, DataSegment> input) {
return Iterables.concat(Iterables.transform(input.getObject(), new Function<PartitionChunk<DataSegment>, Iterable<String>>() {
@Override
public Iterable<String> apply(PartitionChunk<DataSegment> input) {
return input.getObject().getMetrics();
}
}));
}
})));
metricsList = Lists.newArrayList(metricsSet);
}
final List<WindowedStorageAdapter> adapters = Lists.newArrayList(Iterables.concat(Iterables.transform(timeLineSegments, new Function<TimelineObjectHolder<String, DataSegment>, Iterable<WindowedStorageAdapter>>() {
@Override
public Iterable<WindowedStorageAdapter> apply(final TimelineObjectHolder<String, DataSegment> holder) {
return Iterables.transform(holder.getObject(), new Function<PartitionChunk<DataSegment>, WindowedStorageAdapter>() {
@Override
public WindowedStorageAdapter apply(final PartitionChunk<DataSegment> input) {
final DataSegment segment = input.getObject();
try {
return new WindowedStorageAdapter(new QueryableIndexStorageAdapter(indexIO.loadIndex(Preconditions.checkNotNull(segmentFileMap.get(segment), "File for segment %s", segment.getIdentifier()))), holder.getInterval());
} catch (IOException e) {
throw Throwables.propagate(e);
}
}
});
}
})));
return new IngestSegmentFirehose(adapters, dims, metricsList, dimFilter, Granularities.NONE);
} catch (IOException e) {
throw Throwables.propagate(e);
} catch (SegmentLoadingException e) {
throw Throwables.propagate(e);
}
}
use of com.google.common.base.Function in project druid by druid-io.
the class TaskLockbox method findLockPossesForInterval.
/**
* Return all locks that overlap some search interval.
*/
private List<TaskLockPosse> findLockPossesForInterval(final String dataSource, final Interval interval) {
giant.lock();
try {
final NavigableMap<Interval, TaskLockPosse> dsRunning = running.get(dataSource);
if (dsRunning == null) {
// No locks at all
return Collections.emptyList();
} else {
// Tasks are indexed by locked interval, which are sorted by interval start. Intervals are non-overlapping, so:
final NavigableSet<Interval> dsLockbox = dsRunning.navigableKeySet();
final Iterable<Interval> searchIntervals = Iterables.concat(// Single interval that starts at or before ours
Collections.singletonList(dsLockbox.floor(new Interval(interval.getStart(), new DateTime(JodaUtils.MAX_INSTANT)))), // All intervals that start somewhere between our start instant (exclusive) and end instant (exclusive)
dsLockbox.subSet(new Interval(interval.getStart(), new DateTime(JodaUtils.MAX_INSTANT)), false, new Interval(interval.getEnd(), interval.getEnd()), false));
return Lists.newArrayList(FunctionalIterable.create(searchIntervals).filter(new Predicate<Interval>() {
@Override
public boolean apply(@Nullable Interval searchInterval) {
return searchInterval != null && searchInterval.overlaps(interval);
}
}).transform(new Function<Interval, TaskLockPosse>() {
@Override
public TaskLockPosse apply(Interval interval) {
return dsRunning.get(interval);
}
}));
}
} finally {
giant.unlock();
}
}
use of com.google.common.base.Function in project druid by druid-io.
the class PendingTaskBasedWorkerResourceManagementStrategy method doTerminate.
@Override
public boolean doTerminate(WorkerTaskRunner runner) {
Collection<ImmutableWorkerInfo> zkWorkers = runner.getWorkers();
synchronized (lock) {
final WorkerBehaviorConfig workerConfig = workerConfigRef.get();
if (workerConfig == null) {
log.warn("No workerConfig available, cannot terminate workers.");
return false;
}
if (!currentlyProvisioning.isEmpty()) {
log.debug("Already provisioning nodes, Not Terminating any nodes.");
return false;
}
boolean didTerminate = false;
final Collection<String> workerNodeIds = getWorkerNodeIDs(runner.getLazyWorkers(), workerConfig);
final Set<String> stillExisting = Sets.newHashSet();
for (String s : currentlyTerminating) {
if (workerNodeIds.contains(s)) {
stillExisting.add(s);
}
}
currentlyTerminating.clear();
currentlyTerminating.addAll(stillExisting);
if (currentlyTerminating.isEmpty()) {
final int maxWorkersToTerminate = maxWorkersToTerminate(zkWorkers, workerConfig);
final Predicate<ImmutableWorkerInfo> isLazyWorker = ResourceManagementUtil.createLazyWorkerPredicate(config);
final List<String> laziestWorkerIps = Lists.newArrayList(Collections2.transform(runner.markWorkersLazy(isLazyWorker, maxWorkersToTerminate), new Function<Worker, String>() {
@Override
public String apply(Worker zkWorker) {
return zkWorker.getIp();
}
}));
if (laziestWorkerIps.isEmpty()) {
log.debug("Found no lazy workers");
} else {
log.info("Terminating %,d lazy workers: %s", laziestWorkerIps.size(), Joiner.on(", ").join(laziestWorkerIps));
final AutoScalingData terminated = workerConfig.getAutoScaler().terminate(laziestWorkerIps);
if (terminated != null) {
currentlyTerminating.addAll(terminated.getNodeIds());
lastTerminateTime = new DateTime();
scalingStats.addTerminateEvent(terminated);
didTerminate = true;
}
}
} else {
Duration durSinceLastTerminate = new Duration(lastTerminateTime, new DateTime());
log.info("%s terminating. Current wait time: %s", currentlyTerminating, durSinceLastTerminate);
if (durSinceLastTerminate.isLongerThan(config.getMaxScalingDuration().toStandardDuration())) {
log.makeAlert("Worker node termination taking too long!").addData("millisSinceLastTerminate", durSinceLastTerminate.getMillis()).addData("terminatingCount", currentlyTerminating.size()).emit();
currentlyTerminating.clear();
}
}
return didTerminate;
}
}
use of com.google.common.base.Function in project druid by druid-io.
the class EC2AutoScaler method terminate.
@Override
public AutoScalingData terminate(List<String> ips) {
if (ips.isEmpty()) {
return new AutoScalingData(Lists.<String>newArrayList());
}
DescribeInstancesResult result = amazonEC2Client.describeInstances(new DescribeInstancesRequest().withFilters(new Filter("private-ip-address", ips)));
List<Instance> instances = Lists.newArrayList();
for (Reservation reservation : result.getReservations()) {
instances.addAll(reservation.getInstances());
}
try {
return terminateWithIds(Lists.transform(instances, new Function<Instance, String>() {
@Override
public String apply(Instance input) {
return input.getInstanceId();
}
}));
} catch (Exception e) {
log.error(e, "Unable to terminate any instances.");
}
return null;
}
Aggregations