Search in sources :

Example 21 with Function

use of com.google.common.base.Function in project druid by druid-io.

the class KafkaSupervisor method discoverTasks.

private void discoverTasks() throws ExecutionException, InterruptedException, TimeoutException {
    int taskCount = 0;
    List<String> futureTaskIds = Lists.newArrayList();
    List<ListenableFuture<Boolean>> futures = Lists.newArrayList();
    List<Task> tasks = taskStorage.getActiveTasks();
    for (Task task : tasks) {
        if (!(task instanceof KafkaIndexTask) || !dataSource.equals(task.getDataSource())) {
            continue;
        }
        taskCount++;
        final KafkaIndexTask kafkaTask = (KafkaIndexTask) task;
        final String taskId = task.getId();
        // Determine which task group this task belongs to based on one of the partitions handled by this task. If we
        // later determine that this task is actively reading, we will make sure that it matches our current partition
        // allocation (getTaskGroupIdForPartition(partition) should return the same value for every partition being read
        // by this task) and kill it if it is not compatible. If the task is instead found to be in the publishing
        // state, we will permit it to complete even if it doesn't match our current partition allocation to support
        // seamless schema migration.
        Iterator<Integer> it = kafkaTask.getIOConfig().getStartPartitions().getPartitionOffsetMap().keySet().iterator();
        final Integer taskGroupId = (it.hasNext() ? getTaskGroupIdForPartition(it.next()) : null);
        if (taskGroupId != null) {
            // check to see if we already know about this task, either in [taskGroups] or in [pendingCompletionTaskGroups]
            // and if not add it to taskGroups or pendingCompletionTaskGroups (if status = PUBLISHING)
            TaskGroup taskGroup = taskGroups.get(taskGroupId);
            if (!isTaskInPendingCompletionGroups(taskId) && (taskGroup == null || !taskGroup.tasks.containsKey(taskId))) {
                futureTaskIds.add(taskId);
                futures.add(Futures.transform(taskClient.getStatusAsync(taskId), new Function<KafkaIndexTask.Status, Boolean>() {

                    @Override
                    public Boolean apply(KafkaIndexTask.Status status) {
                        if (status == KafkaIndexTask.Status.PUBLISHING) {
                            addDiscoveredTaskToPendingCompletionTaskGroups(taskGroupId, taskId, kafkaTask.getIOConfig().getStartPartitions().getPartitionOffsetMap());
                            // update partitionGroups with the publishing task's offsets (if they are greater than what is
                            // existing) so that the next tasks will start reading from where this task left off
                            Map<Integer, Long> publishingTaskCurrentOffsets = taskClient.getCurrentOffsets(taskId, true);
                            for (Map.Entry<Integer, Long> entry : publishingTaskCurrentOffsets.entrySet()) {
                                Integer partition = entry.getKey();
                                Long offset = entry.getValue();
                                ConcurrentHashMap<Integer, Long> partitionOffsets = partitionGroups.get(getTaskGroupIdForPartition(partition));
                                boolean succeeded;
                                do {
                                    succeeded = true;
                                    Long previousOffset = partitionOffsets.putIfAbsent(partition, offset);
                                    if (previousOffset != null && previousOffset < offset) {
                                        succeeded = partitionOffsets.replace(partition, previousOffset, offset);
                                    }
                                } while (!succeeded);
                            }
                        } else {
                            for (Integer partition : kafkaTask.getIOConfig().getStartPartitions().getPartitionOffsetMap().keySet()) {
                                if (!taskGroupId.equals(getTaskGroupIdForPartition(partition))) {
                                    log.warn("Stopping task [%s] which does not match the expected partition allocation", taskId);
                                    try {
                                        stopTask(taskId, false).get(futureTimeoutInSeconds, TimeUnit.SECONDS);
                                    } catch (InterruptedException | ExecutionException | TimeoutException e) {
                                        log.warn(e, "Exception while stopping task");
                                    }
                                    return false;
                                }
                            }
                            if (taskGroups.putIfAbsent(taskGroupId, new TaskGroup(ImmutableMap.copyOf(kafkaTask.getIOConfig().getStartPartitions().getPartitionOffsetMap()), kafkaTask.getIOConfig().getMinimumMessageTime())) == null) {
                                log.debug("Created new task group [%d]", taskGroupId);
                            }
                            if (!isTaskCurrent(taskGroupId, taskId)) {
                                log.info("Stopping task [%s] which does not match the expected parameters and ingestion spec", taskId);
                                try {
                                    stopTask(taskId, false).get(futureTimeoutInSeconds, TimeUnit.SECONDS);
                                } catch (InterruptedException | ExecutionException | TimeoutException e) {
                                    log.warn(e, "Exception while stopping task");
                                }
                                return false;
                            } else {
                                taskGroups.get(taskGroupId).tasks.putIfAbsent(taskId, new TaskData());
                            }
                        }
                        return true;
                    }
                }, workerExec));
            }
        }
    }
    List<Boolean> results = Futures.successfulAsList(futures).get(futureTimeoutInSeconds, TimeUnit.SECONDS);
    for (int i = 0; i < results.size(); i++) {
        if (results.get(i) == null) {
            String taskId = futureTaskIds.get(i);
            log.warn("Task [%s] failed to return status, killing task", taskId);
            killTask(taskId);
        }
    }
    log.debug("Found [%d] Kafka indexing tasks for dataSource [%s]", taskCount, dataSource);
}
Also used : Task(io.druid.indexing.common.task.Task) KafkaIndexTask(io.druid.indexing.kafka.KafkaIndexTask) Function(com.google.common.base.Function) ExecutionException(java.util.concurrent.ExecutionException) TimeoutException(java.util.concurrent.TimeoutException) TaskStatus(io.druid.indexing.common.TaskStatus) KafkaIndexTask(io.druid.indexing.kafka.KafkaIndexTask) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap)

Example 22 with Function

use of com.google.common.base.Function in project druid by druid-io.

the class IngestSegmentFirehoseFactory method connect.

@Override
public Firehose connect(InputRowParser inputRowParser) throws IOException, ParseException {
    log.info("Connecting firehose: dataSource[%s], interval[%s]", dataSource, interval);
    if (taskToolbox == null) {
        // Noop Task is just used to create the toolbox and list segments.
        taskToolbox = injector.getInstance(TaskToolboxFactory.class).build(new NoopTask("reingest", 0, 0, null, null, null));
    }
    try {
        final List<DataSegment> usedSegments = taskToolbox.getTaskActionClient().submit(new SegmentListUsedAction(dataSource, interval, null));
        final Map<DataSegment, File> segmentFileMap = taskToolbox.fetchSegments(usedSegments);
        VersionedIntervalTimeline<String, DataSegment> timeline = new VersionedIntervalTimeline<>(Ordering.<String>natural().nullsFirst());
        for (DataSegment segment : usedSegments) {
            timeline.add(segment.getInterval(), segment.getVersion(), segment.getShardSpec().createChunk(segment));
        }
        final List<TimelineObjectHolder<String, DataSegment>> timeLineSegments = timeline.lookup(interval);
        final List<String> dims;
        if (dimensions != null) {
            dims = dimensions;
        } else if (inputRowParser.getParseSpec().getDimensionsSpec().hasCustomDimensions()) {
            dims = inputRowParser.getParseSpec().getDimensionsSpec().getDimensionNames();
        } else {
            Set<String> dimSet = Sets.newHashSet(Iterables.concat(Iterables.transform(timeLineSegments, new Function<TimelineObjectHolder<String, DataSegment>, Iterable<String>>() {

                @Override
                public Iterable<String> apply(TimelineObjectHolder<String, DataSegment> timelineObjectHolder) {
                    return Iterables.concat(Iterables.transform(timelineObjectHolder.getObject(), new Function<PartitionChunk<DataSegment>, Iterable<String>>() {

                        @Override
                        public Iterable<String> apply(PartitionChunk<DataSegment> input) {
                            return input.getObject().getDimensions();
                        }
                    }));
                }
            })));
            dims = Lists.newArrayList(Sets.difference(dimSet, inputRowParser.getParseSpec().getDimensionsSpec().getDimensionExclusions()));
        }
        final List<String> metricsList;
        if (metrics != null) {
            metricsList = metrics;
        } else {
            Set<String> metricsSet = Sets.newHashSet(Iterables.concat(Iterables.transform(timeLineSegments, new Function<TimelineObjectHolder<String, DataSegment>, Iterable<String>>() {

                @Override
                public Iterable<String> apply(TimelineObjectHolder<String, DataSegment> input) {
                    return Iterables.concat(Iterables.transform(input.getObject(), new Function<PartitionChunk<DataSegment>, Iterable<String>>() {

                        @Override
                        public Iterable<String> apply(PartitionChunk<DataSegment> input) {
                            return input.getObject().getMetrics();
                        }
                    }));
                }
            })));
            metricsList = Lists.newArrayList(metricsSet);
        }
        final List<WindowedStorageAdapter> adapters = Lists.newArrayList(Iterables.concat(Iterables.transform(timeLineSegments, new Function<TimelineObjectHolder<String, DataSegment>, Iterable<WindowedStorageAdapter>>() {

            @Override
            public Iterable<WindowedStorageAdapter> apply(final TimelineObjectHolder<String, DataSegment> holder) {
                return Iterables.transform(holder.getObject(), new Function<PartitionChunk<DataSegment>, WindowedStorageAdapter>() {

                    @Override
                    public WindowedStorageAdapter apply(final PartitionChunk<DataSegment> input) {
                        final DataSegment segment = input.getObject();
                        try {
                            return new WindowedStorageAdapter(new QueryableIndexStorageAdapter(indexIO.loadIndex(Preconditions.checkNotNull(segmentFileMap.get(segment), "File for segment %s", segment.getIdentifier()))), holder.getInterval());
                        } catch (IOException e) {
                            throw Throwables.propagate(e);
                        }
                    }
                });
            }
        })));
        return new IngestSegmentFirehose(adapters, dims, metricsList, dimFilter, Granularities.NONE);
    } catch (IOException e) {
        throw Throwables.propagate(e);
    } catch (SegmentLoadingException e) {
        throw Throwables.propagate(e);
    }
}
Also used : IngestSegmentFirehose(io.druid.segment.realtime.firehose.IngestSegmentFirehose) Set(java.util.Set) NoopTask(io.druid.indexing.common.task.NoopTask) DataSegment(io.druid.timeline.DataSegment) Function(com.google.common.base.Function) PartitionChunk(io.druid.timeline.partition.PartitionChunk) WindowedStorageAdapter(io.druid.segment.realtime.firehose.WindowedStorageAdapter) SegmentLoadingException(io.druid.segment.loading.SegmentLoadingException) QueryableIndexStorageAdapter(io.druid.segment.QueryableIndexStorageAdapter) IOException(java.io.IOException) TimelineObjectHolder(io.druid.timeline.TimelineObjectHolder) VersionedIntervalTimeline(io.druid.timeline.VersionedIntervalTimeline) SegmentListUsedAction(io.druid.indexing.common.actions.SegmentListUsedAction) File(java.io.File)

Example 23 with Function

use of com.google.common.base.Function in project druid by druid-io.

the class TaskLockbox method findLockPossesForInterval.

/**
   * Return all locks that overlap some search interval.
   */
private List<TaskLockPosse> findLockPossesForInterval(final String dataSource, final Interval interval) {
    giant.lock();
    try {
        final NavigableMap<Interval, TaskLockPosse> dsRunning = running.get(dataSource);
        if (dsRunning == null) {
            // No locks at all
            return Collections.emptyList();
        } else {
            // Tasks are indexed by locked interval, which are sorted by interval start. Intervals are non-overlapping, so:
            final NavigableSet<Interval> dsLockbox = dsRunning.navigableKeySet();
            final Iterable<Interval> searchIntervals = Iterables.concat(// Single interval that starts at or before ours
            Collections.singletonList(dsLockbox.floor(new Interval(interval.getStart(), new DateTime(JodaUtils.MAX_INSTANT)))), // All intervals that start somewhere between our start instant (exclusive) and end instant (exclusive)
            dsLockbox.subSet(new Interval(interval.getStart(), new DateTime(JodaUtils.MAX_INSTANT)), false, new Interval(interval.getEnd(), interval.getEnd()), false));
            return Lists.newArrayList(FunctionalIterable.create(searchIntervals).filter(new Predicate<Interval>() {

                @Override
                public boolean apply(@Nullable Interval searchInterval) {
                    return searchInterval != null && searchInterval.overlaps(interval);
                }
            }).transform(new Function<Interval, TaskLockPosse>() {

                @Override
                public TaskLockPosse apply(Interval interval) {
                    return dsRunning.get(interval);
                }
            }));
        }
    } finally {
        giant.unlock();
    }
}
Also used : Function(com.google.common.base.Function) DateTime(org.joda.time.DateTime) Interval(org.joda.time.Interval)

Example 24 with Function

use of com.google.common.base.Function in project druid by druid-io.

the class PendingTaskBasedWorkerResourceManagementStrategy method doTerminate.

@Override
public boolean doTerminate(WorkerTaskRunner runner) {
    Collection<ImmutableWorkerInfo> zkWorkers = runner.getWorkers();
    synchronized (lock) {
        final WorkerBehaviorConfig workerConfig = workerConfigRef.get();
        if (workerConfig == null) {
            log.warn("No workerConfig available, cannot terminate workers.");
            return false;
        }
        if (!currentlyProvisioning.isEmpty()) {
            log.debug("Already provisioning nodes, Not Terminating any nodes.");
            return false;
        }
        boolean didTerminate = false;
        final Collection<String> workerNodeIds = getWorkerNodeIDs(runner.getLazyWorkers(), workerConfig);
        final Set<String> stillExisting = Sets.newHashSet();
        for (String s : currentlyTerminating) {
            if (workerNodeIds.contains(s)) {
                stillExisting.add(s);
            }
        }
        currentlyTerminating.clear();
        currentlyTerminating.addAll(stillExisting);
        if (currentlyTerminating.isEmpty()) {
            final int maxWorkersToTerminate = maxWorkersToTerminate(zkWorkers, workerConfig);
            final Predicate<ImmutableWorkerInfo> isLazyWorker = ResourceManagementUtil.createLazyWorkerPredicate(config);
            final List<String> laziestWorkerIps = Lists.newArrayList(Collections2.transform(runner.markWorkersLazy(isLazyWorker, maxWorkersToTerminate), new Function<Worker, String>() {

                @Override
                public String apply(Worker zkWorker) {
                    return zkWorker.getIp();
                }
            }));
            if (laziestWorkerIps.isEmpty()) {
                log.debug("Found no lazy workers");
            } else {
                log.info("Terminating %,d lazy workers: %s", laziestWorkerIps.size(), Joiner.on(", ").join(laziestWorkerIps));
                final AutoScalingData terminated = workerConfig.getAutoScaler().terminate(laziestWorkerIps);
                if (terminated != null) {
                    currentlyTerminating.addAll(terminated.getNodeIds());
                    lastTerminateTime = new DateTime();
                    scalingStats.addTerminateEvent(terminated);
                    didTerminate = true;
                }
            }
        } else {
            Duration durSinceLastTerminate = new Duration(lastTerminateTime, new DateTime());
            log.info("%s terminating. Current wait time: %s", currentlyTerminating, durSinceLastTerminate);
            if (durSinceLastTerminate.isLongerThan(config.getMaxScalingDuration().toStandardDuration())) {
                log.makeAlert("Worker node termination taking too long!").addData("millisSinceLastTerminate", durSinceLastTerminate.getMillis()).addData("terminatingCount", currentlyTerminating.size()).emit();
                currentlyTerminating.clear();
            }
        }
        return didTerminate;
    }
}
Also used : Duration(org.joda.time.Duration) ImmutableWorkerInfo(io.druid.indexing.overlord.ImmutableWorkerInfo) DateTime(org.joda.time.DateTime) WorkerBehaviorConfig(io.druid.indexing.overlord.setup.WorkerBehaviorConfig) Function(com.google.common.base.Function) Worker(io.druid.indexing.worker.Worker)

Example 25 with Function

use of com.google.common.base.Function in project druid by druid-io.

the class EC2AutoScaler method terminate.

@Override
public AutoScalingData terminate(List<String> ips) {
    if (ips.isEmpty()) {
        return new AutoScalingData(Lists.<String>newArrayList());
    }
    DescribeInstancesResult result = amazonEC2Client.describeInstances(new DescribeInstancesRequest().withFilters(new Filter("private-ip-address", ips)));
    List<Instance> instances = Lists.newArrayList();
    for (Reservation reservation : result.getReservations()) {
        instances.addAll(reservation.getInstances());
    }
    try {
        return terminateWithIds(Lists.transform(instances, new Function<Instance, String>() {

            @Override
            public String apply(Instance input) {
                return input.getInstanceId();
            }
        }));
    } catch (Exception e) {
        log.error(e, "Unable to terminate any instances.");
    }
    return null;
}
Also used : DescribeInstancesResult(com.amazonaws.services.ec2.model.DescribeInstancesResult) Function(com.google.common.base.Function) Reservation(com.amazonaws.services.ec2.model.Reservation) AutoScalingData(io.druid.indexing.overlord.autoscaling.AutoScalingData) Filter(com.amazonaws.services.ec2.model.Filter) Instance(com.amazonaws.services.ec2.model.Instance) DescribeInstancesRequest(com.amazonaws.services.ec2.model.DescribeInstancesRequest)

Aggregations

Function (com.google.common.base.Function)315 Nullable (javax.annotation.Nullable)57 ArrayList (java.util.ArrayList)51 List (java.util.List)49 Test (org.junit.Test)49 IOException (java.io.IOException)48 Map (java.util.Map)45 File (java.io.File)29 HashMap (java.util.HashMap)29 ImmutableList (com.google.common.collect.ImmutableList)23 ImmutableMap (com.google.common.collect.ImmutableMap)19 DateTime (org.joda.time.DateTime)19 Optional (com.google.common.base.Optional)18 ISE (io.druid.java.util.common.ISE)14 Iterator (java.util.Iterator)14 ImmutableSet (com.google.common.collect.ImmutableSet)13 Result (io.druid.query.Result)13 Feature (org.opengis.feature.Feature)13 BuildTarget (com.facebook.buck.model.BuildTarget)12 AggregatorFactory (io.druid.query.aggregation.AggregatorFactory)12