Search in sources :

Example 51 with Function

use of com.google.common.base.Function in project druid by druid-io.

the class CachingClusteredClient method run.

@Override
public Sequence<T> run(final Query<T> query, final Map<String, Object> responseContext) {
    final QueryToolChest<T, Query<T>> toolChest = warehouse.getToolChest(query);
    final CacheStrategy<T, Object, Query<T>> strategy = toolChest.getCacheStrategy(query);
    final Map<DruidServer, List<SegmentDescriptor>> serverSegments = Maps.newTreeMap();
    final List<Pair<Interval, byte[]>> cachedResults = Lists.newArrayList();
    final Map<String, CachePopulator> cachePopulatorMap = Maps.newHashMap();
    final boolean useCache = CacheUtil.useCacheOnBrokers(query, strategy, cacheConfig);
    final boolean populateCache = CacheUtil.populateCacheOnBrokers(query, strategy, cacheConfig);
    final boolean isBySegment = BaseQuery.getContextBySegment(query, false);
    final ImmutableMap.Builder<String, Object> contextBuilder = new ImmutableMap.Builder<>();
    final int priority = BaseQuery.getContextPriority(query, 0);
    contextBuilder.put("priority", priority);
    if (populateCache) {
        // prevent down-stream nodes from caching results as well if we are populating the cache
        contextBuilder.put(CacheConfig.POPULATE_CACHE, false);
        contextBuilder.put("bySegment", true);
    }
    TimelineLookup<String, ServerSelector> timeline = serverView.getTimeline(query.getDataSource());
    if (timeline == null) {
        return Sequences.empty();
    }
    // build set of segments to query
    Set<Pair<ServerSelector, SegmentDescriptor>> segments = Sets.newLinkedHashSet();
    List<TimelineObjectHolder<String, ServerSelector>> serversLookup = Lists.newLinkedList();
    // Note that enabling this leads to putting uncovered intervals information in the response headers
    // and might blow up in some cases https://github.com/druid-io/druid/issues/2108
    int uncoveredIntervalsLimit = BaseQuery.getContextUncoveredIntervalsLimit(query, 0);
    if (uncoveredIntervalsLimit > 0) {
        List<Interval> uncoveredIntervals = Lists.newArrayListWithCapacity(uncoveredIntervalsLimit);
        boolean uncoveredIntervalsOverflowed = false;
        for (Interval interval : query.getIntervals()) {
            Iterable<TimelineObjectHolder<String, ServerSelector>> lookup = timeline.lookup(interval);
            long startMillis = interval.getStartMillis();
            long endMillis = interval.getEndMillis();
            for (TimelineObjectHolder<String, ServerSelector> holder : lookup) {
                Interval holderInterval = holder.getInterval();
                long intervalStart = holderInterval.getStartMillis();
                if (!uncoveredIntervalsOverflowed && startMillis != intervalStart) {
                    if (uncoveredIntervalsLimit > uncoveredIntervals.size()) {
                        uncoveredIntervals.add(new Interval(startMillis, intervalStart));
                    } else {
                        uncoveredIntervalsOverflowed = true;
                    }
                }
                startMillis = holderInterval.getEndMillis();
                serversLookup.add(holder);
            }
            if (!uncoveredIntervalsOverflowed && startMillis < endMillis) {
                if (uncoveredIntervalsLimit > uncoveredIntervals.size()) {
                    uncoveredIntervals.add(new Interval(startMillis, endMillis));
                } else {
                    uncoveredIntervalsOverflowed = true;
                }
            }
        }
        if (!uncoveredIntervals.isEmpty()) {
            // This returns intervals for which NO segment is present.
            // Which is not necessarily an indication that the data doesn't exist or is
            // incomplete. The data could exist and just not be loaded yet.  In either
            // case, though, this query will not include any data from the identified intervals.
            responseContext.put("uncoveredIntervals", uncoveredIntervals);
            responseContext.put("uncoveredIntervalsOverflowed", uncoveredIntervalsOverflowed);
        }
    } else {
        for (Interval interval : query.getIntervals()) {
            Iterables.addAll(serversLookup, timeline.lookup(interval));
        }
    }
    // Let tool chest filter out unneeded segments
    final List<TimelineObjectHolder<String, ServerSelector>> filteredServersLookup = toolChest.filterSegments(query, serversLookup);
    Map<String, Optional<RangeSet<String>>> dimensionRangeCache = Maps.newHashMap();
    // Filter unneeded chunks based on partition dimension
    for (TimelineObjectHolder<String, ServerSelector> holder : filteredServersLookup) {
        final Set<PartitionChunk<ServerSelector>> filteredChunks = DimFilterUtils.filterShards(query.getFilter(), holder.getObject(), new Function<PartitionChunk<ServerSelector>, ShardSpec>() {

            @Override
            public ShardSpec apply(PartitionChunk<ServerSelector> input) {
                return input.getObject().getSegment().getShardSpec();
            }
        }, dimensionRangeCache);
        for (PartitionChunk<ServerSelector> chunk : filteredChunks) {
            ServerSelector selector = chunk.getObject();
            final SegmentDescriptor descriptor = new SegmentDescriptor(holder.getInterval(), holder.getVersion(), chunk.getChunkNumber());
            segments.add(Pair.of(selector, descriptor));
        }
    }
    final byte[] queryCacheKey;
    if (// implies strategy != null
    (populateCache || useCache) && // explicit bySegment queries are never cached
    !isBySegment) {
        queryCacheKey = strategy.computeCacheKey(query);
    } else {
        queryCacheKey = null;
    }
    if (query.getContext().get(QueryResource.HDR_IF_NONE_MATCH) != null) {
        String prevEtag = (String) query.getContext().get(QueryResource.HDR_IF_NONE_MATCH);
        //compute current Etag
        Hasher hasher = Hashing.sha1().newHasher();
        boolean hasOnlyHistoricalSegments = true;
        for (Pair<ServerSelector, SegmentDescriptor> p : segments) {
            if (!p.lhs.pick().getServer().isAssignable()) {
                hasOnlyHistoricalSegments = false;
                break;
            }
            hasher.putString(p.lhs.getSegment().getIdentifier(), Charsets.UTF_8);
        }
        if (hasOnlyHistoricalSegments) {
            hasher.putBytes(queryCacheKey == null ? strategy.computeCacheKey(query) : queryCacheKey);
            String currEtag = Base64.encodeBase64String(hasher.hash().asBytes());
            responseContext.put(QueryResource.HDR_ETAG, currEtag);
            if (prevEtag.equals(currEtag)) {
                return Sequences.empty();
            }
        }
    }
    if (queryCacheKey != null) {
        // cachKeys map must preserve segment ordering, in order for shards to always be combined in the same order
        Map<Pair<ServerSelector, SegmentDescriptor>, Cache.NamedKey> cacheKeys = Maps.newLinkedHashMap();
        for (Pair<ServerSelector, SegmentDescriptor> segment : segments) {
            final Cache.NamedKey segmentCacheKey = CacheUtil.computeSegmentCacheKey(segment.lhs.getSegment().getIdentifier(), segment.rhs, queryCacheKey);
            cacheKeys.put(segment, segmentCacheKey);
        }
        // Pull cached segments from cache and remove from set of segments to query
        final Map<Cache.NamedKey, byte[]> cachedValues;
        if (useCache) {
            cachedValues = cache.getBulk(Iterables.limit(cacheKeys.values(), cacheConfig.getCacheBulkMergeLimit()));
        } else {
            cachedValues = ImmutableMap.of();
        }
        for (Map.Entry<Pair<ServerSelector, SegmentDescriptor>, Cache.NamedKey> entry : cacheKeys.entrySet()) {
            Pair<ServerSelector, SegmentDescriptor> segment = entry.getKey();
            Cache.NamedKey segmentCacheKey = entry.getValue();
            final Interval segmentQueryInterval = segment.rhs.getInterval();
            final byte[] cachedValue = cachedValues.get(segmentCacheKey);
            if (cachedValue != null) {
                // remove cached segment from set of segments to query
                segments.remove(segment);
                cachedResults.add(Pair.of(segmentQueryInterval, cachedValue));
            } else if (populateCache) {
                // otherwise, if populating cache, add segment to list of segments to cache
                final String segmentIdentifier = segment.lhs.getSegment().getIdentifier();
                cachePopulatorMap.put(String.format("%s_%s", segmentIdentifier, segmentQueryInterval), new CachePopulator(cache, objectMapper, segmentCacheKey));
            }
        }
    }
    // Compile list of all segments not pulled from cache
    for (Pair<ServerSelector, SegmentDescriptor> segment : segments) {
        final QueryableDruidServer queryableDruidServer = segment.lhs.pick();
        if (queryableDruidServer == null) {
            log.makeAlert("No servers found for SegmentDescriptor[%s] for DataSource[%s]?! How can this be?!", segment.rhs, query.getDataSource()).emit();
        } else {
            final DruidServer server = queryableDruidServer.getServer();
            List<SegmentDescriptor> descriptors = serverSegments.get(server);
            if (descriptors == null) {
                descriptors = Lists.newArrayList();
                serverSegments.put(server, descriptors);
            }
            descriptors.add(segment.rhs);
        }
    }
    return new LazySequence<>(new Supplier<Sequence<T>>() {

        @Override
        public Sequence<T> get() {
            ArrayList<Sequence<T>> sequencesByInterval = Lists.newArrayList();
            addSequencesFromCache(sequencesByInterval);
            addSequencesFromServer(sequencesByInterval);
            return mergeCachedAndUncachedSequences(query, sequencesByInterval);
        }

        private void addSequencesFromCache(ArrayList<Sequence<T>> listOfSequences) {
            if (strategy == null) {
                return;
            }
            final Function<Object, T> pullFromCacheFunction = strategy.pullFromCache();
            final TypeReference<Object> cacheObjectClazz = strategy.getCacheObjectClazz();
            for (Pair<Interval, byte[]> cachedResultPair : cachedResults) {
                final byte[] cachedResult = cachedResultPair.rhs;
                Sequence<Object> cachedSequence = new BaseSequence<>(new BaseSequence.IteratorMaker<Object, Iterator<Object>>() {

                    @Override
                    public Iterator<Object> make() {
                        try {
                            if (cachedResult.length == 0) {
                                return Iterators.emptyIterator();
                            }
                            return objectMapper.readValues(objectMapper.getFactory().createParser(cachedResult), cacheObjectClazz);
                        } catch (IOException e) {
                            throw Throwables.propagate(e);
                        }
                    }

                    @Override
                    public void cleanup(Iterator<Object> iterFromMake) {
                    }
                });
                listOfSequences.add(Sequences.map(cachedSequence, pullFromCacheFunction));
            }
        }

        private void addSequencesFromServer(ArrayList<Sequence<T>> listOfSequences) {
            listOfSequences.ensureCapacity(listOfSequences.size() + serverSegments.size());
            final Query<T> rewrittenQuery = query.withOverriddenContext(contextBuilder.build());
            // The data gets handled as a Future and parsed in the long Sequence chain in the resultSeqToAdd setter.
            for (Map.Entry<DruidServer, List<SegmentDescriptor>> entry : serverSegments.entrySet()) {
                final DruidServer server = entry.getKey();
                final List<SegmentDescriptor> descriptors = entry.getValue();
                final QueryRunner clientQueryable = serverView.getQueryRunner(server);
                if (clientQueryable == null) {
                    log.error("WTF!? server[%s] doesn't have a client Queryable?", server);
                    continue;
                }
                final MultipleSpecificSegmentSpec segmentSpec = new MultipleSpecificSegmentSpec(descriptors);
                final Sequence<T> resultSeqToAdd;
                if (!server.isAssignable() || !populateCache || isBySegment) {
                    // Direct server queryable
                    if (!isBySegment) {
                        resultSeqToAdd = clientQueryable.run(query.withQuerySegmentSpec(segmentSpec), responseContext);
                    } else {
                        // bySegment queries need to be de-serialized, see DirectDruidClient.run()
                        @SuppressWarnings("unchecked") final Query<Result<BySegmentResultValueClass<T>>> bySegmentQuery = (Query<Result<BySegmentResultValueClass<T>>>) ((Query) query);
                        @SuppressWarnings("unchecked") final Sequence<Result<BySegmentResultValueClass<T>>> resultSequence = clientQueryable.run(bySegmentQuery.withQuerySegmentSpec(segmentSpec), responseContext);
                        resultSeqToAdd = (Sequence) Sequences.map(resultSequence, new Function<Result<BySegmentResultValueClass<T>>, Result<BySegmentResultValueClass<T>>>() {

                            @Override
                            public Result<BySegmentResultValueClass<T>> apply(Result<BySegmentResultValueClass<T>> input) {
                                final BySegmentResultValueClass<T> bySegmentValue = input.getValue();
                                return new Result<>(input.getTimestamp(), new BySegmentResultValueClass<T>(Lists.transform(bySegmentValue.getResults(), toolChest.makePreComputeManipulatorFn(query, MetricManipulatorFns.deserializing())), bySegmentValue.getSegmentId(), bySegmentValue.getInterval()));
                            }
                        });
                    }
                } else {
                    // Requires some manipulation on broker side
                    @SuppressWarnings("unchecked") final Sequence<Result<BySegmentResultValueClass<T>>> runningSequence = clientQueryable.run(rewrittenQuery.withQuerySegmentSpec(segmentSpec), responseContext);
                    resultSeqToAdd = new MergeSequence(query.getResultOrdering(), Sequences.<Result<BySegmentResultValueClass<T>>, Sequence<T>>map(runningSequence, new Function<Result<BySegmentResultValueClass<T>>, Sequence<T>>() {

                        private final Function<T, Object> cacheFn = strategy.prepareForCache();

                        // Acctually do something with the results
                        @Override
                        public Sequence<T> apply(Result<BySegmentResultValueClass<T>> input) {
                            final BySegmentResultValueClass<T> value = input.getValue();
                            final CachePopulator cachePopulator = cachePopulatorMap.get(String.format("%s_%s", value.getSegmentId(), value.getInterval()));
                            final Queue<ListenableFuture<Object>> cacheFutures = new ConcurrentLinkedQueue<>();
                            return Sequences.<T>withEffect(Sequences.<T, T>map(Sequences.<T, T>map(Sequences.<T>simple(value.getResults()), new Function<T, T>() {

                                @Override
                                public T apply(final T input) {
                                    if (cachePopulator != null) {
                                        // only compute cache data if populating cache
                                        cacheFutures.add(backgroundExecutorService.submit(new Callable<Object>() {

                                            @Override
                                            public Object call() {
                                                return cacheFn.apply(input);
                                            }
                                        }));
                                    }
                                    return input;
                                }
                            }), toolChest.makePreComputeManipulatorFn(// This casting is sub-optimal, but hasn't caused any major problems yet...
                            (Query) rewrittenQuery, MetricManipulatorFns.deserializing())), new Runnable() {

                                @Override
                                public void run() {
                                    if (cachePopulator != null) {
                                        Futures.addCallback(Futures.allAsList(cacheFutures), new FutureCallback<List<Object>>() {

                                            @Override
                                            public void onSuccess(List<Object> cacheData) {
                                                cachePopulator.populate(cacheData);
                                                // Help out GC by making sure all references are gone
                                                cacheFutures.clear();
                                            }

                                            @Override
                                            public void onFailure(Throwable throwable) {
                                                log.error(throwable, "Background caching failed");
                                            }
                                        }, backgroundExecutorService);
                                    }
                                }
                            }, MoreExecutors.sameThreadExecutor());
                        // End withEffect
                        }
                    }));
                }
                listOfSequences.add(resultSeqToAdd);
            }
        }
    });
}
Also used : BaseQuery(io.druid.query.BaseQuery) Query(io.druid.query.Query) ArrayList(java.util.ArrayList) ShardSpec(io.druid.timeline.partition.ShardSpec) QueryableDruidServer(io.druid.client.selector.QueryableDruidServer) Result(io.druid.query.Result) ServerSelector(io.druid.client.selector.ServerSelector) List(java.util.List) ArrayList(java.util.ArrayList) FutureCallback(com.google.common.util.concurrent.FutureCallback) Optional(com.google.common.base.Optional) QueryableDruidServer(io.druid.client.selector.QueryableDruidServer) BySegmentResultValueClass(io.druid.query.BySegmentResultValueClass) LazySequence(io.druid.java.util.common.guava.LazySequence) BaseSequence(io.druid.java.util.common.guava.BaseSequence) Sequence(io.druid.java.util.common.guava.Sequence) MergeSequence(io.druid.java.util.common.guava.MergeSequence) ImmutableMap(com.google.common.collect.ImmutableMap) TimelineObjectHolder(io.druid.timeline.TimelineObjectHolder) Hasher(com.google.common.hash.Hasher) ConcurrentLinkedQueue(java.util.concurrent.ConcurrentLinkedQueue) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) Interval(org.joda.time.Interval) Cache(io.druid.client.cache.Cache) MultipleSpecificSegmentSpec(io.druid.query.spec.MultipleSpecificSegmentSpec) Function(com.google.common.base.Function) MergeSequence(io.druid.java.util.common.guava.MergeSequence) SegmentDescriptor(io.druid.query.SegmentDescriptor) Iterator(java.util.Iterator) PartitionChunk(io.druid.timeline.partition.PartitionChunk) TypeReference(com.fasterxml.jackson.core.type.TypeReference) Pair(io.druid.java.util.common.Pair) IOException(java.io.IOException) QueryRunner(io.druid.query.QueryRunner) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) LazySequence(io.druid.java.util.common.guava.LazySequence)

Example 52 with Function

use of com.google.common.base.Function in project druid by druid-io.

the class CachingQueryRunner method run.

@Override
public Sequence<T> run(Query<T> query, Map<String, Object> responseContext) {
    final CacheStrategy strategy = toolChest.getCacheStrategy(query);
    final boolean populateCache = CacheUtil.populateCacheOnDataNodes(query, strategy, cacheConfig);
    final boolean useCache = CacheUtil.useCacheOnDataNodes(query, strategy, cacheConfig);
    final Cache.NamedKey key;
    if (strategy != null && (useCache || populateCache)) {
        key = CacheUtil.computeSegmentCacheKey(segmentIdentifier, segmentDescriptor, strategy.computeCacheKey(query));
    } else {
        key = null;
    }
    if (useCache) {
        final Function cacheFn = strategy.pullFromCache();
        final byte[] cachedResult = cache.get(key);
        if (cachedResult != null) {
            final TypeReference cacheObjectClazz = strategy.getCacheObjectClazz();
            return Sequences.map(new BaseSequence<>(new BaseSequence.IteratorMaker<T, Iterator<T>>() {

                @Override
                public Iterator<T> make() {
                    try {
                        if (cachedResult.length == 0) {
                            return Iterators.emptyIterator();
                        }
                        return mapper.readValues(mapper.getFactory().createParser(cachedResult), cacheObjectClazz);
                    } catch (IOException e) {
                        throw Throwables.propagate(e);
                    }
                }

                @Override
                public void cleanup(Iterator<T> iterFromMake) {
                }
            }), cacheFn);
        }
    }
    final Collection<ListenableFuture<?>> cacheFutures = Collections.synchronizedList(Lists.<ListenableFuture<?>>newLinkedList());
    if (populateCache) {
        final Function cacheFn = strategy.prepareForCache();
        return Sequences.withEffect(Sequences.map(base.run(query, responseContext), new Function<T, T>() {

            @Override
            public T apply(final T input) {
                final SettableFuture<Object> future = SettableFuture.create();
                cacheFutures.add(future);
                backgroundExecutorService.submit(new Runnable() {

                    @Override
                    public void run() {
                        try {
                            future.set(cacheFn.apply(input));
                        } catch (Exception e) {
                            // if there is exception, should setException to quit the caching processing
                            future.setException(e);
                        }
                    }
                });
                return input;
            }
        }), new Runnable() {

            @Override
            public void run() {
                try {
                    CacheUtil.populate(cache, mapper, key, Futures.allAsList(cacheFutures).get());
                } catch (Exception e) {
                    log.error(e, "Error while getting future for cache task");
                    throw Throwables.propagate(e);
                }
            }
        }, backgroundExecutorService);
    } else {
        return base.run(query, responseContext);
    }
}
Also used : IOException(java.io.IOException) IOException(java.io.IOException) Function(com.google.common.base.Function) Iterator(java.util.Iterator) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) TypeReference(com.fasterxml.jackson.core.type.TypeReference) CacheStrategy(io.druid.query.CacheStrategy) Cache(io.druid.client.cache.Cache)

Example 53 with Function

use of com.google.common.base.Function in project druid by druid-io.

the class AppenderatorPlumber method mergeAndPush.

private void mergeAndPush() {
    final Granularity segmentGranularity = schema.getGranularitySpec().getSegmentGranularity();
    final Period windowPeriod = config.getWindowPeriod();
    final long windowMillis = windowPeriod.toStandardDuration().getMillis();
    log.info("Starting merge and push.");
    DateTime minTimestampAsDate = segmentGranularity.bucketStart(new DateTime(Math.max(windowMillis, rejectionPolicy.getCurrMaxTime().getMillis()) - windowMillis));
    long minTimestamp = minTimestampAsDate.getMillis();
    final List<SegmentIdentifier> appenderatorSegments = appenderator.getSegments();
    final List<SegmentIdentifier> segmentsToPush = Lists.newArrayList();
    if (shuttingDown) {
        log.info("Found [%,d] segments. Attempting to hand off all of them.", appenderatorSegments.size());
        segmentsToPush.addAll(appenderatorSegments);
    } else {
        log.info("Found [%,d] segments. Attempting to hand off segments that start before [%s].", appenderatorSegments.size(), minTimestampAsDate);
        for (SegmentIdentifier segment : appenderatorSegments) {
            final Long intervalStart = segment.getInterval().getStartMillis();
            if (intervalStart < minTimestamp) {
                log.info("Adding entry [%s] for merge and push.", segment);
                segmentsToPush.add(segment);
            } else {
                log.info("Skipping persist and merge for entry [%s] : Start time [%s] >= [%s] min timestamp required in this run. Segment will be picked up in a future run.", segment, new DateTime(intervalStart), minTimestampAsDate);
            }
        }
    }
    log.info("Found [%,d] segments to persist and merge", segmentsToPush.size());
    final Function<Throwable, Void> errorHandler = new Function<Throwable, Void>() {

        @Override
        public Void apply(Throwable throwable) {
            final List<String> segmentIdentifierStrings = Lists.transform(segmentsToPush, new Function<SegmentIdentifier, String>() {

                @Override
                public String apply(SegmentIdentifier input) {
                    return input.getIdentifierAsString();
                }
            });
            log.makeAlert(throwable, "Failed to publish merged indexes[%s]", schema.getDataSource()).addData("segments", segmentIdentifierStrings).emit();
            if (shuttingDown) {
                // We're trying to shut down, and these segments failed to push. Let's just get rid of them.
                // This call will also delete possibly-partially-written files, so we don't need to do it explicitly.
                cleanShutdown = false;
                for (SegmentIdentifier identifier : segmentsToPush) {
                    dropSegment(identifier);
                }
            }
            return null;
        }
    };
    // WARNING: Committers.nil() here means that on-disk data can get out of sync with committing.
    Futures.addCallback(appenderator.push(segmentsToPush, Committers.nil()), new FutureCallback<SegmentsAndMetadata>() {

        @Override
        public void onSuccess(SegmentsAndMetadata result) {
            // Immediately publish after pushing
            for (DataSegment pushedSegment : result.getSegments()) {
                try {
                    segmentPublisher.publishSegment(pushedSegment);
                } catch (Exception e) {
                    errorHandler.apply(e);
                }
            }
            log.info("Published [%,d] sinks.", segmentsToPush.size());
        }

        @Override
        public void onFailure(Throwable e) {
            log.warn(e, "Failed to push [%,d] segments.", segmentsToPush.size());
            errorHandler.apply(e);
        }
    });
}
Also used : Period(org.joda.time.Period) Granularity(io.druid.java.util.common.granularity.Granularity) DataSegment(io.druid.timeline.DataSegment) DateTime(org.joda.time.DateTime) IndexSizeExceededException(io.druid.segment.incremental.IndexSizeExceededException) IOException(java.io.IOException) Function(com.google.common.base.Function)

Example 54 with Function

use of com.google.common.base.Function in project druid by druid-io.

the class RealtimeManager method getQueryRunnerForSegments.

@Override
public <T> QueryRunner<T> getQueryRunnerForSegments(final Query<T> query, final Iterable<SegmentDescriptor> specs) {
    final QueryRunnerFactory<T, Query<T>> factory = conglomerate.findFactory(query);
    final Map<Integer, FireChief> partitionChiefs = chiefs.get(Iterables.getOnlyElement(query.getDataSource().getNames()));
    return partitionChiefs == null ? new NoopQueryRunner<T>() : factory.getToolchest().mergeResults(factory.mergeRunners(MoreExecutors.sameThreadExecutor(), Iterables.transform(specs, new Function<SegmentDescriptor, QueryRunner<T>>() {

        @Override
        public QueryRunner<T> apply(SegmentDescriptor spec) {
            final FireChief retVal = partitionChiefs.get(spec.getPartitionNumber());
            return retVal == null ? new NoopQueryRunner<T>() : retVal.getQueryRunner(query.withQuerySegmentSpec(new SpecificSegmentSpec(spec)));
        }
    })));
}
Also used : Function(com.google.common.base.Function) Query(io.druid.query.Query) SpecificSegmentSpec(io.druid.query.spec.SpecificSegmentSpec) SegmentDescriptor(io.druid.query.SegmentDescriptor)

Example 55 with Function

use of com.google.common.base.Function in project druid by druid-io.

the class AppenderatorImpl method bootstrapSinksFromDisk.

/**
   * Populate "sinks" and "sinkTimeline" with committed segments, and announce them with the segmentAnnouncer.
   *
   * @return persisted commit metadata
   */
private Object bootstrapSinksFromDisk() {
    Preconditions.checkState(sinks.isEmpty(), "Already bootstrapped?!");
    final File baseDir = tuningConfig.getBasePersistDirectory();
    if (!baseDir.exists()) {
        return null;
    }
    final File[] files = baseDir.listFiles();
    if (files == null) {
        return null;
    }
    final File commitFile = computeCommitFile();
    final Committed committed;
    try {
        if (commitFile.exists()) {
            committed = objectMapper.readValue(commitFile, Committed.class);
        } else {
            committed = Committed.nil();
        }
    } catch (Exception e) {
        throw new ISE(e, "Failed to read commitFile: %s", commitFile);
    }
    log.info("Loading sinks from[%s]: %s", baseDir, committed.getHydrants().keySet());
    for (File sinkDir : files) {
        final File identifierFile = new File(sinkDir, IDENTIFIER_FILE_NAME);
        if (!identifierFile.isFile()) {
            // No identifier in this sinkDir; it must not actually be a sink directory. Skip it.
            continue;
        }
        try {
            final SegmentIdentifier identifier = objectMapper.readValue(new File(sinkDir, "identifier.json"), SegmentIdentifier.class);
            final int committedHydrants = committed.getCommittedHydrants(identifier.getIdentifierAsString());
            if (committedHydrants <= 0) {
                log.info("Removing uncommitted sink at [%s]", sinkDir);
                FileUtils.deleteDirectory(sinkDir);
                continue;
            }
            // To avoid reading and listing of "merged" dir and other special files
            final File[] sinkFiles = sinkDir.listFiles(new FilenameFilter() {

                @Override
                public boolean accept(File dir, String fileName) {
                    return !(Ints.tryParse(fileName) == null);
                }
            });
            Arrays.sort(sinkFiles, new Comparator<File>() {

                @Override
                public int compare(File o1, File o2) {
                    return Ints.compare(Integer.parseInt(o1.getName()), Integer.parseInt(o2.getName()));
                }
            });
            List<FireHydrant> hydrants = Lists.newArrayList();
            for (File hydrantDir : sinkFiles) {
                final int hydrantNumber = Integer.parseInt(hydrantDir.getName());
                if (hydrantNumber >= committedHydrants) {
                    log.info("Removing uncommitted segment at [%s]", hydrantDir);
                    FileUtils.deleteDirectory(hydrantDir);
                } else {
                    log.info("Loading previously persisted segment at [%s]", hydrantDir);
                    if (hydrantNumber != hydrants.size()) {
                        throw new ISE("Missing hydrant [%,d] in sinkDir [%s].", hydrants.size(), sinkDir);
                    }
                    hydrants.add(new FireHydrant(new QueryableIndexSegment(identifier.getIdentifierAsString(), indexIO.loadIndex(hydrantDir)), hydrantNumber));
                }
            }
            // Make sure we loaded enough hydrants.
            if (committedHydrants != hydrants.size()) {
                throw new ISE("Missing hydrant [%,d] in sinkDir [%s].", hydrants.size(), sinkDir);
            }
            Sink currSink = new Sink(identifier.getInterval(), schema, identifier.getShardSpec(), identifier.getVersion(), tuningConfig.getMaxRowsInMemory(), tuningConfig.isReportParseExceptions(), hydrants);
            sinks.put(identifier, currSink);
            sinkTimeline.add(currSink.getInterval(), currSink.getVersion(), identifier.getShardSpec().createChunk(currSink));
            segmentAnnouncer.announceSegment(currSink.getSegment());
        } catch (IOException e) {
            log.makeAlert(e, "Problem loading sink[%s] from disk.", schema.getDataSource()).addData("sinkDir", sinkDir).emit();
        }
    }
    // Make sure we loaded all committed sinks.
    final Set<String> loadedSinks = Sets.newHashSet(Iterables.transform(sinks.keySet(), new Function<SegmentIdentifier, String>() {

        @Override
        public String apply(SegmentIdentifier input) {
            return input.getIdentifierAsString();
        }
    }));
    final Set<String> missingSinks = Sets.difference(committed.getHydrants().keySet(), loadedSinks);
    if (!missingSinks.isEmpty()) {
        throw new ISE("Missing committed sinks [%s]", Joiner.on(", ").join(missingSinks));
    }
    return committed.getMetadata();
}
Also used : QueryableIndexSegment(io.druid.segment.QueryableIndexSegment) IOException(java.io.IOException) IndexSizeExceededException(io.druid.segment.incremental.IndexSizeExceededException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) FilenameFilter(java.io.FilenameFilter) Function(com.google.common.base.Function) Sink(io.druid.segment.realtime.plumber.Sink) ISE(io.druid.java.util.common.ISE) FireHydrant(io.druid.segment.realtime.FireHydrant) File(java.io.File)

Aggregations

Function (com.google.common.base.Function)616 List (java.util.List)138 ArrayList (java.util.ArrayList)114 Nullable (javax.annotation.Nullable)103 Map (java.util.Map)97 IOException (java.io.IOException)89 HashMap (java.util.HashMap)78 Test (org.junit.Test)73 ImmutableList (com.google.common.collect.ImmutableList)49 File (java.io.File)46 Set (java.util.Set)46 Collection (java.util.Collection)35 ImmutableMap (com.google.common.collect.ImmutableMap)34 DateTime (org.joda.time.DateTime)33 ListenableFuture (com.google.common.util.concurrent.ListenableFuture)29 HashSet (java.util.HashSet)27 Iterator (java.util.Iterator)27 LinkedList (java.util.LinkedList)26 ExecutionException (java.util.concurrent.ExecutionException)24 Collectors (java.util.stream.Collectors)15