Search in sources :

Example 1 with Accumulator

use of org.apache.druid.java.util.common.guava.Accumulator in project druid by druid-io.

the class GroupByQueryHelper method createBySegmentAccumulatorPair.

public static <T> Pair<Queue, Accumulator<Queue, T>> createBySegmentAccumulatorPair() {
    // In parallel query runner multiple threads add to this queue concurrently
    Queue init = new ConcurrentLinkedQueue<>();
    Accumulator<Queue, T> accumulator = new Accumulator<Queue, T>() {

        @Override
        public Queue accumulate(Queue accumulated, T in) {
            if (in == null) {
                throw new ISE("Cannot have null result");
            }
            accumulated.offer(in);
            return accumulated;
        }
    };
    return new Pair<>(init, accumulator);
}
Also used : Accumulator(org.apache.druid.java.util.common.guava.Accumulator) ISE(org.apache.druid.java.util.common.ISE) ConcurrentLinkedQueue(java.util.concurrent.ConcurrentLinkedQueue) Queue(java.util.Queue) ConcurrentLinkedQueue(java.util.concurrent.ConcurrentLinkedQueue) Pair(org.apache.druid.java.util.common.Pair)

Example 2 with Accumulator

use of org.apache.druid.java.util.common.guava.Accumulator in project druid by druid-io.

the class RowBasedGrouperHelper method createGrouperAccumulatorPair.

/**
 * Create a {@link Grouper} that groups according to the dimensions and aggregators in "query", along with
 * an {@link Accumulator} that accepts ResultRows and forwards them to the grouper.
 *
 * The pair will operate in one of two modes:
 *
 * 1) Combining mode (used if "subquery" is null). In this mode, filters from the "query" are ignored, and
 * its aggregators are converted into combining form. The input ResultRows are assumed to be partially-grouped
 * results originating from the provided "query".
 *
 * 2) Subquery mode (used if "subquery" is nonnull). In this mode, filters from the "query" (both intervals
 * and dim filters) are respected, and its aggregators are used in standard (not combining) form. The input
 * ResultRows are assumed to be results originating from the provided "subquery".
 *
 * @param query               query that we are grouping for
 * @param subquery            optional subquery that we are receiving results from (see combining vs. subquery
 *                            mode above)
 * @param config              groupBy query config
 * @param bufferSupplier      supplier of merge buffers
 * @param combineBufferHolder holder of combine buffers. Unused if concurrencyHint = -1, and may be null in that case
 * @param concurrencyHint     -1 for single-threaded Grouper, >=1 for concurrent Grouper
 * @param temporaryStorage    temporary storage used for spilling from the Grouper
 * @param spillMapper         object mapper used for spilling from the Grouper
 * @param grouperSorter       executor service used for parallel combining. Unused if concurrencyHint = -1, and may
 *                            be null in that case
 * @param priority            query priority
 * @param hasQueryTimeout     whether or not this query has a timeout
 * @param queryTimeoutAt      when this query times out, in milliseconds since the epoch
 * @param mergeBufferSize     size of the merge buffers from "bufferSupplier"
 */
public static Pair<Grouper<RowBasedKey>, Accumulator<AggregateResult, ResultRow>> createGrouperAccumulatorPair(final GroupByQuery query, @Nullable final GroupByQuery subquery, final GroupByQueryConfig config, final Supplier<ByteBuffer> bufferSupplier, @Nullable final ReferenceCountingResourceHolder<ByteBuffer> combineBufferHolder, final int concurrencyHint, final LimitedTemporaryStorage temporaryStorage, final ObjectMapper spillMapper, @Nullable final ListeningExecutorService grouperSorter, final int priority, final boolean hasQueryTimeout, final long queryTimeoutAt, final int mergeBufferSize) {
    // concurrencyHint >= 1 for concurrent groupers, -1 for single-threaded
    Preconditions.checkArgument(concurrencyHint >= 1 || concurrencyHint == -1, "invalid concurrencyHint");
    if (concurrencyHint >= 1) {
        Preconditions.checkNotNull(grouperSorter, "grouperSorter executor must be provided");
    }
    // See method-level javadoc; we go into combining mode if there is no subquery.
    final boolean combining = subquery == null;
    final List<ColumnType> valueTypes = DimensionHandlerUtils.getValueTypesFromDimensionSpecs(query.getDimensions());
    final GroupByQueryConfig querySpecificConfig = config.withOverrides(query);
    final boolean includeTimestamp = query.getResultRowHasTimestamp();
    final ThreadLocal<ResultRow> columnSelectorRow = new ThreadLocal<>();
    ColumnSelectorFactory columnSelectorFactory = createResultRowBasedColumnSelectorFactory(combining ? query : subquery, columnSelectorRow::get, RowSignature.Finalization.UNKNOWN);
    // Apply virtual columns if we are in subquery (non-combining) mode.
    if (!combining) {
        columnSelectorFactory = query.getVirtualColumns().wrap(columnSelectorFactory);
    }
    final boolean willApplyLimitPushDown = query.isApplyLimitPushDown();
    final DefaultLimitSpec limitSpec = willApplyLimitPushDown ? (DefaultLimitSpec) query.getLimitSpec() : null;
    boolean sortHasNonGroupingFields = false;
    if (willApplyLimitPushDown) {
        sortHasNonGroupingFields = DefaultLimitSpec.sortingOrderHasNonGroupingFields(limitSpec, query.getDimensions());
    }
    final AggregatorFactory[] aggregatorFactories;
    if (combining) {
        aggregatorFactories = query.getAggregatorSpecs().stream().map(AggregatorFactory::getCombiningFactory).toArray(AggregatorFactory[]::new);
    } else {
        aggregatorFactories = query.getAggregatorSpecs().toArray(new AggregatorFactory[0]);
    }
    final Grouper.KeySerdeFactory<RowBasedKey> keySerdeFactory = new RowBasedKeySerdeFactory(includeTimestamp, query.getContextSortByDimsFirst(), query.getDimensions(), querySpecificConfig.getMaxMergingDictionarySize() / (concurrencyHint == -1 ? 1 : concurrencyHint), valueTypes, aggregatorFactories, limitSpec);
    final Grouper<RowBasedKey> grouper;
    if (concurrencyHint == -1) {
        grouper = new SpillingGrouper<>(bufferSupplier, keySerdeFactory, columnSelectorFactory, aggregatorFactories, querySpecificConfig.getBufferGrouperMaxSize(), querySpecificConfig.getBufferGrouperMaxLoadFactor(), querySpecificConfig.getBufferGrouperInitialBuckets(), temporaryStorage, spillMapper, true, limitSpec, sortHasNonGroupingFields, mergeBufferSize);
    } else {
        final Grouper.KeySerdeFactory<RowBasedKey> combineKeySerdeFactory = new RowBasedKeySerdeFactory(includeTimestamp, query.getContextSortByDimsFirst(), query.getDimensions(), // use entire dictionary space for combining key serde
        querySpecificConfig.getMaxMergingDictionarySize(), valueTypes, aggregatorFactories, limitSpec);
        grouper = new ConcurrentGrouper<>(querySpecificConfig, bufferSupplier, combineBufferHolder, keySerdeFactory, combineKeySerdeFactory, columnSelectorFactory, aggregatorFactories, temporaryStorage, spillMapper, concurrencyHint, limitSpec, sortHasNonGroupingFields, grouperSorter, priority, hasQueryTimeout, queryTimeoutAt);
    }
    final int keySize = includeTimestamp ? query.getDimensions().size() + 1 : query.getDimensions().size();
    final ValueExtractFunction valueExtractFn = makeValueExtractFunction(query, combining, includeTimestamp, columnSelectorFactory, valueTypes);
    final Predicate<ResultRow> rowPredicate;
    if (combining) {
        // Filters are not applied in combining mode.
        rowPredicate = row -> true;
    } else {
        rowPredicate = getResultRowPredicate(query, subquery);
    }
    final Accumulator<AggregateResult, ResultRow> accumulator = (priorResult, row) -> {
        BaseQuery.checkInterrupted();
        if (priorResult != null && !priorResult.isOk()) {
            // Pass-through error returns without doing more work.
            return priorResult;
        }
        if (!grouper.isInitialized()) {
            grouper.init();
        }
        if (!rowPredicate.test(row)) {
            return AggregateResult.ok();
        }
        columnSelectorRow.set(row);
        final Comparable[] key = new Comparable[keySize];
        valueExtractFn.apply(row, key);
        final AggregateResult aggregateResult = grouper.aggregate(new RowBasedKey(key));
        columnSelectorRow.set(null);
        return aggregateResult;
    };
    return new Pair<>(grouper, accumulator);
}
Also used : Arrays(java.util.Arrays) Comparators(org.apache.druid.java.util.common.guava.Comparators) IntArrayUtils(org.apache.druid.common.utils.IntArrayUtils) DimensionHandlerUtils(org.apache.druid.segment.DimensionHandlerUtils) ColumnValueSelector(org.apache.druid.segment.ColumnValueSelector) AllGranularity(org.apache.druid.java.util.common.granularity.AllGranularity) IndexedInts(org.apache.druid.segment.data.IndexedInts) ByteBuffer(java.nio.ByteBuffer) Pair(org.apache.druid.java.util.common.Pair) DefaultLimitSpec(org.apache.druid.query.groupby.orderby.DefaultLimitSpec) BaseFloatColumnValueSelector(org.apache.druid.segment.BaseFloatColumnValueSelector) OrderByColumnSpec(org.apache.druid.query.groupby.orderby.OrderByColumnSpec) ColumnSelectorFactory(org.apache.druid.segment.ColumnSelectorFactory) RowAdapter(org.apache.druid.segment.RowAdapter) ColumnSelectorStrategyFactory(org.apache.druid.query.dimension.ColumnSelectorStrategyFactory) JsonValue(com.fasterxml.jackson.annotation.JsonValue) GroupingAggregatorFactory(org.apache.druid.query.aggregation.GroupingAggregatorFactory) BufferComparator(org.apache.druid.query.groupby.epinephelinae.Grouper.BufferComparator) Object2IntOpenHashMap(it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap) IAE(org.apache.druid.java.util.common.IAE) ToLongFunction(java.util.function.ToLongFunction) Longs(com.google.common.primitives.Longs) RowBasedColumnSelectorFactory(org.apache.druid.segment.RowBasedColumnSelectorFactory) ResultRow(org.apache.druid.query.groupby.ResultRow) Predicate(java.util.function.Predicate) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) Set(java.util.Set) ISE(org.apache.druid.java.util.common.ISE) ValueType(org.apache.druid.segment.column.ValueType) Collectors(java.util.stream.Collectors) List(java.util.List) ColumnCapabilitiesImpl(org.apache.druid.segment.column.ColumnCapabilitiesImpl) BooleanValueMatcher(org.apache.druid.segment.filter.BooleanValueMatcher) DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) ColumnCapabilities(org.apache.druid.segment.column.ColumnCapabilities) BaseDoubleColumnValueSelector(org.apache.druid.segment.BaseDoubleColumnValueSelector) ListeningExecutorService(com.google.common.util.concurrent.ListeningExecutorService) Accumulator(org.apache.druid.java.util.common.guava.Accumulator) IntStream(java.util.stream.IntStream) ColumnSelectorPlus(org.apache.druid.query.ColumnSelectorPlus) ComparableList(org.apache.druid.segment.data.ComparableList) Supplier(com.google.common.base.Supplier) BaseQuery(org.apache.druid.query.BaseQuery) Function(java.util.function.Function) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Interval(org.joda.time.Interval) SettableSupplier(org.apache.druid.common.guava.SettableSupplier) ColumnSelectorStrategy(org.apache.druid.query.dimension.ColumnSelectorStrategy) StringComparators(org.apache.druid.query.ordering.StringComparators) ComparableStringArray(org.apache.druid.segment.data.ComparableStringArray) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) DimensionSelector(org.apache.druid.segment.DimensionSelector) Nullable(javax.annotation.Nullable) ValueMatcher(org.apache.druid.query.filter.ValueMatcher) ColumnInspector(org.apache.druid.segment.ColumnInspector) StringComparator(org.apache.druid.query.ordering.StringComparator) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) GroupByQueryConfig(org.apache.druid.query.groupby.GroupByQueryConfig) DateTime(org.joda.time.DateTime) Ints(com.google.common.primitives.Ints) BaseLongColumnValueSelector(org.apache.druid.segment.BaseLongColumnValueSelector) Object2IntMap(it.unimi.dsi.fastutil.objects.Object2IntMap) NullHandling(org.apache.druid.common.config.NullHandling) RowSignature(org.apache.druid.segment.column.RowSignature) Closeable(java.io.Closeable) JsonCreator(com.fasterxml.jackson.annotation.JsonCreator) ColumnType(org.apache.druid.segment.column.ColumnType) Preconditions(com.google.common.base.Preconditions) BitSet(java.util.BitSet) IntArrays(it.unimi.dsi.fastutil.ints.IntArrays) Comparator(java.util.Comparator) Filters(org.apache.druid.segment.filter.Filters) ReferenceCountingResourceHolder(org.apache.druid.collections.ReferenceCountingResourceHolder) Filter(org.apache.druid.query.filter.Filter) ColumnType(org.apache.druid.segment.column.ColumnType) ColumnSelectorFactory(org.apache.druid.segment.ColumnSelectorFactory) RowBasedColumnSelectorFactory(org.apache.druid.segment.RowBasedColumnSelectorFactory) DefaultLimitSpec(org.apache.druid.query.groupby.orderby.DefaultLimitSpec) Pair(org.apache.druid.java.util.common.Pair) ResultRow(org.apache.druid.query.groupby.ResultRow) GroupByQueryConfig(org.apache.druid.query.groupby.GroupByQueryConfig) GroupingAggregatorFactory(org.apache.druid.query.aggregation.GroupingAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory)

Example 3 with Accumulator

use of org.apache.druid.java.util.common.guava.Accumulator in project druid by druid-io.

the class GroupByMergingQueryRunnerV2 method run.

@Override
public Sequence<ResultRow> run(final QueryPlus<ResultRow> queryPlus, final ResponseContext responseContext) {
    final GroupByQuery query = (GroupByQuery) queryPlus.getQuery();
    final GroupByQueryConfig querySpecificConfig = config.withOverrides(query);
    // CTX_KEY_MERGE_RUNNERS_USING_CHAINED_EXECUTION is here because realtime servers use nested mergeRunners calls
    // (one for the entire query and one for each sink). We only want the outer call to actually do merging with a
    // merge buffer, otherwise the query will allocate too many merge buffers. This is potentially sub-optimal as it
    // will involve materializing the results for each sink before starting to feed them into the outer merge buffer.
    // I'm not sure of a better way to do this without tweaking how realtime servers do queries.
    final boolean forceChainedExecution = query.getContextBoolean(CTX_KEY_MERGE_RUNNERS_USING_CHAINED_EXECUTION, false);
    final QueryPlus<ResultRow> queryPlusForRunners = queryPlus.withQuery(query.withOverriddenContext(ImmutableMap.of(CTX_KEY_MERGE_RUNNERS_USING_CHAINED_EXECUTION, true))).withoutThreadUnsafeState();
    if (QueryContexts.isBySegment(query) || forceChainedExecution) {
        ChainedExecutionQueryRunner<ResultRow> runner = new ChainedExecutionQueryRunner<>(queryProcessingPool, queryWatcher, queryables);
        return runner.run(queryPlusForRunners, responseContext);
    }
    final boolean isSingleThreaded = querySpecificConfig.isSingleThreaded();
    final File temporaryStorageDirectory = new File(processingTmpDir, StringUtils.format("druid-groupBy-%s_%s", UUID.randomUUID(), query.getId()));
    final int priority = QueryContexts.getPriority(query);
    // Figure out timeoutAt time now, so we can apply the timeout to both the mergeBufferPool.take and the actual
    // query processing together.
    final long queryTimeout = QueryContexts.getTimeout(query);
    final boolean hasTimeout = QueryContexts.hasTimeout(query);
    final long timeoutAt = System.currentTimeMillis() + queryTimeout;
    return new BaseSequence<>(new BaseSequence.IteratorMaker<ResultRow, CloseableGrouperIterator<RowBasedKey, ResultRow>>() {

        @Override
        public CloseableGrouperIterator<RowBasedKey, ResultRow> make() {
            final Closer resources = Closer.create();
            try {
                final LimitedTemporaryStorage temporaryStorage = new LimitedTemporaryStorage(temporaryStorageDirectory, querySpecificConfig.getMaxOnDiskStorage());
                final ReferenceCountingResourceHolder<LimitedTemporaryStorage> temporaryStorageHolder = ReferenceCountingResourceHolder.fromCloseable(temporaryStorage);
                resources.register(temporaryStorageHolder);
                // If parallelCombine is enabled, we need two merge buffers for parallel aggregating and parallel combining
                final int numMergeBuffers = querySpecificConfig.getNumParallelCombineThreads() > 1 ? 2 : 1;
                final List<ReferenceCountingResourceHolder<ByteBuffer>> mergeBufferHolders = getMergeBuffersHolder(numMergeBuffers, hasTimeout, timeoutAt);
                resources.registerAll(mergeBufferHolders);
                final ReferenceCountingResourceHolder<ByteBuffer> mergeBufferHolder = mergeBufferHolders.get(0);
                final ReferenceCountingResourceHolder<ByteBuffer> combineBufferHolder = numMergeBuffers == 2 ? mergeBufferHolders.get(1) : null;
                Pair<Grouper<RowBasedKey>, Accumulator<AggregateResult, ResultRow>> pair = RowBasedGrouperHelper.createGrouperAccumulatorPair(query, null, config, Suppliers.ofInstance(mergeBufferHolder.get()), combineBufferHolder, concurrencyHint, temporaryStorage, spillMapper, // Passed as executor service
                queryProcessingPool, priority, hasTimeout, timeoutAt, mergeBufferSize);
                final Grouper<RowBasedKey> grouper = pair.lhs;
                final Accumulator<AggregateResult, ResultRow> accumulator = pair.rhs;
                grouper.init();
                final ReferenceCountingResourceHolder<Grouper<RowBasedKey>> grouperHolder = ReferenceCountingResourceHolder.fromCloseable(grouper);
                resources.register(grouperHolder);
                List<ListenableFuture<AggregateResult>> futures = Lists.newArrayList(Iterables.transform(queryables, new Function<QueryRunner<ResultRow>, ListenableFuture<AggregateResult>>() {

                    @Override
                    public ListenableFuture<AggregateResult> apply(final QueryRunner<ResultRow> input) {
                        if (input == null) {
                            throw new ISE("Null queryRunner! Looks to be some segment unmapping action happening");
                        }
                        ListenableFuture<AggregateResult> future = queryProcessingPool.submitRunnerTask(new AbstractPrioritizedQueryRunnerCallable<AggregateResult, ResultRow>(priority, input) {

                            @Override
                            public AggregateResult call() {
                                try (// These variables are used to close releasers automatically.
                                @SuppressWarnings("unused") Releaser bufferReleaser = mergeBufferHolder.increment();
                                    @SuppressWarnings("unused") Releaser grouperReleaser = grouperHolder.increment()) {
                                    // Return true if OK, false if resources were exhausted.
                                    return input.run(queryPlusForRunners, responseContext).accumulate(AggregateResult.ok(), accumulator);
                                } catch (QueryInterruptedException | QueryTimeoutException e) {
                                    throw e;
                                } catch (Exception e) {
                                    log.error(e, "Exception with one of the sequences!");
                                    throw new RuntimeException(e);
                                }
                            }
                        });
                        if (isSingleThreaded) {
                            waitForFutureCompletion(query, ImmutableList.of(future), hasTimeout, timeoutAt - System.currentTimeMillis());
                        }
                        return future;
                    }
                }));
                if (!isSingleThreaded) {
                    waitForFutureCompletion(query, futures, hasTimeout, timeoutAt - System.currentTimeMillis());
                }
                return RowBasedGrouperHelper.makeGrouperIterator(grouper, query, resources);
            } catch (Throwable t) {
                // Exception caught while setting up the iterator; release resources.
                try {
                    resources.close();
                } catch (Exception ex) {
                    t.addSuppressed(ex);
                }
                throw t;
            }
        }

        @Override
        public void cleanup(CloseableGrouperIterator<RowBasedKey, ResultRow> iterFromMake) {
            iterFromMake.close();
        }
    });
}
Also used : Accumulator(org.apache.druid.java.util.common.guava.Accumulator) AbstractPrioritizedQueryRunnerCallable(org.apache.druid.query.AbstractPrioritizedQueryRunnerCallable) ChainedExecutionQueryRunner(org.apache.druid.query.ChainedExecutionQueryRunner) QueryTimeoutException(org.apache.druid.query.QueryTimeoutException) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) Releaser(org.apache.druid.collections.Releaser) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) ISE(org.apache.druid.java.util.common.ISE) Pair(org.apache.druid.java.util.common.Pair) QueryInterruptedException(org.apache.druid.query.QueryInterruptedException) ResultRow(org.apache.druid.query.groupby.ResultRow) Closer(org.apache.druid.java.util.common.io.Closer) GroupByQueryConfig(org.apache.druid.query.groupby.GroupByQueryConfig) RowBasedKey(org.apache.druid.query.groupby.epinephelinae.RowBasedGrouperHelper.RowBasedKey) ByteBuffer(java.nio.ByteBuffer) BaseSequence(org.apache.druid.java.util.common.guava.BaseSequence) ChainedExecutionQueryRunner(org.apache.druid.query.ChainedExecutionQueryRunner) QueryRunner(org.apache.druid.query.QueryRunner) TimeoutException(java.util.concurrent.TimeoutException) CancellationException(java.util.concurrent.CancellationException) QueryInterruptedException(org.apache.druid.query.QueryInterruptedException) ExecutionException(java.util.concurrent.ExecutionException) QueryTimeoutException(org.apache.druid.query.QueryTimeoutException) ResourceLimitExceededException(org.apache.druid.query.ResourceLimitExceededException) ReferenceCountingResourceHolder(org.apache.druid.collections.ReferenceCountingResourceHolder) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) File(java.io.File)

Example 4 with Accumulator

use of org.apache.druid.java.util.common.guava.Accumulator in project druid by druid-io.

the class GroupByRowProcessor method process.

/**
 * Process the input of sequence "rows" (output by "subquery") based on "query" and returns a {@link ResultSupplier}.
 *
 * In addition to grouping using dimensions and metrics, it will also apply filters (both DimFilter and interval
 * filters).
 *
 * The input sequence is processed synchronously with the call to this method, and result iteration happens lazy upon
 * calls to the {@link ResultSupplier}. Make sure to close it when you're done.
 */
public static ResultSupplier process(final GroupByQuery query, final GroupByQuery subquery, final Sequence<ResultRow> rows, final GroupByQueryConfig config, final GroupByQueryResource resource, final ObjectMapper spillMapper, final String processingTmpDir, final int mergeBufferSize) {
    final Closer closeOnExit = Closer.create();
    final GroupByQueryConfig querySpecificConfig = config.withOverrides(query);
    final File temporaryStorageDirectory = new File(processingTmpDir, StringUtils.format("druid-groupBy-%s_%s", UUID.randomUUID(), query.getId()));
    final LimitedTemporaryStorage temporaryStorage = new LimitedTemporaryStorage(temporaryStorageDirectory, querySpecificConfig.getMaxOnDiskStorage());
    closeOnExit.register(temporaryStorage);
    Pair<Grouper<RowBasedKey>, Accumulator<AggregateResult, ResultRow>> pair = RowBasedGrouperHelper.createGrouperAccumulatorPair(query, subquery, querySpecificConfig, new Supplier<ByteBuffer>() {

        @Override
        public ByteBuffer get() {
            final ResourceHolder<ByteBuffer> mergeBufferHolder = resource.getMergeBuffer();
            closeOnExit.register(mergeBufferHolder);
            return mergeBufferHolder.get();
        }
    }, temporaryStorage, spillMapper, mergeBufferSize);
    final Grouper<RowBasedKey> grouper = pair.lhs;
    final Accumulator<AggregateResult, ResultRow> accumulator = pair.rhs;
    closeOnExit.register(grouper);
    final AggregateResult retVal = rows.accumulate(AggregateResult.ok(), accumulator);
    if (!retVal.isOk()) {
        throw new ResourceLimitExceededException(retVal.getReason());
    }
    return new ResultSupplier() {

        @Override
        public Sequence<ResultRow> results(@Nullable List<DimensionSpec> dimensionsToInclude) {
            return getRowsFromGrouper(query, grouper, dimensionsToInclude);
        }

        @Override
        public void close() throws IOException {
            closeOnExit.close();
        }
    };
}
Also used : Closer(org.apache.druid.java.util.common.io.Closer) Accumulator(org.apache.druid.java.util.common.guava.Accumulator) ResultRow(org.apache.druid.query.groupby.ResultRow) GroupByQueryConfig(org.apache.druid.query.groupby.GroupByQueryConfig) ResourceHolder(org.apache.druid.collections.ResourceHolder) RowBasedKey(org.apache.druid.query.groupby.epinephelinae.RowBasedGrouperHelper.RowBasedKey) ByteBuffer(java.nio.ByteBuffer) ResourceLimitExceededException(org.apache.druid.query.ResourceLimitExceededException) List(java.util.List) File(java.io.File) Nullable(javax.annotation.Nullable)

Example 5 with Accumulator

use of org.apache.druid.java.util.common.guava.Accumulator in project druid by druid-io.

the class SegmentAnalyzer method analyzeStringColumn.

private ColumnAnalysis analyzeStringColumn(final ColumnCapabilities capabilities, final StorageAdapter storageAdapter, final String columnName) {
    int cardinality = 0;
    long size = 0;
    Comparable min = null;
    Comparable max = null;
    if (analyzingCardinality()) {
        cardinality = storageAdapter.getDimensionCardinality(columnName);
    }
    if (analyzingSize()) {
        final DateTime start = storageAdapter.getMinTime();
        final DateTime end = storageAdapter.getMaxTime();
        final Sequence<Cursor> cursors = storageAdapter.makeCursors(null, new Interval(start, end), VirtualColumns.EMPTY, Granularities.ALL, false, null);
        size = cursors.accumulate(0L, new Accumulator<Long, Cursor>() {

            @Override
            public Long accumulate(Long accumulated, Cursor cursor) {
                DimensionSelector selector = cursor.getColumnSelectorFactory().makeDimensionSelector(new DefaultDimensionSpec(columnName, columnName));
                if (selector == null) {
                    return accumulated;
                }
                long current = accumulated;
                while (!cursor.isDone()) {
                    final IndexedInts row = selector.getRow();
                    for (int i = 0, rowSize = row.size(); i < rowSize; ++i) {
                        final String dimVal = selector.lookupName(row.get(i));
                        if (dimVal != null && !dimVal.isEmpty()) {
                            current += StringUtils.estimatedBinaryLengthAsUTF8(dimVal);
                        }
                    }
                    cursor.advance();
                }
                return current;
            }
        });
    }
    if (analyzingMinMax()) {
        min = storageAdapter.getMinValue(columnName);
        max = storageAdapter.getMaxValue(columnName);
    }
    return new ColumnAnalysis(capabilities.toColumnType(), capabilities.getType().name(), capabilities.hasMultipleValues().isTrue(), // if we don't know for sure, then we should plan to check for nulls
    capabilities.hasNulls().isMaybeTrue(), size, cardinality, min, max, null);
}
Also used : Accumulator(org.apache.druid.java.util.common.guava.Accumulator) DimensionSelector(org.apache.druid.segment.DimensionSelector) Cursor(org.apache.druid.segment.Cursor) DateTime(org.joda.time.DateTime) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) IndexedInts(org.apache.druid.segment.data.IndexedInts) ColumnAnalysis(org.apache.druid.query.metadata.metadata.ColumnAnalysis) Interval(org.joda.time.Interval)

Aggregations

Accumulator (org.apache.druid.java.util.common.guava.Accumulator)10 ISE (org.apache.druid.java.util.common.ISE)5 List (java.util.List)4 Pair (org.apache.druid.java.util.common.Pair)4 ResourceLimitExceededException (org.apache.druid.query.ResourceLimitExceededException)4 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)3 ByteBuffer (java.nio.ByteBuffer)3 ArrayList (java.util.ArrayList)3 Sequence (org.apache.druid.java.util.common.guava.Sequence)3 Yielder (org.apache.druid.java.util.common.guava.Yielder)3 YieldingAccumulator (org.apache.druid.java.util.common.guava.YieldingAccumulator)3 GroupByQueryConfig (org.apache.druid.query.groupby.GroupByQueryConfig)3 ResultRow (org.apache.druid.query.groupby.ResultRow)3 Function (com.google.common.base.Function)2 ImmutableList (com.google.common.collect.ImmutableList)2 ListenableFuture (com.google.common.util.concurrent.ListenableFuture)2 File (java.io.File)2 HashSet (java.util.HashSet)2 Queue (java.util.Queue)2 Set (java.util.Set)2