Search in sources :

Example 6 with Accumulator

use of io.druid.java.util.common.guava.Accumulator in project druid by druid-io.

the class GroupByMergedQueryRunner method run.

@Override
public Sequence<T> run(final Query<T> queryParam, final Map<String, Object> responseContext) {
    final GroupByQuery query = (GroupByQuery) queryParam;
    final GroupByQueryConfig querySpecificConfig = configSupplier.get().withOverrides(query);
    final boolean isSingleThreaded = querySpecificConfig.isSingleThreaded();
    final Pair<IncrementalIndex, Accumulator<IncrementalIndex, T>> indexAccumulatorPair = GroupByQueryHelper.createIndexAccumulatorPair(query, querySpecificConfig, bufferPool, true);
    final Pair<Queue, Accumulator<Queue, T>> bySegmentAccumulatorPair = GroupByQueryHelper.createBySegmentAccumulatorPair();
    final boolean bySegment = BaseQuery.getContextBySegment(query, false);
    final int priority = BaseQuery.getContextPriority(query, 0);
    ListenableFuture<List<Void>> futures = Futures.allAsList(Lists.newArrayList(Iterables.transform(queryables, new Function<QueryRunner<T>, ListenableFuture<Void>>() {

        @Override
        public ListenableFuture<Void> apply(final QueryRunner<T> input) {
            if (input == null) {
                throw new ISE("Null queryRunner! Looks to be some segment unmapping action happening");
            }
            ListenableFuture<Void> future = exec.submit(new AbstractPrioritizedCallable<Void>(priority) {

                @Override
                public Void call() throws Exception {
                    try {
                        if (bySegment) {
                            input.run(queryParam, responseContext).accumulate(bySegmentAccumulatorPair.lhs, bySegmentAccumulatorPair.rhs);
                        } else {
                            input.run(queryParam, responseContext).accumulate(indexAccumulatorPair.lhs, indexAccumulatorPair.rhs);
                        }
                        return null;
                    } catch (QueryInterruptedException e) {
                        throw Throwables.propagate(e);
                    } catch (Exception e) {
                        log.error(e, "Exception with one of the sequences!");
                        throw Throwables.propagate(e);
                    }
                }
            });
            if (isSingleThreaded) {
                waitForFutureCompletion(query, future, indexAccumulatorPair.lhs);
            }
            return future;
        }
    })));
    if (!isSingleThreaded) {
        waitForFutureCompletion(query, futures, indexAccumulatorPair.lhs);
    }
    if (bySegment) {
        return Sequences.simple(bySegmentAccumulatorPair.lhs);
    }
    return Sequences.withBaggage(Sequences.simple(Iterables.transform(indexAccumulatorPair.lhs.iterableWithPostAggregations(null, query.isDescending()), new Function<Row, T>() {

        @Override
        public T apply(Row input) {
            return (T) input;
        }
    })), indexAccumulatorPair.lhs);
}
Also used : Accumulator(io.druid.java.util.common.guava.Accumulator) GroupByQueryConfig(io.druid.query.groupby.GroupByQueryConfig) IncrementalIndex(io.druid.segment.incremental.IncrementalIndex) TimeoutException(java.util.concurrent.TimeoutException) CancellationException(java.util.concurrent.CancellationException) ExecutionException(java.util.concurrent.ExecutionException) GroupByQuery(io.druid.query.groupby.GroupByQuery) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) ISE(io.druid.java.util.common.ISE) List(java.util.List) Row(io.druid.data.input.Row) Queue(java.util.Queue)

Example 7 with Accumulator

use of io.druid.java.util.common.guava.Accumulator in project druid by druid-io.

the class RowBasedGrouperHelper method createGrouperAccumulatorPair.

/**
   * If isInputRaw is true, transformations such as timestamp truncation and extraction functions have not
   * been applied to the input rows yet, for example, in a nested query, if an extraction function is being
   * applied in the outer query to a field of the inner query. This method must apply those transformations.
   */
public static Pair<Grouper<RowBasedKey>, Accumulator<Grouper<RowBasedKey>, Row>> createGrouperAccumulatorPair(final GroupByQuery query, final boolean isInputRaw, final Map<String, ValueType> rawInputRowSignature, final GroupByQueryConfig config, final Supplier<ByteBuffer> bufferSupplier, final int concurrencyHint, final LimitedTemporaryStorage temporaryStorage, final ObjectMapper spillMapper, final AggregatorFactory[] aggregatorFactories) {
    // concurrencyHint >= 1 for concurrent groupers, -1 for single-threaded
    Preconditions.checkArgument(concurrencyHint >= 1 || concurrencyHint == -1, "invalid concurrencyHint");
    final List<ValueType> valueTypes = DimensionHandlerUtils.getValueTypesFromDimensionSpecs(query.getDimensions());
    final GroupByQueryConfig querySpecificConfig = config.withOverrides(query);
    final boolean includeTimestamp = GroupByStrategyV2.getUniversalTimestamp(query) == null;
    final Grouper.KeySerdeFactory<RowBasedKey> keySerdeFactory = new RowBasedKeySerdeFactory(includeTimestamp, query.getContextSortByDimsFirst(), query.getDimensions().size(), querySpecificConfig.getMaxMergingDictionarySize() / (concurrencyHint == -1 ? 1 : concurrencyHint), valueTypes);
    final ThreadLocal<Row> columnSelectorRow = new ThreadLocal<>();
    final ColumnSelectorFactory columnSelectorFactory = query.getVirtualColumns().wrap(RowBasedColumnSelectorFactory.create(columnSelectorRow, rawInputRowSignature));
    final Grouper<RowBasedKey> grouper;
    if (concurrencyHint == -1) {
        grouper = new SpillingGrouper<>(bufferSupplier, keySerdeFactory, columnSelectorFactory, aggregatorFactories, querySpecificConfig.getBufferGrouperMaxSize(), querySpecificConfig.getBufferGrouperMaxLoadFactor(), querySpecificConfig.getBufferGrouperInitialBuckets(), temporaryStorage, spillMapper, true);
    } else {
        grouper = new ConcurrentGrouper<>(bufferSupplier, keySerdeFactory, columnSelectorFactory, aggregatorFactories, querySpecificConfig.getBufferGrouperMaxSize(), querySpecificConfig.getBufferGrouperMaxLoadFactor(), querySpecificConfig.getBufferGrouperInitialBuckets(), temporaryStorage, spillMapper, concurrencyHint);
    }
    final int keySize = includeTimestamp ? query.getDimensions().size() + 1 : query.getDimensions().size();
    final ValueExtractFunction valueExtractFn = makeValueExtractFunction(query, isInputRaw, includeTimestamp, columnSelectorFactory, rawInputRowSignature, valueTypes);
    final Accumulator<Grouper<RowBasedKey>, Row> accumulator = new Accumulator<Grouper<RowBasedKey>, Row>() {

        @Override
        public Grouper<RowBasedKey> accumulate(final Grouper<RowBasedKey> theGrouper, final Row row) {
            BaseQuery.checkInterrupted();
            if (theGrouper == null) {
                // Pass-through null returns without doing more work.
                return null;
            }
            if (!theGrouper.isInitialized()) {
                theGrouper.init();
            }
            columnSelectorRow.set(row);
            final Comparable[] key = new Comparable[keySize];
            valueExtractFn.apply(row, key);
            final boolean didAggregate = theGrouper.aggregate(new RowBasedKey(key));
            if (!didAggregate) {
                // null return means grouping resources were exhausted.
                return null;
            }
            columnSelectorRow.set(null);
            return theGrouper;
        }
    };
    return new Pair<>(grouper, accumulator);
}
Also used : Accumulator(io.druid.java.util.common.guava.Accumulator) RowBasedColumnSelectorFactory(io.druid.query.groupby.RowBasedColumnSelectorFactory) ColumnSelectorFactory(io.druid.segment.ColumnSelectorFactory) ValueType(io.druid.segment.column.ValueType) GroupByQueryConfig(io.druid.query.groupby.GroupByQueryConfig) Row(io.druid.data.input.Row) MapBasedRow(io.druid.data.input.MapBasedRow) Pair(io.druid.java.util.common.Pair)

Example 8 with Accumulator

use of io.druid.java.util.common.guava.Accumulator in project druid by druid-io.

the class SegmentAnalyzer method analyzeStringColumn.

private ColumnAnalysis analyzeStringColumn(final ColumnCapabilities capabilities, final StorageAdapter storageAdapter, final String columnName) {
    int cardinality = 0;
    long size = 0;
    Comparable min = null;
    Comparable max = null;
    if (analyzingCardinality()) {
        cardinality = storageAdapter.getDimensionCardinality(columnName);
    }
    if (analyzingSize()) {
        final long start = storageAdapter.getMinTime().getMillis();
        final long end = storageAdapter.getMaxTime().getMillis();
        final Sequence<Cursor> cursors = storageAdapter.makeCursors(null, new Interval(start, end), VirtualColumns.EMPTY, Granularities.ALL, false);
        size = cursors.accumulate(0L, new Accumulator<Long, Cursor>() {

            @Override
            public Long accumulate(Long accumulated, Cursor cursor) {
                DimensionSelector selector = cursor.makeDimensionSelector(new DefaultDimensionSpec(columnName, columnName));
                if (selector == null) {
                    return accumulated;
                }
                long current = accumulated;
                while (!cursor.isDone()) {
                    final IndexedInts vals = selector.getRow();
                    for (int i = 0; i < vals.size(); ++i) {
                        final String dimVal = selector.lookupName(vals.get(i));
                        if (dimVal != null && !dimVal.isEmpty()) {
                            current += StringUtils.estimatedBinaryLengthAsUTF8(dimVal);
                        }
                    }
                    cursor.advance();
                }
                return current;
            }
        });
    }
    if (analyzingMinMax()) {
        min = storageAdapter.getMinValue(columnName);
        max = storageAdapter.getMaxValue(columnName);
    }
    return new ColumnAnalysis(capabilities.getType().name(), capabilities.hasMultipleValues(), size, cardinality, min, max, null);
}
Also used : Accumulator(io.druid.java.util.common.guava.Accumulator) DimensionSelector(io.druid.segment.DimensionSelector) Cursor(io.druid.segment.Cursor) DefaultDimensionSpec(io.druid.query.dimension.DefaultDimensionSpec) IndexedInts(io.druid.segment.data.IndexedInts) ColumnAnalysis(io.druid.query.metadata.metadata.ColumnAnalysis) Interval(org.joda.time.Interval)

Example 9 with Accumulator

use of io.druid.java.util.common.guava.Accumulator in project druid by druid-io.

the class SpecificSegmentQueryRunner method run.

@Override
public Sequence<T> run(final Query<T> input, final Map<String, Object> responseContext) {
    final Query<T> query = input.withQuerySegmentSpec(specificSpec);
    final Thread currThread = Thread.currentThread();
    final String currThreadName = currThread.getName();
    final String newName = String.format("%s_%s_%s", query.getType(), query.getDataSource(), query.getIntervals());
    final Sequence<T> baseSequence = doNamed(currThread, currThreadName, newName, new Supplier<Sequence<T>>() {

        @Override
        public Sequence<T> get() {
            return base.run(query, responseContext);
        }
    });
    Sequence<T> segmentMissingCatchingSequence = new Sequence<T>() {

        @Override
        public <OutType> OutType accumulate(final OutType initValue, final Accumulator<OutType, T> accumulator) {
            try {
                return baseSequence.accumulate(initValue, accumulator);
            } catch (SegmentMissingException e) {
                appendMissingSegment(responseContext);
                return initValue;
            }
        }

        @Override
        public <OutType> Yielder<OutType> toYielder(final OutType initValue, final YieldingAccumulator<OutType, T> accumulator) {
            try {
                return makeYielder(baseSequence.toYielder(initValue, accumulator));
            } catch (SegmentMissingException e) {
                appendMissingSegment(responseContext);
                return Yielders.done(initValue, null);
            }
        }

        private <OutType> Yielder<OutType> makeYielder(final Yielder<OutType> yielder) {
            return new Yielder<OutType>() {

                @Override
                public OutType get() {
                    return yielder.get();
                }

                @Override
                public Yielder<OutType> next(final OutType initValue) {
                    try {
                        return yielder.next(initValue);
                    } catch (SegmentMissingException e) {
                        appendMissingSegment(responseContext);
                        return Yielders.done(initValue, null);
                    }
                }

                @Override
                public boolean isDone() {
                    return yielder.isDone();
                }

                @Override
                public void close() throws IOException {
                    yielder.close();
                }
            };
        }
    };
    return Sequences.wrap(segmentMissingCatchingSequence, new SequenceWrapper() {

        @Override
        public <RetType> RetType wrap(Supplier<RetType> sequenceProcessing) {
            return doNamed(currThread, currThreadName, newName, sequenceProcessing);
        }
    });
}
Also used : YieldingAccumulator(io.druid.java.util.common.guava.YieldingAccumulator) Accumulator(io.druid.java.util.common.guava.Accumulator) SequenceWrapper(io.druid.java.util.common.guava.SequenceWrapper) Yielder(io.druid.java.util.common.guava.Yielder) SegmentMissingException(io.druid.segment.SegmentMissingException) Sequence(io.druid.java.util.common.guava.Sequence) YieldingAccumulator(io.druid.java.util.common.guava.YieldingAccumulator)

Aggregations

Accumulator (io.druid.java.util.common.guava.Accumulator)9 Pair (io.druid.java.util.common.Pair)5 Row (io.druid.data.input.Row)4 ISE (io.druid.java.util.common.ISE)4 AggregatorFactory (io.druid.query.aggregation.AggregatorFactory)4 GroupByQueryConfig (io.druid.query.groupby.GroupByQueryConfig)4 List (java.util.List)4 GroupByQuery (io.druid.query.groupby.GroupByQuery)3 Function (com.google.common.base.Function)2 ImmutableList (com.google.common.collect.ImmutableList)2 ListenableFuture (com.google.common.util.concurrent.ListenableFuture)2 MapBasedRow (io.druid.data.input.MapBasedRow)2 BaseSequence (io.druid.java.util.common.guava.BaseSequence)2 Sequence (io.druid.java.util.common.guava.Sequence)2 Yielder (io.druid.java.util.common.guava.Yielder)2 YieldingAccumulator (io.druid.java.util.common.guava.YieldingAccumulator)2 QueryRunner (io.druid.query.QueryRunner)2 RowBasedColumnSelectorFactory (io.druid.query.groupby.RowBasedColumnSelectorFactory)2 RowBasedKey (io.druid.query.groupby.epinephelinae.RowBasedGrouperHelper.RowBasedKey)2 SegmentMissingException (io.druid.segment.SegmentMissingException)2