Search in sources :

Example 56 with Pair

use of io.druid.java.util.common.Pair in project druid by druid-io.

the class MoveTimeFiltersToIntervals method extractConvertibleTimeBounds.

/**
   * Extract bound filters on __time that can be converted to query-level "intervals".
   *
   * @return pair of new dimFilter + RangeSet of __time that should be ANDed together. Either can be null but not both.
   */
private static Pair<DimFilter, RangeSet<Long>> extractConvertibleTimeBounds(final DimFilter filter) {
    if (filter instanceof AndDimFilter) {
        final List<DimFilter> children = ((AndDimFilter) filter).getFields();
        final List<DimFilter> newChildren = Lists.newArrayList();
        final List<RangeSet<Long>> rangeSets = Lists.newArrayList();
        for (DimFilter child : children) {
            final Pair<DimFilter, RangeSet<Long>> pair = extractConvertibleTimeBounds(child);
            if (pair.lhs != null) {
                newChildren.add(pair.lhs);
            }
            if (pair.rhs != null) {
                rangeSets.add(pair.rhs);
            }
        }
        final DimFilter newFilter;
        if (newChildren.size() == 0) {
            newFilter = null;
        } else if (newChildren.size() == 1) {
            newFilter = newChildren.get(0);
        } else {
            newFilter = new AndDimFilter(newChildren);
        }
        return Pair.of(newFilter, rangeSets.isEmpty() ? null : RangeSets.intersectRangeSets(rangeSets));
    } else if (filter instanceof OrDimFilter) {
        final List<DimFilter> children = ((OrDimFilter) filter).getFields();
        final List<RangeSet<Long>> rangeSets = Lists.newArrayList();
        boolean allCompletelyConverted = true;
        boolean allHadIntervals = true;
        for (DimFilter child : children) {
            final Pair<DimFilter, RangeSet<Long>> pair = extractConvertibleTimeBounds(child);
            if (pair.lhs != null) {
                allCompletelyConverted = false;
            }
            if (pair.rhs != null) {
                rangeSets.add(pair.rhs);
            } else {
                allHadIntervals = false;
            }
        }
        if (allCompletelyConverted) {
            return Pair.of(null, RangeSets.unionRangeSets(rangeSets));
        } else {
            return Pair.of(filter, allHadIntervals ? RangeSets.unionRangeSets(rangeSets) : null);
        }
    } else if (filter instanceof NotDimFilter) {
        final DimFilter child = ((NotDimFilter) filter).getField();
        final Pair<DimFilter, RangeSet<Long>> pair = extractConvertibleTimeBounds(child);
        if (pair.rhs != null && pair.lhs == null) {
            return Pair.of(null, pair.rhs.complement());
        } else {
            return Pair.of(filter, null);
        }
    } else if (filter instanceof BoundDimFilter) {
        final BoundDimFilter bound = (BoundDimFilter) filter;
        if (BoundRefKey.from(bound).equals(TIME_BOUND_REF_KEY)) {
            return Pair.of(null, RangeSets.of(toLongRange(Bounds.toRange(bound))));
        } else {
            return Pair.of(filter, null);
        }
    } else {
        return Pair.of(filter, null);
    }
}
Also used : NotDimFilter(io.druid.query.filter.NotDimFilter) BoundDimFilter(io.druid.query.filter.BoundDimFilter) AndDimFilter(io.druid.query.filter.AndDimFilter) OrDimFilter(io.druid.query.filter.OrDimFilter) RangeSet(com.google.common.collect.RangeSet) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) OrDimFilter(io.druid.query.filter.OrDimFilter) DimFilter(io.druid.query.filter.DimFilter) NotDimFilter(io.druid.query.filter.NotDimFilter) AndDimFilter(io.druid.query.filter.AndDimFilter) BoundDimFilter(io.druid.query.filter.BoundDimFilter) Pair(io.druid.java.util.common.Pair)

Example 57 with Pair

use of io.druid.java.util.common.Pair in project druid by druid-io.

the class CombiningSequenceTest method testCombining.

private void testCombining(List<Pair<Integer, Integer>> pairs, List<Pair<Integer, Integer>> expected, int limit) throws Exception {
    // Test that closing works too
    final CountDownLatch closed = new CountDownLatch(1);
    final Closeable closeable = new Closeable() {

        @Override
        public void close() throws IOException {
            closed.countDown();
        }
    };
    Sequence<Pair<Integer, Integer>> seq = Sequences.limit(CombiningSequence.create(Sequences.withBaggage(Sequences.simple(pairs), closeable), Ordering.natural().onResultOf(Pair.<Integer, Integer>lhsFn()), new BinaryFn<Pair<Integer, Integer>, Pair<Integer, Integer>, Pair<Integer, Integer>>() {

        @Override
        public Pair<Integer, Integer> apply(Pair<Integer, Integer> lhs, Pair<Integer, Integer> rhs) {
            if (lhs == null) {
                return rhs;
            }
            if (rhs == null) {
                return lhs;
            }
            return Pair.of(lhs.lhs, lhs.rhs + rhs.rhs);
        }
    }), limit);
    List<Pair<Integer, Integer>> merged = Sequences.toList(seq, Lists.<Pair<Integer, Integer>>newArrayList());
    Assert.assertEquals(expected, merged);
    Yielder<Pair<Integer, Integer>> yielder = seq.toYielder(null, new YieldingAccumulator<Pair<Integer, Integer>, Pair<Integer, Integer>>() {

        int count = 0;

        @Override
        public Pair<Integer, Integer> accumulate(Pair<Integer, Integer> lhs, Pair<Integer, Integer> rhs) {
            count++;
            if (count % yieldEvery == 0) {
                yield();
            }
            return rhs;
        }
    });
    Iterator<Pair<Integer, Integer>> expectedVals = Iterators.filter(expected.iterator(), new Predicate<Pair<Integer, Integer>>() {

        int count = 0;

        @Override
        public boolean apply(@Nullable Pair<Integer, Integer> input) {
            count++;
            if (count % yieldEvery == 0) {
                return true;
            }
            return false;
        }
    });
    if (expectedVals.hasNext()) {
        while (!yielder.isDone()) {
            final Pair<Integer, Integer> expectedVal = expectedVals.next();
            final Pair<Integer, Integer> actual = yielder.get();
            Assert.assertEquals(expectedVal, actual);
            yielder = yielder.next(actual);
        }
    }
    Assert.assertTrue(yielder.isDone());
    Assert.assertFalse(expectedVals.hasNext());
    yielder.close();
    Assert.assertTrue("resource closed", closed.await(10000, TimeUnit.MILLISECONDS));
}
Also used : Closeable(java.io.Closeable) CountDownLatch(java.util.concurrent.CountDownLatch) BinaryFn(io.druid.java.util.common.guava.nary.BinaryFn) Pair(io.druid.java.util.common.Pair)

Example 58 with Pair

use of io.druid.java.util.common.Pair in project druid by druid-io.

the class GroupByQueryHelper method createBySegmentAccumulatorPair.

public static <T> Pair<Queue, Accumulator<Queue, T>> createBySegmentAccumulatorPair() {
    // In parallel query runner multiple threads add to this queue concurrently
    Queue init = new ConcurrentLinkedQueue<>();
    Accumulator<Queue, T> accumulator = new Accumulator<Queue, T>() {

        @Override
        public Queue accumulate(Queue accumulated, T in) {
            if (in == null) {
                throw new ISE("Cannot have null result");
            }
            accumulated.offer(in);
            return accumulated;
        }
    };
    return new Pair<>(init, accumulator);
}
Also used : Accumulator(io.druid.java.util.common.guava.Accumulator) ISE(io.druid.java.util.common.ISE) ConcurrentLinkedQueue(java.util.concurrent.ConcurrentLinkedQueue) Queue(java.util.Queue) ConcurrentLinkedQueue(java.util.concurrent.ConcurrentLinkedQueue) Pair(io.druid.java.util.common.Pair)

Example 59 with Pair

use of io.druid.java.util.common.Pair in project druid by druid-io.

the class GroupByMergingQueryRunnerV2 method run.

@Override
public Sequence<Row> run(final Query queryParam, final Map responseContext) {
    final GroupByQuery query = (GroupByQuery) queryParam;
    final GroupByQueryConfig querySpecificConfig = config.withOverrides(query);
    // CTX_KEY_MERGE_RUNNERS_USING_CHAINED_EXECUTION is here because realtime servers use nested mergeRunners calls
    // (one for the entire query and one for each sink). We only want the outer call to actually do merging with a
    // merge buffer, otherwise the query will allocate too many merge buffers. This is potentially sub-optimal as it
    // will involve materializing the results for each sink before starting to feed them into the outer merge buffer.
    // I'm not sure of a better way to do this without tweaking how realtime servers do queries.
    final boolean forceChainedExecution = query.getContextBoolean(CTX_KEY_MERGE_RUNNERS_USING_CHAINED_EXECUTION, false);
    final GroupByQuery queryForRunners = query.withOverriddenContext(ImmutableMap.<String, Object>of(CTX_KEY_MERGE_RUNNERS_USING_CHAINED_EXECUTION, true));
    if (BaseQuery.getContextBySegment(query, false) || forceChainedExecution) {
        return new ChainedExecutionQueryRunner(exec, queryWatcher, queryables).run(query, responseContext);
    }
    final boolean isSingleThreaded = querySpecificConfig.isSingleThreaded();
    final AggregatorFactory[] combiningAggregatorFactories = new AggregatorFactory[query.getAggregatorSpecs().size()];
    for (int i = 0; i < query.getAggregatorSpecs().size(); i++) {
        combiningAggregatorFactories[i] = query.getAggregatorSpecs().get(i).getCombiningFactory();
    }
    final File temporaryStorageDirectory = new File(processingTmpDir, String.format("druid-groupBy-%s_%s", UUID.randomUUID(), query.getId()));
    final int priority = BaseQuery.getContextPriority(query, 0);
    // Figure out timeoutAt time now, so we can apply the timeout to both the mergeBufferPool.take and the actual
    // query processing together.
    final Number queryTimeout = query.getContextValue(QueryContextKeys.TIMEOUT, null);
    final long timeoutAt = queryTimeout == null ? JodaUtils.MAX_INSTANT : System.currentTimeMillis() + queryTimeout.longValue();
    return new BaseSequence<>(new BaseSequence.IteratorMaker<Row, CloseableGrouperIterator<RowBasedKey, Row>>() {

        @Override
        public CloseableGrouperIterator<RowBasedKey, Row> make() {
            final List<ReferenceCountingResourceHolder> resources = Lists.newArrayList();
            try {
                final LimitedTemporaryStorage temporaryStorage = new LimitedTemporaryStorage(temporaryStorageDirectory, querySpecificConfig.getMaxOnDiskStorage());
                final ReferenceCountingResourceHolder<LimitedTemporaryStorage> temporaryStorageHolder = ReferenceCountingResourceHolder.fromCloseable(temporaryStorage);
                resources.add(temporaryStorageHolder);
                final ReferenceCountingResourceHolder<ByteBuffer> mergeBufferHolder;
                try {
                    // This will potentially block if there are no merge buffers left in the pool.
                    final long timeout = timeoutAt - System.currentTimeMillis();
                    if (timeout <= 0 || (mergeBufferHolder = mergeBufferPool.take(timeout)) == null) {
                        throw new TimeoutException();
                    }
                    resources.add(mergeBufferHolder);
                } catch (Exception e) {
                    throw new QueryInterruptedException(e);
                }
                Pair<Grouper<RowBasedKey>, Accumulator<Grouper<RowBasedKey>, Row>> pair = RowBasedGrouperHelper.createGrouperAccumulatorPair(query, false, null, config, Suppliers.ofInstance(mergeBufferHolder.get()), concurrencyHint, temporaryStorage, spillMapper, combiningAggregatorFactories);
                final Grouper<RowBasedKey> grouper = pair.lhs;
                final Accumulator<Grouper<RowBasedKey>, Row> accumulator = pair.rhs;
                grouper.init();
                final ReferenceCountingResourceHolder<Grouper<RowBasedKey>> grouperHolder = ReferenceCountingResourceHolder.fromCloseable(grouper);
                resources.add(grouperHolder);
                ListenableFuture<List<Boolean>> futures = Futures.allAsList(Lists.newArrayList(Iterables.transform(queryables, new Function<QueryRunner<Row>, ListenableFuture<Boolean>>() {

                    @Override
                    public ListenableFuture<Boolean> apply(final QueryRunner<Row> input) {
                        if (input == null) {
                            throw new ISE("Null queryRunner! Looks to be some segment unmapping action happening");
                        }
                        ListenableFuture<Boolean> future = exec.submit(new AbstractPrioritizedCallable<Boolean>(priority) {

                            @Override
                            public Boolean call() throws Exception {
                                try (Releaser bufferReleaser = mergeBufferHolder.increment();
                                    Releaser grouperReleaser = grouperHolder.increment()) {
                                    final Object retVal = input.run(queryForRunners, responseContext).accumulate(grouper, accumulator);
                                    // Return true if OK, false if resources were exhausted.
                                    return retVal == grouper;
                                } catch (QueryInterruptedException e) {
                                    throw e;
                                } catch (Exception e) {
                                    log.error(e, "Exception with one of the sequences!");
                                    throw Throwables.propagate(e);
                                }
                            }
                        });
                        if (isSingleThreaded) {
                            waitForFutureCompletion(query, Futures.allAsList(ImmutableList.of(future)), timeoutAt - System.currentTimeMillis());
                        }
                        return future;
                    }
                })));
                if (!isSingleThreaded) {
                    waitForFutureCompletion(query, futures, timeoutAt - System.currentTimeMillis());
                }
                return RowBasedGrouperHelper.makeGrouperIterator(grouper, query, new Closeable() {

                    @Override
                    public void close() throws IOException {
                        for (Closeable closeable : Lists.reverse(resources)) {
                            CloseQuietly.close(closeable);
                        }
                    }
                });
            } catch (Throwable e) {
                // Exception caught while setting up the iterator; release resources.
                for (Closeable closeable : Lists.reverse(resources)) {
                    CloseQuietly.close(closeable);
                }
                throw e;
            }
        }

        @Override
        public void cleanup(CloseableGrouperIterator<RowBasedKey, Row> iterFromMake) {
            iterFromMake.close();
        }
    });
}
Also used : Accumulator(io.druid.java.util.common.guava.Accumulator) Closeable(java.io.Closeable) ChainedExecutionQueryRunner(io.druid.query.ChainedExecutionQueryRunner) Function(com.google.common.base.Function) GroupByQuery(io.druid.query.groupby.GroupByQuery) Releaser(io.druid.collections.Releaser) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) ISE(io.druid.java.util.common.ISE) TimeoutException(java.util.concurrent.TimeoutException) QueryInterruptedException(io.druid.query.QueryInterruptedException) Pair(io.druid.java.util.common.Pair) GroupByQueryConfig(io.druid.query.groupby.GroupByQueryConfig) RowBasedKey(io.druid.query.groupby.epinephelinae.RowBasedGrouperHelper.RowBasedKey) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) BaseSequence(io.druid.java.util.common.guava.BaseSequence) TimeoutException(java.util.concurrent.TimeoutException) CancellationException(java.util.concurrent.CancellationException) QueryInterruptedException(io.druid.query.QueryInterruptedException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) ChainedExecutionQueryRunner(io.druid.query.ChainedExecutionQueryRunner) QueryRunner(io.druid.query.QueryRunner) ReferenceCountingResourceHolder(io.druid.collections.ReferenceCountingResourceHolder) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) Row(io.druid.data.input.Row) File(java.io.File)

Example 60 with Pair

use of io.druid.java.util.common.Pair in project druid by druid-io.

the class RowBasedGrouperHelper method createGrouperAccumulatorPair.

/**
   * If isInputRaw is true, transformations such as timestamp truncation and extraction functions have not
   * been applied to the input rows yet, for example, in a nested query, if an extraction function is being
   * applied in the outer query to a field of the inner query. This method must apply those transformations.
   */
public static Pair<Grouper<RowBasedKey>, Accumulator<Grouper<RowBasedKey>, Row>> createGrouperAccumulatorPair(final GroupByQuery query, final boolean isInputRaw, final Map<String, ValueType> rawInputRowSignature, final GroupByQueryConfig config, final Supplier<ByteBuffer> bufferSupplier, final int concurrencyHint, final LimitedTemporaryStorage temporaryStorage, final ObjectMapper spillMapper, final AggregatorFactory[] aggregatorFactories) {
    // concurrencyHint >= 1 for concurrent groupers, -1 for single-threaded
    Preconditions.checkArgument(concurrencyHint >= 1 || concurrencyHint == -1, "invalid concurrencyHint");
    final List<ValueType> valueTypes = DimensionHandlerUtils.getValueTypesFromDimensionSpecs(query.getDimensions());
    final GroupByQueryConfig querySpecificConfig = config.withOverrides(query);
    final boolean includeTimestamp = GroupByStrategyV2.getUniversalTimestamp(query) == null;
    final Grouper.KeySerdeFactory<RowBasedKey> keySerdeFactory = new RowBasedKeySerdeFactory(includeTimestamp, query.getContextSortByDimsFirst(), query.getDimensions().size(), querySpecificConfig.getMaxMergingDictionarySize() / (concurrencyHint == -1 ? 1 : concurrencyHint), valueTypes);
    final ThreadLocal<Row> columnSelectorRow = new ThreadLocal<>();
    final ColumnSelectorFactory columnSelectorFactory = query.getVirtualColumns().wrap(RowBasedColumnSelectorFactory.create(columnSelectorRow, rawInputRowSignature));
    final Grouper<RowBasedKey> grouper;
    if (concurrencyHint == -1) {
        grouper = new SpillingGrouper<>(bufferSupplier, keySerdeFactory, columnSelectorFactory, aggregatorFactories, querySpecificConfig.getBufferGrouperMaxSize(), querySpecificConfig.getBufferGrouperMaxLoadFactor(), querySpecificConfig.getBufferGrouperInitialBuckets(), temporaryStorage, spillMapper, true);
    } else {
        grouper = new ConcurrentGrouper<>(bufferSupplier, keySerdeFactory, columnSelectorFactory, aggregatorFactories, querySpecificConfig.getBufferGrouperMaxSize(), querySpecificConfig.getBufferGrouperMaxLoadFactor(), querySpecificConfig.getBufferGrouperInitialBuckets(), temporaryStorage, spillMapper, concurrencyHint);
    }
    final int keySize = includeTimestamp ? query.getDimensions().size() + 1 : query.getDimensions().size();
    final ValueExtractFunction valueExtractFn = makeValueExtractFunction(query, isInputRaw, includeTimestamp, columnSelectorFactory, rawInputRowSignature, valueTypes);
    final Accumulator<Grouper<RowBasedKey>, Row> accumulator = new Accumulator<Grouper<RowBasedKey>, Row>() {

        @Override
        public Grouper<RowBasedKey> accumulate(final Grouper<RowBasedKey> theGrouper, final Row row) {
            BaseQuery.checkInterrupted();
            if (theGrouper == null) {
                // Pass-through null returns without doing more work.
                return null;
            }
            if (!theGrouper.isInitialized()) {
                theGrouper.init();
            }
            columnSelectorRow.set(row);
            final Comparable[] key = new Comparable[keySize];
            valueExtractFn.apply(row, key);
            final boolean didAggregate = theGrouper.aggregate(new RowBasedKey(key));
            if (!didAggregate) {
                // null return means grouping resources were exhausted.
                return null;
            }
            columnSelectorRow.set(null);
            return theGrouper;
        }
    };
    return new Pair<>(grouper, accumulator);
}
Also used : Accumulator(io.druid.java.util.common.guava.Accumulator) RowBasedColumnSelectorFactory(io.druid.query.groupby.RowBasedColumnSelectorFactory) ColumnSelectorFactory(io.druid.segment.ColumnSelectorFactory) ValueType(io.druid.segment.column.ValueType) GroupByQueryConfig(io.druid.query.groupby.GroupByQueryConfig) Row(io.druid.data.input.Row) MapBasedRow(io.druid.data.input.MapBasedRow) Pair(io.druid.java.util.common.Pair)

Aggregations

Pair (io.druid.java.util.common.Pair)62 Test (org.junit.Test)26 Interval (org.joda.time.Interval)15 DataSegment (io.druid.timeline.DataSegment)11 Map (java.util.Map)11 ByteBuffer (java.nio.ByteBuffer)10 HashMap (java.util.HashMap)9 SerializablePair (io.druid.collections.SerializablePair)8 SegmentDescriptor (io.druid.query.SegmentDescriptor)8 List (java.util.List)8 ImmutableMap (com.google.common.collect.ImmutableMap)7 Executor (java.util.concurrent.Executor)7 DateTime (org.joda.time.DateTime)7 Function (com.google.common.base.Function)6 ListenableFuture (com.google.common.util.concurrent.ListenableFuture)6 MapBasedInputRow (io.druid.data.input.MapBasedInputRow)6 TaskStatus (io.druid.indexing.common.TaskStatus)6 ISE (io.druid.java.util.common.ISE)6 Access (io.druid.server.security.Access)6 Action (io.druid.server.security.Action)6