Search in sources :

Example 6 with MultipleSpecificSegmentSpec

use of org.apache.druid.query.spec.MultipleSpecificSegmentSpec in project hive by apache.

the class DruidQueryBasedInputFormat method distributeScanQuery.

/* New method that distributes the Scan query by creating splits containing
   * information about different Druid nodes that have the data for the given
   * query. */
private static HiveDruidSplit[] distributeScanQuery(String address, ScanQuery query, Path dummyPath) throws IOException {
    // If it has a limit, we use it and we do not distribute the query
    final boolean isFetch = query.getScanRowsLimit() < Long.MAX_VALUE;
    if (isFetch) {
        return new HiveDruidSplit[] { new HiveDruidSplit(DruidStorageHandlerUtils.JSON_MAPPER.writeValueAsString(query), dummyPath, new String[] { address }) };
    }
    final List<LocatedSegmentDescriptor> segmentDescriptors = fetchLocatedSegmentDescriptors(address, query);
    // Create one input split for each segment
    final int numSplits = segmentDescriptors.size();
    final HiveDruidSplit[] splits = new HiveDruidSplit[segmentDescriptors.size()];
    for (int i = 0; i < numSplits; i++) {
        final LocatedSegmentDescriptor locatedSD = segmentDescriptors.get(i);
        final String[] hosts = new String[locatedSD.getLocations().size() + 1];
        for (int j = 0; j < locatedSD.getLocations().size(); j++) {
            hosts[j] = locatedSD.getLocations().get(j).getHost();
        }
        // Default to broker if all other hosts fail.
        hosts[locatedSD.getLocations().size()] = address;
        // Create partial Select query
        final SegmentDescriptor newSD = new SegmentDescriptor(locatedSD.getInterval(), locatedSD.getVersion(), locatedSD.getPartitionNumber());
        final Query partialQuery = query.withQuerySegmentSpec(new MultipleSpecificSegmentSpec(Lists.newArrayList(newSD)));
        splits[i] = new HiveDruidSplit(DruidStorageHandlerUtils.JSON_MAPPER.writeValueAsString(partialQuery), dummyPath, hosts);
    }
    return splits;
}
Also used : MultipleSpecificSegmentSpec(org.apache.druid.query.spec.MultipleSpecificSegmentSpec) LocatedSegmentDescriptor(org.apache.druid.query.LocatedSegmentDescriptor) BaseQuery(org.apache.druid.query.BaseQuery) ScanQuery(org.apache.druid.query.scan.ScanQuery) Query(org.apache.druid.query.Query) LocatedSegmentDescriptor(org.apache.druid.query.LocatedSegmentDescriptor) SegmentDescriptor(org.apache.druid.query.SegmentDescriptor)

Example 7 with MultipleSpecificSegmentSpec

use of org.apache.druid.query.spec.MultipleSpecificSegmentSpec in project druid by druid-io.

the class ScanBenchmark method querySingleIncrementalIndex.

@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
public void querySingleIncrementalIndex(Blackhole blackhole, IncrementalIndexState state) {
    QueryRunner<ScanResultValue> runner = QueryBenchmarkUtil.makeQueryRunner(factory, SegmentId.dummy("incIndex"), new IncrementalIndexSegment(state.incIndex, SegmentId.dummy("incIndex")));
    Query effectiveQuery = query.withDataSource(new TableDataSource("incIndex")).withQuerySegmentSpec(new MultipleSpecificSegmentSpec(ImmutableList.of(new SegmentDescriptor(Intervals.ETERNITY, "dummy_version", 0)))).withOverriddenContext(ImmutableMap.of(ScanQuery.CTX_KEY_OUTERMOST, false));
    List<ScanResultValue> results = ScanBenchmark.runQuery(factory, runner, effectiveQuery);
    blackhole.consume(results);
}
Also used : MultipleSpecificSegmentSpec(org.apache.druid.query.spec.MultipleSpecificSegmentSpec) ScanQuery(org.apache.druid.query.scan.ScanQuery) Query(org.apache.druid.query.Query) TableDataSource(org.apache.druid.query.TableDataSource) IncrementalIndexSegment(org.apache.druid.segment.IncrementalIndexSegment) ScanResultValue(org.apache.druid.query.scan.ScanResultValue) SegmentDescriptor(org.apache.druid.query.SegmentDescriptor) BenchmarkMode(org.openjdk.jmh.annotations.BenchmarkMode) Benchmark(org.openjdk.jmh.annotations.Benchmark) OutputTimeUnit(org.openjdk.jmh.annotations.OutputTimeUnit)

Example 8 with MultipleSpecificSegmentSpec

use of org.apache.druid.query.spec.MultipleSpecificSegmentSpec in project druid by druid-io.

the class ScanBenchmark method querySingleQueryableIndex.

@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
public void querySingleQueryableIndex(Blackhole blackhole, QueryableIndexState state) {
    final QueryRunner<Result<ScanResultValue>> runner = QueryBenchmarkUtil.makeQueryRunner(factory, SegmentId.dummy("qIndex"), new QueryableIndexSegment(state.qIndexes.get(0), SegmentId.dummy("qIndex")));
    Query effectiveQuery = query.withDataSource(new TableDataSource("qIndex")).withQuerySegmentSpec(new MultipleSpecificSegmentSpec(ImmutableList.of(new SegmentDescriptor(Intervals.ETERNITY, "dummy_version", 0)))).withOverriddenContext(ImmutableMap.of(ScanQuery.CTX_KEY_OUTERMOST, false));
    List<ScanResultValue> results = ScanBenchmark.runQuery(factory, runner, effectiveQuery);
    blackhole.consume(results);
}
Also used : QueryableIndexSegment(org.apache.druid.segment.QueryableIndexSegment) MultipleSpecificSegmentSpec(org.apache.druid.query.spec.MultipleSpecificSegmentSpec) ScanQuery(org.apache.druid.query.scan.ScanQuery) Query(org.apache.druid.query.Query) TableDataSource(org.apache.druid.query.TableDataSource) SegmentDescriptor(org.apache.druid.query.SegmentDescriptor) ScanResultValue(org.apache.druid.query.scan.ScanResultValue) Result(org.apache.druid.query.Result) BenchmarkMode(org.openjdk.jmh.annotations.BenchmarkMode) Benchmark(org.openjdk.jmh.annotations.Benchmark) OutputTimeUnit(org.openjdk.jmh.annotations.OutputTimeUnit)

Example 9 with MultipleSpecificSegmentSpec

use of org.apache.druid.query.spec.MultipleSpecificSegmentSpec in project druid by druid-io.

the class ScanBenchmark method queryMultiQueryableIndex.

@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
public void queryMultiQueryableIndex(Blackhole blackhole, QueryableIndexState state) {
    List<SegmentDescriptor> segmentDescriptors = new ArrayList<>();
    List<QueryRunner<Row>> runners = new ArrayList<>();
    QueryToolChest toolChest = factory.getToolchest();
    for (int i = 0; i < state.numSegments; i++) {
        String segmentName = "qIndex";
        final QueryRunner<Result<ScanResultValue>> runner = QueryBenchmarkUtil.makeQueryRunner(factory, SegmentId.dummy(segmentName), new QueryableIndexSegment(state.qIndexes.get(i), SegmentId.dummy(segmentName, i)));
        segmentDescriptors.add(new SegmentDescriptor(Intervals.ETERNITY, "dummy_version", i));
        runners.add(toolChest.preMergeQueryDecoration(runner));
    }
    QueryRunner theRunner = toolChest.postMergeQueryDecoration(new FinalizeResultsQueryRunner<>(toolChest.mergeResults(factory.mergeRunners(state.executorService, runners)), toolChest));
    Query effectiveQuery = query.withDataSource(new TableDataSource("qIndex")).withQuerySegmentSpec(new MultipleSpecificSegmentSpec(segmentDescriptors)).withOverriddenContext(ImmutableMap.of(ScanQuery.CTX_KEY_OUTERMOST, false));
    Sequence<Result<ScanResultValue>> queryResult = theRunner.run(QueryPlus.wrap(effectiveQuery), ResponseContext.createEmpty());
    List<Result<ScanResultValue>> results = queryResult.toList();
    blackhole.consume(results);
}
Also used : QueryableIndexSegment(org.apache.druid.segment.QueryableIndexSegment) MultipleSpecificSegmentSpec(org.apache.druid.query.spec.MultipleSpecificSegmentSpec) ScanQuery(org.apache.druid.query.scan.ScanQuery) Query(org.apache.druid.query.Query) ArrayList(java.util.ArrayList) ScanQueryQueryToolChest(org.apache.druid.query.scan.ScanQueryQueryToolChest) QueryToolChest(org.apache.druid.query.QueryToolChest) QueryRunner(org.apache.druid.query.QueryRunner) FinalizeResultsQueryRunner(org.apache.druid.query.FinalizeResultsQueryRunner) Result(org.apache.druid.query.Result) TableDataSource(org.apache.druid.query.TableDataSource) SegmentDescriptor(org.apache.druid.query.SegmentDescriptor) BenchmarkMode(org.openjdk.jmh.annotations.BenchmarkMode) Benchmark(org.openjdk.jmh.annotations.Benchmark) OutputTimeUnit(org.openjdk.jmh.annotations.OutputTimeUnit)

Example 10 with MultipleSpecificSegmentSpec

use of org.apache.druid.query.spec.MultipleSpecificSegmentSpec in project druid by druid-io.

the class ScanQueryResultOrderingTest method assertResultsEquals.

private void assertResultsEquals(final ScanQuery query, final List<Integer> expectedResults) {
    final List<List<Pair<SegmentId, QueryRunner<ScanResultValue>>>> serverRunners = new ArrayList<>();
    for (int i = 0; i <= segmentToServerMap.stream().max(Comparator.naturalOrder()).orElse(0); i++) {
        serverRunners.add(new ArrayList<>());
    }
    for (int segmentNumber = 0; segmentNumber < segmentToServerMap.size(); segmentNumber++) {
        final SegmentId segmentId = SEGMENTS.get(segmentNumber).getId();
        final int serverNumber = segmentToServerMap.get(segmentNumber);
        serverRunners.get(serverNumber).add(Pair.of(segmentId, segmentRunners.get(segmentNumber)));
    }
    // Simulates what the Historical servers would do.
    final List<QueryRunner<ScanResultValue>> mergedServerRunners = serverRunners.stream().filter(runners -> !runners.isEmpty()).map(runners -> queryRunnerFactory.getToolchest().mergeResults(new QueryRunner<ScanResultValue>() {

        @Override
        public Sequence<ScanResultValue> run(final QueryPlus<ScanResultValue> queryPlus, final ResponseContext responseContext) {
            return queryRunnerFactory.mergeRunners(Execs.directExecutor(), runners.stream().map(p -> p.rhs).collect(Collectors.toList())).run(queryPlus.withQuery(queryPlus.getQuery().withQuerySegmentSpec(new MultipleSpecificSegmentSpec(runners.stream().map(p -> p.lhs.toDescriptor()).collect(Collectors.toList())))), responseContext);
        }
    })).collect(Collectors.toList());
    // Simulates what the Broker would do.
    final QueryRunner<ScanResultValue> brokerRunner = queryRunnerFactory.getToolchest().mergeResults((queryPlus, responseContext) -> {
        final List<Sequence<ScanResultValue>> sequences = mergedServerRunners.stream().map(runner -> runner.run(queryPlus.withoutThreadUnsafeState())).collect(Collectors.toList());
        return new MergeSequence<>(queryPlus.getQuery().getResultOrdering(), Sequences.simple(sequences));
    });
    // Finally: run the query.
    final List<Integer> results = runQuery((ScanQuery) Druids.ScanQueryBuilder.copy(query).limit(limit).batchSize(batchSize).build().withOverriddenContext(ImmutableMap.of(ScanQueryConfig.CTX_KEY_MAX_ROWS_QUEUED_FOR_ORDERING, maxRowsQueuedForOrdering)), brokerRunner);
    Assert.assertEquals(expectedResults.stream().limit(limit == 0 ? Long.MAX_VALUE : limit).collect(Collectors.toList()), results);
}
Also used : IntStream(java.util.stream.IntStream) QueryPlus(org.apache.druid.query.QueryPlus) Intervals(org.apache.druid.java.util.common.Intervals) RowBasedSegment(org.apache.druid.segment.RowBasedSegment) RunWith(org.junit.runner.RunWith) TreeSet(java.util.TreeSet) Pair(org.apache.druid.java.util.common.Pair) ArrayList(java.util.ArrayList) MultipleSpecificSegmentSpec(org.apache.druid.query.spec.MultipleSpecificSegmentSpec) Druids(org.apache.druid.query.Druids) RowAdapter(org.apache.druid.segment.RowAdapter) ColumnHolder(org.apache.druid.segment.column.ColumnHolder) ImmutableList(com.google.common.collect.ImmutableList) QueryRunner(org.apache.druid.query.QueryRunner) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) Sequences(org.apache.druid.java.util.common.guava.Sequences) Parameterized(org.junit.runners.Parameterized) Before(org.junit.Before) ImmutableSortedSet(com.google.common.collect.ImmutableSortedSet) DateTimes(org.apache.druid.java.util.common.DateTimes) Sequence(org.apache.druid.java.util.common.guava.Sequence) Execs(org.apache.druid.java.util.common.concurrent.Execs) ImmutableMap(com.google.common.collect.ImmutableMap) MergeSequence(org.apache.druid.java.util.common.guava.MergeSequence) ResponseContext(org.apache.druid.query.context.ResponseContext) DateTime(org.joda.time.DateTime) Set(java.util.Set) Test(org.junit.Test) Collectors(java.util.stream.Collectors) Sets(com.google.common.collect.Sets) List(java.util.List) RowSignature(org.apache.druid.segment.column.RowSignature) ColumnType(org.apache.druid.segment.column.ColumnType) DefaultGenericQueryMetricsFactory(org.apache.druid.query.DefaultGenericQueryMetricsFactory) SegmentId(org.apache.druid.timeline.SegmentId) Assert(org.junit.Assert) Comparator(java.util.Comparator) Collections(java.util.Collections) MultipleSpecificSegmentSpec(org.apache.druid.query.spec.MultipleSpecificSegmentSpec) SegmentId(org.apache.druid.timeline.SegmentId) ArrayList(java.util.ArrayList) Sequence(org.apache.druid.java.util.common.guava.Sequence) MergeSequence(org.apache.druid.java.util.common.guava.MergeSequence) QueryRunner(org.apache.druid.query.QueryRunner) MergeSequence(org.apache.druid.java.util.common.guava.MergeSequence) ResponseContext(org.apache.druid.query.context.ResponseContext) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List)

Aggregations

MultipleSpecificSegmentSpec (org.apache.druid.query.spec.MultipleSpecificSegmentSpec)13 SegmentDescriptor (org.apache.druid.query.SegmentDescriptor)9 TableDataSource (org.apache.druid.query.TableDataSource)6 Test (org.junit.Test)6 ArrayList (java.util.ArrayList)5 Query (org.apache.druid.query.Query)5 QueryRunner (org.apache.druid.query.QueryRunner)4 Result (org.apache.druid.query.Result)4 ScanQuery (org.apache.druid.query.scan.ScanQuery)4 SegmentId (org.apache.druid.timeline.SegmentId)4 Druids (org.apache.druid.query.Druids)3 FinalizeResultsQueryRunner (org.apache.druid.query.FinalizeResultsQueryRunner)3 QueryPlus (org.apache.druid.query.QueryPlus)3 ResponseContext (org.apache.druid.query.context.ResponseContext)3 ScanResultValue (org.apache.druid.query.scan.ScanResultValue)3 ImmutableList (com.google.common.collect.ImmutableList)2 List (java.util.List)2 QueryableDruidServer (org.apache.druid.client.selector.QueryableDruidServer)2 ServerSelector (org.apache.druid.client.selector.ServerSelector)2 ISE (org.apache.druid.java.util.common.ISE)2