use of org.apache.druid.query.spec.MultipleSpecificSegmentSpec in project hive by apache.
the class DruidQueryBasedInputFormat method distributeScanQuery.
/* New method that distributes the Scan query by creating splits containing
* information about different Druid nodes that have the data for the given
* query. */
private static HiveDruidSplit[] distributeScanQuery(String address, ScanQuery query, Path dummyPath) throws IOException {
// If it has a limit, we use it and we do not distribute the query
final boolean isFetch = query.getScanRowsLimit() < Long.MAX_VALUE;
if (isFetch) {
return new HiveDruidSplit[] { new HiveDruidSplit(DruidStorageHandlerUtils.JSON_MAPPER.writeValueAsString(query), dummyPath, new String[] { address }) };
}
final List<LocatedSegmentDescriptor> segmentDescriptors = fetchLocatedSegmentDescriptors(address, query);
// Create one input split for each segment
final int numSplits = segmentDescriptors.size();
final HiveDruidSplit[] splits = new HiveDruidSplit[segmentDescriptors.size()];
for (int i = 0; i < numSplits; i++) {
final LocatedSegmentDescriptor locatedSD = segmentDescriptors.get(i);
final String[] hosts = new String[locatedSD.getLocations().size() + 1];
for (int j = 0; j < locatedSD.getLocations().size(); j++) {
hosts[j] = locatedSD.getLocations().get(j).getHost();
}
// Default to broker if all other hosts fail.
hosts[locatedSD.getLocations().size()] = address;
// Create partial Select query
final SegmentDescriptor newSD = new SegmentDescriptor(locatedSD.getInterval(), locatedSD.getVersion(), locatedSD.getPartitionNumber());
final Query partialQuery = query.withQuerySegmentSpec(new MultipleSpecificSegmentSpec(Lists.newArrayList(newSD)));
splits[i] = new HiveDruidSplit(DruidStorageHandlerUtils.JSON_MAPPER.writeValueAsString(partialQuery), dummyPath, hosts);
}
return splits;
}
use of org.apache.druid.query.spec.MultipleSpecificSegmentSpec in project druid by druid-io.
the class ScanBenchmark method querySingleIncrementalIndex.
@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
public void querySingleIncrementalIndex(Blackhole blackhole, IncrementalIndexState state) {
QueryRunner<ScanResultValue> runner = QueryBenchmarkUtil.makeQueryRunner(factory, SegmentId.dummy("incIndex"), new IncrementalIndexSegment(state.incIndex, SegmentId.dummy("incIndex")));
Query effectiveQuery = query.withDataSource(new TableDataSource("incIndex")).withQuerySegmentSpec(new MultipleSpecificSegmentSpec(ImmutableList.of(new SegmentDescriptor(Intervals.ETERNITY, "dummy_version", 0)))).withOverriddenContext(ImmutableMap.of(ScanQuery.CTX_KEY_OUTERMOST, false));
List<ScanResultValue> results = ScanBenchmark.runQuery(factory, runner, effectiveQuery);
blackhole.consume(results);
}
use of org.apache.druid.query.spec.MultipleSpecificSegmentSpec in project druid by druid-io.
the class ScanBenchmark method querySingleQueryableIndex.
@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
public void querySingleQueryableIndex(Blackhole blackhole, QueryableIndexState state) {
final QueryRunner<Result<ScanResultValue>> runner = QueryBenchmarkUtil.makeQueryRunner(factory, SegmentId.dummy("qIndex"), new QueryableIndexSegment(state.qIndexes.get(0), SegmentId.dummy("qIndex")));
Query effectiveQuery = query.withDataSource(new TableDataSource("qIndex")).withQuerySegmentSpec(new MultipleSpecificSegmentSpec(ImmutableList.of(new SegmentDescriptor(Intervals.ETERNITY, "dummy_version", 0)))).withOverriddenContext(ImmutableMap.of(ScanQuery.CTX_KEY_OUTERMOST, false));
List<ScanResultValue> results = ScanBenchmark.runQuery(factory, runner, effectiveQuery);
blackhole.consume(results);
}
use of org.apache.druid.query.spec.MultipleSpecificSegmentSpec in project druid by druid-io.
the class ScanBenchmark method queryMultiQueryableIndex.
@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
public void queryMultiQueryableIndex(Blackhole blackhole, QueryableIndexState state) {
List<SegmentDescriptor> segmentDescriptors = new ArrayList<>();
List<QueryRunner<Row>> runners = new ArrayList<>();
QueryToolChest toolChest = factory.getToolchest();
for (int i = 0; i < state.numSegments; i++) {
String segmentName = "qIndex";
final QueryRunner<Result<ScanResultValue>> runner = QueryBenchmarkUtil.makeQueryRunner(factory, SegmentId.dummy(segmentName), new QueryableIndexSegment(state.qIndexes.get(i), SegmentId.dummy(segmentName, i)));
segmentDescriptors.add(new SegmentDescriptor(Intervals.ETERNITY, "dummy_version", i));
runners.add(toolChest.preMergeQueryDecoration(runner));
}
QueryRunner theRunner = toolChest.postMergeQueryDecoration(new FinalizeResultsQueryRunner<>(toolChest.mergeResults(factory.mergeRunners(state.executorService, runners)), toolChest));
Query effectiveQuery = query.withDataSource(new TableDataSource("qIndex")).withQuerySegmentSpec(new MultipleSpecificSegmentSpec(segmentDescriptors)).withOverriddenContext(ImmutableMap.of(ScanQuery.CTX_KEY_OUTERMOST, false));
Sequence<Result<ScanResultValue>> queryResult = theRunner.run(QueryPlus.wrap(effectiveQuery), ResponseContext.createEmpty());
List<Result<ScanResultValue>> results = queryResult.toList();
blackhole.consume(results);
}
use of org.apache.druid.query.spec.MultipleSpecificSegmentSpec in project druid by druid-io.
the class ScanQueryResultOrderingTest method assertResultsEquals.
private void assertResultsEquals(final ScanQuery query, final List<Integer> expectedResults) {
final List<List<Pair<SegmentId, QueryRunner<ScanResultValue>>>> serverRunners = new ArrayList<>();
for (int i = 0; i <= segmentToServerMap.stream().max(Comparator.naturalOrder()).orElse(0); i++) {
serverRunners.add(new ArrayList<>());
}
for (int segmentNumber = 0; segmentNumber < segmentToServerMap.size(); segmentNumber++) {
final SegmentId segmentId = SEGMENTS.get(segmentNumber).getId();
final int serverNumber = segmentToServerMap.get(segmentNumber);
serverRunners.get(serverNumber).add(Pair.of(segmentId, segmentRunners.get(segmentNumber)));
}
// Simulates what the Historical servers would do.
final List<QueryRunner<ScanResultValue>> mergedServerRunners = serverRunners.stream().filter(runners -> !runners.isEmpty()).map(runners -> queryRunnerFactory.getToolchest().mergeResults(new QueryRunner<ScanResultValue>() {
@Override
public Sequence<ScanResultValue> run(final QueryPlus<ScanResultValue> queryPlus, final ResponseContext responseContext) {
return queryRunnerFactory.mergeRunners(Execs.directExecutor(), runners.stream().map(p -> p.rhs).collect(Collectors.toList())).run(queryPlus.withQuery(queryPlus.getQuery().withQuerySegmentSpec(new MultipleSpecificSegmentSpec(runners.stream().map(p -> p.lhs.toDescriptor()).collect(Collectors.toList())))), responseContext);
}
})).collect(Collectors.toList());
// Simulates what the Broker would do.
final QueryRunner<ScanResultValue> brokerRunner = queryRunnerFactory.getToolchest().mergeResults((queryPlus, responseContext) -> {
final List<Sequence<ScanResultValue>> sequences = mergedServerRunners.stream().map(runner -> runner.run(queryPlus.withoutThreadUnsafeState())).collect(Collectors.toList());
return new MergeSequence<>(queryPlus.getQuery().getResultOrdering(), Sequences.simple(sequences));
});
// Finally: run the query.
final List<Integer> results = runQuery((ScanQuery) Druids.ScanQueryBuilder.copy(query).limit(limit).batchSize(batchSize).build().withOverriddenContext(ImmutableMap.of(ScanQueryConfig.CTX_KEY_MAX_ROWS_QUEUED_FOR_ORDERING, maxRowsQueuedForOrdering)), brokerRunner);
Assert.assertEquals(expectedResults.stream().limit(limit == 0 ? Long.MAX_VALUE : limit).collect(Collectors.toList()), results);
}
Aggregations