Search in sources :

Example 31 with SegmentDescriptor

use of org.apache.druid.query.SegmentDescriptor in project hive by apache.

the class DruidQueryBasedInputFormat method distributeScanQuery.

/* New method that distributes the Scan query by creating splits containing
   * information about different Druid nodes that have the data for the given
   * query. */
private static HiveDruidSplit[] distributeScanQuery(String address, ScanQuery query, Path dummyPath) throws IOException {
    // If it has a limit, we use it and we do not distribute the query
    final boolean isFetch = query.getScanRowsLimit() < Long.MAX_VALUE;
    if (isFetch) {
        return new HiveDruidSplit[] { new HiveDruidSplit(DruidStorageHandlerUtils.JSON_MAPPER.writeValueAsString(query), dummyPath, new String[] { address }) };
    }
    final List<LocatedSegmentDescriptor> segmentDescriptors = fetchLocatedSegmentDescriptors(address, query);
    // Create one input split for each segment
    final int numSplits = segmentDescriptors.size();
    final HiveDruidSplit[] splits = new HiveDruidSplit[segmentDescriptors.size()];
    for (int i = 0; i < numSplits; i++) {
        final LocatedSegmentDescriptor locatedSD = segmentDescriptors.get(i);
        final String[] hosts = new String[locatedSD.getLocations().size() + 1];
        for (int j = 0; j < locatedSD.getLocations().size(); j++) {
            hosts[j] = locatedSD.getLocations().get(j).getHost();
        }
        // Default to broker if all other hosts fail.
        hosts[locatedSD.getLocations().size()] = address;
        // Create partial Select query
        final SegmentDescriptor newSD = new SegmentDescriptor(locatedSD.getInterval(), locatedSD.getVersion(), locatedSD.getPartitionNumber());
        final Query partialQuery = query.withQuerySegmentSpec(new MultipleSpecificSegmentSpec(Lists.newArrayList(newSD)));
        splits[i] = new HiveDruidSplit(DruidStorageHandlerUtils.JSON_MAPPER.writeValueAsString(partialQuery), dummyPath, hosts);
    }
    return splits;
}
Also used : MultipleSpecificSegmentSpec(org.apache.druid.query.spec.MultipleSpecificSegmentSpec) LocatedSegmentDescriptor(org.apache.druid.query.LocatedSegmentDescriptor) BaseQuery(org.apache.druid.query.BaseQuery) ScanQuery(org.apache.druid.query.scan.ScanQuery) Query(org.apache.druid.query.Query) LocatedSegmentDescriptor(org.apache.druid.query.LocatedSegmentDescriptor) SegmentDescriptor(org.apache.druid.query.SegmentDescriptor)

Example 32 with SegmentDescriptor

use of org.apache.druid.query.SegmentDescriptor in project druid by druid-io.

the class ScanBenchmark method querySingleIncrementalIndex.

@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
public void querySingleIncrementalIndex(Blackhole blackhole, IncrementalIndexState state) {
    QueryRunner<ScanResultValue> runner = QueryBenchmarkUtil.makeQueryRunner(factory, SegmentId.dummy("incIndex"), new IncrementalIndexSegment(state.incIndex, SegmentId.dummy("incIndex")));
    Query effectiveQuery = query.withDataSource(new TableDataSource("incIndex")).withQuerySegmentSpec(new MultipleSpecificSegmentSpec(ImmutableList.of(new SegmentDescriptor(Intervals.ETERNITY, "dummy_version", 0)))).withOverriddenContext(ImmutableMap.of(ScanQuery.CTX_KEY_OUTERMOST, false));
    List<ScanResultValue> results = ScanBenchmark.runQuery(factory, runner, effectiveQuery);
    blackhole.consume(results);
}
Also used : MultipleSpecificSegmentSpec(org.apache.druid.query.spec.MultipleSpecificSegmentSpec) ScanQuery(org.apache.druid.query.scan.ScanQuery) Query(org.apache.druid.query.Query) TableDataSource(org.apache.druid.query.TableDataSource) IncrementalIndexSegment(org.apache.druid.segment.IncrementalIndexSegment) ScanResultValue(org.apache.druid.query.scan.ScanResultValue) SegmentDescriptor(org.apache.druid.query.SegmentDescriptor) BenchmarkMode(org.openjdk.jmh.annotations.BenchmarkMode) Benchmark(org.openjdk.jmh.annotations.Benchmark) OutputTimeUnit(org.openjdk.jmh.annotations.OutputTimeUnit)

Example 33 with SegmentDescriptor

use of org.apache.druid.query.SegmentDescriptor in project druid by druid-io.

the class ScanBenchmark method querySingleQueryableIndex.

@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
public void querySingleQueryableIndex(Blackhole blackhole, QueryableIndexState state) {
    final QueryRunner<Result<ScanResultValue>> runner = QueryBenchmarkUtil.makeQueryRunner(factory, SegmentId.dummy("qIndex"), new QueryableIndexSegment(state.qIndexes.get(0), SegmentId.dummy("qIndex")));
    Query effectiveQuery = query.withDataSource(new TableDataSource("qIndex")).withQuerySegmentSpec(new MultipleSpecificSegmentSpec(ImmutableList.of(new SegmentDescriptor(Intervals.ETERNITY, "dummy_version", 0)))).withOverriddenContext(ImmutableMap.of(ScanQuery.CTX_KEY_OUTERMOST, false));
    List<ScanResultValue> results = ScanBenchmark.runQuery(factory, runner, effectiveQuery);
    blackhole.consume(results);
}
Also used : QueryableIndexSegment(org.apache.druid.segment.QueryableIndexSegment) MultipleSpecificSegmentSpec(org.apache.druid.query.spec.MultipleSpecificSegmentSpec) ScanQuery(org.apache.druid.query.scan.ScanQuery) Query(org.apache.druid.query.Query) TableDataSource(org.apache.druid.query.TableDataSource) SegmentDescriptor(org.apache.druid.query.SegmentDescriptor) ScanResultValue(org.apache.druid.query.scan.ScanResultValue) Result(org.apache.druid.query.Result) BenchmarkMode(org.openjdk.jmh.annotations.BenchmarkMode) Benchmark(org.openjdk.jmh.annotations.Benchmark) OutputTimeUnit(org.openjdk.jmh.annotations.OutputTimeUnit)

Example 34 with SegmentDescriptor

use of org.apache.druid.query.SegmentDescriptor in project druid by druid-io.

the class ScanBenchmark method queryMultiQueryableIndex.

@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
public void queryMultiQueryableIndex(Blackhole blackhole, QueryableIndexState state) {
    List<SegmentDescriptor> segmentDescriptors = new ArrayList<>();
    List<QueryRunner<Row>> runners = new ArrayList<>();
    QueryToolChest toolChest = factory.getToolchest();
    for (int i = 0; i < state.numSegments; i++) {
        String segmentName = "qIndex";
        final QueryRunner<Result<ScanResultValue>> runner = QueryBenchmarkUtil.makeQueryRunner(factory, SegmentId.dummy(segmentName), new QueryableIndexSegment(state.qIndexes.get(i), SegmentId.dummy(segmentName, i)));
        segmentDescriptors.add(new SegmentDescriptor(Intervals.ETERNITY, "dummy_version", i));
        runners.add(toolChest.preMergeQueryDecoration(runner));
    }
    QueryRunner theRunner = toolChest.postMergeQueryDecoration(new FinalizeResultsQueryRunner<>(toolChest.mergeResults(factory.mergeRunners(state.executorService, runners)), toolChest));
    Query effectiveQuery = query.withDataSource(new TableDataSource("qIndex")).withQuerySegmentSpec(new MultipleSpecificSegmentSpec(segmentDescriptors)).withOverriddenContext(ImmutableMap.of(ScanQuery.CTX_KEY_OUTERMOST, false));
    Sequence<Result<ScanResultValue>> queryResult = theRunner.run(QueryPlus.wrap(effectiveQuery), ResponseContext.createEmpty());
    List<Result<ScanResultValue>> results = queryResult.toList();
    blackhole.consume(results);
}
Also used : QueryableIndexSegment(org.apache.druid.segment.QueryableIndexSegment) MultipleSpecificSegmentSpec(org.apache.druid.query.spec.MultipleSpecificSegmentSpec) ScanQuery(org.apache.druid.query.scan.ScanQuery) Query(org.apache.druid.query.Query) ArrayList(java.util.ArrayList) ScanQueryQueryToolChest(org.apache.druid.query.scan.ScanQueryQueryToolChest) QueryToolChest(org.apache.druid.query.QueryToolChest) QueryRunner(org.apache.druid.query.QueryRunner) FinalizeResultsQueryRunner(org.apache.druid.query.FinalizeResultsQueryRunner) Result(org.apache.druid.query.Result) TableDataSource(org.apache.druid.query.TableDataSource) SegmentDescriptor(org.apache.druid.query.SegmentDescriptor) BenchmarkMode(org.openjdk.jmh.annotations.BenchmarkMode) Benchmark(org.openjdk.jmh.annotations.Benchmark) OutputTimeUnit(org.openjdk.jmh.annotations.OutputTimeUnit)

Example 35 with SegmentDescriptor

use of org.apache.druid.query.SegmentDescriptor in project druid by druid-io.

the class TimeCompareBenchmark method setup.

@Setup
public void setup() throws IOException {
    log.info("SETUP CALLED AT " + System.currentTimeMillis());
    ComplexMetrics.registerSerde("hyperUnique", new HyperUniquesSerde());
    executorService = Execs.multiThreaded(numSegments, "TopNThreadPool");
    setupQueries();
    String schemaName = "basic";
    schemaInfo = GeneratorBasicSchemas.SCHEMA_MAP.get(schemaName);
    segmentIntervals = new Interval[numSegments];
    long startMillis = schemaInfo.getDataInterval().getStartMillis();
    long endMillis = schemaInfo.getDataInterval().getEndMillis();
    long partialIntervalMillis = (endMillis - startMillis) / numSegments;
    for (int i = 0; i < numSegments; i++) {
        long partialEndMillis = startMillis + partialIntervalMillis;
        segmentIntervals[i] = Intervals.utc(startMillis, partialEndMillis);
        log.info("Segment [%d] with interval [%s]", i, segmentIntervals[i]);
        startMillis = partialEndMillis;
    }
    incIndexes = new ArrayList<>();
    for (int i = 0; i < numSegments; i++) {
        log.info("Generating rows for segment " + i);
        DataGenerator gen = new DataGenerator(schemaInfo.getColumnSchemas(), RNG_SEED + i, segmentIntervals[i], rowsPerSegment);
        IncrementalIndex incIndex = makeIncIndex();
        for (int j = 0; j < rowsPerSegment; j++) {
            InputRow row = gen.nextRow();
            if (j % 10000 == 0) {
                log.info(j + " rows generated.");
            }
            incIndex.add(row);
        }
        incIndexes.add(incIndex);
    }
    tmpDir = FileUtils.createTempDir();
    log.info("Using temp dir: " + tmpDir.getAbsolutePath());
    qIndexes = new ArrayList<>();
    for (int i = 0; i < numSegments; i++) {
        File indexFile = INDEX_MERGER_V9.persist(incIndexes.get(i), tmpDir, new IndexSpec(), null);
        QueryableIndex qIndex = INDEX_IO.loadIndex(indexFile);
        qIndexes.add(qIndex);
    }
    List<QueryRunner<Result<TopNResultValue>>> singleSegmentRunners = new ArrayList<>();
    QueryToolChest toolChest = topNFactory.getToolchest();
    for (int i = 0; i < numSegments; i++) {
        SegmentId segmentId = SegmentId.dummy("qIndex " + i);
        QueryRunner<Result<TopNResultValue>> runner = QueryBenchmarkUtil.makeQueryRunner(topNFactory, segmentId, new QueryableIndexSegment(qIndexes.get(i), segmentId));
        singleSegmentRunners.add(new PerSegmentOptimizingQueryRunner<>(toolChest.preMergeQueryDecoration(runner), new PerSegmentQueryOptimizationContext(new SegmentDescriptor(segmentIntervals[i], "1", 0))));
    }
    topNRunner = toolChest.postMergeQueryDecoration(new FinalizeResultsQueryRunner<>(toolChest.mergeResults(topNFactory.mergeRunners(executorService, singleSegmentRunners)), toolChest));
    List<QueryRunner<Result<TimeseriesResultValue>>> singleSegmentRunnersT = new ArrayList<>();
    QueryToolChest toolChestT = timeseriesFactory.getToolchest();
    for (int i = 0; i < numSegments; i++) {
        SegmentId segmentId = SegmentId.dummy("qIndex " + i);
        QueryRunner<Result<TimeseriesResultValue>> runner = QueryBenchmarkUtil.makeQueryRunner(timeseriesFactory, segmentId, new QueryableIndexSegment(qIndexes.get(i), segmentId));
        singleSegmentRunnersT.add(new PerSegmentOptimizingQueryRunner<>(toolChestT.preMergeQueryDecoration(runner), new PerSegmentQueryOptimizationContext(new SegmentDescriptor(segmentIntervals[i], "1", 0))));
    }
    timeseriesRunner = toolChestT.postMergeQueryDecoration(new FinalizeResultsQueryRunner<>(toolChestT.mergeResults(timeseriesFactory.mergeRunners(executorService, singleSegmentRunnersT)), toolChestT));
}
Also used : QueryableIndexSegment(org.apache.druid.segment.QueryableIndexSegment) TimeseriesResultValue(org.apache.druid.query.timeseries.TimeseriesResultValue) IndexSpec(org.apache.druid.segment.IndexSpec) ArrayList(java.util.ArrayList) HyperUniquesSerde(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesSerde) TopNQueryQueryToolChest(org.apache.druid.query.topn.TopNQueryQueryToolChest) TimeseriesQueryQueryToolChest(org.apache.druid.query.timeseries.TimeseriesQueryQueryToolChest) QueryToolChest(org.apache.druid.query.QueryToolChest) Result(org.apache.druid.query.Result) SegmentDescriptor(org.apache.druid.query.SegmentDescriptor) TopNResultValue(org.apache.druid.query.topn.TopNResultValue) IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) SegmentId(org.apache.druid.timeline.SegmentId) QueryRunner(org.apache.druid.query.QueryRunner) FinalizeResultsQueryRunner(org.apache.druid.query.FinalizeResultsQueryRunner) PerSegmentOptimizingQueryRunner(org.apache.druid.query.PerSegmentOptimizingQueryRunner) PerSegmentQueryOptimizationContext(org.apache.druid.query.PerSegmentQueryOptimizationContext) FinalizeResultsQueryRunner(org.apache.druid.query.FinalizeResultsQueryRunner) QueryableIndex(org.apache.druid.segment.QueryableIndex) DataGenerator(org.apache.druid.segment.generator.DataGenerator) InputRow(org.apache.druid.data.input.InputRow) File(java.io.File) Setup(org.openjdk.jmh.annotations.Setup)

Aggregations

SegmentDescriptor (org.apache.druid.query.SegmentDescriptor)71 Test (org.junit.Test)47 Interval (org.joda.time.Interval)26 TaskStatus (org.apache.druid.indexer.TaskStatus)21 DataSegment (org.apache.druid.timeline.DataSegment)20 Executor (java.util.concurrent.Executor)19 ArrayList (java.util.ArrayList)17 Result (org.apache.druid.query.Result)16 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)16 List (java.util.List)15 Query (org.apache.druid.query.Query)14 QueryRunner (org.apache.druid.query.QueryRunner)14 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)13 Map (java.util.Map)13 TimeseriesQuery (org.apache.druid.query.timeseries.TimeseriesQuery)13 ImmutableMap (com.google.common.collect.ImmutableMap)12 ListenableFuture (com.google.common.util.concurrent.ListenableFuture)11 QueryPlus (org.apache.druid.query.QueryPlus)11 ResponseContext (org.apache.druid.query.context.ResponseContext)11 ImmutableList (com.google.common.collect.ImmutableList)10