Search in sources :

Example 6 with MultipleSpecificSegmentSpec

use of io.druid.query.spec.MultipleSpecificSegmentSpec in project hive by apache.

the class DruidQueryBasedInputFormat method distributeScanQuery.

/* New method that distributes the Scan query by creating splits containing
   * information about different Druid nodes that have the data for the given
   * query. */
private static HiveDruidSplit[] distributeScanQuery(Configuration conf, String address, ScanQuery query, Path dummyPath) throws IOException {
    // If it has a limit, we use it and we do not distribute the query
    final boolean isFetch = query.getContextBoolean(Constants.DRUID_QUERY_FETCH, false);
    if (isFetch) {
        return new HiveDruidSplit[] { new HiveDruidSplit(DruidStorageHandlerUtils.JSON_MAPPER.writeValueAsString(query), dummyPath, new String[] { address }) };
    }
    final List<LocatedSegmentDescriptor> segmentDescriptors = fetchLocatedSegmentDescriptors(address, query);
    // Create one input split for each segment
    final int numSplits = segmentDescriptors.size();
    final HiveDruidSplit[] splits = new HiveDruidSplit[segmentDescriptors.size()];
    for (int i = 0; i < numSplits; i++) {
        final LocatedSegmentDescriptor locatedSD = segmentDescriptors.get(i);
        final String[] hosts = new String[locatedSD.getLocations().size()];
        for (int j = 0; j < locatedSD.getLocations().size(); j++) {
            hosts[j] = locatedSD.getLocations().get(j).getHost();
        }
        // Create partial Select query
        final SegmentDescriptor newSD = new SegmentDescriptor(locatedSD.getInterval(), locatedSD.getVersion(), locatedSD.getPartitionNumber());
        final Query partialQuery = query.withQuerySegmentSpec(new MultipleSpecificSegmentSpec(Lists.newArrayList(newSD)));
        splits[i] = new HiveDruidSplit(DruidStorageHandlerUtils.JSON_MAPPER.writeValueAsString(partialQuery), dummyPath, hosts);
    }
    return splits;
}
Also used : MultipleSpecificSegmentSpec(io.druid.query.spec.MultipleSpecificSegmentSpec) LocatedSegmentDescriptor(io.druid.query.LocatedSegmentDescriptor) BaseQuery(io.druid.query.BaseQuery) SelectQuery(io.druid.query.select.SelectQuery) Query(io.druid.query.Query) ScanQuery(io.druid.query.scan.ScanQuery) SegmentDescriptor(io.druid.query.SegmentDescriptor) LocatedSegmentDescriptor(io.druid.query.LocatedSegmentDescriptor)

Example 7 with MultipleSpecificSegmentSpec

use of io.druid.query.spec.MultipleSpecificSegmentSpec in project druid by druid-io.

the class CachingClusteredClientTest method testSingleDimensionPruning.

@Test
public void testSingleDimensionPruning() throws Exception {
    DimFilter filter = Druids.newAndDimFilterBuilder().fields(Arrays.asList(Druids.newOrDimFilterBuilder().fields(Arrays.asList(new SelectorDimFilter("dim1", "a", null), new BoundDimFilter("dim1", "from", "to", false, false, false, null, StringComparators.LEXICOGRAPHIC))).build(), Druids.newAndDimFilterBuilder().fields(Arrays.asList(new InDimFilter("dim2", Arrays.asList("a", "c", "e", "g"), null), new BoundDimFilter("dim2", "aaa", "hi", false, false, false, null, StringComparators.LEXICOGRAPHIC), new BoundDimFilter("dim2", "e", "zzz", true, true, false, null, StringComparators.LEXICOGRAPHIC))).build())).build();
    final Druids.TimeseriesQueryBuilder builder = Druids.newTimeseriesQueryBuilder().dataSource(DATA_SOURCE).filters(filter).granularity(GRANULARITY).intervals(SEG_SPEC).context(CONTEXT).intervals("2011-01-05/2011-01-10").aggregators(RENAMED_AGGS).postAggregators(RENAMED_POST_AGGS);
    TimeseriesQuery query = builder.build();
    Map<String, List> context = new HashMap<>();
    final Interval interval1 = new Interval("2011-01-06/2011-01-07");
    final Interval interval2 = new Interval("2011-01-07/2011-01-08");
    final Interval interval3 = new Interval("2011-01-08/2011-01-09");
    QueryRunner runner = new FinalizeResultsQueryRunner(client, new TimeseriesQueryQueryToolChest(QueryRunnerTestHelper.NoopIntervalChunkingQueryRunnerDecorator()));
    final DruidServer lastServer = servers[random.nextInt(servers.length)];
    ServerSelector selector1 = makeMockSingleDimensionSelector(lastServer, "dim1", null, "b", 1);
    ServerSelector selector2 = makeMockSingleDimensionSelector(lastServer, "dim1", "e", "f", 2);
    ServerSelector selector3 = makeMockSingleDimensionSelector(lastServer, "dim1", "hi", "zzz", 3);
    ServerSelector selector4 = makeMockSingleDimensionSelector(lastServer, "dim2", "a", "e", 4);
    ServerSelector selector5 = makeMockSingleDimensionSelector(lastServer, "dim2", null, null, 5);
    ServerSelector selector6 = makeMockSingleDimensionSelector(lastServer, "other", "b", null, 6);
    timeline.add(interval1, "v", new StringPartitionChunk<>(null, "a", 1, selector1));
    timeline.add(interval1, "v", new StringPartitionChunk<>("a", "b", 2, selector2));
    timeline.add(interval1, "v", new StringPartitionChunk<>("b", null, 3, selector3));
    timeline.add(interval2, "v", new StringPartitionChunk<>(null, "d", 4, selector4));
    timeline.add(interval2, "v", new StringPartitionChunk<>("d", null, 5, selector5));
    timeline.add(interval3, "v", new StringPartitionChunk<>(null, null, 6, selector6));
    final Capture<TimeseriesQuery> capture = Capture.newInstance();
    final Capture<Map<String, List>> contextCap = Capture.newInstance();
    QueryRunner mockRunner = EasyMock.createNiceMock(QueryRunner.class);
    EasyMock.expect(mockRunner.run(EasyMock.capture(capture), EasyMock.capture(contextCap))).andReturn(Sequences.empty()).anyTimes();
    EasyMock.expect(serverView.getQueryRunner(lastServer)).andReturn(mockRunner).anyTimes();
    EasyMock.replay(serverView);
    EasyMock.replay(mockRunner);
    List<SegmentDescriptor> descriptors = new ArrayList<>();
    descriptors.add(new SegmentDescriptor(interval1, "v", 1));
    descriptors.add(new SegmentDescriptor(interval1, "v", 3));
    descriptors.add(new SegmentDescriptor(interval2, "v", 5));
    descriptors.add(new SegmentDescriptor(interval3, "v", 6));
    MultipleSpecificSegmentSpec expected = new MultipleSpecificSegmentSpec(descriptors);
    Sequences.toList(runner.run(query, context), Lists.newArrayList());
    Assert.assertEquals(expected, capture.getValue().getQuerySegmentSpec());
}
Also used : MultipleSpecificSegmentSpec(io.druid.query.spec.MultipleSpecificSegmentSpec) BoundDimFilter(io.druid.query.filter.BoundDimFilter) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) TimeseriesQueryQueryToolChest(io.druid.query.timeseries.TimeseriesQueryQueryToolChest) ServerSelector(io.druid.client.selector.ServerSelector) SelectorDimFilter(io.druid.query.filter.SelectorDimFilter) SegmentDescriptor(io.druid.query.SegmentDescriptor) Druids(io.druid.query.Druids) InDimFilter(io.druid.query.filter.InDimFilter) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) TimeseriesQuery(io.druid.query.timeseries.TimeseriesQuery) QueryableDruidServer(io.druid.client.selector.QueryableDruidServer) FinalizeResultsQueryRunner(io.druid.query.FinalizeResultsQueryRunner) QueryRunner(io.druid.query.QueryRunner) FinalizeResultsQueryRunner(io.druid.query.FinalizeResultsQueryRunner) BoundDimFilter(io.druid.query.filter.BoundDimFilter) InDimFilter(io.druid.query.filter.InDimFilter) SelectorDimFilter(io.druid.query.filter.SelectorDimFilter) DimFilter(io.druid.query.filter.DimFilter) Map(java.util.Map) TreeMap(java.util.TreeMap) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) Interval(org.joda.time.Interval) Test(org.junit.Test) GroupByQueryRunnerTest(io.druid.query.groupby.GroupByQueryRunnerTest)

Example 8 with MultipleSpecificSegmentSpec

use of io.druid.query.spec.MultipleSpecificSegmentSpec in project druid by druid-io.

the class RealtimeManagerTest method testQueryWithMultipleSegmentSpec.

@Test(timeout = 10_000L)
public void testQueryWithMultipleSegmentSpec() throws IOException, InterruptedException {
    List<Row> expectedResults_both_partitions = Arrays.asList(GroupByQueryRunnerTestHelper.createExpectedRow("2011-03-26", "alias", "business", "rows", 2L, "idx", 260L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-03-26", "alias", "health", "rows", 2L, "idx", 236L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-03-26", "alias", "mezzanine", "rows", 4L, "idx", 4556L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-03-26", "alias", "news", "rows", 2L, "idx", 284L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-03-26", "alias", "technology", "rows", 2L, "idx", 202L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-03-27", "alias", "automotive", "rows", 2L, "idx", 288L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-03-27", "alias", "entertainment", "rows", 2L, "idx", 326L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-03-28", "alias", "automotive", "rows", 2L, "idx", 312L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-03-28", "alias", "business", "rows", 2L, "idx", 248L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-03-28", "alias", "entertainment", "rows", 2L, "idx", 326L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-03-28", "alias", "health", "rows", 2L, "idx", 262L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-03-28", "alias", "mezzanine", "rows", 6L, "idx", 5126L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-03-28", "alias", "news", "rows", 2L, "idx", 254L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-03-28", "alias", "premium", "rows", 6L, "idx", 5276L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-03-28", "alias", "technology", "rows", 2L, "idx", 206L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-03-28", "alias", "travel", "rows", 2L, "idx", 260L));
    List<Row> expectedResults_single_partition_26_28 = Arrays.asList(GroupByQueryRunnerTestHelper.createExpectedRow("2011-03-26", "alias", "business", "rows", 1L, "idx", 130L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-03-26", "alias", "health", "rows", 1L, "idx", 118L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-03-26", "alias", "mezzanine", "rows", 2L, "idx", 2278L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-03-26", "alias", "news", "rows", 1L, "idx", 142L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-03-26", "alias", "technology", "rows", 1L, "idx", 101L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-03-27", "alias", "automotive", "rows", 1L, "idx", 144L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-03-27", "alias", "entertainment", "rows", 1L, "idx", 163L));
    List<Row> expectedResults_single_partition_28_29 = Arrays.asList(GroupByQueryRunnerTestHelper.createExpectedRow("2011-03-28", "alias", "automotive", "rows", 1L, "idx", 156L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-03-28", "alias", "business", "rows", 1L, "idx", 124L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-03-28", "alias", "entertainment", "rows", 1L, "idx", 163L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-03-28", "alias", "health", "rows", 1L, "idx", 131L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-03-28", "alias", "mezzanine", "rows", 3L, "idx", 2563L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-03-28", "alias", "news", "rows", 1L, "idx", 127L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-03-28", "alias", "premium", "rows", 3L, "idx", 2638L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-03-28", "alias", "technology", "rows", 1L, "idx", 103L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-03-28", "alias", "travel", "rows", 1L, "idx", 130L));
    chiefStartedLatch.await();
    final Interval interval_26_28 = new Interval("2011-03-26T00:00:00.000Z/2011-03-28T00:00:00.000Z");
    final Interval interval_28_29 = new Interval("2011-03-28T00:00:00.000Z/2011-03-29T00:00:00.000Z");
    final SegmentDescriptor descriptor_26_28_0 = new SegmentDescriptor(interval_26_28, "ver0", 0);
    final SegmentDescriptor descriptor_28_29_0 = new SegmentDescriptor(interval_28_29, "ver1", 0);
    final SegmentDescriptor descriptor_26_28_1 = new SegmentDescriptor(interval_26_28, "ver0", 1);
    final SegmentDescriptor descriptor_28_29_1 = new SegmentDescriptor(interval_28_29, "ver1", 1);
    GroupByQuery query = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource).setQuerySegmentSpec(new MultipleSpecificSegmentSpec(ImmutableList.<SegmentDescriptor>of(descriptor_26_28_0, descriptor_28_29_0, descriptor_26_28_1, descriptor_28_29_1))).setDimensions(Lists.<DimensionSpec>newArrayList(new DefaultDimensionSpec("quality", "alias"))).setAggregatorSpecs(Arrays.asList(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index"))).setGranularity(QueryRunnerTestHelper.dayGran).build();
    final Map<Interval, QueryRunner> runnerMap = ImmutableMap.<Interval, QueryRunner>of(interval_26_28, QueryRunnerTestHelper.makeQueryRunner(factory, "druid.sample.numeric.tsv.top", null), interval_28_29, QueryRunnerTestHelper.makeQueryRunner(factory, "druid.sample.numeric.tsv.bottom", null));
    plumber.setRunners(runnerMap);
    plumber2.setRunners(runnerMap);
    Iterable<Row> results = GroupByQueryRunnerTestHelper.runQuery(factory, query.getQuerySegmentSpec().lookup(query, realtimeManager3), query);
    TestHelper.assertExpectedObjects(expectedResults_both_partitions, results, "");
    results = GroupByQueryRunnerTestHelper.runQuery(factory, realtimeManager3.getQueryRunnerForSegments(query, ImmutableList.<SegmentDescriptor>of(descriptor_26_28_0)), query);
    TestHelper.assertExpectedObjects(expectedResults_single_partition_26_28, results, "");
    results = GroupByQueryRunnerTestHelper.runQuery(factory, realtimeManager3.getQueryRunnerForSegments(query, ImmutableList.<SegmentDescriptor>of(descriptor_28_29_0)), query);
    TestHelper.assertExpectedObjects(expectedResults_single_partition_28_29, results, "");
    results = GroupByQueryRunnerTestHelper.runQuery(factory, realtimeManager3.getQueryRunnerForSegments(query, ImmutableList.<SegmentDescriptor>of(descriptor_26_28_1)), query);
    TestHelper.assertExpectedObjects(expectedResults_single_partition_26_28, results, "");
    results = GroupByQueryRunnerTestHelper.runQuery(factory, realtimeManager3.getQueryRunnerForSegments(query, ImmutableList.<SegmentDescriptor>of(descriptor_28_29_1)), query);
    TestHelper.assertExpectedObjects(expectedResults_single_partition_28_29, results, "");
}
Also used : MultipleSpecificSegmentSpec(io.druid.query.spec.MultipleSpecificSegmentSpec) DimensionSpec(io.druid.query.dimension.DimensionSpec) DefaultDimensionSpec(io.druid.query.dimension.DefaultDimensionSpec) GroupByQuery(io.druid.query.groupby.GroupByQuery) SegmentDescriptor(io.druid.query.SegmentDescriptor) LongSumAggregatorFactory(io.druid.query.aggregation.LongSumAggregatorFactory) InputRow(io.druid.data.input.InputRow) Row(io.druid.data.input.Row) DefaultDimensionSpec(io.druid.query.dimension.DefaultDimensionSpec) SpecificSegmentQueryRunner(io.druid.query.spec.SpecificSegmentQueryRunner) QueryRunner(io.druid.query.QueryRunner) Interval(org.joda.time.Interval) GroupByQueryRunnerTest(io.druid.query.groupby.GroupByQueryRunnerTest) Test(org.junit.Test)

Aggregations

MultipleSpecificSegmentSpec (io.druid.query.spec.MultipleSpecificSegmentSpec)8 SegmentDescriptor (io.druid.query.SegmentDescriptor)7 Interval (org.joda.time.Interval)4 QueryRunner (io.druid.query.QueryRunner)3 Result (io.druid.query.Result)3 ImmutableMap (com.google.common.collect.ImmutableMap)2 QueryableDruidServer (io.druid.client.selector.QueryableDruidServer)2 ServerSelector (io.druid.client.selector.ServerSelector)2 MergeSequence (io.druid.java.util.common.guava.MergeSequence)2 Sequence (io.druid.java.util.common.guava.Sequence)2 BaseQuery (io.druid.query.BaseQuery)2 BySegmentResultValueClass (io.druid.query.BySegmentResultValueClass)2 LocatedSegmentDescriptor (io.druid.query.LocatedSegmentDescriptor)2 Query (io.druid.query.Query)2 LongSumAggregatorFactory (io.druid.query.aggregation.LongSumAggregatorFactory)2 GroupByQueryRunnerTest (io.druid.query.groupby.GroupByQueryRunnerTest)2 SelectQuery (io.druid.query.select.SelectQuery)2 TimeseriesQuery (io.druid.query.timeseries.TimeseriesQuery)2 ArrayList (java.util.ArrayList)2 Test (org.junit.Test)2