use of io.druid.query.spec.MultipleSpecificSegmentSpec in project hive by apache.
the class DruidQueryBasedInputFormat method distributeScanQuery.
/* New method that distributes the Scan query by creating splits containing
* information about different Druid nodes that have the data for the given
* query. */
private static HiveDruidSplit[] distributeScanQuery(Configuration conf, String address, ScanQuery query, Path dummyPath) throws IOException {
// If it has a limit, we use it and we do not distribute the query
final boolean isFetch = query.getContextBoolean(Constants.DRUID_QUERY_FETCH, false);
if (isFetch) {
return new HiveDruidSplit[] { new HiveDruidSplit(DruidStorageHandlerUtils.JSON_MAPPER.writeValueAsString(query), dummyPath, new String[] { address }) };
}
final List<LocatedSegmentDescriptor> segmentDescriptors = fetchLocatedSegmentDescriptors(address, query);
// Create one input split for each segment
final int numSplits = segmentDescriptors.size();
final HiveDruidSplit[] splits = new HiveDruidSplit[segmentDescriptors.size()];
for (int i = 0; i < numSplits; i++) {
final LocatedSegmentDescriptor locatedSD = segmentDescriptors.get(i);
final String[] hosts = new String[locatedSD.getLocations().size()];
for (int j = 0; j < locatedSD.getLocations().size(); j++) {
hosts[j] = locatedSD.getLocations().get(j).getHost();
}
// Create partial Select query
final SegmentDescriptor newSD = new SegmentDescriptor(locatedSD.getInterval(), locatedSD.getVersion(), locatedSD.getPartitionNumber());
final Query partialQuery = query.withQuerySegmentSpec(new MultipleSpecificSegmentSpec(Lists.newArrayList(newSD)));
splits[i] = new HiveDruidSplit(DruidStorageHandlerUtils.JSON_MAPPER.writeValueAsString(partialQuery), dummyPath, hosts);
}
return splits;
}
use of io.druid.query.spec.MultipleSpecificSegmentSpec in project druid by druid-io.
the class CachingClusteredClientTest method testSingleDimensionPruning.
@Test
public void testSingleDimensionPruning() throws Exception {
DimFilter filter = Druids.newAndDimFilterBuilder().fields(Arrays.asList(Druids.newOrDimFilterBuilder().fields(Arrays.asList(new SelectorDimFilter("dim1", "a", null), new BoundDimFilter("dim1", "from", "to", false, false, false, null, StringComparators.LEXICOGRAPHIC))).build(), Druids.newAndDimFilterBuilder().fields(Arrays.asList(new InDimFilter("dim2", Arrays.asList("a", "c", "e", "g"), null), new BoundDimFilter("dim2", "aaa", "hi", false, false, false, null, StringComparators.LEXICOGRAPHIC), new BoundDimFilter("dim2", "e", "zzz", true, true, false, null, StringComparators.LEXICOGRAPHIC))).build())).build();
final Druids.TimeseriesQueryBuilder builder = Druids.newTimeseriesQueryBuilder().dataSource(DATA_SOURCE).filters(filter).granularity(GRANULARITY).intervals(SEG_SPEC).context(CONTEXT).intervals("2011-01-05/2011-01-10").aggregators(RENAMED_AGGS).postAggregators(RENAMED_POST_AGGS);
TimeseriesQuery query = builder.build();
Map<String, List> context = new HashMap<>();
final Interval interval1 = new Interval("2011-01-06/2011-01-07");
final Interval interval2 = new Interval("2011-01-07/2011-01-08");
final Interval interval3 = new Interval("2011-01-08/2011-01-09");
QueryRunner runner = new FinalizeResultsQueryRunner(client, new TimeseriesQueryQueryToolChest(QueryRunnerTestHelper.NoopIntervalChunkingQueryRunnerDecorator()));
final DruidServer lastServer = servers[random.nextInt(servers.length)];
ServerSelector selector1 = makeMockSingleDimensionSelector(lastServer, "dim1", null, "b", 1);
ServerSelector selector2 = makeMockSingleDimensionSelector(lastServer, "dim1", "e", "f", 2);
ServerSelector selector3 = makeMockSingleDimensionSelector(lastServer, "dim1", "hi", "zzz", 3);
ServerSelector selector4 = makeMockSingleDimensionSelector(lastServer, "dim2", "a", "e", 4);
ServerSelector selector5 = makeMockSingleDimensionSelector(lastServer, "dim2", null, null, 5);
ServerSelector selector6 = makeMockSingleDimensionSelector(lastServer, "other", "b", null, 6);
timeline.add(interval1, "v", new StringPartitionChunk<>(null, "a", 1, selector1));
timeline.add(interval1, "v", new StringPartitionChunk<>("a", "b", 2, selector2));
timeline.add(interval1, "v", new StringPartitionChunk<>("b", null, 3, selector3));
timeline.add(interval2, "v", new StringPartitionChunk<>(null, "d", 4, selector4));
timeline.add(interval2, "v", new StringPartitionChunk<>("d", null, 5, selector5));
timeline.add(interval3, "v", new StringPartitionChunk<>(null, null, 6, selector6));
final Capture<TimeseriesQuery> capture = Capture.newInstance();
final Capture<Map<String, List>> contextCap = Capture.newInstance();
QueryRunner mockRunner = EasyMock.createNiceMock(QueryRunner.class);
EasyMock.expect(mockRunner.run(EasyMock.capture(capture), EasyMock.capture(contextCap))).andReturn(Sequences.empty()).anyTimes();
EasyMock.expect(serverView.getQueryRunner(lastServer)).andReturn(mockRunner).anyTimes();
EasyMock.replay(serverView);
EasyMock.replay(mockRunner);
List<SegmentDescriptor> descriptors = new ArrayList<>();
descriptors.add(new SegmentDescriptor(interval1, "v", 1));
descriptors.add(new SegmentDescriptor(interval1, "v", 3));
descriptors.add(new SegmentDescriptor(interval2, "v", 5));
descriptors.add(new SegmentDescriptor(interval3, "v", 6));
MultipleSpecificSegmentSpec expected = new MultipleSpecificSegmentSpec(descriptors);
Sequences.toList(runner.run(query, context), Lists.newArrayList());
Assert.assertEquals(expected, capture.getValue().getQuerySegmentSpec());
}
use of io.druid.query.spec.MultipleSpecificSegmentSpec in project druid by druid-io.
the class RealtimeManagerTest method testQueryWithMultipleSegmentSpec.
@Test(timeout = 10_000L)
public void testQueryWithMultipleSegmentSpec() throws IOException, InterruptedException {
List<Row> expectedResults_both_partitions = Arrays.asList(GroupByQueryRunnerTestHelper.createExpectedRow("2011-03-26", "alias", "business", "rows", 2L, "idx", 260L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-03-26", "alias", "health", "rows", 2L, "idx", 236L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-03-26", "alias", "mezzanine", "rows", 4L, "idx", 4556L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-03-26", "alias", "news", "rows", 2L, "idx", 284L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-03-26", "alias", "technology", "rows", 2L, "idx", 202L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-03-27", "alias", "automotive", "rows", 2L, "idx", 288L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-03-27", "alias", "entertainment", "rows", 2L, "idx", 326L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-03-28", "alias", "automotive", "rows", 2L, "idx", 312L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-03-28", "alias", "business", "rows", 2L, "idx", 248L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-03-28", "alias", "entertainment", "rows", 2L, "idx", 326L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-03-28", "alias", "health", "rows", 2L, "idx", 262L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-03-28", "alias", "mezzanine", "rows", 6L, "idx", 5126L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-03-28", "alias", "news", "rows", 2L, "idx", 254L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-03-28", "alias", "premium", "rows", 6L, "idx", 5276L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-03-28", "alias", "technology", "rows", 2L, "idx", 206L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-03-28", "alias", "travel", "rows", 2L, "idx", 260L));
List<Row> expectedResults_single_partition_26_28 = Arrays.asList(GroupByQueryRunnerTestHelper.createExpectedRow("2011-03-26", "alias", "business", "rows", 1L, "idx", 130L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-03-26", "alias", "health", "rows", 1L, "idx", 118L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-03-26", "alias", "mezzanine", "rows", 2L, "idx", 2278L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-03-26", "alias", "news", "rows", 1L, "idx", 142L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-03-26", "alias", "technology", "rows", 1L, "idx", 101L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-03-27", "alias", "automotive", "rows", 1L, "idx", 144L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-03-27", "alias", "entertainment", "rows", 1L, "idx", 163L));
List<Row> expectedResults_single_partition_28_29 = Arrays.asList(GroupByQueryRunnerTestHelper.createExpectedRow("2011-03-28", "alias", "automotive", "rows", 1L, "idx", 156L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-03-28", "alias", "business", "rows", 1L, "idx", 124L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-03-28", "alias", "entertainment", "rows", 1L, "idx", 163L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-03-28", "alias", "health", "rows", 1L, "idx", 131L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-03-28", "alias", "mezzanine", "rows", 3L, "idx", 2563L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-03-28", "alias", "news", "rows", 1L, "idx", 127L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-03-28", "alias", "premium", "rows", 3L, "idx", 2638L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-03-28", "alias", "technology", "rows", 1L, "idx", 103L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-03-28", "alias", "travel", "rows", 1L, "idx", 130L));
chiefStartedLatch.await();
final Interval interval_26_28 = new Interval("2011-03-26T00:00:00.000Z/2011-03-28T00:00:00.000Z");
final Interval interval_28_29 = new Interval("2011-03-28T00:00:00.000Z/2011-03-29T00:00:00.000Z");
final SegmentDescriptor descriptor_26_28_0 = new SegmentDescriptor(interval_26_28, "ver0", 0);
final SegmentDescriptor descriptor_28_29_0 = new SegmentDescriptor(interval_28_29, "ver1", 0);
final SegmentDescriptor descriptor_26_28_1 = new SegmentDescriptor(interval_26_28, "ver0", 1);
final SegmentDescriptor descriptor_28_29_1 = new SegmentDescriptor(interval_28_29, "ver1", 1);
GroupByQuery query = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource).setQuerySegmentSpec(new MultipleSpecificSegmentSpec(ImmutableList.<SegmentDescriptor>of(descriptor_26_28_0, descriptor_28_29_0, descriptor_26_28_1, descriptor_28_29_1))).setDimensions(Lists.<DimensionSpec>newArrayList(new DefaultDimensionSpec("quality", "alias"))).setAggregatorSpecs(Arrays.asList(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index"))).setGranularity(QueryRunnerTestHelper.dayGran).build();
final Map<Interval, QueryRunner> runnerMap = ImmutableMap.<Interval, QueryRunner>of(interval_26_28, QueryRunnerTestHelper.makeQueryRunner(factory, "druid.sample.numeric.tsv.top", null), interval_28_29, QueryRunnerTestHelper.makeQueryRunner(factory, "druid.sample.numeric.tsv.bottom", null));
plumber.setRunners(runnerMap);
plumber2.setRunners(runnerMap);
Iterable<Row> results = GroupByQueryRunnerTestHelper.runQuery(factory, query.getQuerySegmentSpec().lookup(query, realtimeManager3), query);
TestHelper.assertExpectedObjects(expectedResults_both_partitions, results, "");
results = GroupByQueryRunnerTestHelper.runQuery(factory, realtimeManager3.getQueryRunnerForSegments(query, ImmutableList.<SegmentDescriptor>of(descriptor_26_28_0)), query);
TestHelper.assertExpectedObjects(expectedResults_single_partition_26_28, results, "");
results = GroupByQueryRunnerTestHelper.runQuery(factory, realtimeManager3.getQueryRunnerForSegments(query, ImmutableList.<SegmentDescriptor>of(descriptor_28_29_0)), query);
TestHelper.assertExpectedObjects(expectedResults_single_partition_28_29, results, "");
results = GroupByQueryRunnerTestHelper.runQuery(factory, realtimeManager3.getQueryRunnerForSegments(query, ImmutableList.<SegmentDescriptor>of(descriptor_26_28_1)), query);
TestHelper.assertExpectedObjects(expectedResults_single_partition_26_28, results, "");
results = GroupByQueryRunnerTestHelper.runQuery(factory, realtimeManager3.getQueryRunnerForSegments(query, ImmutableList.<SegmentDescriptor>of(descriptor_28_29_1)), query);
TestHelper.assertExpectedObjects(expectedResults_single_partition_28_29, results, "");
}
Aggregations