use of org.apache.druid.java.util.common.guava.Sequence in project druid by druid-io.
the class GroupByQueryRunnerTest method testMergeResultsWithLimitPushDown.
@Test
public void testMergeResultsWithLimitPushDown() {
if (!config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V2)) {
return;
}
GroupByQuery.Builder builder = makeQueryBuilder().setDataSource(QueryRunnerTestHelper.DATA_SOURCE).setInterval("2011-04-02/2011-04-04").setDimensions(new DefaultDimensionSpec("quality", "alias")).setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT, new LongSumAggregatorFactory("idx", "index")).setLimitSpec(new DefaultLimitSpec(Collections.singletonList(new OrderByColumnSpec("alias", OrderByColumnSpec.Direction.DESCENDING)), 5)).overrideContext(ImmutableMap.of(GroupByQueryConfig.CTX_KEY_FORCE_LIMIT_PUSH_DOWN, true)).setGranularity(Granularities.ALL);
final GroupByQuery allGranQuery = builder.build();
QueryRunner mergedRunner = factory.getToolchest().mergeResults(new QueryRunner<ResultRow>() {
@Override
public Sequence<ResultRow> run(QueryPlus<ResultRow> queryPlus, ResponseContext responseContext) {
// simulate two daily segments
final QueryPlus<ResultRow> queryPlus1 = queryPlus.withQuery(queryPlus.getQuery().withQuerySegmentSpec(new MultipleIntervalSegmentSpec(Collections.singletonList(Intervals.of("2011-04-02/2011-04-03")))));
final QueryPlus<ResultRow> queryPlus2 = queryPlus.withQuery(queryPlus.getQuery().withQuerySegmentSpec(new MultipleIntervalSegmentSpec(Collections.singletonList(Intervals.of("2011-04-03/2011-04-04")))));
return factory.getToolchest().mergeResults((queryPlus3, responseContext1) -> new MergeSequence<>(queryPlus3.getQuery().getResultOrdering(), Sequences.simple(Arrays.asList(runner.run(queryPlus1, responseContext1), runner.run(queryPlus2, responseContext1))))).run(queryPlus, responseContext);
}
});
Map<String, Object> context = new HashMap<>();
List<ResultRow> allGranExpectedResults = Arrays.asList(makeRow(allGranQuery, "2011-04-02", "alias", "travel", "rows", 2L, "idx", 243L), makeRow(allGranQuery, "2011-04-02", "alias", "technology", "rows", 2L, "idx", 177L), makeRow(allGranQuery, "2011-04-02", "alias", "premium", "rows", 6L, "idx", 4416L), makeRow(allGranQuery, "2011-04-02", "alias", "news", "rows", 2L, "idx", 221L), makeRow(allGranQuery, "2011-04-02", "alias", "mezzanine", "rows", 6L, "idx", 4420L));
TestHelper.assertExpectedObjects(allGranExpectedResults, mergedRunner.run(QueryPlus.wrap(allGranQuery)), "merged");
}
use of org.apache.druid.java.util.common.guava.Sequence in project druid by druid-io.
the class DruidSegmentReader method intermediateRowIterator.
@Override
protected CloseableIterator<Map<String, Object>> intermediateRowIterator() throws IOException {
final CleanableFile segmentFile = source.fetch(temporaryDirectory, null);
final WindowedStorageAdapter storageAdapter = new WindowedStorageAdapter(new QueryableIndexStorageAdapter(indexIO.loadIndex(segmentFile.file())), source.getIntervalFilter());
final Sequence<Cursor> cursors = storageAdapter.getAdapter().makeCursors(Filters.toFilter(dimFilter), storageAdapter.getInterval(), VirtualColumns.EMPTY, Granularities.ALL, false, null);
// Retain order of columns from the original segments. Useful for preserving dimension order if we're in
// schemaless mode.
final Set<String> columnsToRead = Sets.newLinkedHashSet(Iterables.filter(storageAdapter.getAdapter().getRowSignature().getColumnNames(), columnsFilter::apply));
final Sequence<Map<String, Object>> sequence = Sequences.concat(Sequences.map(cursors, cursor -> cursorToSequence(cursor, columnsToRead)));
return makeCloseableIteratorFromSequenceAndSegmentFile(sequence, segmentFile);
}
use of org.apache.druid.java.util.common.guava.Sequence in project druid by druid-io.
the class IndexTaskTest method testNumShardsAndPartitionDimensionsProvided.
@Test
public void testNumShardsAndPartitionDimensionsProvided() throws Exception {
final File tmpDir = temporaryFolder.newFolder();
final File tmpFile = File.createTempFile("druid", "index", tmpDir);
try (BufferedWriter writer = Files.newWriter(tmpFile, StandardCharsets.UTF_8)) {
writer.write("2014-01-01T00:00:10Z,a,1\n");
writer.write("2014-01-01T01:00:20Z,b,1\n");
writer.write("2014-01-01T02:00:30Z,c,1\n");
}
final IndexTask indexTask = new IndexTask(null, null, createDefaultIngestionSpec(jsonMapper, tmpDir, null, null, createTuningConfigWithPartitionsSpec(new HashedPartitionsSpec(null, 2, ImmutableList.of("dim")), true), false, false), null);
final List<DataSegment> segments = runTask(indexTask).rhs;
Assert.assertEquals(2, segments.size());
for (DataSegment segment : segments) {
Assert.assertEquals(DATASOURCE, segment.getDataSource());
Assert.assertEquals(Intervals.of("2014/P1D"), segment.getInterval());
Assert.assertEquals(HashBasedNumberedShardSpec.class, segment.getShardSpec().getClass());
final HashBasedNumberedShardSpec hashBasedNumberedShardSpec = (HashBasedNumberedShardSpec) segment.getShardSpec();
Assert.assertEquals(HashPartitionFunction.MURMUR3_32_ABS, hashBasedNumberedShardSpec.getPartitionFunction());
final File segmentFile = segmentCacheManager.getSegmentFiles(segment);
final WindowedStorageAdapter adapter = new WindowedStorageAdapter(new QueryableIndexStorageAdapter(indexIO.loadIndex(segmentFile)), segment.getInterval());
final Sequence<Cursor> cursorSequence = adapter.getAdapter().makeCursors(null, segment.getInterval(), VirtualColumns.EMPTY, Granularities.ALL, false, null);
final List<Integer> hashes = cursorSequence.map(cursor -> {
final DimensionSelector selector = cursor.getColumnSelectorFactory().makeDimensionSelector(new DefaultDimensionSpec("dim", "dim"));
final int hash = HashPartitionFunction.MURMUR3_32_ABS.hash(HashBasedNumberedShardSpec.serializeGroupKey(jsonMapper, Collections.singletonList(selector.getObject())), hashBasedNumberedShardSpec.getNumBuckets());
cursor.advance();
return hash;
}).toList();
Assert.assertTrue(hashes.stream().allMatch(h -> h.intValue() == hashes.get(0)));
}
}
use of org.apache.druid.java.util.common.guava.Sequence in project druid by druid-io.
the class GroupByQueryEngine method process.
public Sequence<Row> process(final GroupByQuery query, final StorageAdapter storageAdapter) {
if (storageAdapter == null) {
throw new ISE("Null storage adapter found. Probably trying to issue a query against a segment being memory unmapped.");
}
if (!query.getContextValue(GroupByQueryConfig.CTX_KEY_ENABLE_MULTI_VALUE_UNNESTING, true)) {
throw new UOE("GroupBy v1 does not support %s as false. Set %s to true or use groupBy v2", GroupByQueryConfig.CTX_KEY_ENABLE_MULTI_VALUE_UNNESTING, GroupByQueryConfig.CTX_KEY_ENABLE_MULTI_VALUE_UNNESTING);
}
final List<Interval> intervals = query.getQuerySegmentSpec().getIntervals();
if (intervals.size() != 1) {
throw new IAE("Should only have one interval, got[%s]", intervals);
}
Filter filter = Filters.convertToCNFFromQueryContext(query, Filters.toFilter(query.getDimFilter()));
final Sequence<Cursor> cursors = storageAdapter.makeCursors(filter, intervals.get(0), query.getVirtualColumns(), query.getGranularity(), false, null);
final ResourceHolder<ByteBuffer> bufferHolder = intermediateResultsBufferPool.take();
return Sequences.concat(Sequences.withBaggage(Sequences.map(cursors, new Function<Cursor, Sequence<Row>>() {
@Override
public Sequence<Row> apply(final Cursor cursor) {
return new BaseSequence<>(new BaseSequence.IteratorMaker<Row, RowIterator>() {
@Override
public RowIterator make() {
return new RowIterator(query, cursor, bufferHolder.get(), config.get());
}
@Override
public void cleanup(RowIterator iterFromMake) {
CloseableUtils.closeAndWrapExceptions(iterFromMake);
}
});
}
}), bufferHolder));
}
use of org.apache.druid.java.util.common.guava.Sequence in project druid by druid-io.
the class DoubleMeanAggregationTest method testAggretatorUsingTimeseriesQuery.
@Test
@Parameters(method = "doVectorize")
public void testAggretatorUsingTimeseriesQuery(boolean doVectorize) throws Exception {
TimeseriesQuery query = Druids.newTimeseriesQueryBuilder().dataSource("test").granularity(Granularities.ALL).intervals("1970/2050").aggregators(new DoubleMeanAggregatorFactory("meanOnDouble", SimpleTestIndex.DOUBLE_COL), new DoubleMeanAggregatorFactory("meanOnString", SimpleTestIndex.SINGLE_VALUE_DOUBLE_AS_STRING_DIM), new DoubleMeanAggregatorFactory("meanOnMultiValue", SimpleTestIndex.MULTI_VALUE_DOUBLE_AS_STRING_DIM)).context(ImmutableMap.of(QueryContexts.VECTORIZE_KEY, doVectorize)).build();
// do json serialization and deserialization of query to ensure there are no serde issues
ObjectMapper jsonMapper = timeseriesQueryTestHelper.getObjectMapper();
query = (TimeseriesQuery) jsonMapper.readValue(jsonMapper.writeValueAsString(query), Query.class);
Sequence seq = timeseriesQueryTestHelper.runQueryOnSegmentsObjs(segments, query);
TimeseriesResultValue result = ((Result<TimeseriesResultValue>) Iterables.getOnlyElement(seq.toList())).getValue();
Assert.assertEquals(6.2d, result.getDoubleMetric("meanOnDouble").doubleValue(), 0.0001d);
Assert.assertEquals(6.2d, result.getDoubleMetric("meanOnString").doubleValue(), 0.0001d);
Assert.assertEquals(4.1333d, result.getDoubleMetric("meanOnMultiValue").doubleValue(), 0.0001d);
}
Aggregations