use of org.apache.druid.query.topn.TopNQueryBuilder in project druid by druid-io.
the class TimeCompareBenchmark method setupQueries.
private void setupQueries() {
// queries for the basic schema
GeneratorSchemaInfo basicSchema = GeneratorBasicSchemas.SCHEMA_MAP.get("basic");
QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
long startMillis = basicSchema.getDataInterval().getStartMillis();
long endMillis = basicSchema.getDataInterval().getEndMillis();
long half = (endMillis - startMillis) / 2;
Interval recent = Intervals.utc(half, endMillis);
Interval previous = Intervals.utc(startMillis, half);
log.info("Recent interval: " + recent);
log.info("Previous interval: " + previous);
{
// basic.topNTimeCompare
List<AggregatorFactory> queryAggs = new ArrayList<>();
queryAggs.add(new FilteredAggregatorFactory(// jsAgg1,
new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"), new IntervalDimFilter(ColumnHolder.TIME_COLUMN_NAME, Collections.singletonList(recent), null)));
queryAggs.add(new FilteredAggregatorFactory(new LongSumAggregatorFactory("_cmp_sumLongSequential", "sumLongSequential"), new IntervalDimFilter(ColumnHolder.TIME_COLUMN_NAME, Collections.singletonList(previous), null)));
TopNQueryBuilder queryBuilderA = new TopNQueryBuilder().dataSource("blah").granularity(Granularities.ALL).dimension("dimUniform").metric("sumLongSequential").intervals(intervalSpec).aggregators(queryAggs).threshold(threshold);
topNQuery = queryBuilderA.build();
topNFactory = new TopNQueryRunnerFactory(new StupidPool<>("TopNBenchmark-compute-bufferPool", new OffheapBufferGenerator("compute", 250000000), 0, Integer.MAX_VALUE), new TopNQueryQueryToolChest(new TopNQueryConfig()), QueryBenchmarkUtil.NOOP_QUERYWATCHER);
}
{
// basic.timeseriesTimeCompare
List<AggregatorFactory> queryAggs = new ArrayList<>();
queryAggs.add(new FilteredAggregatorFactory(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"), new IntervalDimFilter(ColumnHolder.TIME_COLUMN_NAME, Collections.singletonList(recent), null)));
queryAggs.add(new FilteredAggregatorFactory(new LongSumAggregatorFactory("_cmp_sumLongSequential", "sumLongSequential"), new IntervalDimFilter(ColumnHolder.TIME_COLUMN_NAME, Collections.singletonList(previous), null)));
Druids.TimeseriesQueryBuilder timeseriesQueryBuilder = Druids.newTimeseriesQueryBuilder().dataSource("blah").granularity(Granularities.ALL).intervals(intervalSpec).aggregators(queryAggs).descending(false);
timeseriesQuery = timeseriesQueryBuilder.build();
timeseriesFactory = new TimeseriesQueryRunnerFactory(new TimeseriesQueryQueryToolChest(), new TimeseriesQueryEngine(), QueryBenchmarkUtil.NOOP_QUERYWATCHER);
}
}
use of org.apache.druid.query.topn.TopNQueryBuilder in project druid by druid-io.
the class CachingClusteredClientBenchmark method topNQuery.
@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
public void topNQuery(Blackhole blackhole) {
query = new TopNQueryBuilder().dataSource(DATA_SOURCE).intervals(basicSchemaIntervalSpec).dimension(new DefaultDimensionSpec("dimZipf", null)).aggregators(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential")).granularity(Granularity.fromString(queryGranularity)).metric("sumLongSequential").threshold(// we are primarily measuring 'broker' merge time, so collect a significant number of results
10_000).context(ImmutableMap.of(QueryContexts.BROKER_PARALLEL_MERGE_KEY, parallelCombine, QueryContexts.BROKER_PARALLELISM, parallelism)).build();
final List<Result<TopNResultValue>> results = runQuery();
for (Result<TopNResultValue> result : results) {
blackhole.consume(result);
}
}
use of org.apache.druid.query.topn.TopNQueryBuilder in project druid by druid-io.
the class TopNTypeInterfaceBenchmark method setupQueries.
private void setupQueries() {
// queries for the basic schema
Map<String, TopNQueryBuilder> basicQueries = new LinkedHashMap<>();
GeneratorSchemaInfo basicSchema = GeneratorBasicSchemas.SCHEMA_MAP.get("basic");
{
// basic.A
QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
List<AggregatorFactory> queryAggs = new ArrayList<>();
queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
queryAggs.add(new LongMaxAggregatorFactory("maxLongUniform", "maxLongUniform"));
queryAggs.add(new DoubleSumAggregatorFactory("sumFloatNormal", "sumFloatNormal"));
queryAggs.add(new DoubleMinAggregatorFactory("minFloatZipf", "minFloatZipf"));
queryAggs.add(new HyperUniquesAggregatorFactory("hyperUniquesMet", "hyper"));
// Use an IdentityExtractionFn to force usage of HeapBasedTopNAlgorithm
TopNQueryBuilder queryBuilderString = new TopNQueryBuilder().dataSource("blah").granularity(Granularities.ALL).dimension(new ExtractionDimensionSpec("dimSequential", "dimSequential", IdentityExtractionFn.getInstance())).metric("sumFloatNormal").intervals(intervalSpec).aggregators(queryAggs);
// HeapBasedTopNAlgorithm is always used for numeric columns
TopNQueryBuilder queryBuilderLong = new TopNQueryBuilder().dataSource("blah").granularity(Granularities.ALL).dimension("metLongUniform").metric("sumFloatNormal").intervals(intervalSpec).aggregators(queryAggs);
TopNQueryBuilder queryBuilderFloat = new TopNQueryBuilder().dataSource("blah").granularity(Granularities.ALL).dimension("metFloatNormal").metric("sumFloatNormal").intervals(intervalSpec).aggregators(queryAggs);
basicQueries.put("string", queryBuilderString);
basicQueries.put("long", queryBuilderLong);
basicQueries.put("float", queryBuilderFloat);
}
{
// basic.numericSort
QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
List<AggregatorFactory> queryAggs = new ArrayList<>();
queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
TopNQueryBuilder queryBuilderA = new TopNQueryBuilder().dataSource("blah").granularity(Granularities.ALL).dimension("dimUniform").metric(new DimensionTopNMetricSpec(null, StringComparators.NUMERIC)).intervals(intervalSpec).aggregators(queryAggs);
basicQueries.put("numericSort", queryBuilderA);
}
{
// basic.alphanumericSort
QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
List<AggregatorFactory> queryAggs = new ArrayList<>();
queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
TopNQueryBuilder queryBuilderA = new TopNQueryBuilder().dataSource("blah").granularity(Granularities.ALL).dimension("dimUniform").metric(new DimensionTopNMetricSpec(null, StringComparators.ALPHANUMERIC)).intervals(intervalSpec).aggregators(queryAggs);
basicQueries.put("alphanumericSort", queryBuilderA);
}
SCHEMA_QUERY_MAP.put("basic", basicQueries);
}
use of org.apache.druid.query.topn.TopNQueryBuilder in project druid by druid-io.
the class DatasourceOptimizerTest method testOptimize.
@Test(timeout = 60_000L)
public void testOptimize() throws InterruptedException {
// insert datasource metadata
String dataSource = "derivative";
String baseDataSource = "base";
Set<String> dims = Sets.newHashSet("dim1", "dim2", "dim3");
Set<String> metrics = Sets.newHashSet("cost");
DerivativeDataSourceMetadata metadata = new DerivativeDataSourceMetadata(baseDataSource, dims, metrics);
metadataStorageCoordinator.insertDataSourceMetadata(dataSource, metadata);
// insert base datasource segments
List<Boolean> baseResult = Lists.transform(ImmutableList.of("2011-04-01/2011-04-02", "2011-04-02/2011-04-03", "2011-04-03/2011-04-04", "2011-04-04/2011-04-05", "2011-04-05/2011-04-06"), interval -> {
final DataSegment segment = createDataSegment("base", interval, "v1", Lists.newArrayList("dim1", "dim2", "dim3", "dim4"), 1024 * 1024);
try {
metadataStorageCoordinator.announceHistoricalSegments(Sets.newHashSet(segment));
announceSegmentForServer(druidServer, segment, zkPathsConfig, jsonMapper);
} catch (IOException e) {
return false;
}
return true;
});
// insert derivative segments
List<Boolean> derivativeResult = Lists.transform(ImmutableList.of("2011-04-01/2011-04-02", "2011-04-02/2011-04-03", "2011-04-03/2011-04-04"), interval -> {
final DataSegment segment = createDataSegment("derivative", interval, "v1", Lists.newArrayList("dim1", "dim2", "dim3"), 1024);
try {
metadataStorageCoordinator.announceHistoricalSegments(Sets.newHashSet(segment));
announceSegmentForServer(druidServer, segment, zkPathsConfig, jsonMapper);
} catch (IOException e) {
return false;
}
return true;
});
Assert.assertFalse(baseResult.contains(false));
Assert.assertFalse(derivativeResult.contains(false));
derivativesManager.start();
while (DerivativeDataSourceManager.getAllDerivatives().isEmpty()) {
TimeUnit.SECONDS.sleep(1L);
}
// build user query
TopNQuery userQuery = new TopNQueryBuilder().dataSource("base").granularity(QueryRunnerTestHelper.ALL_GRAN).dimension("dim1").metric("cost").threshold(4).intervals("2011-04-01/2011-04-06").aggregators(new LongSumAggregatorFactory("cost", "cost")).build();
List<Query> expectedQueryAfterOptimizing = Lists.newArrayList(new TopNQueryBuilder().dataSource("derivative").granularity(QueryRunnerTestHelper.ALL_GRAN).dimension("dim1").metric("cost").threshold(4).intervals(new MultipleIntervalSegmentSpec(Collections.singletonList(Intervals.of("2011-04-01/2011-04-04")))).aggregators(new LongSumAggregatorFactory("cost", "cost")).build(), new TopNQueryBuilder().dataSource("base").granularity(QueryRunnerTestHelper.ALL_GRAN).dimension("dim1").metric("cost").threshold(4).intervals(new MultipleIntervalSegmentSpec(Collections.singletonList(Intervals.of("2011-04-04/2011-04-06")))).aggregators(new LongSumAggregatorFactory("cost", "cost")).build());
Assert.assertEquals(expectedQueryAfterOptimizing, optimizer.optimize(userQuery));
derivativesManager.stop();
}
use of org.apache.druid.query.topn.TopNQueryBuilder in project druid by druid-io.
the class MaterializedViewQueryTest method testQuerySerialization.
@Test
public void testQuerySerialization() throws IOException {
TopNQuery topNQuery = new TopNQueryBuilder().dataSource(QueryRunnerTestHelper.DATA_SOURCE).granularity(QueryRunnerTestHelper.ALL_GRAN).dimension(QueryRunnerTestHelper.MARKET_DIMENSION).metric(QueryRunnerTestHelper.INDEX_METRIC).threshold(4).intervals(QueryRunnerTestHelper.FULL_ON_INTERVAL_SPEC).aggregators(Lists.newArrayList(Iterables.concat(QueryRunnerTestHelper.COMMON_DOUBLE_AGGREGATORS, Lists.newArrayList(new DoubleMaxAggregatorFactory("maxIndex", "index"), new DoubleMinAggregatorFactory("minIndex", "index"))))).postAggregators(QueryRunnerTestHelper.ADD_ROWS_INDEX_CONSTANT).build();
MaterializedViewQuery query = new MaterializedViewQuery(topNQuery, optimizer);
String json = JSON_MAPPER.writeValueAsString(query);
Query serdeQuery = JSON_MAPPER.readValue(json, Query.class);
Assert.assertEquals(query, serdeQuery);
Assert.assertEquals(new TableDataSource(QueryRunnerTestHelper.DATA_SOURCE), query.getDataSource());
Assert.assertEquals(QueryRunnerTestHelper.ALL_GRAN, query.getGranularity());
Assert.assertEquals(QueryRunnerTestHelper.FULL_ON_INTERVAL_SPEC.getIntervals(), query.getIntervals());
}
Aggregations