use of org.apache.druid.query.spec.MultipleIntervalSegmentSpec in project druid by druid-io.
the class DatasourceOptimizerTest method testOptimize.
@Test(timeout = 60_000L)
public void testOptimize() throws InterruptedException {
// insert datasource metadata
String dataSource = "derivative";
String baseDataSource = "base";
Set<String> dims = Sets.newHashSet("dim1", "dim2", "dim3");
Set<String> metrics = Sets.newHashSet("cost");
DerivativeDataSourceMetadata metadata = new DerivativeDataSourceMetadata(baseDataSource, dims, metrics);
metadataStorageCoordinator.insertDataSourceMetadata(dataSource, metadata);
// insert base datasource segments
List<Boolean> baseResult = Lists.transform(ImmutableList.of("2011-04-01/2011-04-02", "2011-04-02/2011-04-03", "2011-04-03/2011-04-04", "2011-04-04/2011-04-05", "2011-04-05/2011-04-06"), interval -> {
final DataSegment segment = createDataSegment("base", interval, "v1", Lists.newArrayList("dim1", "dim2", "dim3", "dim4"), 1024 * 1024);
try {
metadataStorageCoordinator.announceHistoricalSegments(Sets.newHashSet(segment));
announceSegmentForServer(druidServer, segment, zkPathsConfig, jsonMapper);
} catch (IOException e) {
return false;
}
return true;
});
// insert derivative segments
List<Boolean> derivativeResult = Lists.transform(ImmutableList.of("2011-04-01/2011-04-02", "2011-04-02/2011-04-03", "2011-04-03/2011-04-04"), interval -> {
final DataSegment segment = createDataSegment("derivative", interval, "v1", Lists.newArrayList("dim1", "dim2", "dim3"), 1024);
try {
metadataStorageCoordinator.announceHistoricalSegments(Sets.newHashSet(segment));
announceSegmentForServer(druidServer, segment, zkPathsConfig, jsonMapper);
} catch (IOException e) {
return false;
}
return true;
});
Assert.assertFalse(baseResult.contains(false));
Assert.assertFalse(derivativeResult.contains(false));
derivativesManager.start();
while (DerivativeDataSourceManager.getAllDerivatives().isEmpty()) {
TimeUnit.SECONDS.sleep(1L);
}
// build user query
TopNQuery userQuery = new TopNQueryBuilder().dataSource("base").granularity(QueryRunnerTestHelper.ALL_GRAN).dimension("dim1").metric("cost").threshold(4).intervals("2011-04-01/2011-04-06").aggregators(new LongSumAggregatorFactory("cost", "cost")).build();
List<Query> expectedQueryAfterOptimizing = Lists.newArrayList(new TopNQueryBuilder().dataSource("derivative").granularity(QueryRunnerTestHelper.ALL_GRAN).dimension("dim1").metric("cost").threshold(4).intervals(new MultipleIntervalSegmentSpec(Collections.singletonList(Intervals.of("2011-04-01/2011-04-04")))).aggregators(new LongSumAggregatorFactory("cost", "cost")).build(), new TopNQueryBuilder().dataSource("base").granularity(QueryRunnerTestHelper.ALL_GRAN).dimension("dim1").metric("cost").threshold(4).intervals(new MultipleIntervalSegmentSpec(Collections.singletonList(Intervals.of("2011-04-04/2011-04-06")))).aggregators(new LongSumAggregatorFactory("cost", "cost")).build());
Assert.assertEquals(expectedQueryAfterOptimizing, optimizer.optimize(userQuery));
derivativesManager.stop();
}
use of org.apache.druid.query.spec.MultipleIntervalSegmentSpec in project druid by druid-io.
the class TopNBenchmark method setupQueries.
private void setupQueries() {
// queries for the basic schema
Map<String, TopNQueryBuilder> basicQueries = new LinkedHashMap<>();
GeneratorSchemaInfo basicSchema = GeneratorBasicSchemas.SCHEMA_MAP.get("basic");
{
// basic.A
QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
List<AggregatorFactory> queryAggs = new ArrayList<>();
queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
queryAggs.add(new LongMaxAggregatorFactory("maxLongUniform", "maxLongUniform"));
queryAggs.add(new DoubleSumAggregatorFactory("sumFloatNormal", "sumFloatNormal"));
queryAggs.add(new DoubleMinAggregatorFactory("minFloatZipf", "minFloatZipf"));
queryAggs.add(new HyperUniquesAggregatorFactory("hyperUniquesMet", "hyper"));
TopNQueryBuilder queryBuilderA = new TopNQueryBuilder().dataSource("blah").granularity(Granularities.ALL).dimension("dimSequential").metric("sumFloatNormal").intervals(intervalSpec).aggregators(queryAggs);
basicQueries.put("A", queryBuilderA);
}
{
// basic.numericSort
QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
List<AggregatorFactory> queryAggs = new ArrayList<>();
queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
TopNQueryBuilder queryBuilderA = new TopNQueryBuilder().dataSource("blah").granularity(Granularities.ALL).dimension("dimUniform").metric(new DimensionTopNMetricSpec(null, StringComparators.NUMERIC)).intervals(intervalSpec).aggregators(queryAggs);
basicQueries.put("numericSort", queryBuilderA);
}
{
// basic.alphanumericSort
QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
List<AggregatorFactory> queryAggs = new ArrayList<>();
queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
TopNQueryBuilder queryBuilderA = new TopNQueryBuilder().dataSource("blah").granularity(Granularities.ALL).dimension("dimUniform").metric(new DimensionTopNMetricSpec(null, StringComparators.ALPHANUMERIC)).intervals(intervalSpec).aggregators(queryAggs);
basicQueries.put("alphanumericSort", queryBuilderA);
}
SCHEMA_QUERY_MAP.put("basic", basicQueries);
}
use of org.apache.druid.query.spec.MultipleIntervalSegmentSpec in project druid by druid-io.
the class GroupByBenchmark method setupQueries.
private void setupQueries() {
// queries for the basic schema
Map<String, GroupByQuery> basicQueries = new LinkedHashMap<>();
GeneratorSchemaInfo basicSchema = GeneratorBasicSchemas.SCHEMA_MAP.get("basic");
{
// basic.A
QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
List<AggregatorFactory> queryAggs = new ArrayList<>();
queryAggs.add(new CountAggregatorFactory("cnt"));
queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
GroupByQuery queryA = GroupByQuery.builder().setDataSource("blah").setQuerySegmentSpec(intervalSpec).setDimensions(new DefaultDimensionSpec("dimSequential", null), new DefaultDimensionSpec("dimZipf", null)).setAggregatorSpecs(queryAggs).setGranularity(Granularity.fromString(queryGranularity)).setContext(ImmutableMap.of("vectorize", vectorize)).build();
basicQueries.put("A", queryA);
}
{
// basic.sorted
QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
List<AggregatorFactory> queryAggs = new ArrayList<>();
queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
GroupByQuery queryA = GroupByQuery.builder().setDataSource("blah").setQuerySegmentSpec(intervalSpec).setDimensions(new DefaultDimensionSpec("dimSequential", null), new DefaultDimensionSpec("dimZipf", null)).setAggregatorSpecs(queryAggs).setGranularity(Granularity.fromString(queryGranularity)).setLimitSpec(new DefaultLimitSpec(Collections.singletonList(new OrderByColumnSpec("sumLongSequential", OrderByColumnSpec.Direction.DESCENDING, StringComparators.NUMERIC)), 100)).build();
basicQueries.put("sorted", queryA);
}
{
// basic.nested
QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
List<AggregatorFactory> queryAggs = new ArrayList<>();
queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
GroupByQuery subqueryA = GroupByQuery.builder().setDataSource("blah").setQuerySegmentSpec(intervalSpec).setDimensions(new DefaultDimensionSpec("dimSequential", null), new DefaultDimensionSpec("dimZipf", null)).setAggregatorSpecs(queryAggs).setGranularity(Granularities.DAY).setContext(ImmutableMap.of("vectorize", vectorize)).build();
GroupByQuery queryA = GroupByQuery.builder().setDataSource(subqueryA).setQuerySegmentSpec(intervalSpec).setDimensions(new DefaultDimensionSpec("dimSequential", null)).setAggregatorSpecs(queryAggs).setGranularity(Granularities.WEEK).setContext(ImmutableMap.of("vectorize", vectorize)).build();
basicQueries.put("nested", queryA);
}
{
// basic.filter
final QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
// Use multiple aggregators to see how the number of aggregators impact to the query performance
List<AggregatorFactory> queryAggs = ImmutableList.of(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"), new LongSumAggregatorFactory("rows", "rows"), new DoubleSumAggregatorFactory("sumFloatNormal", "sumFloatNormal"), new DoubleMinAggregatorFactory("minFloatZipf", "minFloatZipf"));
GroupByQuery queryA = GroupByQuery.builder().setDataSource("blah").setQuerySegmentSpec(intervalSpec).setDimensions(new DefaultDimensionSpec("dimUniform", null)).setAggregatorSpecs(queryAggs).setGranularity(Granularity.fromString(queryGranularity)).setDimFilter(new BoundDimFilter("dimUniform", "0", "100", true, true, null, null, null)).setContext(ImmutableMap.of("vectorize", vectorize)).build();
basicQueries.put("filter", queryA);
}
{
// basic.singleZipf
final QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
// Use multiple aggregators to see how the number of aggregators impact to the query performance
List<AggregatorFactory> queryAggs = ImmutableList.of(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"), new LongSumAggregatorFactory("rows", "rows"), new DoubleSumAggregatorFactory("sumFloatNormal", "sumFloatNormal"), new DoubleMinAggregatorFactory("minFloatZipf", "minFloatZipf"));
GroupByQuery queryA = GroupByQuery.builder().setDataSource("blah").setQuerySegmentSpec(intervalSpec).setDimensions(new DefaultDimensionSpec("dimZipf", null)).setAggregatorSpecs(queryAggs).setGranularity(Granularity.fromString(queryGranularity)).setContext(ImmutableMap.of("vectorize", vectorize)).build();
basicQueries.put("singleZipf", queryA);
}
SCHEMA_QUERY_MAP.put("basic", basicQueries);
// simple one column schema, for testing performance difference between querying on numeric values as Strings and
// directly as longs
Map<String, GroupByQuery> simpleQueries = new LinkedHashMap<>();
GeneratorSchemaInfo simpleSchema = GeneratorBasicSchemas.SCHEMA_MAP.get("simple");
{
// simple.A
QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(simpleSchema.getDataInterval()));
List<AggregatorFactory> queryAggs = new ArrayList<>();
queryAggs.add(new LongSumAggregatorFactory("rows", "rows"));
GroupByQuery queryA = GroupByQuery.builder().setDataSource("blah").setQuerySegmentSpec(intervalSpec).setDimensions(new DefaultDimensionSpec("dimSequential", "dimSequential", ColumnType.STRING)).setAggregatorSpecs(queryAggs).setGranularity(Granularity.fromString(queryGranularity)).setContext(ImmutableMap.of("vectorize", vectorize)).build();
simpleQueries.put("A", queryA);
}
SCHEMA_QUERY_MAP.put("simple", simpleQueries);
Map<String, GroupByQuery> simpleLongQueries = new LinkedHashMap<>();
GeneratorSchemaInfo simpleLongSchema = GeneratorBasicSchemas.SCHEMA_MAP.get("simpleLong");
{
// simpleLong.A
QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(simpleLongSchema.getDataInterval()));
List<AggregatorFactory> queryAggs = new ArrayList<>();
queryAggs.add(new LongSumAggregatorFactory("rows", "rows"));
GroupByQuery queryA = GroupByQuery.builder().setDataSource("blah").setQuerySegmentSpec(intervalSpec).setDimensions(new DefaultDimensionSpec("dimSequential", "dimSequential", ColumnType.LONG)).setAggregatorSpecs(queryAggs).setGranularity(Granularity.fromString(queryGranularity)).setContext(ImmutableMap.of("vectorize", vectorize)).build();
simpleLongQueries.put("A", queryA);
}
SCHEMA_QUERY_MAP.put("simpleLong", simpleLongQueries);
Map<String, GroupByQuery> simpleFloatQueries = new LinkedHashMap<>();
GeneratorSchemaInfo simpleFloatSchema = GeneratorBasicSchemas.SCHEMA_MAP.get("simpleFloat");
{
// simpleFloat.A
QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(simpleFloatSchema.getDataInterval()));
List<AggregatorFactory> queryAggs = new ArrayList<>();
queryAggs.add(new LongSumAggregatorFactory("rows", "rows"));
GroupByQuery queryA = GroupByQuery.builder().setDataSource("blah").setQuerySegmentSpec(intervalSpec).setDimensions(new DefaultDimensionSpec("dimSequential", "dimSequential", ColumnType.FLOAT)).setAggregatorSpecs(queryAggs).setGranularity(Granularity.fromString(queryGranularity)).setContext(ImmutableMap.of("vectorize", vectorize)).build();
simpleFloatQueries.put("A", queryA);
}
SCHEMA_QUERY_MAP.put("simpleFloat", simpleFloatQueries);
// simple one column schema, for testing performance difference between querying on numeric values as Strings and
// directly as longs
Map<String, GroupByQuery> nullQueries = new LinkedHashMap<>();
GeneratorSchemaInfo nullSchema = GeneratorBasicSchemas.SCHEMA_MAP.get("nulls");
{
// simple-null
QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(nullSchema.getDataInterval()));
List<AggregatorFactory> queryAggs = new ArrayList<>();
queryAggs.add(new DoubleSumAggregatorFactory("doubleSum", "doubleZipf"));
GroupByQuery queryA = GroupByQuery.builder().setDataSource("blah").setQuerySegmentSpec(intervalSpec).setDimensions(new DefaultDimensionSpec("stringZipf", "stringZipf", ColumnType.STRING)).setAggregatorSpecs(queryAggs).setGranularity(Granularity.fromString(queryGranularity)).setContext(ImmutableMap.of("vectorize", vectorize)).build();
nullQueries.put("A", queryA);
}
SCHEMA_QUERY_MAP.put("nulls", nullQueries);
}
use of org.apache.druid.query.spec.MultipleIntervalSegmentSpec in project druid by druid-io.
the class ScanBenchmark method basicD.
private static Druids.ScanQueryBuilder basicD(final GeneratorSchemaInfo basicSchema) {
final QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
final String dimName = "dimUniform";
return Druids.newScanQueryBuilder().filters(new BoundDimFilter(dimName, "100", "10000", true, true, true, null, null)).intervals(intervalSpec).dataSource("blah").order(ordering);
}
use of org.apache.druid.query.spec.MultipleIntervalSegmentSpec in project druid by druid-io.
the class ScanBenchmark method basicC.
private static Druids.ScanQueryBuilder basicC(final GeneratorSchemaInfo basicSchema) {
final QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
final String dimName = "dimUniform";
return Druids.newScanQueryBuilder().filters(new SelectorDimFilter(dimName, "3", StrlenExtractionFn.instance())).intervals(intervalSpec).dataSource("blah").order(ordering);
}
Aggregations