use of org.apache.druid.segment.generator.GeneratorSchemaInfo in project druid by druid-io.
the class TopNBenchmark method setupQueries.
private void setupQueries() {
// queries for the basic schema
Map<String, TopNQueryBuilder> basicQueries = new LinkedHashMap<>();
GeneratorSchemaInfo basicSchema = GeneratorBasicSchemas.SCHEMA_MAP.get("basic");
{
// basic.A
QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
List<AggregatorFactory> queryAggs = new ArrayList<>();
queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
queryAggs.add(new LongMaxAggregatorFactory("maxLongUniform", "maxLongUniform"));
queryAggs.add(new DoubleSumAggregatorFactory("sumFloatNormal", "sumFloatNormal"));
queryAggs.add(new DoubleMinAggregatorFactory("minFloatZipf", "minFloatZipf"));
queryAggs.add(new HyperUniquesAggregatorFactory("hyperUniquesMet", "hyper"));
TopNQueryBuilder queryBuilderA = new TopNQueryBuilder().dataSource("blah").granularity(Granularities.ALL).dimension("dimSequential").metric("sumFloatNormal").intervals(intervalSpec).aggregators(queryAggs);
basicQueries.put("A", queryBuilderA);
}
{
// basic.numericSort
QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
List<AggregatorFactory> queryAggs = new ArrayList<>();
queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
TopNQueryBuilder queryBuilderA = new TopNQueryBuilder().dataSource("blah").granularity(Granularities.ALL).dimension("dimUniform").metric(new DimensionTopNMetricSpec(null, StringComparators.NUMERIC)).intervals(intervalSpec).aggregators(queryAggs);
basicQueries.put("numericSort", queryBuilderA);
}
{
// basic.alphanumericSort
QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
List<AggregatorFactory> queryAggs = new ArrayList<>();
queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
TopNQueryBuilder queryBuilderA = new TopNQueryBuilder().dataSource("blah").granularity(Granularities.ALL).dimension("dimUniform").metric(new DimensionTopNMetricSpec(null, StringComparators.ALPHANUMERIC)).intervals(intervalSpec).aggregators(queryAggs);
basicQueries.put("alphanumericSort", queryBuilderA);
}
SCHEMA_QUERY_MAP.put("basic", basicQueries);
}
use of org.apache.druid.segment.generator.GeneratorSchemaInfo in project druid by druid-io.
the class GroupByBenchmark method setupQueries.
private void setupQueries() {
// queries for the basic schema
Map<String, GroupByQuery> basicQueries = new LinkedHashMap<>();
GeneratorSchemaInfo basicSchema = GeneratorBasicSchemas.SCHEMA_MAP.get("basic");
{
// basic.A
QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
List<AggregatorFactory> queryAggs = new ArrayList<>();
queryAggs.add(new CountAggregatorFactory("cnt"));
queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
GroupByQuery queryA = GroupByQuery.builder().setDataSource("blah").setQuerySegmentSpec(intervalSpec).setDimensions(new DefaultDimensionSpec("dimSequential", null), new DefaultDimensionSpec("dimZipf", null)).setAggregatorSpecs(queryAggs).setGranularity(Granularity.fromString(queryGranularity)).setContext(ImmutableMap.of("vectorize", vectorize)).build();
basicQueries.put("A", queryA);
}
{
// basic.sorted
QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
List<AggregatorFactory> queryAggs = new ArrayList<>();
queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
GroupByQuery queryA = GroupByQuery.builder().setDataSource("blah").setQuerySegmentSpec(intervalSpec).setDimensions(new DefaultDimensionSpec("dimSequential", null), new DefaultDimensionSpec("dimZipf", null)).setAggregatorSpecs(queryAggs).setGranularity(Granularity.fromString(queryGranularity)).setLimitSpec(new DefaultLimitSpec(Collections.singletonList(new OrderByColumnSpec("sumLongSequential", OrderByColumnSpec.Direction.DESCENDING, StringComparators.NUMERIC)), 100)).build();
basicQueries.put("sorted", queryA);
}
{
// basic.nested
QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
List<AggregatorFactory> queryAggs = new ArrayList<>();
queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
GroupByQuery subqueryA = GroupByQuery.builder().setDataSource("blah").setQuerySegmentSpec(intervalSpec).setDimensions(new DefaultDimensionSpec("dimSequential", null), new DefaultDimensionSpec("dimZipf", null)).setAggregatorSpecs(queryAggs).setGranularity(Granularities.DAY).setContext(ImmutableMap.of("vectorize", vectorize)).build();
GroupByQuery queryA = GroupByQuery.builder().setDataSource(subqueryA).setQuerySegmentSpec(intervalSpec).setDimensions(new DefaultDimensionSpec("dimSequential", null)).setAggregatorSpecs(queryAggs).setGranularity(Granularities.WEEK).setContext(ImmutableMap.of("vectorize", vectorize)).build();
basicQueries.put("nested", queryA);
}
{
// basic.filter
final QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
// Use multiple aggregators to see how the number of aggregators impact to the query performance
List<AggregatorFactory> queryAggs = ImmutableList.of(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"), new LongSumAggregatorFactory("rows", "rows"), new DoubleSumAggregatorFactory("sumFloatNormal", "sumFloatNormal"), new DoubleMinAggregatorFactory("minFloatZipf", "minFloatZipf"));
GroupByQuery queryA = GroupByQuery.builder().setDataSource("blah").setQuerySegmentSpec(intervalSpec).setDimensions(new DefaultDimensionSpec("dimUniform", null)).setAggregatorSpecs(queryAggs).setGranularity(Granularity.fromString(queryGranularity)).setDimFilter(new BoundDimFilter("dimUniform", "0", "100", true, true, null, null, null)).setContext(ImmutableMap.of("vectorize", vectorize)).build();
basicQueries.put("filter", queryA);
}
{
// basic.singleZipf
final QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
// Use multiple aggregators to see how the number of aggregators impact to the query performance
List<AggregatorFactory> queryAggs = ImmutableList.of(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"), new LongSumAggregatorFactory("rows", "rows"), new DoubleSumAggregatorFactory("sumFloatNormal", "sumFloatNormal"), new DoubleMinAggregatorFactory("minFloatZipf", "minFloatZipf"));
GroupByQuery queryA = GroupByQuery.builder().setDataSource("blah").setQuerySegmentSpec(intervalSpec).setDimensions(new DefaultDimensionSpec("dimZipf", null)).setAggregatorSpecs(queryAggs).setGranularity(Granularity.fromString(queryGranularity)).setContext(ImmutableMap.of("vectorize", vectorize)).build();
basicQueries.put("singleZipf", queryA);
}
SCHEMA_QUERY_MAP.put("basic", basicQueries);
// simple one column schema, for testing performance difference between querying on numeric values as Strings and
// directly as longs
Map<String, GroupByQuery> simpleQueries = new LinkedHashMap<>();
GeneratorSchemaInfo simpleSchema = GeneratorBasicSchemas.SCHEMA_MAP.get("simple");
{
// simple.A
QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(simpleSchema.getDataInterval()));
List<AggregatorFactory> queryAggs = new ArrayList<>();
queryAggs.add(new LongSumAggregatorFactory("rows", "rows"));
GroupByQuery queryA = GroupByQuery.builder().setDataSource("blah").setQuerySegmentSpec(intervalSpec).setDimensions(new DefaultDimensionSpec("dimSequential", "dimSequential", ColumnType.STRING)).setAggregatorSpecs(queryAggs).setGranularity(Granularity.fromString(queryGranularity)).setContext(ImmutableMap.of("vectorize", vectorize)).build();
simpleQueries.put("A", queryA);
}
SCHEMA_QUERY_MAP.put("simple", simpleQueries);
Map<String, GroupByQuery> simpleLongQueries = new LinkedHashMap<>();
GeneratorSchemaInfo simpleLongSchema = GeneratorBasicSchemas.SCHEMA_MAP.get("simpleLong");
{
// simpleLong.A
QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(simpleLongSchema.getDataInterval()));
List<AggregatorFactory> queryAggs = new ArrayList<>();
queryAggs.add(new LongSumAggregatorFactory("rows", "rows"));
GroupByQuery queryA = GroupByQuery.builder().setDataSource("blah").setQuerySegmentSpec(intervalSpec).setDimensions(new DefaultDimensionSpec("dimSequential", "dimSequential", ColumnType.LONG)).setAggregatorSpecs(queryAggs).setGranularity(Granularity.fromString(queryGranularity)).setContext(ImmutableMap.of("vectorize", vectorize)).build();
simpleLongQueries.put("A", queryA);
}
SCHEMA_QUERY_MAP.put("simpleLong", simpleLongQueries);
Map<String, GroupByQuery> simpleFloatQueries = new LinkedHashMap<>();
GeneratorSchemaInfo simpleFloatSchema = GeneratorBasicSchemas.SCHEMA_MAP.get("simpleFloat");
{
// simpleFloat.A
QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(simpleFloatSchema.getDataInterval()));
List<AggregatorFactory> queryAggs = new ArrayList<>();
queryAggs.add(new LongSumAggregatorFactory("rows", "rows"));
GroupByQuery queryA = GroupByQuery.builder().setDataSource("blah").setQuerySegmentSpec(intervalSpec).setDimensions(new DefaultDimensionSpec("dimSequential", "dimSequential", ColumnType.FLOAT)).setAggregatorSpecs(queryAggs).setGranularity(Granularity.fromString(queryGranularity)).setContext(ImmutableMap.of("vectorize", vectorize)).build();
simpleFloatQueries.put("A", queryA);
}
SCHEMA_QUERY_MAP.put("simpleFloat", simpleFloatQueries);
// simple one column schema, for testing performance difference between querying on numeric values as Strings and
// directly as longs
Map<String, GroupByQuery> nullQueries = new LinkedHashMap<>();
GeneratorSchemaInfo nullSchema = GeneratorBasicSchemas.SCHEMA_MAP.get("nulls");
{
// simple-null
QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(nullSchema.getDataInterval()));
List<AggregatorFactory> queryAggs = new ArrayList<>();
queryAggs.add(new DoubleSumAggregatorFactory("doubleSum", "doubleZipf"));
GroupByQuery queryA = GroupByQuery.builder().setDataSource("blah").setQuerySegmentSpec(intervalSpec).setDimensions(new DefaultDimensionSpec("stringZipf", "stringZipf", ColumnType.STRING)).setAggregatorSpecs(queryAggs).setGranularity(Granularity.fromString(queryGranularity)).setContext(ImmutableMap.of("vectorize", vectorize)).build();
nullQueries.put("A", queryA);
}
SCHEMA_QUERY_MAP.put("nulls", nullQueries);
}
use of org.apache.druid.segment.generator.GeneratorSchemaInfo in project druid by druid-io.
the class ScanBenchmark method setupQueries.
private void setupQueries() {
// queries for the basic schema
final Map<String, Druids.ScanQueryBuilder> basicQueries = new LinkedHashMap<>();
final GeneratorSchemaInfo basicSchema = GeneratorBasicSchemas.SCHEMA_MAP.get("basic");
final List<String> queryTypes = ImmutableList.of("A", "B", "C", "D");
for (final String eachType : queryTypes) {
basicQueries.put(eachType, makeQuery(eachType, basicSchema));
}
SCHEMA_QUERY_MAP.put("basic", basicQueries);
}
use of org.apache.druid.segment.generator.GeneratorSchemaInfo in project druid by druid-io.
the class SqlExpressionBenchmark method setup.
@Setup(Level.Trial)
public void setup() {
final GeneratorSchemaInfo schemaInfo = GeneratorBasicSchemas.SCHEMA_MAP.get("expression-testbench");
final DataSegment dataSegment = DataSegment.builder().dataSource("foo").interval(schemaInfo.getDataInterval()).version("1").shardSpec(new LinearShardSpec(0)).size(0).build();
final PlannerConfig plannerConfig = new PlannerConfig();
final SegmentGenerator segmentGenerator = closer.register(new SegmentGenerator());
log.info("Starting benchmark setup using cacheDir[%s], rows[%,d].", segmentGenerator.getCacheDir(), rowsPerSegment);
final QueryableIndex index = segmentGenerator.generate(dataSegment, schemaInfo, Granularities.NONE, rowsPerSegment);
final QueryRunnerFactoryConglomerate conglomerate = QueryStackTests.createQueryRunnerFactoryConglomerate(closer, PROCESSING_CONFIG);
final SpecificSegmentsQuerySegmentWalker walker = new SpecificSegmentsQuerySegmentWalker(conglomerate).add(dataSegment, index);
closer.register(walker);
final DruidSchemaCatalog rootSchema = CalciteTests.createMockRootSchema(conglomerate, walker, plannerConfig, AuthTestUtils.TEST_AUTHORIZER_MAPPER);
plannerFactory = new PlannerFactory(rootSchema, CalciteTests.createMockQueryMakerFactory(walker, conglomerate), CalciteTests.createOperatorTable(), CalciteTests.createExprMacroTable(), plannerConfig, AuthTestUtils.TEST_AUTHORIZER_MAPPER, CalciteTests.getJsonMapper(), CalciteTests.DRUID_SCHEMA_NAME);
try {
SqlVectorizedExpressionSanityTest.sanityTestVectorizedSqlQueries(plannerFactory, QUERIES.get(Integer.parseInt(query)));
} catch (Throwable ignored) {
// the show must go on
}
}
use of org.apache.druid.segment.generator.GeneratorSchemaInfo in project druid by druid-io.
the class SearchBenchmark method setupQueries.
private void setupQueries() {
// queries for the basic schema
final Map<String, SearchQueryBuilder> basicQueries = new LinkedHashMap<>();
final GeneratorSchemaInfo basicSchema = GeneratorBasicSchemas.SCHEMA_MAP.get("basic");
final List<String> queryTypes = ImmutableList.of("A", "B", "C", "D");
for (final String eachType : queryTypes) {
basicQueries.put(eachType, makeQuery(eachType, basicSchema));
}
SCHEMA_QUERY_MAP.put("basic", basicQueries);
}
Aggregations