Search in sources :

Example 96 with AggregatorFactory

use of org.apache.druid.query.aggregation.AggregatorFactory in project druid by druid-io.

the class GroupByQueryTest method testSegmentLookUpForNestedQueries.

@Test
public void testSegmentLookUpForNestedQueries() {
    QuerySegmentSpec innerQuerySegmentSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(Intervals.of("2011-11-07/2011-11-08")));
    QuerySegmentSpec outerQuerySegmentSpec = new MultipleIntervalSegmentSpec(Collections.singletonList((Intervals.of("2011-11-04/2011-11-08"))));
    List<AggregatorFactory> aggs = Collections.singletonList(QueryRunnerTestHelper.ROWS_COUNT);
    final GroupByQuery innerQuery = GroupByQuery.builder().setDataSource("blah").setInterval(innerQuerySegmentSpec).setGranularity(Granularities.DAY).setAggregatorSpecs(aggs).build();
    final GroupByQuery query = GroupByQuery.builder().setDataSource(innerQuery).setInterval(outerQuerySegmentSpec).setAggregatorSpecs(aggs).setGranularity(Granularities.DAY).build();
    Assert.assertEquals(innerQuerySegmentSpec, BaseQuery.getQuerySegmentSpecForLookUp(query));
}
Also used : QuerySegmentSpec(org.apache.druid.query.spec.QuerySegmentSpec) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) Test(org.junit.Test)

Example 97 with AggregatorFactory

use of org.apache.druid.query.aggregation.AggregatorFactory in project druid by druid-io.

the class GroupByQueryRunnerTest method testGroupByOrderLimit.

@Test
public void testGroupByOrderLimit() {
    GroupByQuery.Builder builder = makeQueryBuilder().setDataSource(QueryRunnerTestHelper.DATA_SOURCE).setInterval("2011-04-02/2011-04-04").setDimensions(new DefaultDimensionSpec("quality", "alias")).setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT, new LongSumAggregatorFactory("idx", "index")).addOrderByColumn("rows").addOrderByColumn("alias", OrderByColumnSpec.Direction.DESCENDING).setGranularity(new PeriodGranularity(new Period("P1M"), null, null));
    final GroupByQuery query = builder.build();
    List<ResultRow> expectedResults = Arrays.asList(makeRow(query, "2011-04-01", "alias", "travel", "rows", 2L, "idx", 243L), makeRow(query, "2011-04-01", "alias", "technology", "rows", 2L, "idx", 177L), makeRow(query, "2011-04-01", "alias", "news", "rows", 2L, "idx", 221L), makeRow(query, "2011-04-01", "alias", "health", "rows", 2L, "idx", 216L), makeRow(query, "2011-04-01", "alias", "entertainment", "rows", 2L, "idx", 319L), makeRow(query, "2011-04-01", "alias", "business", "rows", 2L, "idx", 217L), makeRow(query, "2011-04-01", "alias", "automotive", "rows", 2L, "idx", 269L), makeRow(query, "2011-04-01", "alias", "premium", "rows", 6L, "idx", 4416L), makeRow(query, "2011-04-01", "alias", "mezzanine", "rows", 6L, "idx", 4420L));
    QueryRunner<ResultRow> mergeRunner = factory.getToolchest().mergeResults(runner);
    TestHelper.assertExpectedObjects(expectedResults, mergeRunner.run(QueryPlus.wrap(query)), "no-limit");
    TestHelper.assertExpectedObjects(Iterables.limit(expectedResults, 5), mergeRunner.run(QueryPlus.wrap(builder.setLimit(5).build())), "limited");
    // Now try it with an expression based aggregator.
    List<AggregatorFactory> aggregatorSpecs = Arrays.asList(QueryRunnerTestHelper.ROWS_COUNT, new DoubleSumAggregatorFactory("idx", null, "index / 2 + indexMin", TestExprMacroTable.INSTANCE));
    builder.setLimit(Integer.MAX_VALUE).setAggregatorSpecs(aggregatorSpecs);
    expectedResults = makeRows(builder.build(), new String[] { "__time", "alias", "rows", "idx" }, new Object[] { "2011-04-01", "travel", 2L, 365.4876403808594D }, new Object[] { "2011-04-01", "technology", 2L, 267.3737487792969D }, new Object[] { "2011-04-01", "news", 2L, 333.3147277832031D }, new Object[] { "2011-04-01", "health", 2L, 325.467529296875D }, new Object[] { "2011-04-01", "entertainment", 2L, 479.916015625D }, new Object[] { "2011-04-01", "business", 2L, 328.083740234375D }, new Object[] { "2011-04-01", "automotive", 2L, 405.5966796875D }, new Object[] { "2011-04-01", "premium", 6L, 6627.927734375D }, new Object[] { "2011-04-01", "mezzanine", 6L, 6635.47998046875D });
    TestHelper.assertExpectedObjects(expectedResults, mergeRunner.run(QueryPlus.wrap(builder.build())), "no-limit");
    TestHelper.assertExpectedObjects(Iterables.limit(expectedResults, 5), mergeRunner.run(QueryPlus.wrap(builder.setLimit(5).build())), "limited");
    // Now try it with an expression virtual column.
    ExpressionVirtualColumn expressionVirtualColumn = new ExpressionVirtualColumn("expr", "index / 2 + indexMin", ColumnType.FLOAT, TestExprMacroTable.INSTANCE);
    List<AggregatorFactory> aggregatorSpecs2 = Arrays.asList(QueryRunnerTestHelper.ROWS_COUNT, new DoubleSumAggregatorFactory("idx", "expr"));
    builder.setLimit(Integer.MAX_VALUE).setVirtualColumns(expressionVirtualColumn).setAggregatorSpecs(aggregatorSpecs2);
    TestHelper.assertExpectedObjects(expectedResults, mergeRunner.run(QueryPlus.wrap(builder.build())), "no-limit");
    TestHelper.assertExpectedObjects(Iterables.limit(expectedResults, 5), mergeRunner.run(QueryPlus.wrap(builder.setLimit(5).build())), "limited");
}
Also used : DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) PeriodGranularity(org.apache.druid.java.util.common.granularity.PeriodGranularity) Period(org.joda.time.Period) DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) FilteredAggregatorFactory(org.apache.druid.query.aggregation.FilteredAggregatorFactory) LongLastAggregatorFactory(org.apache.druid.query.aggregation.last.LongLastAggregatorFactory) LongFirstAggregatorFactory(org.apache.druid.query.aggregation.first.LongFirstAggregatorFactory) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) TestBigDecimalSumAggregatorFactory(org.apache.druid.query.TestBigDecimalSumAggregatorFactory) JavaScriptAggregatorFactory(org.apache.druid.query.aggregation.JavaScriptAggregatorFactory) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) DoubleMaxAggregatorFactory(org.apache.druid.query.aggregation.DoubleMaxAggregatorFactory) CardinalityAggregatorFactory(org.apache.druid.query.aggregation.cardinality.CardinalityAggregatorFactory) ExpressionLambdaAggregatorFactory(org.apache.druid.query.aggregation.ExpressionLambdaAggregatorFactory) LongMaxAggregatorFactory(org.apache.druid.query.aggregation.LongMaxAggregatorFactory) FloatSumAggregatorFactory(org.apache.druid.query.aggregation.FloatSumAggregatorFactory) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) ExpressionVirtualColumn(org.apache.druid.segment.virtual.ExpressionVirtualColumn) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 98 with AggregatorFactory

use of org.apache.druid.query.aggregation.AggregatorFactory in project druid by druid-io.

the class IncrementalIndexTest method constructorFeeder.

@Parameterized.Parameters(name = "{index}: {0}, {1}, deserialize={2}")
public static Collection<?> constructorFeeder() {
    DimensionsSpec dimensions = new DimensionsSpec(Arrays.asList(new StringDimensionSchema("string"), new FloatDimensionSchema("float"), new LongDimensionSchema("long"), new DoubleDimensionSchema("double")));
    AggregatorFactory[] metrics = { new FilteredAggregatorFactory(new CountAggregatorFactory("cnt"), new SelectorDimFilter("billy", "A", null)) };
    final IncrementalIndexSchema schema = new IncrementalIndexSchema.Builder().withQueryGranularity(Granularities.MINUTE).withDimensionsSpec(dimensions).withMetrics(metrics).build();
    return IncrementalIndexCreator.indexTypeCartesianProduct(ImmutableList.of("rollup", "plain"), ImmutableList.of(true, false), ImmutableList.of(schema));
}
Also used : FilteredAggregatorFactory(org.apache.druid.query.aggregation.FilteredAggregatorFactory) DoubleDimensionSchema(org.apache.druid.data.input.impl.DoubleDimensionSchema) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) LongDimensionSchema(org.apache.druid.data.input.impl.LongDimensionSchema) SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) FloatDimensionSchema(org.apache.druid.data.input.impl.FloatDimensionSchema) FilteredAggregatorFactory(org.apache.druid.query.aggregation.FilteredAggregatorFactory) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema)

Example 99 with AggregatorFactory

use of org.apache.druid.query.aggregation.AggregatorFactory in project druid by druid-io.

the class DruidParquetAvroReadSupport method getPartialReadSchema.

/**
 * Select the columns from the parquet schema that are used in the schema of the ingestion job
 *
 * @param context The context of the file to be read
 *
 * @return the partial schema that only contains the columns that are being used in the schema
 */
private MessageType getPartialReadSchema(InitContext context) {
    MessageType fullSchema = context.getFileSchema();
    String name = fullSchema.getName();
    HadoopDruidIndexerConfig config = HadoopDruidIndexerConfig.fromConfiguration(context.getConfiguration());
    ParseSpec parseSpec = config.getParser().getParseSpec();
    if (parseSpec instanceof AvroParseSpec) {
        if (((AvroParseSpec) parseSpec).getFlattenSpec() != null) {
            return fullSchema;
        }
    }
    String tsField = config.getParser().getParseSpec().getTimestampSpec().getTimestampColumn();
    List<DimensionSchema> dimensionSchema = config.getParser().getParseSpec().getDimensionsSpec().getDimensions();
    Set<String> dimensions = new HashSet<>();
    for (DimensionSchema dim : dimensionSchema) {
        dimensions.add(dim.getName());
    }
    Set<String> metricsFields = new HashSet<>();
    for (AggregatorFactory agg : config.getSchema().getDataSchema().getAggregators()) {
        metricsFields.addAll(agg.requiredFields());
    }
    List<Type> partialFields = new ArrayList<>();
    for (Type type : fullSchema.getFields()) {
        if (tsField.equals(type.getName()) || metricsFields.contains(type.getName()) || dimensions.size() > 0 && dimensions.contains(type.getName()) || dimensions.size() == 0) {
            partialFields.add(type);
        }
    }
    return new MessageType(name, partialFields);
}
Also used : ParseSpec(org.apache.druid.data.input.impl.ParseSpec) AvroParseSpec(org.apache.druid.data.input.avro.AvroParseSpec) ArrayList(java.util.ArrayList) HadoopDruidIndexerConfig(org.apache.druid.indexer.HadoopDruidIndexerConfig) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) DimensionSchema(org.apache.druid.data.input.impl.DimensionSchema) MessageType(org.apache.parquet.schema.MessageType) Type(org.apache.parquet.schema.Type) AvroParseSpec(org.apache.druid.data.input.avro.AvroParseSpec) MessageType(org.apache.parquet.schema.MessageType) HashSet(java.util.HashSet)

Example 100 with AggregatorFactory

use of org.apache.druid.query.aggregation.AggregatorFactory in project druid by druid-io.

the class DruidParquetReadSupport method getPartialReadSchema.

/**
 * Select the columns from the parquet schema that are used in the schema of the ingestion job
 *
 * @param context The context of the file to be read
 *
 * @return the partial schema that only contains the columns that are being used in the schema
 */
private MessageType getPartialReadSchema(InitContext context) {
    MessageType fullSchema = context.getFileSchema();
    String name = fullSchema.getName();
    HadoopDruidIndexerConfig config = HadoopDruidIndexerConfig.fromConfiguration(context.getConfiguration());
    ParseSpec parseSpec = config.getParser().getParseSpec();
    // parse the flatten spec and determine it isn't auto discovering props?
    if (parseSpec instanceof ParquetParseSpec) {
        if (((ParquetParseSpec) parseSpec).getFlattenSpec() != null) {
            return fullSchema;
        }
    }
    String tsField = parseSpec.getTimestampSpec().getTimestampColumn();
    List<DimensionSchema> dimensionSchema = parseSpec.getDimensionsSpec().getDimensions();
    Set<String> dimensions = new HashSet<>();
    for (DimensionSchema dim : dimensionSchema) {
        dimensions.add(dim.getName());
    }
    Set<String> metricsFields = new HashSet<>();
    for (AggregatorFactory agg : config.getSchema().getDataSchema().getAggregators()) {
        metricsFields.addAll(agg.requiredFields());
    }
    List<Type> partialFields = new ArrayList<>();
    for (Type type : fullSchema.getFields()) {
        if (tsField.equals(type.getName()) || metricsFields.contains(type.getName()) || dimensions.size() > 0 && dimensions.contains(type.getName()) || dimensions.size() == 0) {
            partialFields.add(type);
        }
    }
    return new MessageType(name, partialFields);
}
Also used : ParseSpec(org.apache.druid.data.input.impl.ParseSpec) ArrayList(java.util.ArrayList) HadoopDruidIndexerConfig(org.apache.druid.indexer.HadoopDruidIndexerConfig) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) DimensionSchema(org.apache.druid.data.input.impl.DimensionSchema) MessageType(org.apache.parquet.schema.MessageType) Type(org.apache.parquet.schema.Type) MessageType(org.apache.parquet.schema.MessageType) HashSet(java.util.HashSet)

Aggregations

AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)163 Test (org.junit.Test)85 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)56 CountAggregatorFactory (org.apache.druid.query.aggregation.CountAggregatorFactory)48 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)42 ArrayList (java.util.ArrayList)33 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)33 TimestampSpec (org.apache.druid.data.input.impl.TimestampSpec)28 DataSchema (org.apache.druid.segment.indexing.DataSchema)25 Nullable (javax.annotation.Nullable)22 DoubleSumAggregatorFactory (org.apache.druid.query.aggregation.DoubleSumAggregatorFactory)22 PostAggregator (org.apache.druid.query.aggregation.PostAggregator)22 UniformGranularitySpec (org.apache.druid.segment.indexing.granularity.UniformGranularitySpec)22 HashMap (java.util.HashMap)20 List (java.util.List)20 DoubleMaxAggregatorFactory (org.apache.druid.query.aggregation.DoubleMaxAggregatorFactory)18 LongMaxAggregatorFactory (org.apache.druid.query.aggregation.LongMaxAggregatorFactory)18 DimensionSpec (org.apache.druid.query.dimension.DimensionSpec)18 Map (java.util.Map)17 FloatFirstAggregatorFactory (org.apache.druid.query.aggregation.first.FloatFirstAggregatorFactory)15