Search in sources :

Example 91 with AggregatorFactory

use of io.druid.query.aggregation.AggregatorFactory in project druid by druid-io.

the class TopNQueryEngine method getMapFn.

private Function<Cursor, Result<TopNResultValue>> getMapFn(TopNQuery query, final StorageAdapter adapter) {
    final Capabilities capabilities = adapter.getCapabilities();
    final String dimension = query.getDimensionSpec().getDimension();
    final int cardinality = adapter.getDimensionCardinality(dimension);
    int numBytesPerRecord = 0;
    for (AggregatorFactory aggregatorFactory : query.getAggregatorSpecs()) {
        numBytesPerRecord += aggregatorFactory.getMaxIntermediateSize();
    }
    final TopNAlgorithmSelector selector = new TopNAlgorithmSelector(cardinality, numBytesPerRecord);
    query.initTopNAlgorithmSelector(selector);
    final ColumnCapabilities columnCapabilities = query.getVirtualColumns().getColumnCapabilitiesWithFallback(adapter, dimension);
    final TopNAlgorithm topNAlgorithm;
    if (selector.isHasExtractionFn() && // that the column is of type long and single-value.
    dimension.equals(Column.TIME_COLUMN_NAME)) {
        // A special TimeExtractionTopNAlgorithm is required, since DimExtractionTopNAlgorithm
        // currently relies on the dimension cardinality to support lexicographic sorting
        topNAlgorithm = new TimeExtractionTopNAlgorithm(capabilities, query);
    } else if (selector.isHasExtractionFn()) {
        topNAlgorithm = new DimExtractionTopNAlgorithm(capabilities, query);
    } else if (columnCapabilities != null && columnCapabilities.getType() != ValueType.STRING) {
        // force non-Strings to use DimExtraction for now, do a typed PooledTopN later
        topNAlgorithm = new DimExtractionTopNAlgorithm(capabilities, query);
    } else if (selector.isAggregateAllMetrics()) {
        topNAlgorithm = new PooledTopNAlgorithm(capabilities, query, bufferPool);
    } else if (selector.isAggregateTopNMetricFirst() || query.getContextBoolean("doAggregateTopNMetricFirst", false)) {
        topNAlgorithm = new AggregateTopNMetricFirstAlgorithm(capabilities, query, bufferPool);
    } else {
        topNAlgorithm = new PooledTopNAlgorithm(capabilities, query, bufferPool);
    }
    return new TopNMapFn(query, topNAlgorithm);
}
Also used : ColumnCapabilities(io.druid.segment.column.ColumnCapabilities) Capabilities(io.druid.segment.Capabilities) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) ColumnCapabilities(io.druid.segment.column.ColumnCapabilities)

Example 92 with AggregatorFactory

use of io.druid.query.aggregation.AggregatorFactory in project druid by druid-io.

the class TopNQueryQueryToolChest method makePreComputeManipulatorFn.

@Override
public Function<Result<TopNResultValue>, Result<TopNResultValue>> makePreComputeManipulatorFn(final TopNQuery query, final MetricManipulationFn fn) {
    return new Function<Result<TopNResultValue>, Result<TopNResultValue>>() {

        private String dimension = query.getDimensionSpec().getOutputName();

        private final List<PostAggregator> prunedAggs = prunePostAggregators(query);

        private final AggregatorFactory[] aggregatorFactories = query.getAggregatorSpecs().toArray(new AggregatorFactory[0]);

        private final String[] aggFactoryNames = extractFactoryName(query.getAggregatorSpecs());

        @Override
        public Result<TopNResultValue> apply(Result<TopNResultValue> result) {
            List<Map<String, Object>> serializedValues = Lists.newArrayList(Iterables.transform(result.getValue(), new Function<DimensionAndMetricValueExtractor, Map<String, Object>>() {

                @Override
                public Map<String, Object> apply(DimensionAndMetricValueExtractor input) {
                    final Map<String, Object> values = Maps.newHashMapWithExpectedSize(aggregatorFactories.length + prunedAggs.size() + 1);
                    for (int i = 0; i < aggregatorFactories.length; ++i) {
                        final String aggName = aggFactoryNames[i];
                        values.put(aggName, fn.manipulate(aggregatorFactories[i], input.getMetric(aggName)));
                    }
                    for (PostAggregator postAgg : prunedAggs) {
                        final String name = postAgg.getName();
                        Object calculatedPostAgg = input.getMetric(name);
                        if (calculatedPostAgg != null) {
                            values.put(name, calculatedPostAgg);
                        } else {
                            values.put(name, postAgg.compute(values));
                        }
                    }
                    values.put(dimension, input.getDimensionValue(dimension));
                    return values;
                }
            }));
            return new Result<TopNResultValue>(result.getTimestamp(), new TopNResultValue(serializedValues));
        }
    };
}
Also used : PostAggregator(io.druid.query.aggregation.PostAggregator) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) Result(io.druid.query.Result) Function(com.google.common.base.Function) List(java.util.List) Map(java.util.Map)

Example 93 with AggregatorFactory

use of io.druid.query.aggregation.AggregatorFactory in project druid by druid-io.

the class CachingClusteredClientTest method testOutOfOrderSequenceMerging.

@Test
public void testOutOfOrderSequenceMerging() throws Exception {
    List<Sequence<Result<TopNResultValue>>> sequences = ImmutableList.of(Sequences.simple(makeTopNResultsWithoutRename(new DateTime("2011-01-07"), "a", 50, 4991, "b", 50, 4990, "c", 50, 4989, new DateTime("2011-01-08"), "a", 50, 4988, "b", 50, 4987, "c", 50, 4986, new DateTime("2011-01-09"), "a", 50, 4985, "b", 50, 4984, "c", 50, 4983)), Sequences.simple(makeTopNResultsWithoutRename(new DateTime("2011-01-06T01"), "a", 50, 4991, "b", 50, 4990, "c", 50, 4989, new DateTime("2011-01-07T01"), "a", 50, 4991, "b", 50, 4990, "c", 50, 4989, new DateTime("2011-01-08T01"), "a", 50, 4988, "b", 50, 4987, "c", 50, 4986, new DateTime("2011-01-09T01"), "a", 50, 4985, "b", 50, 4984, "c", 50, 4983)));
    TestHelper.assertExpectedResults(makeTopNResultsWithoutRename(new DateTime("2011-01-06T01"), "a", 50, 4991, "b", 50, 4990, "c", 50, 4989, new DateTime("2011-01-07"), "a", 50, 4991, "b", 50, 4990, "c", 50, 4989, new DateTime("2011-01-07T01"), "a", 50, 4991, "b", 50, 4990, "c", 50, 4989, new DateTime("2011-01-08"), "a", 50, 4988, "b", 50, 4987, "c", 50, 4986, new DateTime("2011-01-08T01"), "a", 50, 4988, "b", 50, 4987, "c", 50, 4986, new DateTime("2011-01-09"), "a", 50, 4985, "b", 50, 4984, "c", 50, 4983, new DateTime("2011-01-09T01"), "a", 50, 4985, "b", 50, 4984, "c", 50, 4983), client.mergeCachedAndUncachedSequences(new TopNQueryBuilder().dataSource("test").intervals("2011-01-06/2011-01-10").dimension("a").metric("b").threshold(3).aggregators(Arrays.<AggregatorFactory>asList(new CountAggregatorFactory("b"))).build(), sequences));
}
Also used : TopNResultValue(io.druid.query.topn.TopNResultValue) TopNQueryBuilder(io.druid.query.topn.TopNQueryBuilder) CountAggregatorFactory(io.druid.query.aggregation.CountAggregatorFactory) Sequence(io.druid.java.util.common.guava.Sequence) LongSumAggregatorFactory(io.druid.query.aggregation.LongSumAggregatorFactory) CountAggregatorFactory(io.druid.query.aggregation.CountAggregatorFactory) HyperUniquesAggregatorFactory(io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) DateTime(org.joda.time.DateTime) Test(org.junit.Test) GroupByQueryRunnerTest(io.druid.query.groupby.GroupByQueryRunnerTest)

Example 94 with AggregatorFactory

use of io.druid.query.aggregation.AggregatorFactory in project druid by druid-io.

the class QuantileSqlAggregatorTest method setUp.

@Before
public void setUp() throws Exception {
    Calcites.setSystemProperties();
    // Note: this is needed in order to properly register the serde for Histogram.
    new ApproximateHistogramDruidModule().configure(null);
    final QueryableIndex index = IndexBuilder.create().tmpDir(temporaryFolder.newFolder()).indexMerger(TestHelper.getTestIndexMergerV9()).schema(new IncrementalIndexSchema.Builder().withMetrics(new AggregatorFactory[] { new CountAggregatorFactory("cnt"), new DoubleSumAggregatorFactory("m1", "m1"), new ApproximateHistogramAggregatorFactory("hist_m1", "m1", null, null, null, null) }).withRollup(false).build()).rows(CalciteTests.ROWS1).buildMMappedIndex();
    walker = new SpecificSegmentsQuerySegmentWalker(CalciteTests.queryRunnerFactoryConglomerate()).add(DataSegment.builder().dataSource(DATA_SOURCE).interval(index.getDataInterval()).version("1").shardSpec(new LinearShardSpec(0)).build(), index);
    final PlannerConfig plannerConfig = new PlannerConfig();
    final SchemaPlus rootSchema = Calcites.createRootSchema(CalciteTests.createMockSchema(walker, plannerConfig));
    final DruidOperatorTable operatorTable = new DruidOperatorTable(ImmutableSet.<SqlAggregator>of(new QuantileSqlAggregator()), ImmutableSet.<SqlExtractionOperator>of());
    plannerFactory = new PlannerFactory(rootSchema, walker, operatorTable, plannerConfig);
}
Also used : DoubleSumAggregatorFactory(io.druid.query.aggregation.DoubleSumAggregatorFactory) ApproximateHistogramDruidModule(io.druid.query.aggregation.histogram.ApproximateHistogramDruidModule) LinearShardSpec(io.druid.timeline.partition.LinearShardSpec) IndexBuilder(io.druid.segment.IndexBuilder) SchemaPlus(org.apache.calcite.schema.SchemaPlus) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) CountAggregatorFactory(io.druid.query.aggregation.CountAggregatorFactory) ApproximateHistogramFoldingAggregatorFactory(io.druid.query.aggregation.histogram.ApproximateHistogramFoldingAggregatorFactory) FilteredAggregatorFactory(io.druid.query.aggregation.FilteredAggregatorFactory) ApproximateHistogramAggregatorFactory(io.druid.query.aggregation.histogram.ApproximateHistogramAggregatorFactory) DoubleSumAggregatorFactory(io.druid.query.aggregation.DoubleSumAggregatorFactory) DruidOperatorTable(io.druid.sql.calcite.planner.DruidOperatorTable) CountAggregatorFactory(io.druid.query.aggregation.CountAggregatorFactory) SpecificSegmentsQuerySegmentWalker(io.druid.sql.calcite.util.SpecificSegmentsQuerySegmentWalker) QueryableIndex(io.druid.segment.QueryableIndex) PlannerConfig(io.druid.sql.calcite.planner.PlannerConfig) PlannerFactory(io.druid.sql.calcite.planner.PlannerFactory) ApproximateHistogramAggregatorFactory(io.druid.query.aggregation.histogram.ApproximateHistogramAggregatorFactory) Before(org.junit.Before)

Example 95 with AggregatorFactory

use of io.druid.query.aggregation.AggregatorFactory in project druid by druid-io.

the class KafkaIndexTaskTest method countEvents.

public long countEvents(final Task task) throws Exception {
    // Do a query.
    TimeseriesQuery query = Druids.newTimeseriesQueryBuilder().dataSource(DATA_SCHEMA.getDataSource()).aggregators(ImmutableList.<AggregatorFactory>of(new LongSumAggregatorFactory("rows", "rows"))).granularity(Granularities.ALL).intervals("0000/3000").build();
    ArrayList<Result<TimeseriesResultValue>> results = Sequences.toList(task.getQueryRunner(query).run(query, ImmutableMap.<String, Object>of()), Lists.<Result<TimeseriesResultValue>>newArrayList());
    return results.isEmpty() ? 0 : results.get(0).getValue().getLongMetric("rows");
}
Also used : TimeseriesResultValue(io.druid.query.timeseries.TimeseriesResultValue) TimeseriesQuery(io.druid.query.timeseries.TimeseriesQuery) LongSumAggregatorFactory(io.druid.query.aggregation.LongSumAggregatorFactory) LongSumAggregatorFactory(io.druid.query.aggregation.LongSumAggregatorFactory) CountAggregatorFactory(io.druid.query.aggregation.CountAggregatorFactory) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) Result(io.druid.query.Result)

Aggregations

AggregatorFactory (io.druid.query.aggregation.AggregatorFactory)148 Test (org.junit.Test)86 CountAggregatorFactory (io.druid.query.aggregation.CountAggregatorFactory)82 LongSumAggregatorFactory (io.druid.query.aggregation.LongSumAggregatorFactory)64 Interval (org.joda.time.Interval)45 DoubleSumAggregatorFactory (io.druid.query.aggregation.DoubleSumAggregatorFactory)38 DateTime (org.joda.time.DateTime)37 FilteredAggregatorFactory (io.druid.query.aggregation.FilteredAggregatorFactory)32 Result (io.druid.query.Result)31 DoubleMaxAggregatorFactory (io.druid.query.aggregation.DoubleMaxAggregatorFactory)27 HyperUniquesAggregatorFactory (io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory)25 Row (io.druid.data.input.Row)24 PostAggregator (io.druid.query.aggregation.PostAggregator)24 DefaultDimensionSpec (io.druid.query.dimension.DefaultDimensionSpec)22 CardinalityAggregatorFactory (io.druid.query.aggregation.cardinality.CardinalityAggregatorFactory)19 LongMaxAggregatorFactory (io.druid.query.aggregation.LongMaxAggregatorFactory)18 LongFirstAggregatorFactory (io.druid.query.aggregation.first.LongFirstAggregatorFactory)18 LongLastAggregatorFactory (io.druid.query.aggregation.last.LongLastAggregatorFactory)18 DimensionSpec (io.druid.query.dimension.DimensionSpec)18 TimeseriesQuery (io.druid.query.timeseries.TimeseriesQuery)17