Search in sources :

Example 1 with IncrementalIndexStorageAdapter

use of org.apache.druid.segment.incremental.IncrementalIndexStorageAdapter in project druid by druid-io.

the class StringFirstTimeseriesQueryTest method testTimeseriesQuery.

@Test
public void testTimeseriesQuery() {
    TimeseriesQueryEngine engine = new TimeseriesQueryEngine();
    TimeseriesQuery query = Druids.newTimeseriesQueryBuilder().dataSource(QueryRunnerTestHelper.DATA_SOURCE).granularity(QueryRunnerTestHelper.ALL_GRAN).intervals(QueryRunnerTestHelper.FULL_ON_INTERVAL_SPEC).aggregators(ImmutableList.of(new StringFirstAggregatorFactory("nonfolding", CLIENT_TYPE, null, 1024), new StringFirstAggregatorFactory("folding", FIRST_CLIENT_TYPE, null, 1024), new StringFirstAggregatorFactory("nonexistent", "nonexistent", null, 1024), new StringFirstAggregatorFactory("numeric", "cnt", null, 1024))).build();
    List<Result<TimeseriesResultValue>> expectedResults = Collections.singletonList(new Result<>(TIME1, new TimeseriesResultValue(ImmutableMap.<String, Object>builder().put("nonfolding", new SerializablePairLongString(TIME1.getMillis(), "iphone")).put("folding", new SerializablePairLongString(TIME1.getMillis(), "iphone")).put("nonexistent", new SerializablePairLongString(DateTimes.MAX.getMillis(), null)).put("numeric", new SerializablePairLongString(DateTimes.MAX.getMillis(), null)).build())));
    final Iterable<Result<TimeseriesResultValue>> iiResults = engine.process(query, new IncrementalIndexStorageAdapter(incrementalIndex)).toList();
    final Iterable<Result<TimeseriesResultValue>> qiResults = engine.process(query, new QueryableIndexStorageAdapter(queryableIndex)).toList();
    TestHelper.assertExpectedResults(expectedResults, iiResults, "incremental index");
    TestHelper.assertExpectedResults(expectedResults, qiResults, "queryable index");
}
Also used : TimeseriesResultValue(org.apache.druid.query.timeseries.TimeseriesResultValue) TimeseriesQuery(org.apache.druid.query.timeseries.TimeseriesQuery) QueryableIndexStorageAdapter(org.apache.druid.segment.QueryableIndexStorageAdapter) SerializablePairLongString(org.apache.druid.query.aggregation.SerializablePairLongString) Result(org.apache.druid.query.Result) TimeseriesQueryEngine(org.apache.druid.query.timeseries.TimeseriesQueryEngine) IncrementalIndexStorageAdapter(org.apache.druid.segment.incremental.IncrementalIndexStorageAdapter) SerializablePairLongString(org.apache.druid.query.aggregation.SerializablePairLongString) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 2 with IncrementalIndexStorageAdapter

use of org.apache.druid.segment.incremental.IncrementalIndexStorageAdapter in project druid by druid-io.

the class GroupByStrategyV1 method processSubqueryResult.

@Override
public Sequence<ResultRow> processSubqueryResult(GroupByQuery subquery, GroupByQuery query, GroupByQueryResource resource, Sequence<ResultRow> subqueryResult, boolean wasQueryPushedDown) {
    final Set<AggregatorFactory> aggs = new HashSet<>();
    // Nested group-bys work by first running the inner query and then materializing the results in an incremental
    // index which the outer query is then run against. To build the incremental index, we use the fieldNames from
    // the aggregators for the outer query to define the column names so that the index will match the query. If
    // there are multiple types of aggregators in the outer query referencing the same fieldName, we will try to build
    // multiple columns of the same name using different aggregator types and will fail. Here, we permit multiple
    // aggregators of the same type referencing the same fieldName (and skip creating identical columns for the
    // subsequent ones) and return an error if the aggregator types are different.
    final Set<String> dimensionNames = new HashSet<>();
    for (DimensionSpec dimension : subquery.getDimensions()) {
        dimensionNames.add(dimension.getOutputName());
    }
    for (AggregatorFactory aggregatorFactory : query.getAggregatorSpecs()) {
        for (final AggregatorFactory transferAgg : aggregatorFactory.getRequiredColumns()) {
            if (dimensionNames.contains(transferAgg.getName())) {
                // doesn't have this problem.
                continue;
            }
            if (Iterables.any(aggs, new Predicate<AggregatorFactory>() {

                @Override
                public boolean apply(AggregatorFactory agg) {
                    return agg.getName().equals(transferAgg.getName()) && !agg.equals(transferAgg);
                }
            })) {
                throw new IAE("Inner aggregator can currently only be referenced by a single type of outer aggregator" + " for '%s'", transferAgg.getName());
            }
            aggs.add(transferAgg);
        }
    }
    // We need the inner incremental index to have all the columns required by the outer query
    final GroupByQuery innerQuery = new GroupByQuery.Builder(subquery).setAggregatorSpecs(ImmutableList.copyOf(aggs)).setInterval(subquery.getIntervals()).setPostAggregatorSpecs(new ArrayList<>()).build();
    final GroupByQuery outerQuery = new GroupByQuery.Builder(query).setLimitSpec(query.getLimitSpec().merge(subquery.getLimitSpec())).build();
    final IncrementalIndex innerQueryResultIndex = GroupByQueryHelper.makeIncrementalIndex(innerQuery.withOverriddenContext(ImmutableMap.of(GroupByQueryHelper.CTX_KEY_SORT_RESULTS, true)), subquery, configSupplier.get(), subqueryResult);
    // Outer query might have multiple intervals, but they are expected to be non-overlapping and sorted which
    // is ensured by QuerySegmentSpec.
    // GroupByQueryEngine can only process one interval at a time, so we need to call it once per interval
    // and concatenate the results.
    final IncrementalIndex outerQueryResultIndex = GroupByQueryHelper.makeIncrementalIndex(outerQuery, null, configSupplier.get(), Sequences.concat(Sequences.map(Sequences.simple(outerQuery.getIntervals()), new Function<Interval, Sequence<ResultRow>>() {

        @Override
        public Sequence<ResultRow> apply(Interval interval) {
            return process(outerQuery.withQuerySegmentSpec(new MultipleIntervalSegmentSpec(ImmutableList.of(interval))), new IncrementalIndexStorageAdapter(innerQueryResultIndex));
        }
    })));
    innerQueryResultIndex.close();
    return Sequences.withBaggage(outerQuery.postProcess(GroupByQueryHelper.postAggregate(query, outerQueryResultIndex)), outerQueryResultIndex);
}
Also used : DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) ArrayList(java.util.ArrayList) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) Sequence(org.apache.druid.java.util.common.guava.Sequence) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) IAE(org.apache.druid.java.util.common.IAE) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) IncrementalIndexStorageAdapter(org.apache.druid.segment.incremental.IncrementalIndexStorageAdapter) HashSet(java.util.HashSet) Interval(org.joda.time.Interval)

Example 3 with IncrementalIndexStorageAdapter

use of org.apache.druid.segment.incremental.IncrementalIndexStorageAdapter in project druid by druid-io.

the class IncrementalIndexReadBenchmark method read.

@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
public void read(Blackhole blackhole) {
    IncrementalIndexStorageAdapter sa = new IncrementalIndexStorageAdapter(incIndex);
    Sequence<Cursor> cursors = makeCursors(sa, null);
    Cursor cursor = cursors.limit(1).toList().get(0);
    List<DimensionSelector> selectors = new ArrayList<>();
    selectors.add(makeDimensionSelector(cursor, "dimSequential"));
    selectors.add(makeDimensionSelector(cursor, "dimZipf"));
    selectors.add(makeDimensionSelector(cursor, "dimUniform"));
    selectors.add(makeDimensionSelector(cursor, "dimSequentialHalfNull"));
    cursor.reset();
    while (!cursor.isDone()) {
        for (DimensionSelector selector : selectors) {
            IndexedInts row = selector.getRow();
            blackhole.consume(selector.lookupName(row.get(0)));
        }
        cursor.advance();
    }
}
Also used : DimensionSelector(org.apache.druid.segment.DimensionSelector) IndexedInts(org.apache.druid.segment.data.IndexedInts) IncrementalIndexStorageAdapter(org.apache.druid.segment.incremental.IncrementalIndexStorageAdapter) ArrayList(java.util.ArrayList) Cursor(org.apache.druid.segment.Cursor) BenchmarkMode(org.openjdk.jmh.annotations.BenchmarkMode) Benchmark(org.openjdk.jmh.annotations.Benchmark) OutputTimeUnit(org.openjdk.jmh.annotations.OutputTimeUnit)

Example 4 with IncrementalIndexStorageAdapter

use of org.apache.druid.segment.incremental.IncrementalIndexStorageAdapter in project druid by druid-io.

the class IncrementalIndexReadBenchmark method readWithFilters.

@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
public void readWithFilters(Blackhole blackhole) {
    DimFilter filter = new OrDimFilter(Arrays.asList(new BoundDimFilter("dimSequential", "-1", "-1", true, true, null, null, StringComparators.ALPHANUMERIC), new JavaScriptDimFilter("dimSequential", "function(x) { return false }", null, JavaScriptConfig.getEnabledInstance()), new RegexDimFilter("dimSequential", "X", null), new SearchQueryDimFilter("dimSequential", new ContainsSearchQuerySpec("X", false), null), new InDimFilter("dimSequential", Collections.singletonList("X"), null)));
    IncrementalIndexStorageAdapter sa = new IncrementalIndexStorageAdapter(incIndex);
    Sequence<Cursor> cursors = makeCursors(sa, filter);
    Cursor cursor = cursors.limit(1).toList().get(0);
    List<DimensionSelector> selectors = new ArrayList<>();
    selectors.add(makeDimensionSelector(cursor, "dimSequential"));
    selectors.add(makeDimensionSelector(cursor, "dimZipf"));
    selectors.add(makeDimensionSelector(cursor, "dimUniform"));
    selectors.add(makeDimensionSelector(cursor, "dimSequentialHalfNull"));
    cursor.reset();
    while (!cursor.isDone()) {
        for (DimensionSelector selector : selectors) {
            IndexedInts row = selector.getRow();
            blackhole.consume(selector.lookupName(row.get(0)));
        }
        cursor.advance();
    }
}
Also used : RegexDimFilter(org.apache.druid.query.filter.RegexDimFilter) DimensionSelector(org.apache.druid.segment.DimensionSelector) BoundDimFilter(org.apache.druid.query.filter.BoundDimFilter) ContainsSearchQuerySpec(org.apache.druid.query.search.ContainsSearchQuerySpec) ArrayList(java.util.ArrayList) Cursor(org.apache.druid.segment.Cursor) IndexedInts(org.apache.druid.segment.data.IndexedInts) OrDimFilter(org.apache.druid.query.filter.OrDimFilter) InDimFilter(org.apache.druid.query.filter.InDimFilter) IncrementalIndexStorageAdapter(org.apache.druid.segment.incremental.IncrementalIndexStorageAdapter) SearchQueryDimFilter(org.apache.druid.query.filter.SearchQueryDimFilter) JavaScriptDimFilter(org.apache.druid.query.filter.JavaScriptDimFilter) SearchQueryDimFilter(org.apache.druid.query.filter.SearchQueryDimFilter) BoundDimFilter(org.apache.druid.query.filter.BoundDimFilter) RegexDimFilter(org.apache.druid.query.filter.RegexDimFilter) DimFilter(org.apache.druid.query.filter.DimFilter) InDimFilter(org.apache.druid.query.filter.InDimFilter) OrDimFilter(org.apache.druid.query.filter.OrDimFilter) JavaScriptDimFilter(org.apache.druid.query.filter.JavaScriptDimFilter) BenchmarkMode(org.openjdk.jmh.annotations.BenchmarkMode) Benchmark(org.openjdk.jmh.annotations.Benchmark) OutputTimeUnit(org.openjdk.jmh.annotations.OutputTimeUnit)

Example 5 with IncrementalIndexStorageAdapter

use of org.apache.druid.segment.incremental.IncrementalIndexStorageAdapter in project druid by druid-io.

the class DistinctCountTopNQueryTest method testTopNWithDistinctCountAgg.

@Test
public void testTopNWithDistinctCountAgg() throws Exception {
    TopNQueryEngine engine = new TopNQueryEngine(pool);
    IncrementalIndex index = new OnheapIncrementalIndex.Builder().setIndexSchema(new IncrementalIndexSchema.Builder().withQueryGranularity(Granularities.SECOND).withMetrics(new CountAggregatorFactory("cnt")).build()).setMaxRowCount(1000).build();
    String visitor_id = "visitor_id";
    String client_type = "client_type";
    DateTime time = DateTimes.of("2016-03-04T00:00:00.000Z");
    long timestamp = time.getMillis();
    index.add(new MapBasedInputRow(timestamp, Lists.newArrayList(visitor_id, client_type), ImmutableMap.of(visitor_id, "0", client_type, "iphone")));
    index.add(new MapBasedInputRow(timestamp, Lists.newArrayList(visitor_id, client_type), ImmutableMap.of(visitor_id, "1", client_type, "iphone")));
    index.add(new MapBasedInputRow(timestamp, Lists.newArrayList(visitor_id, client_type), ImmutableMap.of(visitor_id, "2", client_type, "android")));
    TopNQuery query = new TopNQueryBuilder().dataSource(QueryRunnerTestHelper.DATA_SOURCE).granularity(QueryRunnerTestHelper.ALL_GRAN).intervals(QueryRunnerTestHelper.FULL_ON_INTERVAL_SPEC).dimension(client_type).metric("UV").threshold(10).aggregators(QueryRunnerTestHelper.ROWS_COUNT, new DistinctCountAggregatorFactory("UV", visitor_id, null)).build();
    final Iterable<Result<TopNResultValue>> results = engine.query(query, new IncrementalIndexStorageAdapter(index), null).toList();
    List<Result<TopNResultValue>> expectedResults = Collections.singletonList(new Result<>(time, new TopNResultValue(Arrays.<Map<String, Object>>asList(ImmutableMap.of(client_type, "iphone", "UV", 2L, "rows", 2L), ImmutableMap.of(client_type, "android", "UV", 1L, "rows", 1L)))));
    TestHelper.assertExpectedResults(expectedResults, results);
}
Also used : TopNQueryBuilder(org.apache.druid.query.topn.TopNQueryBuilder) TopNResultValue(org.apache.druid.query.topn.TopNResultValue) IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) TopNQueryBuilder(org.apache.druid.query.topn.TopNQueryBuilder) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) DateTime(org.joda.time.DateTime) TopNQueryEngine(org.apache.druid.query.topn.TopNQueryEngine) Result(org.apache.druid.query.Result) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) IncrementalIndexStorageAdapter(org.apache.druid.segment.incremental.IncrementalIndexStorageAdapter) TopNQuery(org.apache.druid.query.topn.TopNQuery) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Aggregations

IncrementalIndexStorageAdapter (org.apache.druid.segment.incremental.IncrementalIndexStorageAdapter)12 QueryableIndexStorageAdapter (org.apache.druid.segment.QueryableIndexStorageAdapter)6 IncrementalIndex (org.apache.druid.segment.incremental.IncrementalIndex)6 Test (org.junit.Test)6 ArrayList (java.util.ArrayList)5 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)5 Result (org.apache.druid.query.Result)4 CountAggregatorFactory (org.apache.druid.query.aggregation.CountAggregatorFactory)4 Cursor (org.apache.druid.segment.Cursor)4 DimensionSelector (org.apache.druid.segment.DimensionSelector)4 QueryableIndex (org.apache.druid.segment.QueryableIndex)4 StorageAdapter (org.apache.druid.segment.StorageAdapter)4 OnheapIncrementalIndex (org.apache.druid.segment.incremental.OnheapIncrementalIndex)4 MapBasedInputRow (org.apache.druid.data.input.MapBasedInputRow)3 Sequence (org.apache.druid.java.util.common.guava.Sequence)3 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)3 TimeseriesQuery (org.apache.druid.query.timeseries.TimeseriesQuery)3 TimeseriesQueryEngine (org.apache.druid.query.timeseries.TimeseriesQueryEngine)3 TimeseriesResultValue (org.apache.druid.query.timeseries.TimeseriesResultValue)3 ImmutableList (com.google.common.collect.ImmutableList)2