Search in sources :

Example 6 with IncrementalIndex

use of io.druid.segment.incremental.IncrementalIndex in project druid by druid-io.

the class SpatialFilterTest method makeQueryableIndex.

private static QueryableIndex makeQueryableIndex(IndexSpec indexSpec) throws IOException {
    IncrementalIndex theIndex = makeIncrementalIndex();
    File tmpFile = File.createTempFile("billy", "yay");
    tmpFile.delete();
    tmpFile.mkdirs();
    tmpFile.deleteOnExit();
    INDEX_MERGER.persist(theIndex, tmpFile, indexSpec);
    return INDEX_IO.loadIndex(tmpFile);
}
Also used : IncrementalIndex(io.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(io.druid.segment.incremental.OnheapIncrementalIndex) File(java.io.File)

Example 7 with IncrementalIndex

use of io.druid.segment.incremental.IncrementalIndex in project druid by druid-io.

the class TopNTypeInterfaceBenchmark method setup.

@Setup
public void setup() throws IOException {
    log.info("SETUP CALLED AT " + System.currentTimeMillis());
    if (ComplexMetrics.getSerdeForType("hyperUnique") == null) {
        ComplexMetrics.registerSerde("hyperUnique", new HyperUniquesSerde(HyperLogLogHash.getDefault()));
    }
    executorService = Execs.multiThreaded(numSegments, "TopNThreadPool");
    setupQueries();
    schemaInfo = BenchmarkSchemas.SCHEMA_MAP.get("basic");
    queryBuilder = SCHEMA_QUERY_MAP.get("basic").get("string");
    queryBuilder.threshold(threshold);
    stringQuery = queryBuilder.build();
    TopNQueryBuilder longBuilder = SCHEMA_QUERY_MAP.get("basic").get("long");
    longBuilder.threshold(threshold);
    longQuery = longBuilder.build();
    TopNQueryBuilder floatBuilder = SCHEMA_QUERY_MAP.get("basic").get("float");
    floatBuilder.threshold(threshold);
    floatQuery = floatBuilder.build();
    incIndexes = new ArrayList<>();
    for (int i = 0; i < numSegments; i++) {
        log.info("Generating rows for segment " + i);
        BenchmarkDataGenerator gen = new BenchmarkDataGenerator(schemaInfo.getColumnSchemas(), RNG_SEED + i, schemaInfo.getDataInterval(), rowsPerSegment);
        IncrementalIndex incIndex = makeIncIndex();
        for (int j = 0; j < rowsPerSegment; j++) {
            InputRow row = gen.nextRow();
            if (j % 10000 == 0) {
                log.info(j + " rows generated.");
            }
            incIndex.add(row);
        }
        incIndexes.add(incIndex);
    }
    File tmpFile = Files.createTempDir();
    log.info("Using temp dir: " + tmpFile.getAbsolutePath());
    tmpFile.deleteOnExit();
    qIndexes = new ArrayList<>();
    for (int i = 0; i < numSegments; i++) {
        File indexFile = INDEX_MERGER_V9.persist(incIndexes.get(i), tmpFile, new IndexSpec());
        QueryableIndex qIndex = INDEX_IO.loadIndex(indexFile);
        qIndexes.add(qIndex);
    }
    factory = new TopNQueryRunnerFactory(new StupidPool<>("TopNBenchmark-compute-bufferPool", new OffheapBufferGenerator("compute", 250000000), 0, Integer.MAX_VALUE), new TopNQueryQueryToolChest(new TopNQueryConfig(), QueryBenchmarkUtil.NoopIntervalChunkingQueryRunnerDecorator()), QueryBenchmarkUtil.NOOP_QUERYWATCHER);
}
Also used : TopNQueryBuilder(io.druid.query.topn.TopNQueryBuilder) IndexSpec(io.druid.segment.IndexSpec) IncrementalIndex(io.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(io.druid.segment.incremental.OnheapIncrementalIndex) BenchmarkDataGenerator(io.druid.benchmark.datagen.BenchmarkDataGenerator) HyperUniquesSerde(io.druid.query.aggregation.hyperloglog.HyperUniquesSerde) OffheapBufferGenerator(io.druid.offheap.OffheapBufferGenerator) TopNQueryConfig(io.druid.query.topn.TopNQueryConfig) QueryableIndex(io.druid.segment.QueryableIndex) InputRow(io.druid.data.input.InputRow) TopNQueryRunnerFactory(io.druid.query.topn.TopNQueryRunnerFactory) StupidPool(io.druid.collections.StupidPool) TopNQueryQueryToolChest(io.druid.query.topn.TopNQueryQueryToolChest) File(java.io.File) Setup(org.openjdk.jmh.annotations.Setup)

Example 8 with IncrementalIndex

use of io.druid.segment.incremental.IncrementalIndex in project druid by druid-io.

the class GroupByQueryHelper method createIndexAccumulatorPair.

public static <T> Pair<IncrementalIndex, Accumulator<IncrementalIndex, T>> createIndexAccumulatorPair(final GroupByQuery query, final GroupByQueryConfig config, StupidPool<ByteBuffer> bufferPool, final boolean combine) {
    final GroupByQueryConfig querySpecificConfig = config.withOverrides(query);
    final Granularity gran = query.getGranularity();
    final long timeStart = query.getIntervals().get(0).getStartMillis();
    long granTimeStart = timeStart;
    if (!(Granularities.ALL.equals(gran))) {
        granTimeStart = gran.bucketStart(new DateTime(timeStart)).getMillis();
    }
    final List<AggregatorFactory> aggs;
    if (combine) {
        aggs = Lists.transform(query.getAggregatorSpecs(), new Function<AggregatorFactory, AggregatorFactory>() {

            @Override
            public AggregatorFactory apply(AggregatorFactory input) {
                return input.getCombiningFactory();
            }
        });
    } else {
        aggs = query.getAggregatorSpecs();
    }
    final List<String> dimensions = Lists.transform(query.getDimensions(), new Function<DimensionSpec, String>() {

        @Override
        public String apply(DimensionSpec input) {
            return input.getOutputName();
        }
    });
    final IncrementalIndex index;
    final boolean sortResults = query.getContextValue(CTX_KEY_SORT_RESULTS, true);
    // All groupBy dimensions are strings, for now.
    final List<DimensionSchema> dimensionSchemas = Lists.newArrayList();
    for (DimensionSpec dimension : query.getDimensions()) {
        dimensionSchemas.add(new StringDimensionSchema(dimension.getOutputName()));
    }
    final IncrementalIndexSchema indexSchema = new IncrementalIndexSchema.Builder().withDimensionsSpec(new DimensionsSpec(dimensionSchemas, null, null)).withMetrics(aggs.toArray(new AggregatorFactory[aggs.size()])).withQueryGranularity(gran).withMinTimestamp(granTimeStart).build();
    if (query.getContextValue("useOffheap", false)) {
        index = new OffheapIncrementalIndex(indexSchema, false, true, sortResults, querySpecificConfig.getMaxResults(), bufferPool);
    } else {
        index = new OnheapIncrementalIndex(indexSchema, false, true, sortResults, querySpecificConfig.getMaxResults());
    }
    Accumulator<IncrementalIndex, T> accumulator = new Accumulator<IncrementalIndex, T>() {

        @Override
        public IncrementalIndex accumulate(IncrementalIndex accumulated, T in) {
            if (in instanceof MapBasedRow) {
                try {
                    MapBasedRow row = (MapBasedRow) in;
                    accumulated.add(new MapBasedInputRow(row.getTimestamp(), dimensions, row.getEvent()));
                } catch (IndexSizeExceededException e) {
                    throw new ResourceLimitExceededException(e.getMessage());
                }
            } else {
                throw new ISE("Unable to accumulate something of type [%s]", in.getClass());
            }
            return accumulated;
        }
    };
    return new Pair<>(index, accumulator);
}
Also used : Accumulator(io.druid.java.util.common.guava.Accumulator) DimensionSpec(io.druid.query.dimension.DimensionSpec) OffheapIncrementalIndex(io.druid.segment.incremental.OffheapIncrementalIndex) OnheapIncrementalIndex(io.druid.segment.incremental.OnheapIncrementalIndex) Granularity(io.druid.java.util.common.granularity.Granularity) StringDimensionSchema(io.druid.data.input.impl.StringDimensionSchema) DimensionSchema(io.druid.data.input.impl.DimensionSchema) DateTime(org.joda.time.DateTime) MapBasedRow(io.druid.data.input.MapBasedRow) Function(com.google.common.base.Function) ISE(io.druid.java.util.common.ISE) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) IncrementalIndexSchema(io.druid.segment.incremental.IncrementalIndexSchema) Pair(io.druid.java.util.common.Pair) OffheapIncrementalIndex(io.druid.segment.incremental.OffheapIncrementalIndex) IncrementalIndex(io.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(io.druid.segment.incremental.OnheapIncrementalIndex) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) StringDimensionSchema(io.druid.data.input.impl.StringDimensionSchema) ResourceLimitExceededException(io.druid.query.ResourceLimitExceededException) DimensionsSpec(io.druid.data.input.impl.DimensionsSpec) IndexSizeExceededException(io.druid.segment.incremental.IndexSizeExceededException)

Example 9 with IncrementalIndex

use of io.druid.segment.incremental.IncrementalIndex in project druid by druid-io.

the class GroupByStrategyV1 method processSubqueryResult.

@Override
public Sequence<Row> processSubqueryResult(GroupByQuery subquery, GroupByQuery query, GroupByQueryResource resource, Sequence<Row> subqueryResult) {
    final Set<AggregatorFactory> aggs = Sets.newHashSet();
    // Nested group-bys work by first running the inner query and then materializing the results in an incremental
    // index which the outer query is then run against. To build the incremental index, we use the fieldNames from
    // the aggregators for the outer query to define the column names so that the index will match the query. If
    // there are multiple types of aggregators in the outer query referencing the same fieldName, we will try to build
    // multiple columns of the same name using different aggregator types and will fail. Here, we permit multiple
    // aggregators of the same type referencing the same fieldName (and skip creating identical columns for the
    // subsequent ones) and return an error if the aggregator types are different.
    final Set<String> dimensionNames = Sets.newHashSet();
    for (DimensionSpec dimension : subquery.getDimensions()) {
        dimensionNames.add(dimension.getOutputName());
    }
    for (AggregatorFactory aggregatorFactory : query.getAggregatorSpecs()) {
        for (final AggregatorFactory transferAgg : aggregatorFactory.getRequiredColumns()) {
            if (dimensionNames.contains(transferAgg.getName())) {
                // doesn't have this problem.
                continue;
            }
            if (Iterables.any(aggs, new Predicate<AggregatorFactory>() {

                @Override
                public boolean apply(AggregatorFactory agg) {
                    return agg.getName().equals(transferAgg.getName()) && !agg.equals(transferAgg);
                }
            })) {
                throw new IAE("Inner aggregator can currently only be referenced by a single type of outer aggregator" + " for '%s'", transferAgg.getName());
            }
            aggs.add(transferAgg);
        }
    }
    // We need the inner incremental index to have all the columns required by the outer query
    final GroupByQuery innerQuery = new GroupByQuery.Builder(subquery).setAggregatorSpecs(Lists.newArrayList(aggs)).setInterval(subquery.getIntervals()).setPostAggregatorSpecs(Lists.<PostAggregator>newArrayList()).build();
    final GroupByQuery outerQuery = new GroupByQuery.Builder(query).setLimitSpec(query.getLimitSpec().merge(subquery.getLimitSpec())).build();
    final IncrementalIndex innerQueryResultIndex = GroupByQueryHelper.makeIncrementalIndex(innerQuery.withOverriddenContext(ImmutableMap.<String, Object>of(GroupByQueryHelper.CTX_KEY_SORT_RESULTS, true)), configSupplier.get(), bufferPool, subqueryResult, false);
    //Outer query might have multiple intervals, but they are expected to be non-overlapping and sorted which
    //is ensured by QuerySegmentSpec.
    //GroupByQueryEngine can only process one interval at a time, so we need to call it once per interval
    //and concatenate the results.
    final IncrementalIndex outerQueryResultIndex = GroupByQueryHelper.makeIncrementalIndex(outerQuery, configSupplier.get(), bufferPool, Sequences.concat(Sequences.map(Sequences.simple(outerQuery.getIntervals()), new Function<Interval, Sequence<Row>>() {

        @Override
        public Sequence<Row> apply(Interval interval) {
            return process(outerQuery.withQuerySegmentSpec(new MultipleIntervalSegmentSpec(ImmutableList.of(interval))), new IncrementalIndexStorageAdapter(innerQueryResultIndex));
        }
    })), true);
    innerQueryResultIndex.close();
    return Sequences.withBaggage(outerQuery.applyLimit(GroupByQueryHelper.postAggregate(query, outerQueryResultIndex)), outerQueryResultIndex);
}
Also used : DimensionSpec(io.druid.query.dimension.DimensionSpec) PostAggregator(io.druid.query.aggregation.PostAggregator) IncrementalIndex(io.druid.segment.incremental.IncrementalIndex) MultipleIntervalSegmentSpec(io.druid.query.spec.MultipleIntervalSegmentSpec) Sequence(io.druid.java.util.common.guava.Sequence) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) IAE(io.druid.java.util.common.IAE) GroupByQuery(io.druid.query.groupby.GroupByQuery) IncrementalIndexStorageAdapter(io.druid.segment.incremental.IncrementalIndexStorageAdapter) Interval(org.joda.time.Interval)

Example 10 with IncrementalIndex

use of io.druid.segment.incremental.IncrementalIndex in project druid by druid-io.

the class DataSourceMetadataQueryTest method testMaxIngestedEventTime.

@Test
public void testMaxIngestedEventTime() throws Exception {
    final IncrementalIndex rtIndex = new OnheapIncrementalIndex(0L, Granularities.NONE, new AggregatorFactory[] { new CountAggregatorFactory("count") }, 1000);
    ;
    final QueryRunner runner = QueryRunnerTestHelper.makeQueryRunner((QueryRunnerFactory) new DataSourceMetadataQueryRunnerFactory(QueryRunnerTestHelper.NOOP_QUERYWATCHER), new IncrementalIndexSegment(rtIndex, "test"), null);
    DateTime timestamp = new DateTime(System.currentTimeMillis());
    rtIndex.add(new MapBasedInputRow(timestamp.getMillis(), ImmutableList.of("dim1"), ImmutableMap.<String, Object>of("dim1", "x")));
    DataSourceMetadataQuery dataSourceMetadataQuery = Druids.newDataSourceMetadataQueryBuilder().dataSource("testing").build();
    Map<String, Object> context = new MapMaker().makeMap();
    context.put(Result.MISSING_SEGMENTS_KEY, Lists.newArrayList());
    Iterable<Result<DataSourceMetadataResultValue>> results = Sequences.toList(runner.run(dataSourceMetadataQuery, context), Lists.<Result<DataSourceMetadataResultValue>>newArrayList());
    DataSourceMetadataResultValue val = results.iterator().next().getValue();
    DateTime maxIngestedEventTime = val.getMaxIngestedEventTime();
    Assert.assertEquals(timestamp, maxIngestedEventTime);
}
Also used : IncrementalIndex(io.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(io.druid.segment.incremental.OnheapIncrementalIndex) IncrementalIndexSegment(io.druid.segment.IncrementalIndexSegment) OnheapIncrementalIndex(io.druid.segment.incremental.OnheapIncrementalIndex) MapMaker(com.google.common.collect.MapMaker) QueryRunner(io.druid.query.QueryRunner) DateTime(org.joda.time.DateTime) Result(io.druid.query.Result) CountAggregatorFactory(io.druid.query.aggregation.CountAggregatorFactory) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) Test(org.junit.Test)

Aggregations

IncrementalIndex (io.druid.segment.incremental.IncrementalIndex)89 OnheapIncrementalIndex (io.druid.segment.incremental.OnheapIncrementalIndex)81 File (java.io.File)49 Test (org.junit.Test)37 MapBasedInputRow (io.druid.data.input.MapBasedInputRow)33 IncrementalIndexTest (io.druid.segment.data.IncrementalIndexTest)26 CountAggregatorFactory (io.druid.query.aggregation.CountAggregatorFactory)25 DateTime (org.joda.time.DateTime)21 LongSumAggregatorFactory (io.druid.query.aggregation.LongSumAggregatorFactory)16 QueryableIndex (io.druid.segment.QueryableIndex)16 DimensionsSpec (io.druid.data.input.impl.DimensionsSpec)13 IncrementalIndexSegment (io.druid.segment.IncrementalIndexSegment)13 IndexSpec (io.druid.segment.IndexSpec)13 AggregatorFactory (io.druid.query.aggregation.AggregatorFactory)12 IncrementalIndexAdapter (io.druid.segment.incremental.IncrementalIndexAdapter)12 Interval (org.joda.time.Interval)11 InputRow (io.druid.data.input.InputRow)10 IOException (java.io.IOException)10 BenchmarkDataGenerator (io.druid.benchmark.datagen.BenchmarkDataGenerator)8 HyperUniquesSerde (io.druid.query.aggregation.hyperloglog.HyperUniquesSerde)8