Examples with OnheapIncrementalIndex - io.druid.segment.incremental.OnheapIncrementalIndex

Example 36 with OnheapIncrementalIndex

use of io.druid.segment.incremental.OnheapIncrementalIndex in project druid by druid-io.

the class Sink method makeNewCurrIndex.

private FireHydrant makeNewCurrIndex(long minTimestamp, DataSchema schema) {
    final IncrementalIndexSchema indexSchema = new IncrementalIndexSchema.Builder().withMinTimestamp(minTimestamp).withTimestampSpec(schema.getParser()).withQueryGranularity(schema.getGranularitySpec().getQueryGranularity()).withDimensionsSpec(schema.getParser()).withMetrics(schema.getAggregators()).withRollup(schema.getGranularitySpec().isRollup()).build();
    final IncrementalIndex newIndex = new OnheapIncrementalIndex(indexSchema, reportParseExceptions, maxRowsInMemory);
    final FireHydrant old;
    synchronized (hydrantLock) {
        if (writable) {
            old = currHydrant;
            int newCount = 0;
            int numHydrants = hydrants.size();
            if (numHydrants > 0) {
                FireHydrant lastHydrant = hydrants.get(numHydrants - 1);
                newCount = lastHydrant.getCount() + 1;
                if (!indexSchema.getDimensionsSpec().hasCustomDimensions()) {
                    Map<String, ColumnCapabilitiesImpl> oldCapabilities;
                    if (lastHydrant.hasSwapped()) {
                        oldCapabilities = Maps.newHashMap();
                        QueryableIndex oldIndex = lastHydrant.getSegment().asQueryableIndex();
                        for (String dim : oldIndex.getAvailableDimensions()) {
                            dimOrder.add(dim);
                            oldCapabilities.put(dim, (ColumnCapabilitiesImpl) oldIndex.getColumn(dim).getCapabilities());
                        }
                    } else {
                        IncrementalIndex oldIndex = lastHydrant.getIndex();
                        dimOrder.addAll(oldIndex.getDimensionOrder());
                        oldCapabilities = oldIndex.getColumnCapabilities();
                    }
                    newIndex.loadDimensionIterable(dimOrder, oldCapabilities);
                }
            }
            currHydrant = new FireHydrant(newIndex, newCount, getSegment().getIdentifier());
            if (old != null) {
                numRowsExcludingCurrIndex.addAndGet(old.getIndex().size());
            }
            hydrants.add(currHydrant);
        } else {
            // Oops, someone called finishWriting while we were making this new index.
            newIndex.close();
            throw new ISE("finishWriting() called during swap");
        }
    }
    return old;
}

Also used : IncrementalIndex(io.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(io.druid.segment.incremental.OnheapIncrementalIndex) QueryableIndex(io.druid.segment.QueryableIndex) OnheapIncrementalIndex(io.druid.segment.incremental.OnheapIncrementalIndex) ISE(io.druid.java.util.common.ISE) FireHydrant(io.druid.segment.realtime.FireHydrant) IncrementalIndexSchema(io.druid.segment.incremental.IncrementalIndexSchema) ColumnCapabilitiesImpl(io.druid.segment.column.ColumnCapabilitiesImpl)

Example 37 with OnheapIncrementalIndex

use of io.druid.segment.incremental.OnheapIncrementalIndex in project druid by druid-io.

the class SpatialFilterTest method makeMergedQueryableIndex.

private static QueryableIndex makeMergedQueryableIndex(IndexSpec indexSpec) {
    try {
        IncrementalIndex first = new OnheapIncrementalIndex(new IncrementalIndexSchema.Builder().withMinTimestamp(DATA_INTERVAL.getStartMillis()).withQueryGranularity(Granularities.DAY).withMetrics(METRIC_AGGS).withDimensionsSpec(new DimensionsSpec(null, null, Arrays.asList(new SpatialDimensionSchema("dim.geo", Arrays.asList("lat", "long")), new SpatialDimensionSchema("spatialIsRad", Arrays.asList("lat2", "long2"))))).build(), false, 1000);
        IncrementalIndex second = new OnheapIncrementalIndex(new IncrementalIndexSchema.Builder().withMinTimestamp(DATA_INTERVAL.getStartMillis()).withQueryGranularity(Granularities.DAY).withMetrics(METRIC_AGGS).withDimensionsSpec(new DimensionsSpec(null, null, Arrays.asList(new SpatialDimensionSchema("dim.geo", Arrays.asList("lat", "long")), new SpatialDimensionSchema("spatialIsRad", Arrays.asList("lat2", "long2"))))).build(), false, 1000);
        IncrementalIndex third = new OnheapIncrementalIndex(new IncrementalIndexSchema.Builder().withMinTimestamp(DATA_INTERVAL.getStartMillis()).withQueryGranularity(Granularities.DAY).withMetrics(METRIC_AGGS).withDimensionsSpec(new DimensionsSpec(null, null, Arrays.asList(new SpatialDimensionSchema("dim.geo", Arrays.asList("lat", "long")), new SpatialDimensionSchema("spatialIsRad", Arrays.asList("lat2", "long2"))))).build(), false, NUM_POINTS);
        first.add(new MapBasedInputRow(new DateTime("2013-01-01").getMillis(), DIMS, ImmutableMap.<String, Object>of("timestamp", new DateTime("2013-01-01").toString(), "dim", "foo", "lat", 0.0f, "long", 0.0f, "val", 17L)));
        first.add(new MapBasedInputRow(new DateTime("2013-01-02").getMillis(), DIMS, ImmutableMap.<String, Object>of("timestamp", new DateTime("2013-01-02").toString(), "dim", "foo", "lat", 1.0f, "long", 3.0f, "val", 29L)));
        first.add(new MapBasedInputRow(new DateTime("2013-01-03").getMillis(), DIMS, ImmutableMap.<String, Object>of("timestamp", new DateTime("2013-01-03").toString(), "dim", "foo", "lat", 4.0f, "long", 2.0f, "val", 13L)));
        first.add(new MapBasedInputRow(new DateTime("2013-01-05").getMillis(), DIMS, ImmutableMap.<String, Object>of("timestamp", new DateTime("2013-01-05").toString(), "dim", "foo", "lat", "_mmx.unknown", "long", "_mmx.unknown", "val", 101L)));
        first.add(new MapBasedInputRow(new DateTime("2013-01-05").getMillis(), DIMS, ImmutableMap.<String, Object>of("timestamp", new DateTime("2013-01-05").toString(), "dim", "foo", "dim.geo", "_mmx.unknown", "val", 501L)));
        second.add(new MapBasedInputRow(new DateTime("2013-01-04").getMillis(), DIMS, ImmutableMap.<String, Object>of("timestamp", new DateTime("2013-01-04").toString(), "dim", "foo", "lat", 7.0f, "long", 3.0f, "val", 91L)));
        second.add(new MapBasedInputRow(new DateTime("2013-01-05").getMillis(), DIMS, ImmutableMap.<String, Object>of("timestamp", new DateTime("2013-01-05").toString(), "dim", "foo", "lat", 8.0f, "long", 6.0f, "val", 47L)));
        second.add(new MapBasedInputRow(new DateTime("2013-01-05").getMillis(), DIMS, ImmutableMap.<String, Object>of("timestamp", new DateTime("2013-01-05").toString(), "lat2", 0.0f, "long2", 0.0f, "val", 13L)));
        // Add a bunch of random points
        Random rand = new Random();
        for (int i = 8; i < NUM_POINTS; i++) {
            third.add(new MapBasedInputRow(new DateTime("2013-01-01").getMillis(), DIMS, ImmutableMap.<String, Object>of("timestamp", new DateTime("2013-01-01").toString(), "dim", "boo", "lat", (float) (rand.nextFloat() * 10 + 10.0), "long", (float) (rand.nextFloat() * 10 + 10.0), "val", i)));
        }
        File tmpFile = File.createTempFile("yay", "who");
        tmpFile.delete();
        File firstFile = new File(tmpFile, "first");
        File secondFile = new File(tmpFile, "second");
        File thirdFile = new File(tmpFile, "third");
        File mergedFile = new File(tmpFile, "merged");
        firstFile.mkdirs();
        firstFile.deleteOnExit();
        secondFile.mkdirs();
        secondFile.deleteOnExit();
        thirdFile.mkdirs();
        thirdFile.deleteOnExit();
        mergedFile.mkdirs();
        mergedFile.deleteOnExit();
        INDEX_MERGER.persist(first, DATA_INTERVAL, firstFile, indexSpec);
        INDEX_MERGER.persist(second, DATA_INTERVAL, secondFile, indexSpec);
        INDEX_MERGER.persist(third, DATA_INTERVAL, thirdFile, indexSpec);
        QueryableIndex mergedRealtime = INDEX_IO.loadIndex(INDEX_MERGER.mergeQueryableIndex(Arrays.asList(INDEX_IO.loadIndex(firstFile), INDEX_IO.loadIndex(secondFile), INDEX_IO.loadIndex(thirdFile)), true, METRIC_AGGS, mergedFile, indexSpec));
        return mergedRealtime;
    } catch (IOException e) {
        throw Throwables.propagate(e);
    }
}

Also used : IncrementalIndex(io.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(io.druid.segment.incremental.OnheapIncrementalIndex) OnheapIncrementalIndex(io.druid.segment.incremental.OnheapIncrementalIndex) IOException(java.io.IOException) DateTime(org.joda.time.DateTime) Random(java.util.Random) QueryableIndex(io.druid.segment.QueryableIndex) SpatialDimensionSchema(io.druid.data.input.impl.SpatialDimensionSchema) DimensionsSpec(io.druid.data.input.impl.DimensionsSpec) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) File(java.io.File)

Example 38 with OnheapIncrementalIndex

use of io.druid.segment.incremental.OnheapIncrementalIndex in project druid by druid-io.

the class DistinctCountGroupByQueryTest method testGroupByWithDistinctCountAgg.

@Test
public void testGroupByWithDistinctCountAgg() throws Exception {
    final GroupByQueryConfig config = new GroupByQueryConfig();
    config.setMaxIntermediateRows(10000);
    final GroupByQueryRunnerFactory factory = GroupByQueryRunnerTest.makeQueryRunnerFactory(config);
    IncrementalIndex index = new OnheapIncrementalIndex(0, Granularities.SECOND, new AggregatorFactory[] { new CountAggregatorFactory("cnt") }, 1000);
    String visitor_id = "visitor_id";
    String client_type = "client_type";
    long timestamp = System.currentTimeMillis();
    index.add(new MapBasedInputRow(timestamp, Lists.newArrayList(visitor_id, client_type), ImmutableMap.<String, Object>of(visitor_id, "0", client_type, "iphone")));
    index.add(new MapBasedInputRow(timestamp + 1, Lists.newArrayList(visitor_id, client_type), ImmutableMap.<String, Object>of(visitor_id, "1", client_type, "iphone")));
    index.add(new MapBasedInputRow(timestamp + 2, Lists.newArrayList(visitor_id, client_type), ImmutableMap.<String, Object>of(visitor_id, "2", client_type, "android")));
    GroupByQuery query = new GroupByQuery.Builder().setDataSource(QueryRunnerTestHelper.dataSource).setGranularity(QueryRunnerTestHelper.allGran).setDimensions(Arrays.<DimensionSpec>asList(new DefaultDimensionSpec(client_type, client_type))).setInterval(QueryRunnerTestHelper.fullOnInterval).setLimitSpec(new DefaultLimitSpec(Lists.newArrayList(new OrderByColumnSpec(client_type, OrderByColumnSpec.Direction.DESCENDING)), 10)).setAggregatorSpecs(Lists.newArrayList(QueryRunnerTestHelper.rowsCount, new DistinctCountAggregatorFactory("UV", visitor_id, null))).build();
    final Segment incrementalIndexSegment = new IncrementalIndexSegment(index, null);
    Iterable<Row> results = GroupByQueryRunnerTestHelper.runQuery(factory, factory.createRunner(incrementalIndexSegment), query);
    List<Row> expectedResults = Arrays.asList(GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", client_type, "iphone", "UV", 2L, "rows", 2L), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", client_type, "android", "UV", 1L, "rows", 1L));
    TestHelper.assertExpectedObjects(expectedResults, results, "distinct-count");
}

Also used : GroupByQueryRunnerFactory(io.druid.query.groupby.GroupByQueryRunnerFactory) DefaultLimitSpec(io.druid.query.groupby.orderby.DefaultLimitSpec) GroupByQueryConfig(io.druid.query.groupby.GroupByQueryConfig) IncrementalIndex(io.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(io.druid.segment.incremental.OnheapIncrementalIndex) IncrementalIndexSegment(io.druid.segment.IncrementalIndexSegment) OnheapIncrementalIndex(io.druid.segment.incremental.OnheapIncrementalIndex) DefaultDimensionSpec(io.druid.query.dimension.DefaultDimensionSpec) Segment(io.druid.segment.Segment) IncrementalIndexSegment(io.druid.segment.IncrementalIndexSegment) OrderByColumnSpec(io.druid.query.groupby.orderby.OrderByColumnSpec) GroupByQuery(io.druid.query.groupby.GroupByQuery) CountAggregatorFactory(io.druid.query.aggregation.CountAggregatorFactory) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) Row(io.druid.data.input.Row) Test(org.junit.Test) GroupByQueryRunnerTest(io.druid.query.groupby.GroupByQueryRunnerTest)

Example 39 with OnheapIncrementalIndex

use of io.druid.segment.incremental.OnheapIncrementalIndex in project druid by druid-io.

the class DistinctCountTimeseriesQueryTest method testTopNWithDistinctCountAgg.

@Test
public void testTopNWithDistinctCountAgg() throws Exception {
    TimeseriesQueryEngine engine = new TimeseriesQueryEngine();
    IncrementalIndex index = new OnheapIncrementalIndex(0, Granularities.SECOND, new AggregatorFactory[] { new CountAggregatorFactory("cnt") }, 1000);
    String visitor_id = "visitor_id";
    String client_type = "client_type";
    DateTime time = new DateTime("2016-03-04T00:00:00.000Z");
    long timestamp = time.getMillis();
    index.add(new MapBasedInputRow(timestamp, Lists.newArrayList(visitor_id, client_type), ImmutableMap.<String, Object>of(visitor_id, "0", client_type, "iphone")));
    index.add(new MapBasedInputRow(timestamp, Lists.newArrayList(visitor_id, client_type), ImmutableMap.<String, Object>of(visitor_id, "1", client_type, "iphone")));
    index.add(new MapBasedInputRow(timestamp, Lists.newArrayList(visitor_id, client_type), ImmutableMap.<String, Object>of(visitor_id, "2", client_type, "android")));
    TimeseriesQuery query = Druids.newTimeseriesQueryBuilder().dataSource(QueryRunnerTestHelper.dataSource).granularity(QueryRunnerTestHelper.allGran).intervals(QueryRunnerTestHelper.fullOnInterval).aggregators(Lists.newArrayList(QueryRunnerTestHelper.rowsCount, new DistinctCountAggregatorFactory("UV", visitor_id, null))).build();
    final Iterable<Result<TimeseriesResultValue>> results = Sequences.toList(engine.process(query, new IncrementalIndexStorageAdapter(index)), Lists.<Result<TimeseriesResultValue>>newLinkedList());
    List<Result<TimeseriesResultValue>> expectedResults = Arrays.asList(new Result<>(time, new TimeseriesResultValue(ImmutableMap.<String, Object>of("UV", 3, "rows", 3L))));
    TestHelper.assertExpectedResults(expectedResults, results);
}

Also used : TimeseriesResultValue(io.druid.query.timeseries.TimeseriesResultValue) TimeseriesQuery(io.druid.query.timeseries.TimeseriesQuery) IncrementalIndex(io.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(io.druid.segment.incremental.OnheapIncrementalIndex) OnheapIncrementalIndex(io.druid.segment.incremental.OnheapIncrementalIndex) DateTime(org.joda.time.DateTime) Result(io.druid.query.Result) TimeseriesQueryEngine(io.druid.query.timeseries.TimeseriesQueryEngine) CountAggregatorFactory(io.druid.query.aggregation.CountAggregatorFactory) IncrementalIndexStorageAdapter(io.druid.segment.incremental.IncrementalIndexStorageAdapter) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) Test(org.junit.Test)

Example 40 with OnheapIncrementalIndex

use of io.druid.segment.incremental.OnheapIncrementalIndex in project druid by druid-io.

the class DistinctCountTopNQueryTest method testTopNWithDistinctCountAgg.

@Test
public void testTopNWithDistinctCountAgg() throws Exception {
    TopNQueryEngine engine = new TopNQueryEngine(new StupidPool<ByteBuffer>("TopNQueryEngine-bufferPool", new Supplier<ByteBuffer>() {

        @Override
        public ByteBuffer get() {
            return ByteBuffer.allocate(1024 * 1024);
        }
    }));
    IncrementalIndex index = new OnheapIncrementalIndex(0, Granularities.SECOND, new AggregatorFactory[] { new CountAggregatorFactory("cnt") }, 1000);
    String visitor_id = "visitor_id";
    String client_type = "client_type";
    DateTime time = new DateTime("2016-03-04T00:00:00.000Z");
    long timestamp = time.getMillis();
    index.add(new MapBasedInputRow(timestamp, Lists.newArrayList(visitor_id, client_type), ImmutableMap.<String, Object>of(visitor_id, "0", client_type, "iphone")));
    index.add(new MapBasedInputRow(timestamp, Lists.newArrayList(visitor_id, client_type), ImmutableMap.<String, Object>of(visitor_id, "1", client_type, "iphone")));
    index.add(new MapBasedInputRow(timestamp, Lists.newArrayList(visitor_id, client_type), ImmutableMap.<String, Object>of(visitor_id, "2", client_type, "android")));
    TopNQuery query = new TopNQueryBuilder().dataSource(QueryRunnerTestHelper.dataSource).granularity(QueryRunnerTestHelper.allGran).intervals(QueryRunnerTestHelper.fullOnInterval).dimension(client_type).metric("UV").threshold(10).aggregators(Lists.newArrayList(QueryRunnerTestHelper.rowsCount, new DistinctCountAggregatorFactory("UV", visitor_id, null))).build();
    final Iterable<Result<TopNResultValue>> results = Sequences.toList(engine.query(query, new IncrementalIndexStorageAdapter(index)), Lists.<Result<TopNResultValue>>newLinkedList());
    List<Result<TopNResultValue>> expectedResults = Arrays.asList(new Result<>(time, new TopNResultValue(Arrays.<Map<String, Object>>asList(ImmutableMap.<String, Object>of(client_type, "iphone", "UV", 2L, "rows", 2L), ImmutableMap.<String, Object>of(client_type, "android", "UV", 1L, "rows", 1L)))));
    TestHelper.assertExpectedResults(expectedResults, results);
}

Also used : TopNQueryBuilder(io.druid.query.topn.TopNQueryBuilder) TopNResultValue(io.druid.query.topn.TopNResultValue) IncrementalIndex(io.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(io.druid.segment.incremental.OnheapIncrementalIndex) OnheapIncrementalIndex(io.druid.segment.incremental.OnheapIncrementalIndex) ByteBuffer(java.nio.ByteBuffer) DateTime(org.joda.time.DateTime) TopNQueryEngine(io.druid.query.topn.TopNQueryEngine) Result(io.druid.query.Result) CountAggregatorFactory(io.druid.query.aggregation.CountAggregatorFactory) IncrementalIndexStorageAdapter(io.druid.segment.incremental.IncrementalIndexStorageAdapter) TopNQuery(io.druid.query.topn.TopNQuery) Supplier(com.google.common.base.Supplier) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) Test(org.junit.Test)

Aggregations

OnheapIncrementalIndex (io.druid.segment.incremental.OnheapIncrementalIndex)41 IncrementalIndex (io.druid.segment.incremental.IncrementalIndex)33 MapBasedInputRow (io.druid.data.input.MapBasedInputRow)27 File (java.io.File)17 DateTime (org.joda.time.DateTime)17 CountAggregatorFactory (io.druid.query.aggregation.CountAggregatorFactory)16 Test (org.junit.Test)16 DimensionsSpec (io.druid.data.input.impl.DimensionsSpec)15 IncrementalIndexSchema (io.druid.segment.incremental.IncrementalIndexSchema)12 AggregatorFactory (io.druid.query.aggregation.AggregatorFactory)8 IOException (java.io.IOException)8 SpatialDimensionSchema (io.druid.data.input.impl.SpatialDimensionSchema)7 IncrementalIndexTest (io.druid.segment.data.IncrementalIndexTest)7 TimestampSpec (io.druid.data.input.impl.TimestampSpec)6 LongSumAggregatorFactory (io.druid.query.aggregation.LongSumAggregatorFactory)6 Random (java.util.Random)6 StringInputRowParser (io.druid.data.input.impl.StringInputRowParser)5 IndexSpec (io.druid.segment.IndexSpec)5 Result (io.druid.query.Result)4 InputRow (io.druid.data.input.InputRow)3