Search in sources :

Example 21 with IncrementalIndex

use of org.apache.druid.segment.incremental.IncrementalIndex in project druid by druid-io.

the class SpatialFilterTest method makeMergedQueryableIndex.

private static QueryableIndex makeMergedQueryableIndex(IndexSpec indexSpec) {
    try {
        IncrementalIndex first = new OnheapIncrementalIndex.Builder().setIndexSchema(new IncrementalIndexSchema.Builder().withMinTimestamp(DATA_INTERVAL.getStartMillis()).withQueryGranularity(Granularities.DAY).withMetrics(METRIC_AGGS).withDimensionsSpec(DimensionsSpec.builder().setSpatialDimensions(Arrays.asList(new SpatialDimensionSchema("dim.geo", Arrays.asList("lat", "long")), new SpatialDimensionSchema("spatialIsRad", Arrays.asList("lat2", "long2")))).build()).build()).setMaxRowCount(1000).build();
        IncrementalIndex second = new OnheapIncrementalIndex.Builder().setIndexSchema(new IncrementalIndexSchema.Builder().withMinTimestamp(DATA_INTERVAL.getStartMillis()).withQueryGranularity(Granularities.DAY).withMetrics(METRIC_AGGS).withDimensionsSpec(DimensionsSpec.builder().setSpatialDimensions(Arrays.asList(new SpatialDimensionSchema("dim.geo", Arrays.asList("lat", "long")), new SpatialDimensionSchema("spatialIsRad", Arrays.asList("lat2", "long2")))).build()).build()).setMaxRowCount(1000).build();
        IncrementalIndex third = new OnheapIncrementalIndex.Builder().setIndexSchema(new IncrementalIndexSchema.Builder().withMinTimestamp(DATA_INTERVAL.getStartMillis()).withQueryGranularity(Granularities.DAY).withMetrics(METRIC_AGGS).withDimensionsSpec(DimensionsSpec.builder().setSpatialDimensions(Arrays.asList(new SpatialDimensionSchema("dim.geo", Arrays.asList("lat", "long")), new SpatialDimensionSchema("spatialIsRad", Arrays.asList("lat2", "long2")))).build()).build()).setMaxRowCount(NUM_POINTS).build();
        first.add(new MapBasedInputRow(DateTimes.of("2013-01-01").getMillis(), DIMS, ImmutableMap.of("timestamp", DateTimes.of("2013-01-01").toString(), "dim", "foo", "lat", 0.0f, "long", 0.0f, "val", 17L)));
        first.add(new MapBasedInputRow(DateTimes.of("2013-01-02").getMillis(), DIMS, ImmutableMap.of("timestamp", DateTimes.of("2013-01-02").toString(), "dim", "foo", "lat", 1.0f, "long", 3.0f, "val", 29L)));
        first.add(new MapBasedInputRow(DateTimes.of("2013-01-03").getMillis(), DIMS, ImmutableMap.of("timestamp", DateTimes.of("2013-01-03").toString(), "dim", "foo", "lat", 4.0f, "long", 2.0f, "val", 13L)));
        first.add(new MapBasedInputRow(DateTimes.of("2013-01-05").getMillis(), DIMS, ImmutableMap.of("timestamp", DateTimes.of("2013-01-05").toString(), "dim", "foo", "lat", "_mmx.unknown", "long", "_mmx.unknown", "val", 101L)));
        first.add(new MapBasedInputRow(DateTimes.of("2013-01-05").getMillis(), DIMS, ImmutableMap.of("timestamp", DateTimes.of("2013-01-05").toString(), "dim", "foo", "dim.geo", "_mmx.unknown", "val", 501L)));
        second.add(new MapBasedInputRow(DateTimes.of("2013-01-04").getMillis(), DIMS, ImmutableMap.of("timestamp", DateTimes.of("2013-01-04").toString(), "dim", "foo", "lat", 7.0f, "long", 3.0f, "val", 91L)));
        second.add(new MapBasedInputRow(DateTimes.of("2013-01-05").getMillis(), DIMS, ImmutableMap.of("timestamp", DateTimes.of("2013-01-05").toString(), "dim", "foo", "lat", 8.0f, "long", 6.0f, "val", 47L)));
        second.add(new MapBasedInputRow(DateTimes.of("2013-01-05").getMillis(), DIMS, ImmutableMap.of("timestamp", DateTimes.of("2013-01-05").toString(), "lat2", 0.0f, "long2", 0.0f, "val", 13L)));
        // Add a bunch of random points
        Random rand = ThreadLocalRandom.current();
        for (int i = 8; i < NUM_POINTS; i++) {
            third.add(new MapBasedInputRow(DateTimes.of("2013-01-01").getMillis(), DIMS, ImmutableMap.of("timestamp", DateTimes.of("2013-01-01").toString(), "dim", "boo", "lat", (float) (rand.nextFloat() * 10 + 10.0), "long", (float) (rand.nextFloat() * 10 + 10.0), "val", i)));
        }
        File tmpFile = File.createTempFile("yay", "who");
        tmpFile.delete();
        File firstFile = new File(tmpFile, "first");
        File secondFile = new File(tmpFile, "second");
        File thirdFile = new File(tmpFile, "third");
        File mergedFile = new File(tmpFile, "merged");
        FileUtils.mkdirp(firstFile);
        FileUtils.mkdirp(secondFile);
        FileUtils.mkdirp(thirdFile);
        FileUtils.mkdirp(mergedFile);
        firstFile.deleteOnExit();
        secondFile.deleteOnExit();
        thirdFile.deleteOnExit();
        mergedFile.deleteOnExit();
        INDEX_MERGER.persist(first, DATA_INTERVAL, firstFile, indexSpec, null);
        INDEX_MERGER.persist(second, DATA_INTERVAL, secondFile, indexSpec, null);
        INDEX_MERGER.persist(third, DATA_INTERVAL, thirdFile, indexSpec, null);
        QueryableIndex mergedRealtime = INDEX_IO.loadIndex(INDEX_MERGER.mergeQueryableIndex(Arrays.asList(INDEX_IO.loadIndex(firstFile), INDEX_IO.loadIndex(secondFile), INDEX_IO.loadIndex(thirdFile)), true, METRIC_AGGS, mergedFile, indexSpec, null, -1));
        return mergedRealtime;
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}
Also used : Random(java.util.Random) ThreadLocalRandom(java.util.concurrent.ThreadLocalRandom) IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) QueryableIndex(org.apache.druid.segment.QueryableIndex) SpatialDimensionSchema(org.apache.druid.data.input.impl.SpatialDimensionSchema) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) IOException(java.io.IOException) File(java.io.File)

Example 22 with IncrementalIndex

use of org.apache.druid.segment.incremental.IncrementalIndex in project druid by druid-io.

the class SpatialFilterTest method makeIncrementalIndex.

private static IncrementalIndex makeIncrementalIndex() throws IOException {
    IncrementalIndex theIndex = new OnheapIncrementalIndex.Builder().setIndexSchema(new IncrementalIndexSchema.Builder().withMinTimestamp(DATA_INTERVAL.getStartMillis()).withQueryGranularity(Granularities.DAY).withMetrics(METRIC_AGGS).withDimensionsSpec(DimensionsSpec.builder().setSpatialDimensions(Arrays.asList(new SpatialDimensionSchema("dim.geo", Arrays.asList("lat", "long")), new SpatialDimensionSchema("spatialIsRad", Arrays.asList("lat2", "long2")))).build()).build()).setMaxRowCount(NUM_POINTS).build();
    theIndex.add(new MapBasedInputRow(DateTimes.of("2013-01-01").getMillis(), DIMS, ImmutableMap.of("timestamp", DateTimes.of("2013-01-01").toString(), "dim", "foo", "lat", 0.0f, "long", 0.0f, "val", 17L)));
    theIndex.add(new MapBasedInputRow(DateTimes.of("2013-01-02").getMillis(), DIMS, ImmutableMap.of("timestamp", DateTimes.of("2013-01-02").toString(), "dim", "foo", "lat", 1.0f, "long", 3.0f, "val", 29L)));
    theIndex.add(new MapBasedInputRow(DateTimes.of("2013-01-03").getMillis(), DIMS, ImmutableMap.of("timestamp", DateTimes.of("2013-01-03").toString(), "dim", "foo", "lat", 4.0f, "long", 2.0f, "val", 13L)));
    theIndex.add(new MapBasedInputRow(DateTimes.of("2013-01-04").getMillis(), DIMS, ImmutableMap.of("timestamp", DateTimes.of("2013-01-04").toString(), "dim", "foo", "lat", 7.0f, "long", 3.0f, "val", 91L)));
    theIndex.add(new MapBasedInputRow(DateTimes.of("2013-01-05").getMillis(), DIMS, ImmutableMap.of("timestamp", DateTimes.of("2013-01-05").toString(), "dim", "foo", "lat", 8.0f, "long", 6.0f, "val", 47L)));
    theIndex.add(new MapBasedInputRow(DateTimes.of("2013-01-05").getMillis(), DIMS, ImmutableMap.of("timestamp", DateTimes.of("2013-01-05").toString(), "dim", "foo", "lat", "_mmx.unknown", "long", "_mmx.unknown", "val", 101L)));
    theIndex.add(new MapBasedInputRow(DateTimes.of("2013-01-05").getMillis(), DIMS, ImmutableMap.of("timestamp", DateTimes.of("2013-01-05").toString(), "dim", "foo", "dim.geo", "_mmx.unknown", "val", 501L)));
    theIndex.add(new MapBasedInputRow(DateTimes.of("2013-01-05").getMillis(), DIMS, ImmutableMap.of("timestamp", DateTimes.of("2013-01-05").toString(), "lat2", 0.0f, "long2", 0.0f, "val", 13L)));
    // Add a bunch of random points
    Random rand = ThreadLocalRandom.current();
    for (int i = 8; i < NUM_POINTS; i++) {
        theIndex.add(new MapBasedInputRow(DateTimes.of("2013-01-01").getMillis(), DIMS, ImmutableMap.of("timestamp", DateTimes.of("2013-01-01").toString(), "dim", "boo", "lat", (float) (rand.nextFloat() * 10 + 10.0), "long", (float) (rand.nextFloat() * 10 + 10.0), "val", i)));
    }
    return theIndex;
}
Also used : Random(java.util.Random) ThreadLocalRandom(java.util.concurrent.ThreadLocalRandom) IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) SpatialDimensionSchema(org.apache.druid.data.input.impl.SpatialDimensionSchema) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow)

Example 23 with IncrementalIndex

use of org.apache.druid.segment.incremental.IncrementalIndex in project druid by druid-io.

the class DataGeneratorTest method testToIndex.

@Test
public void testToIndex() {
    List<GeneratorColumnSchema> schemas = new ArrayList<>();
    schemas.add(GeneratorColumnSchema.makeSequential("dimA", ValueType.STRING, false, 1, null, 10, 20));
    schemas.add(GeneratorColumnSchema.makeEnumeratedSequential("dimB", ValueType.STRING, false, 1, null, Arrays.asList("Hello", "World", "Foo", "Bar")));
    schemas.add(GeneratorColumnSchema.makeSequential("dimC", ValueType.STRING, false, 1, 0.50, 30, 40));
    DataGenerator dataGenerator = new DataGenerator(schemas, 9999, 0, 0, 1000.0);
    DimensionsSpec dimensions = new DimensionsSpec(Arrays.asList(new StringDimensionSchema("dimA"), new StringDimensionSchema("dimB"), new StringDimensionSchema("dimC")));
    AggregatorFactory[] metrics = { new CountAggregatorFactory("cnt") };
    final IncrementalIndexSchema schema = new IncrementalIndexSchema.Builder().withQueryGranularity(Granularities.MINUTE).withDimensionsSpec(dimensions).withMetrics(metrics).withRollup(false).build();
    IncrementalIndex index = new OnheapIncrementalIndex.Builder().setIndexSchema(schema).setSortFacts(false).setMaxRowCount(1_000_000).build();
    dataGenerator.addToIndex(index, 100);
    Assert.assertEquals(100, index.size());
}
Also used : IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) ArrayList(java.util.ArrayList) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) IncrementalIndexSchema(org.apache.druid.segment.incremental.IncrementalIndexSchema) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 24 with IncrementalIndex

use of org.apache.druid.segment.incremental.IncrementalIndex in project druid by druid-io.

the class BroadcastSegmentIndexedTableTest method setup.

@Before
public void setup() throws IOException, SegmentLoadingException {
    final ObjectMapper mapper = new DefaultObjectMapper();
    mapper.registerModule(new SegmentizerModule());
    final IndexIO indexIO = new IndexIO(mapper, () -> 0);
    mapper.setInjectableValues(new InjectableValues.Std().addValue(ExprMacroTable.class.getName(), TestExprMacroTable.INSTANCE).addValue(ObjectMapper.class.getName(), mapper).addValue(IndexIO.class, indexIO).addValue(DataSegment.PruneSpecsHolder.class, DataSegment.PruneSpecsHolder.DEFAULT));
    final IndexMerger indexMerger = new IndexMergerV9(mapper, indexIO, OffHeapMemorySegmentWriteOutMediumFactory.instance());
    Interval testInterval = Intervals.of("2011-01-12T00:00:00.000Z/2011-05-01T00:00:00.000Z");
    IncrementalIndex data = TestIndex.makeRealtimeIndex("druid.sample.numeric.tsv");
    File segment = new File(temporaryFolder.newFolder(), "segment");
    File persisted = indexMerger.persist(data, testInterval, segment, new IndexSpec(), null);
    File factoryJson = new File(persisted, "factory.json");
    Assert.assertTrue(factoryJson.exists());
    SegmentizerFactory factory = mapper.readValue(factoryJson, SegmentizerFactory.class);
    Assert.assertTrue(factory instanceof MMappedQueryableSegmentizerFactory);
    DataSegment dataSegment = new DataSegment(DATASOURCE, testInterval, DateTimes.nowUtc().toString(), ImmutableMap.of(), columnNames, ImmutableList.of(), null, null, segment.getTotalSpace());
    backingSegment = (QueryableIndexSegment) factory.factorize(dataSegment, segment, false, SegmentLazyLoadFailCallback.NOOP);
    columnNames = ImmutableList.<String>builder().add(ColumnHolder.TIME_COLUMN_NAME).addAll(backingSegment.asQueryableIndex().getColumnNames()).build();
    broadcastTable = new BroadcastSegmentIndexedTable(backingSegment, keyColumns, dataSegment.getVersion());
}
Also used : IndexMerger(org.apache.druid.segment.IndexMerger) IndexSpec(org.apache.druid.segment.IndexSpec) IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) IndexMergerV9(org.apache.druid.segment.IndexMergerV9) MMappedQueryableSegmentizerFactory(org.apache.druid.segment.loading.MMappedQueryableSegmentizerFactory) MMappedQueryableSegmentizerFactory(org.apache.druid.segment.loading.MMappedQueryableSegmentizerFactory) SegmentizerFactory(org.apache.druid.segment.loading.SegmentizerFactory) InjectableValues(com.fasterxml.jackson.databind.InjectableValues) DataSegment(org.apache.druid.timeline.DataSegment) TestExprMacroTable(org.apache.druid.query.expression.TestExprMacroTable) ExprMacroTable(org.apache.druid.math.expr.ExprMacroTable) IndexIO(org.apache.druid.segment.IndexIO) SegmentizerModule(org.apache.druid.jackson.SegmentizerModule) DefaultObjectMapper(org.apache.druid.jackson.DefaultObjectMapper) File(java.io.File) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) DefaultObjectMapper(org.apache.druid.jackson.DefaultObjectMapper) Interval(org.joda.time.Interval) Before(org.junit.Before)

Example 25 with IncrementalIndex

use of org.apache.druid.segment.incremental.IncrementalIndex in project druid by druid-io.

the class IngestSegmentFirehoseTest method createTestIndex.

private void createTestIndex(File segmentDir) throws Exception {
    final List<String> rows = Lists.newArrayList("2014102200\thost1\t10\t0\t1", "2014102200\thost2\t20\t1\t0", "2014102200\thost3\t30\t1\t1", "2014102201\thost1\t10\t1\t1", "2014102201\thost2\t20\t1\t1", "2014102201\thost3\t30\t1\t1", "2014102202\thost1\t10\t1\t1", "2014102202\thost2\t20\t1\t1", "2014102202\thost3\t30\t1\t1");
    final StringInputRowParser parser = new StringInputRowParser(new DelimitedParseSpec(new TimestampSpec("timestamp", "yyyyMMddHH", null), DIMENSIONS_SPEC, "\t", null, ImmutableList.of("timestamp", "host", "visited", "x", "y", "spatial"), false, 0), StandardCharsets.UTF_8.toString());
    try (final IncrementalIndex index = new OnheapIncrementalIndex.Builder().setIndexSchema(new IncrementalIndexSchema.Builder().withDimensionsSpec(parser.getParseSpec().getDimensionsSpec()).withMetrics(AGGREGATORS.toArray(new AggregatorFactory[0])).build()).setMaxRowCount(5000).build()) {
        for (String line : rows) {
            index.add(parser.parse(line));
        }
        indexMerger.persist(index, segmentDir, new IndexSpec(), null);
    }
}
Also used : IndexSpec(org.apache.druid.segment.IndexSpec) IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) DelimitedParseSpec(org.apache.druid.data.input.impl.DelimitedParseSpec) StringInputRowParser(org.apache.druid.data.input.impl.StringInputRowParser) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory)

Aggregations

IncrementalIndex (org.apache.druid.segment.incremental.IncrementalIndex)109 OnheapIncrementalIndex (org.apache.druid.segment.incremental.OnheapIncrementalIndex)85 File (java.io.File)59 Test (org.junit.Test)51 MapBasedInputRow (org.apache.druid.data.input.MapBasedInputRow)46 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)46 CountAggregatorFactory (org.apache.druid.query.aggregation.CountAggregatorFactory)26 IncrementalIndexTest (org.apache.druid.segment.data.IncrementalIndexTest)26 ArrayList (java.util.ArrayList)25 IncrementalIndexSchema (org.apache.druid.segment.incremental.IncrementalIndexSchema)25 IndexSpec (org.apache.druid.segment.IndexSpec)19 QueryableIndex (org.apache.druid.segment.QueryableIndex)19 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)17 InputRow (org.apache.druid.data.input.InputRow)15 IncrementalIndexSegment (org.apache.druid.segment.IncrementalIndexSegment)14 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)12 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)11 IOException (java.io.IOException)10 Before (org.junit.Before)10 Interval (org.joda.time.Interval)9