Examples with IndexSpec - org.apache.druid.segment.IndexSpec

Example 1 with IndexSpec

use of org.apache.druid.segment.IndexSpec in project druid by druid-io.

the class AggregationTestHelper method createIndex.

public void createIndex(Iterator rows, InputRowParser parser, final AggregatorFactory[] metrics, File outDir, long minTimestamp, Granularity gran, boolean deserializeComplexMetrics, int maxRowCount, boolean rollup) throws Exception {
    IncrementalIndex index = null;
    List<File> toMerge = new ArrayList<>();
    try {
        index = new OnheapIncrementalIndex.Builder().setIndexSchema(new IncrementalIndexSchema.Builder().withMinTimestamp(minTimestamp).withDimensionsSpec(parser.getParseSpec().getDimensionsSpec()).withQueryGranularity(gran).withMetrics(metrics).withRollup(rollup).build()).setDeserializeComplexMetrics(deserializeComplexMetrics).setMaxRowCount(maxRowCount).build();
        while (rows.hasNext()) {
            Object row = rows.next();
            if (!index.canAppendRow()) {
                File tmp = tempFolder.newFolder();
                toMerge.add(tmp);
                indexMerger.persist(index, tmp, new IndexSpec(), null);
                index.close();
                index = new OnheapIncrementalIndex.Builder().setIndexSchema(new IncrementalIndexSchema.Builder().withMinTimestamp(minTimestamp).withDimensionsSpec(parser.getParseSpec().getDimensionsSpec()).withQueryGranularity(gran).withMetrics(metrics).withRollup(rollup).build()).setDeserializeComplexMetrics(deserializeComplexMetrics).setMaxRowCount(maxRowCount).build();
            }
            if (row instanceof String && parser instanceof StringInputRowParser) {
                // Note: this is required because StringInputRowParser is InputRowParser<ByteBuffer> as opposed to
                // InputRowsParser<String>
                index.add(((StringInputRowParser) parser).parse((String) row));
            } else {
                index.add(((List<InputRow>) parser.parseBatch(row)).get(0));
            }
        }
        if (toMerge.size() > 0) {
            File tmp = tempFolder.newFolder();
            toMerge.add(tmp);
            indexMerger.persist(index, tmp, new IndexSpec(), null);
            List<QueryableIndex> indexes = new ArrayList<>(toMerge.size());
            for (File file : toMerge) {
                indexes.add(indexIO.loadIndex(file));
            }
            indexMerger.mergeQueryableIndex(indexes, rollup, metrics, outDir, new IndexSpec(), null, -1);
            for (QueryableIndex qi : indexes) {
                qi.close();
            }
        } else {
            indexMerger.persist(index, outDir, new IndexSpec(), null);
        }
    } finally {
        if (index != null) {
            index.close();
        }
    }
}

Also used : IndexSpec(org.apache.druid.segment.IndexSpec) IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) ArrayList(java.util.ArrayList) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) QueryableIndex(org.apache.druid.segment.QueryableIndex) StringInputRowParser(org.apache.druid.data.input.impl.StringInputRowParser) InputRow(org.apache.druid.data.input.InputRow) File(java.io.File) IncrementalIndexSchema(org.apache.druid.segment.incremental.IncrementalIndexSchema)

Example 2 with IndexSpec

use of org.apache.druid.segment.IndexSpec in project druid by druid-io.

the class GroupByLimitPushDownInsufficientBufferTest method setup.

@Before
public void setup() throws Exception {
    tmpDir = FileUtils.createTempDir();
    InputRow row;
    List<String> dimNames = Arrays.asList("dimA", "metA");
    Map<String, Object> event;
    final IncrementalIndex indexA = makeIncIndex(false);
    incrementalIndices.add(indexA);
    event = new HashMap<>();
    event.put("dimA", "hello");
    event.put("metA", 100);
    row = new MapBasedInputRow(1000, dimNames, event);
    indexA.add(row);
    event = new HashMap<>();
    event.put("dimA", "mango");
    event.put("metA", 95);
    row = new MapBasedInputRow(1000, dimNames, event);
    indexA.add(row);
    event = new HashMap<>();
    event.put("dimA", "world");
    event.put("metA", 75);
    row = new MapBasedInputRow(1000, dimNames, event);
    indexA.add(row);
    event = new HashMap<>();
    event.put("dimA", "fubaz");
    event.put("metA", 75);
    row = new MapBasedInputRow(1000, dimNames, event);
    indexA.add(row);
    event = new HashMap<>();
    event.put("dimA", "zortaxx");
    event.put("metA", 999);
    row = new MapBasedInputRow(1000, dimNames, event);
    indexA.add(row);
    event = new HashMap<>();
    event.put("dimA", "blarg");
    event.put("metA", 125);
    row = new MapBasedInputRow(1000, dimNames, event);
    indexA.add(row);
    event = new HashMap<>();
    event.put("dimA", "blerg");
    event.put("metA", 130);
    row = new MapBasedInputRow(1000, dimNames, event);
    indexA.add(row);
    final File fileA = INDEX_MERGER_V9.persist(indexA, new File(tmpDir, "A"), new IndexSpec(), OffHeapMemorySegmentWriteOutMediumFactory.instance());
    QueryableIndex qindexA = INDEX_IO.loadIndex(fileA);
    final IncrementalIndex indexB = makeIncIndex(false);
    incrementalIndices.add(indexB);
    event = new HashMap<>();
    event.put("dimA", "foo");
    event.put("metA", 200);
    row = new MapBasedInputRow(1000, dimNames, event);
    indexB.add(row);
    event = new HashMap<>();
    event.put("dimA", "world");
    event.put("metA", 75);
    row = new MapBasedInputRow(1000, dimNames, event);
    indexB.add(row);
    event = new HashMap<>();
    event.put("dimA", "mango");
    event.put("metA", 95);
    row = new MapBasedInputRow(1000, dimNames, event);
    indexB.add(row);
    event = new HashMap<>();
    event.put("dimA", "zebra");
    event.put("metA", 180);
    row = new MapBasedInputRow(1000, dimNames, event);
    indexB.add(row);
    event = new HashMap<>();
    event.put("dimA", "blorg");
    event.put("metA", 120);
    row = new MapBasedInputRow(1000, dimNames, event);
    indexB.add(row);
    final File fileB = INDEX_MERGER_V9.persist(indexB, new File(tmpDir, "B"), new IndexSpec(), OffHeapMemorySegmentWriteOutMediumFactory.instance());
    QueryableIndex qindexB = INDEX_IO.loadIndex(fileB);
    groupByIndices = Arrays.asList(qindexA, qindexB);
    resourceCloser = Closer.create();
    setupGroupByFactory();
}

Also used : IndexSpec(org.apache.druid.segment.IndexSpec) IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) QueryableIndex(org.apache.druid.segment.QueryableIndex) InputRow(org.apache.druid.data.input.InputRow) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) File(java.io.File) Before(org.junit.Before)

Example 3 with IndexSpec

use of org.apache.druid.segment.IndexSpec in project druid by druid-io.

the class RealtimeTuningConfigTest method testSerdeWithDefaults.

@Test
public void testSerdeWithDefaults() throws Exception {
    String jsonStr = "{\"type\":\"realtime\"}";
    ObjectMapper mapper = TestHelper.makeJsonMapper();
    RealtimeTuningConfig config = (RealtimeTuningConfig) mapper.readValue(mapper.writeValueAsString(mapper.readValue(jsonStr, TuningConfig.class)), TuningConfig.class);
    Assert.assertNotNull(config.getBasePersistDirectory());
    Assert.assertEquals(new OnheapIncrementalIndex.Spec(), config.getAppendableIndexSpec());
    Assert.assertEquals(0, config.getHandoffConditionTimeout());
    Assert.assertEquals(0, config.getAlertTimeout());
    Assert.assertEquals(new IndexSpec(), config.getIndexSpec());
    Assert.assertEquals(new IndexSpec(), config.getIndexSpecForIntermediatePersists());
    Assert.assertEquals(new Period("PT10M"), config.getIntermediatePersistPeriod());
    Assert.assertEquals(new NumberedShardSpec(0, 1), config.getShardSpec());
    Assert.assertEquals(0, config.getMaxPendingPersists());
    Assert.assertEquals(1000000, config.getMaxRowsInMemory());
    Assert.assertEquals(0, config.getMergeThreadPriority());
    Assert.assertEquals(0, config.getPersistThreadPriority());
    Assert.assertEquals(new Period("PT10M"), config.getWindowPeriod());
    Assert.assertEquals(false, config.isReportParseExceptions());
}

Also used : IndexSpec(org.apache.druid.segment.IndexSpec) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) Period(org.joda.time.Period) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) Test(org.junit.Test)

Example 4 with IndexSpec

use of org.apache.druid.segment.IndexSpec in project druid by druid-io.

the class RealtimeTuningConfigTest method testSerdeWithNonDefaults.

@Test
public void testSerdeWithNonDefaults() throws Exception {
    String jsonStr = "{\n" + "  \"type\": \"realtime\",\n" + "  \"maxRowsInMemory\": 100,\n" + "  \"intermediatePersistPeriod\": \"PT1H\",\n" + "  \"windowPeriod\": \"PT1H\",\n" + "  \"basePersistDirectory\": \"/tmp/xxx\",\n" + "  \"maxPendingPersists\": 100,\n" + "  \"persistThreadPriority\": 100,\n" + "  \"mergeThreadPriority\": 100,\n" + "  \"reportParseExceptions\": true,\n" + "  \"handoffConditionTimeout\": 100,\n" + "  \"alertTimeout\": 70,\n" + "  \"indexSpec\": { \"metricCompression\" : \"NONE\" },\n" + "  \"indexSpecForIntermediatePersists\": { \"dimensionCompression\" : \"uncompressed\" },\n" + "  \"appendableIndexSpec\": { \"type\" : \"onheap\" }\n" + "}";
    ObjectMapper mapper = TestHelper.makeJsonMapper();
    RealtimeTuningConfig config = (RealtimeTuningConfig) mapper.readValue(mapper.writeValueAsString(mapper.readValue(jsonStr, TuningConfig.class)), TuningConfig.class);
    Assert.assertEquals("/tmp/xxx", config.getBasePersistDirectory().toString());
    Assert.assertEquals(new OnheapIncrementalIndex.Spec(), config.getAppendableIndexSpec());
    Assert.assertEquals(100, config.getHandoffConditionTimeout());
    Assert.assertEquals(70, config.getAlertTimeout());
    Assert.assertEquals(new Period("PT1H"), config.getIntermediatePersistPeriod());
    Assert.assertEquals(new NumberedShardSpec(0, 1), config.getShardSpec());
    Assert.assertEquals(100, config.getMaxPendingPersists());
    Assert.assertEquals(100, config.getMaxRowsInMemory());
    Assert.assertEquals(100, config.getMergeThreadPriority());
    Assert.assertEquals(100, config.getPersistThreadPriority());
    Assert.assertEquals(new Period("PT1H"), config.getWindowPeriod());
    Assert.assertEquals(true, config.isReportParseExceptions());
    Assert.assertEquals(new IndexSpec(null, null, CompressionStrategy.NONE, null), config.getIndexSpec());
    Assert.assertEquals(new IndexSpec(null, CompressionStrategy.UNCOMPRESSED, null, null), config.getIndexSpecForIntermediatePersists());
}

Example 5 with IndexSpec

use of org.apache.druid.segment.IndexSpec in project druid by druid-io.

the class NewestSegmentFirstPolicyTest method testIteratorReturnsSegmentsAsSegmentsWasCompactedAndHaveDifferentDimensions.

@Test
public void testIteratorReturnsSegmentsAsSegmentsWasCompactedAndHaveDifferentDimensions() {
    // Same indexSpec as what is set in the auto compaction config
    Map<String, Object> indexSpec = mapper.convertValue(new IndexSpec(), new TypeReference<Map<String, Object>>() {
    });
    // Same partitionsSpec as what is set in the auto compaction config
    PartitionsSpec partitionsSpec = NewestSegmentFirstIterator.findPartitionsSpecFromConfig(ClientCompactionTaskQueryTuningConfig.from(null, null));
    // Create segments that were compacted (CompactionState != null) and have
    // Dimensions=["foo", "bar"] for interval 2017-10-01T00:00:00/2017-10-02T00:00:00,
    // Dimensions=["foo"] for interval 2017-10-02T00:00:00/2017-10-03T00:00:00,
    // Dimensions=null for interval 2017-10-03T00:00:00/2017-10-04T00:00:00 (dimensions was not set during last compaction)
    // and dimensionsSpec=null for interval 2017-10-04T00:00:00/2017-10-05T00:00:00 (dimensionsSpec was not set during last compaction)
    final VersionedIntervalTimeline<String, DataSegment> timeline = createTimeline(new SegmentGenerateSpec(Intervals.of("2017-10-01T00:00:00/2017-10-02T00:00:00"), new Period("P1D"), null, new CompactionState(partitionsSpec, new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("bar", "foo"))), null, null, indexSpec, null)), new SegmentGenerateSpec(Intervals.of("2017-10-02T00:00:00/2017-10-03T00:00:00"), new Period("P1D"), null, new CompactionState(partitionsSpec, new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("foo"))), null, null, indexSpec, null)), new SegmentGenerateSpec(Intervals.of("2017-10-03T00:00:00/2017-10-04T00:00:00"), new Period("P1D"), null, new CompactionState(partitionsSpec, DimensionsSpec.EMPTY, null, null, indexSpec, null)), new SegmentGenerateSpec(Intervals.of("2017-10-04T00:00:00/2017-10-05T00:00:00"), new Period("P1D"), null, new CompactionState(partitionsSpec, null, null, null, indexSpec, null)));
    // Auto compaction config sets Dimensions=["foo"]
    CompactionSegmentIterator iterator = policy.reset(ImmutableMap.of(DATA_SOURCE, createCompactionConfig(130000, new Period("P0D"), null, new UserCompactionTaskDimensionsConfig(DimensionsSpec.getDefaultSchemas(ImmutableList.of("foo"))), null, null)), ImmutableMap.of(DATA_SOURCE, timeline), Collections.emptyMap());
    // We should get interval 2017-10-01T00:00:00/2017-10-02T00:00:00, interval 2017-10-04T00:00:00/2017-10-05T00:00:00, and interval 2017-10-03T00:00:00/2017-10-04T00:00:00.
    Assert.assertTrue(iterator.hasNext());
    List<DataSegment> expectedSegmentsToCompact = new ArrayList<>(timeline.findNonOvershadowedObjectsInInterval(Intervals.of("2017-10-04T00:00:00/2017-10-05T00:00:00"), Partitions.ONLY_COMPLETE));
    Assert.assertEquals(ImmutableSet.copyOf(expectedSegmentsToCompact), ImmutableSet.copyOf(iterator.next()));
    Assert.assertTrue(iterator.hasNext());
    expectedSegmentsToCompact = new ArrayList<>(timeline.findNonOvershadowedObjectsInInterval(Intervals.of("2017-10-03T00:00:00/2017-10-04T00:00:00"), Partitions.ONLY_COMPLETE));
    Assert.assertEquals(ImmutableSet.copyOf(expectedSegmentsToCompact), ImmutableSet.copyOf(iterator.next()));
    Assert.assertTrue(iterator.hasNext());
    expectedSegmentsToCompact = new ArrayList<>(timeline.findNonOvershadowedObjectsInInterval(Intervals.of("2017-10-01T00:00:00/2017-10-02T00:00:00"), Partitions.ONLY_COMPLETE));
    Assert.assertEquals(ImmutableSet.copyOf(expectedSegmentsToCompact), ImmutableSet.copyOf(iterator.next()));
    // No more
    Assert.assertFalse(iterator.hasNext());
    // Auto compaction config sets Dimensions=null
    iterator = policy.reset(ImmutableMap.of(DATA_SOURCE, createCompactionConfig(130000, new Period("P0D"), null, new UserCompactionTaskDimensionsConfig(null), null, null)), ImmutableMap.of(DATA_SOURCE, timeline), Collections.emptyMap());
    // No more
    Assert.assertFalse(iterator.hasNext());
}

Also used : IndexSpec(org.apache.druid.segment.IndexSpec) ArrayList(java.util.ArrayList) Period(org.joda.time.Period) DataSegment(org.apache.druid.timeline.DataSegment) PartitionsSpec(org.apache.druid.indexer.partitions.PartitionsSpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) CompactionState(org.apache.druid.timeline.CompactionState) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) UserCompactionTaskDimensionsConfig(org.apache.druid.server.coordinator.UserCompactionTaskDimensionsConfig) Test(org.junit.Test)

Aggregations

IndexSpec (org.apache.druid.segment.IndexSpec)89 Test (org.junit.Test)59 File (java.io.File)29 Period (org.joda.time.Period)28 RoaringBitmapSerdeFactory (org.apache.druid.segment.data.RoaringBitmapSerdeFactory)25 OnheapIncrementalIndex (org.apache.druid.segment.incremental.OnheapIncrementalIndex)24 ArrayList (java.util.ArrayList)21 Map (java.util.Map)20 DataSegment (org.apache.druid.timeline.DataSegment)20 DynamicPartitionsSpec (org.apache.druid.indexer.partitions.DynamicPartitionsSpec)19 IncrementalIndex (org.apache.druid.segment.incremental.IncrementalIndex)19 CompactionState (org.apache.druid.timeline.CompactionState)16 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)15 ImmutableMap (com.google.common.collect.ImmutableMap)14 PartitionsSpec (org.apache.druid.indexer.partitions.PartitionsSpec)14 QueryableIndex (org.apache.druid.segment.QueryableIndex)14 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)13 InputRow (org.apache.druid.data.input.InputRow)13 Duration (org.joda.time.Duration)13 DefaultObjectMapper (org.apache.druid.jackson.DefaultObjectMapper)10