Search in sources :

Example 16 with IndexSpec

use of org.apache.druid.segment.IndexSpec in project druid by druid-io.

the class BroadcastJoinableMMappedQueryableSegmentizerFactoryTest method testSegmentizer.

@Test
public void testSegmentizer() throws IOException, SegmentLoadingException {
    final ObjectMapper mapper = new DefaultObjectMapper();
    mapper.registerModule(new SegmentizerModule());
    final IndexIO indexIO = new IndexIO(mapper, () -> 0);
    mapper.setInjectableValues(new InjectableValues.Std().addValue(ExprMacroTable.class.getName(), TestExprMacroTable.INSTANCE).addValue(ObjectMapper.class.getName(), mapper).addValue(IndexIO.class, indexIO).addValue(DataSegment.PruneSpecsHolder.class, DataSegment.PruneSpecsHolder.DEFAULT));
    IndexMerger indexMerger = new IndexMergerV9(mapper, indexIO, OffHeapMemorySegmentWriteOutMediumFactory.instance());
    SegmentizerFactory expectedFactory = new BroadcastJoinableMMappedQueryableSegmentizerFactory(indexIO, KEY_COLUMNS);
    Interval testInterval = Intervals.of("2011-01-12T00:00:00.000Z/2011-05-01T00:00:00.000Z");
    IncrementalIndex data = TestIndex.makeRealtimeIndex("druid.sample.numeric.tsv");
    List<String> columnNames = data.getColumnNames();
    File segment = new File(temporaryFolder.newFolder(), "segment");
    File persistedSegmentRoot = indexMerger.persist(data, testInterval, segment, new IndexSpec(null, null, null, null, expectedFactory), null);
    File factoryJson = new File(persistedSegmentRoot, "factory.json");
    Assert.assertTrue(factoryJson.exists());
    SegmentizerFactory factory = mapper.readValue(factoryJson, SegmentizerFactory.class);
    Assert.assertTrue(factory instanceof BroadcastJoinableMMappedQueryableSegmentizerFactory);
    Assert.assertEquals(expectedFactory, factory);
    // load a segment
    final DataSegment dataSegment = new DataSegment(TABLE_NAME, testInterval, DateTimes.nowUtc().toString(), ImmutableMap.of(), columnNames, ImmutableList.of(), null, null, persistedSegmentRoot.getTotalSpace());
    final Segment loaded = factory.factorize(dataSegment, persistedSegmentRoot, false, SegmentLazyLoadFailCallback.NOOP);
    final BroadcastSegmentIndexedTable table = (BroadcastSegmentIndexedTable) loaded.as(IndexedTable.class);
    Assert.assertNotNull(table);
}
Also used : IndexMerger(org.apache.druid.segment.IndexMerger) IndexSpec(org.apache.druid.segment.IndexSpec) IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) IndexMergerV9(org.apache.druid.segment.IndexMergerV9) BroadcastSegmentIndexedTable(org.apache.druid.segment.join.table.BroadcastSegmentIndexedTable) IndexedTable(org.apache.druid.segment.join.table.IndexedTable) InjectableValues(com.fasterxml.jackson.databind.InjectableValues) DataSegment(org.apache.druid.timeline.DataSegment) TestExprMacroTable(org.apache.druid.query.expression.TestExprMacroTable) ExprMacroTable(org.apache.druid.math.expr.ExprMacroTable) Segment(org.apache.druid.segment.Segment) DataSegment(org.apache.druid.timeline.DataSegment) IndexIO(org.apache.druid.segment.IndexIO) SegmentizerModule(org.apache.druid.jackson.SegmentizerModule) DefaultObjectMapper(org.apache.druid.jackson.DefaultObjectMapper) BroadcastSegmentIndexedTable(org.apache.druid.segment.join.table.BroadcastSegmentIndexedTable) File(java.io.File) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) DefaultObjectMapper(org.apache.druid.jackson.DefaultObjectMapper) Interval(org.joda.time.Interval) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 17 with IndexSpec

use of org.apache.druid.segment.IndexSpec in project druid by druid-io.

the class SegmentManagerBroadcastJoinIndexedTableTest method createSegment.

private DataSegment createSegment(IncrementalIndex data, String interval, String version) throws IOException {
    final DataSegment tmpSegment = new DataSegment(TABLE_NAME, Intervals.of(interval), version, Collections.emptyMap(), Collections.emptyList(), Collections.emptyList(), new NumberedShardSpec(0, 0), 9, 100);
    final String storageDir = DataSegmentPusher.getDefaultStorageDir(tmpSegment, false);
    final File segmentDir = new File(segmentDeepStorageDir, storageDir);
    FileUtils.mkdirp(segmentDir);
    IndexMerger indexMerger = new IndexMergerV9(objectMapper, indexIO, OffHeapMemorySegmentWriteOutMediumFactory.instance());
    SegmentizerFactory factory = new BroadcastJoinableMMappedQueryableSegmentizerFactory(indexIO, KEY_COLUMNS);
    indexMerger.persist(data, Intervals.of(interval), segmentDir, new IndexSpec(null, null, null, null, factory), null);
    final File factoryJson = new File(segmentDir, "factory.json");
    objectMapper.writeValue(factoryJson, factory);
    return tmpSegment.withLoadSpec(ImmutableMap.of("type", "local", "path", segmentDir.getAbsolutePath()));
}
Also used : IndexMerger(org.apache.druid.segment.IndexMerger) IndexSpec(org.apache.druid.segment.IndexSpec) IndexMergerV9(org.apache.druid.segment.IndexMergerV9) BroadcastJoinableMMappedQueryableSegmentizerFactory(org.apache.druid.segment.loading.BroadcastJoinableMMappedQueryableSegmentizerFactory) SegmentizerFactory(org.apache.druid.segment.loading.SegmentizerFactory) DataSegment(org.apache.druid.timeline.DataSegment) File(java.io.File) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) BroadcastJoinableMMappedQueryableSegmentizerFactory(org.apache.druid.segment.loading.BroadcastJoinableMMappedQueryableSegmentizerFactory)

Example 18 with IndexSpec

use of org.apache.druid.segment.IndexSpec in project druid by druid-io.

the class MultiValuedDimensionTest method setup.

@Before
public void setup() throws Exception {
    incrementalIndex = new OnheapIncrementalIndex.Builder().setSimpleTestingIndexSchema(new CountAggregatorFactory("count")).setMaxRowCount(5000).build();
    StringInputRowParser parser = new StringInputRowParser(new CSVParseSpec(new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("product", "tags", "othertags"))), "\t", ImmutableList.of("timestamp", "product", "tags", "othertags"), false, 0), "UTF-8");
    String[] rows = new String[] { "2011-01-12T00:00:00.000Z,product_1,t1\tt2\tt3,u1\tu2", "2011-01-13T00:00:00.000Z,product_2,t3\tt4\tt5,u3\tu4", "2011-01-14T00:00:00.000Z,product_3,t5\tt6\tt7,u1\tu5", "2011-01-14T00:00:00.000Z,product_4,\"\",u2" };
    for (String row : rows) {
        incrementalIndex.add(parser.parse(row));
    }
    persistedSegmentDir = FileUtils.createTempDir();
    TestHelper.getTestIndexMergerV9(segmentWriteOutMediumFactory).persist(incrementalIndex, persistedSegmentDir, new IndexSpec(), null);
    queryableIndex = TestHelper.getTestIndexIO().loadIndex(persistedSegmentDir);
    StringInputRowParser parserNullSampler = new StringInputRowParser(new JSONParseSpec(new TimestampSpec("time", "iso", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("product", "tags", "othertags")))), "UTF-8");
    incrementalIndexNullSampler = new OnheapIncrementalIndex.Builder().setSimpleTestingIndexSchema(new CountAggregatorFactory("count")).setMaxRowCount(5000).build();
    String[] rowsNullSampler = new String[] { "{\"time\":\"2011-01-13T00:00:00.000Z\",\"product\":\"product_1\",\"tags\":[],\"othertags\":[\"u1\", \"u2\"]}", "{\"time\":\"2011-01-12T00:00:00.000Z\",\"product\":\"product_2\",\"othertags\":[\"u3\", \"u4\"]}", "{\"time\":\"2011-01-14T00:00:00.000Z\",\"product\":\"product_3\",\"tags\":[\"\"],\"othertags\":[\"u1\", \"u5\"]}", "{\"time\":\"2011-01-15T00:00:00.000Z\",\"product\":\"product_4\",\"tags\":[\"t1\", \"t2\", \"\"],\"othertags\":[\"u6\", \"u7\"]}", "{\"time\":\"2011-01-16T00:00:00.000Z\",\"product\":\"product_5\",\"tags\":[],\"othertags\":[]}", "{\"time\":\"2011-01-16T00:00:00.000Z\",\"product\":\"product_6\"}", "{\"time\":\"2011-01-16T00:00:00.000Z\",\"product\":\"product_7\",\"othertags\":[]}", "{\"time\":\"2011-01-16T00:00:00.000Z\",\"product\":\"product_8\",\"tags\":[\"\"],\"othertags\":[]}" };
    for (String row : rowsNullSampler) {
        incrementalIndexNullSampler.add(parserNullSampler.parse(row));
    }
    persistedSegmentDirNullSampler = FileUtils.createTempDir();
    TestHelper.getTestIndexMergerV9(segmentWriteOutMediumFactory).persist(incrementalIndexNullSampler, persistedSegmentDirNullSampler, new IndexSpec(), null);
    queryableIndexNullSampler = TestHelper.getTestIndexIO().loadIndex(persistedSegmentDirNullSampler);
}
Also used : IndexSpec(org.apache.druid.segment.IndexSpec) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) TopNQueryBuilder(org.apache.druid.query.topn.TopNQueryBuilder) CSVParseSpec(org.apache.druid.data.input.impl.CSVParseSpec) StringInputRowParser(org.apache.druid.data.input.impl.StringInputRowParser) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) JSONParseSpec(org.apache.druid.data.input.impl.JSONParseSpec) Before(org.junit.Before)

Example 19 with IndexSpec

use of org.apache.druid.segment.IndexSpec in project druid by druid-io.

the class CompactionTaskTest method testGetTuningConfigWithParallelIndexTuningConfig.

@Test
public void testGetTuningConfigWithParallelIndexTuningConfig() {
    ParallelIndexTuningConfig parallelIndexTuningConfig = new ParallelIndexTuningConfig(null, // null to compute maxRowsPerSegment automatically
    null, null, 500000, 1000000L, null, null, null, null, null, new IndexSpec(new RoaringBitmapSerdeFactory(true), CompressionStrategy.LZ4, CompressionStrategy.LZF, LongEncodingStrategy.LONGS), null, null, true, false, 5000L, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null);
    CompactionTask.CompactionTuningConfig compactionTuningConfig = new CompactionTask.CompactionTuningConfig(null, // null to compute maxRowsPerSegment automatically
    null, null, 500000, 1000000L, null, null, null, null, null, new IndexSpec(new RoaringBitmapSerdeFactory(true), CompressionStrategy.LZ4, CompressionStrategy.LZF, LongEncodingStrategy.LONGS), null, null, true, false, 5000L, null, null, null, null, null, null, null, null, null, null, null, null, null, null);
    Assert.assertEquals(compactionTuningConfig, CompactionTask.getTuningConfig(parallelIndexTuningConfig));
}
Also used : IndexSpec(org.apache.druid.segment.IndexSpec) RoaringBitmapSerdeFactory(org.apache.druid.segment.data.RoaringBitmapSerdeFactory) ParallelIndexTuningConfig(org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexTuningConfig) Test(org.junit.Test)

Example 20 with IndexSpec

use of org.apache.druid.segment.IndexSpec in project druid by druid-io.

the class CompactionTaskRunTest method testCompactionWithFilterInTransformSpec.

@Test
public void testCompactionWithFilterInTransformSpec() throws Exception {
    runIndexTask();
    final Builder builder = new Builder(DATA_SOURCE, segmentCacheManagerFactory, RETRY_POLICY_FACTORY);
    // day segmentGranularity
    final CompactionTask compactionTask = builder.interval(Intervals.of("2014-01-01/2014-01-02")).granularitySpec(new ClientCompactionTaskGranularitySpec(Granularities.DAY, null, null)).transformSpec(new ClientCompactionTaskTransformSpec(new SelectorDimFilter("dim", "a", null))).build();
    Pair<TaskStatus, List<DataSegment>> resultPair = runTask(compactionTask);
    Assert.assertTrue(resultPair.lhs.isSuccess());
    List<DataSegment> segments = resultPair.rhs;
    Assert.assertEquals(1, segments.size());
    Assert.assertEquals(Intervals.of("2014-01-01/2014-01-02"), segments.get(0).getInterval());
    Assert.assertEquals(new NumberedShardSpec(0, 1), segments.get(0).getShardSpec());
    ObjectMapper mapper = new DefaultObjectMapper();
    Map<String, String> expectedLongSumMetric = new HashMap<>();
    expectedLongSumMetric.put("type", "longSum");
    expectedLongSumMetric.put("name", "val");
    expectedLongSumMetric.put("fieldName", "val");
    expectedLongSumMetric.put("expression", null);
    CompactionState expectedCompactionState = new CompactionState(new DynamicPartitionsSpec(5000000, Long.MAX_VALUE), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("ts", "dim"))), ImmutableList.of(expectedLongSumMetric), getObjectMapper().readValue(getObjectMapper().writeValueAsString(compactionTask.getTransformSpec()), Map.class), mapper.readValue(mapper.writeValueAsString(new IndexSpec()), Map.class), mapper.readValue(mapper.writeValueAsString(new UniformGranularitySpec(Granularities.DAY, Granularities.MINUTE, true, ImmutableList.of(Intervals.of("2014-01-01T00:00:00/2014-01-01T03:00:00")))), Map.class));
    Assert.assertEquals(expectedCompactionState, segments.get(0).getLastCompactionState());
}
Also used : IndexSpec(org.apache.druid.segment.IndexSpec) HashMap(java.util.HashMap) Builder(org.apache.druid.indexing.common.task.CompactionTask.Builder) ClientCompactionTaskTransformSpec(org.apache.druid.client.indexing.ClientCompactionTaskTransformSpec) ClientCompactionTaskGranularitySpec(org.apache.druid.client.indexing.ClientCompactionTaskGranularitySpec) TaskStatus(org.apache.druid.indexer.TaskStatus) DataSegment(org.apache.druid.timeline.DataSegment) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) DynamicPartitionsSpec(org.apache.druid.indexer.partitions.DynamicPartitionsSpec) SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) CompactionState(org.apache.druid.timeline.CompactionState) DefaultObjectMapper(org.apache.druid.jackson.DefaultObjectMapper) Map(java.util.Map) HashMap(java.util.HashMap) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) DefaultObjectMapper(org.apache.druid.jackson.DefaultObjectMapper) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Test(org.junit.Test)

Aggregations

IndexSpec (org.apache.druid.segment.IndexSpec)89 Test (org.junit.Test)59 File (java.io.File)29 Period (org.joda.time.Period)28 RoaringBitmapSerdeFactory (org.apache.druid.segment.data.RoaringBitmapSerdeFactory)25 OnheapIncrementalIndex (org.apache.druid.segment.incremental.OnheapIncrementalIndex)24 ArrayList (java.util.ArrayList)21 Map (java.util.Map)20 DataSegment (org.apache.druid.timeline.DataSegment)20 DynamicPartitionsSpec (org.apache.druid.indexer.partitions.DynamicPartitionsSpec)19 IncrementalIndex (org.apache.druid.segment.incremental.IncrementalIndex)19 CompactionState (org.apache.druid.timeline.CompactionState)16 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)15 ImmutableMap (com.google.common.collect.ImmutableMap)14 PartitionsSpec (org.apache.druid.indexer.partitions.PartitionsSpec)14 QueryableIndex (org.apache.druid.segment.QueryableIndex)14 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)13 InputRow (org.apache.druid.data.input.InputRow)13 Duration (org.joda.time.Duration)13 DefaultObjectMapper (org.apache.druid.jackson.DefaultObjectMapper)10