Search in sources :

Example 66 with UniformGranularitySpec

use of org.apache.druid.segment.indexing.granularity.UniformGranularitySpec in project druid by druid-io.

the class CompactionTaskRunTest method runIndexTask.

private Pair<TaskStatus, List<DataSegment>> runIndexTask(@Nullable CountDownLatch readyLatchToCountDown, @Nullable CountDownLatch latchToAwaitBeforeRun, boolean appendToExisting) throws Exception {
    File tmpDir = temporaryFolder.newFolder();
    File tmpFile = File.createTempFile("druid", "index", tmpDir);
    try (BufferedWriter writer = Files.newWriter(tmpFile, StandardCharsets.UTF_8)) {
        for (String testRow : TEST_ROWS) {
            writer.write(testRow);
        }
    }
    IndexTask indexTask = new IndexTask(null, null, IndexTaskTest.createIngestionSpec(getObjectMapper(), tmpDir, DEFAULT_PARSE_SPEC, null, new UniformGranularitySpec(Granularities.HOUR, Granularities.MINUTE, null), IndexTaskTest.createTuningConfig(2, 2, null, 2L, null, false, true), appendToExisting, false), null);
    return runTask(indexTask, readyLatchToCountDown, latchToAwaitBeforeRun);
}
Also used : UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) File(java.io.File) BufferedWriter(java.io.BufferedWriter)

Example 67 with UniformGranularitySpec

use of org.apache.druid.segment.indexing.granularity.UniformGranularitySpec in project druid by druid-io.

the class CompactionTaskRunTest method testRunRegularIndexTaskWithIngestSegmentFirehose.

/**
 * Run a regular index task that's equivalent to the compaction task in {@link #testRunWithDynamicPartitioning()},
 * using {@link IngestSegmentFirehoseFactory}.
 *
 * This is not entirely CompactionTask related, but it's similar conceptually and it requires
 * similar setup to what this test suite already has.
 *
 * It could be moved to a separate test class if needed.
 */
@Test
public void testRunRegularIndexTaskWithIngestSegmentFirehose() throws Exception {
    runIndexTask();
    IndexTask indexTask = new IndexTask(null, null, new IndexTask.IndexIngestionSpec(new DataSchema("test", getObjectMapper().convertValue(new StringInputRowParser(DEFAULT_PARSE_SPEC, null), Map.class), new AggregatorFactory[] { new LongSumAggregatorFactory("val", "val") }, new UniformGranularitySpec(Granularities.HOUR, Granularities.MINUTE, null), null, getObjectMapper()), new IndexTask.IndexIOConfig(new IngestSegmentFirehoseFactory(DATA_SOURCE, Intervals.of("2014-01-01/2014-01-02"), null, null, null, null, null, getIndexIO(), coordinatorClient, segmentCacheManagerFactory, RETRY_POLICY_FACTORY), false, false), IndexTaskTest.createTuningConfig(5000000, null, null, Long.MAX_VALUE, null, false, true)), null);
    // This is a regular index so we need to explicitly add this context to store the CompactionState
    indexTask.addToContext(Tasks.STORE_COMPACTION_STATE_KEY, true);
    final Pair<TaskStatus, List<DataSegment>> resultPair = runTask(indexTask);
    Assert.assertTrue(resultPair.lhs.isSuccess());
    final List<DataSegment> segments = resultPair.rhs;
    Assert.assertEquals(3, segments.size());
    for (int i = 0; i < 3; i++) {
        Assert.assertEquals(Intervals.of("2014-01-01T0%d:00:00/2014-01-01T0%d:00:00", i, i + 1), segments.get(i).getInterval());
        Assert.assertEquals(getDefaultCompactionState(Granularities.HOUR, Granularities.MINUTE, ImmutableList.of()), segments.get(i).getLastCompactionState());
        if (lockGranularity == LockGranularity.SEGMENT) {
            Assert.assertEquals(new NumberedOverwriteShardSpec(32768, 0, 2, (short) 1, (short) 1), segments.get(i).getShardSpec());
        } else {
            Assert.assertEquals(new NumberedShardSpec(0, 1), segments.get(i).getShardSpec());
        }
    }
}
Also used : LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) TaskStatus(org.apache.druid.indexer.TaskStatus) DataSegment(org.apache.druid.timeline.DataSegment) DataSchema(org.apache.druid.segment.indexing.DataSchema) IngestSegmentFirehoseFactory(org.apache.druid.indexing.firehose.IngestSegmentFirehoseFactory) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) StringInputRowParser(org.apache.druid.data.input.impl.StringInputRowParser) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) NumberedOverwriteShardSpec(org.apache.druid.timeline.partition.NumberedOverwriteShardSpec) Map(java.util.Map) HashMap(java.util.HashMap) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) Test(org.junit.Test)

Example 68 with UniformGranularitySpec

use of org.apache.druid.segment.indexing.granularity.UniformGranularitySpec in project druid by druid-io.

the class CompactionTaskRunTest method testRunWithHashPartitioning.

@Test
public void testRunWithHashPartitioning() throws Exception {
    // Hash partitioning is not supported with segment lock yet
    if (lockGranularity == LockGranularity.SEGMENT) {
        return;
    }
    runIndexTask();
    final Builder builder = new Builder(DATA_SOURCE, segmentCacheManagerFactory, RETRY_POLICY_FACTORY);
    final CompactionTask compactionTask = builder.interval(Intervals.of("2014-01-01/2014-01-02")).tuningConfig(new ParallelIndexTuningConfig(null, null, null, null, null, null, null, null, null, new HashedPartitionsSpec(null, 3, null), null, null, null, true, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null)).build();
    final Pair<TaskStatus, List<DataSegment>> resultPair = runTask(compactionTask);
    Assert.assertTrue(resultPair.lhs.isSuccess());
    final List<DataSegment> segments = resultPair.rhs;
    Assert.assertEquals(6, segments.size());
    for (int i = 0; i < 3; i++) {
        final Interval interval = Intervals.of("2014-01-01T0%d:00:00/2014-01-01T0%d:00:00", i, i + 1);
        for (int j = 0; j < 2; j++) {
            final int segmentIdx = i * 2 + j;
            Assert.assertEquals(interval, segments.get(segmentIdx).getInterval());
            Map<String, String> expectedLongSumMetric = new HashMap<>();
            expectedLongSumMetric.put("type", "longSum");
            expectedLongSumMetric.put("name", "val");
            expectedLongSumMetric.put("fieldName", "val");
            expectedLongSumMetric.put("expression", null);
            CompactionState expectedState = new CompactionState(new HashedPartitionsSpec(null, 3, null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("ts", "dim"))), ImmutableList.of(expectedLongSumMetric), null, compactionTask.getTuningConfig().getIndexSpec().asMap(getObjectMapper()), getObjectMapper().readValue(getObjectMapper().writeValueAsString(new UniformGranularitySpec(Granularities.HOUR, Granularities.MINUTE, true, ImmutableList.of(Intervals.of("2014-01-01T0%d:00:00/2014-01-01T0%d:00:00", i, i + 1)))), Map.class));
            Assert.assertEquals(expectedState, segments.get(segmentIdx).getLastCompactionState());
            Assert.assertSame(HashBasedNumberedShardSpec.class, segments.get(segmentIdx).getShardSpec().getClass());
        }
    }
    List<String> rowsFromSegment = getCSVFormatRowsFromSegments(segments);
    rowsFromSegment.sort(Ordering.natural());
    Assert.assertEquals(TEST_ROWS, rowsFromSegment);
}
Also used : HashedPartitionsSpec(org.apache.druid.indexer.partitions.HashedPartitionsSpec) HashMap(java.util.HashMap) Builder(org.apache.druid.indexing.common.task.CompactionTask.Builder) TaskStatus(org.apache.druid.indexer.TaskStatus) DataSegment(org.apache.druid.timeline.DataSegment) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) CompactionState(org.apache.druid.timeline.CompactionState) ParallelIndexTuningConfig(org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexTuningConfig) Map(java.util.Map) HashMap(java.util.HashMap) Interval(org.joda.time.Interval) Test(org.junit.Test)

Example 69 with UniformGranularitySpec

use of org.apache.druid.segment.indexing.granularity.UniformGranularitySpec in project druid by druid-io.

the class CompactionTaskRunTest method getDefaultCompactionState.

public static CompactionState getDefaultCompactionState(Granularity segmentGranularity, Granularity queryGranularity, List<Interval> intervals) throws JsonProcessingException {
    ObjectMapper mapper = new DefaultObjectMapper();
    // Expected compaction state to exist after compaction as we store compaction state by default
    Map<String, String> expectedLongSumMetric = new HashMap<>();
    expectedLongSumMetric.put("type", "longSum");
    expectedLongSumMetric.put("name", "val");
    expectedLongSumMetric.put("fieldName", "val");
    expectedLongSumMetric.put("expression", null);
    return new CompactionState(new DynamicPartitionsSpec(5000000, Long.MAX_VALUE), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("ts", "dim"))), ImmutableList.of(expectedLongSumMetric), null, mapper.readValue(mapper.writeValueAsString(new IndexSpec()), Map.class), mapper.readValue(mapper.writeValueAsString(new UniformGranularitySpec(segmentGranularity, queryGranularity, true, intervals)), Map.class));
}
Also used : UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) IndexSpec(org.apache.druid.segment.IndexSpec) DynamicPartitionsSpec(org.apache.druid.indexer.partitions.DynamicPartitionsSpec) HashMap(java.util.HashMap) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) CompactionState(org.apache.druid.timeline.CompactionState) DefaultObjectMapper(org.apache.druid.jackson.DefaultObjectMapper) Map(java.util.Map) HashMap(java.util.HashMap) DefaultObjectMapper(org.apache.druid.jackson.DefaultObjectMapper) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper)

Example 70 with UniformGranularitySpec

use of org.apache.druid.segment.indexing.granularity.UniformGranularitySpec in project druid by druid-io.

the class CompactionTaskRunTest method testCompactionWithNewMetricInMetricsSpec.

@Test
public void testCompactionWithNewMetricInMetricsSpec() throws Exception {
    runIndexTask();
    final Builder builder = new Builder(DATA_SOURCE, segmentCacheManagerFactory, RETRY_POLICY_FACTORY);
    // day segmentGranularity
    final CompactionTask compactionTask = builder.interval(Intervals.of("2014-01-01/2014-01-02")).granularitySpec(new ClientCompactionTaskGranularitySpec(Granularities.DAY, null, null)).metricsSpec(new AggregatorFactory[] { new CountAggregatorFactory("cnt"), new LongSumAggregatorFactory("val", "val") }).build();
    Pair<TaskStatus, List<DataSegment>> resultPair = runTask(compactionTask);
    Assert.assertTrue(resultPair.lhs.isSuccess());
    List<DataSegment> segments = resultPair.rhs;
    Assert.assertEquals(1, segments.size());
    Assert.assertEquals(Intervals.of("2014-01-01/2014-01-02"), segments.get(0).getInterval());
    Assert.assertEquals(new NumberedShardSpec(0, 1), segments.get(0).getShardSpec());
    ObjectMapper mapper = new DefaultObjectMapper();
    Map<String, String> expectedCountMetric = new HashMap<>();
    expectedCountMetric.put("type", "count");
    expectedCountMetric.put("name", "cnt");
    Map<String, String> expectedLongSumMetric = new HashMap<>();
    expectedLongSumMetric.put("type", "longSum");
    expectedLongSumMetric.put("name", "val");
    expectedLongSumMetric.put("fieldName", "val");
    expectedLongSumMetric.put("expression", null);
    CompactionState expectedCompactionState = new CompactionState(new DynamicPartitionsSpec(5000000, Long.MAX_VALUE), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("ts", "dim"))), ImmutableList.of(expectedCountMetric, expectedLongSumMetric), getObjectMapper().readValue(getObjectMapper().writeValueAsString(compactionTask.getTransformSpec()), Map.class), mapper.readValue(mapper.writeValueAsString(new IndexSpec()), Map.class), mapper.readValue(mapper.writeValueAsString(new UniformGranularitySpec(Granularities.DAY, Granularities.MINUTE, true, ImmutableList.of(Intervals.of("2014-01-01T00:00:00/2014-01-01T03:00:00")))), Map.class));
    Assert.assertEquals(expectedCompactionState, segments.get(0).getLastCompactionState());
}
Also used : IndexSpec(org.apache.druid.segment.IndexSpec) HashMap(java.util.HashMap) Builder(org.apache.druid.indexing.common.task.CompactionTask.Builder) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) ClientCompactionTaskGranularitySpec(org.apache.druid.client.indexing.ClientCompactionTaskGranularitySpec) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) TaskStatus(org.apache.druid.indexer.TaskStatus) DataSegment(org.apache.druid.timeline.DataSegment) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) DynamicPartitionsSpec(org.apache.druid.indexer.partitions.DynamicPartitionsSpec) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) CompactionState(org.apache.druid.timeline.CompactionState) DefaultObjectMapper(org.apache.druid.jackson.DefaultObjectMapper) Map(java.util.Map) HashMap(java.util.HashMap) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) DefaultObjectMapper(org.apache.druid.jackson.DefaultObjectMapper) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Test(org.junit.Test)

Aggregations

UniformGranularitySpec (org.apache.druid.segment.indexing.granularity.UniformGranularitySpec)91 Test (org.junit.Test)60 DataSchema (org.apache.druid.segment.indexing.DataSchema)49 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)36 TimestampSpec (org.apache.druid.data.input.impl.TimestampSpec)35 DataSegment (org.apache.druid.timeline.DataSegment)33 File (java.io.File)25 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)24 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)22 Map (java.util.Map)20 Interval (org.joda.time.Interval)18 CountAggregatorFactory (org.apache.druid.query.aggregation.CountAggregatorFactory)17 ArrayList (java.util.ArrayList)15 HashMap (java.util.HashMap)14 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)14 ImmutableMap (com.google.common.collect.ImmutableMap)12 Builder (org.apache.druid.indexing.common.task.CompactionTask.Builder)12 GranularitySpec (org.apache.druid.segment.indexing.granularity.GranularitySpec)12 CompactionState (org.apache.druid.timeline.CompactionState)12 HashBasedNumberedShardSpec (org.apache.druid.timeline.partition.HashBasedNumberedShardSpec)12