Search in sources :

Example 41 with DynamicPartitionsSpec

use of org.apache.druid.indexer.partitions.DynamicPartitionsSpec in project druid by druid-io.

the class CompactionTuningConfigTest method testSerdeWithNonZeroAwaitSegmentAvailabilityTimeoutMillis.

@Test
public void testSerdeWithNonZeroAwaitSegmentAvailabilityTimeoutMillis() {
    expectedException.expect(IllegalArgumentException.class);
    expectedException.expectMessage("awaitSegmentAvailabilityTimeoutMillis is not supported for Compcation Task");
    final CompactionTask.CompactionTuningConfig tuningConfig = new CompactionTask.CompactionTuningConfig(null, null, null, 10, 1000L, null, null, null, null, new DynamicPartitionsSpec(100, 100L), new IndexSpec(new RoaringBitmapSerdeFactory(true), CompressionStrategy.UNCOMPRESSED, CompressionStrategy.LZF, LongEncodingStrategy.LONGS), new IndexSpec(), 1, false, true, 10000L, OffHeapMemorySegmentWriteOutMediumFactory.instance(), null, 250, 100, 20L, new Duration(3600), 128, null, null, false, null, null, null, 5L);
}
Also used : IndexSpec(org.apache.druid.segment.IndexSpec) DynamicPartitionsSpec(org.apache.druid.indexer.partitions.DynamicPartitionsSpec) RoaringBitmapSerdeFactory(org.apache.druid.segment.data.RoaringBitmapSerdeFactory) Duration(org.joda.time.Duration) Test(org.junit.Test)

Example 42 with DynamicPartitionsSpec

use of org.apache.druid.indexer.partitions.DynamicPartitionsSpec in project druid by druid-io.

the class CompactionTuningConfigTest method testSerdeWithZeroAwaitSegmentAvailabilityTimeoutMillis.

@Test
public void testSerdeWithZeroAwaitSegmentAvailabilityTimeoutMillis() {
    final CompactionTask.CompactionTuningConfig tuningConfig = new CompactionTask.CompactionTuningConfig(null, null, null, 10, 1000L, null, null, null, null, new DynamicPartitionsSpec(100, 100L), new IndexSpec(new RoaringBitmapSerdeFactory(true), CompressionStrategy.UNCOMPRESSED, CompressionStrategy.LZF, LongEncodingStrategy.LONGS), new IndexSpec(), 1, false, true, 10000L, OffHeapMemorySegmentWriteOutMediumFactory.instance(), null, 250, 100, 20L, new Duration(3600), 128, null, null, false, null, null, null, 0L);
    Assert.assertEquals(0L, tuningConfig.getAwaitSegmentAvailabilityTimeoutMillis());
}
Also used : IndexSpec(org.apache.druid.segment.IndexSpec) DynamicPartitionsSpec(org.apache.druid.indexer.partitions.DynamicPartitionsSpec) RoaringBitmapSerdeFactory(org.apache.druid.segment.data.RoaringBitmapSerdeFactory) Duration(org.joda.time.Duration) Test(org.junit.Test)

Example 43 with DynamicPartitionsSpec

use of org.apache.druid.indexer.partitions.DynamicPartitionsSpec in project druid by druid-io.

the class ClientCompactionTaskQuerySerdeTest method testClientCompactionTaskQueryToCompactionTask.

@Test
public void testClientCompactionTaskQueryToCompactionTask() throws IOException {
    final ObjectMapper mapper = setupInjectablesInObjectMapper(new DefaultObjectMapper());
    final ClientCompactionTaskQuery query = new ClientCompactionTaskQuery("id", "datasource", new ClientCompactionIOConfig(new ClientCompactionIntervalSpec(Intervals.of("2019/2020"), "testSha256OfSortedSegmentIds"), true), new ClientCompactionTaskQueryTuningConfig(null, 40000, 2000L, null, new SegmentsSplitHintSpec(new HumanReadableBytes(100000L), 10), new DynamicPartitionsSpec(100, 30000L), new IndexSpec(new DefaultBitmapSerdeFactory(), CompressionStrategy.LZ4, CompressionStrategy.LZF, LongEncodingStrategy.LONGS), new IndexSpec(new DefaultBitmapSerdeFactory(), CompressionStrategy.LZ4, CompressionStrategy.UNCOMPRESSED, LongEncodingStrategy.AUTO), 2, 1000L, TmpFileSegmentWriteOutMediumFactory.instance(), 100, 5, 1000L, new Duration(3000L), 7, 1000, 100), new ClientCompactionTaskGranularitySpec(Granularities.DAY, Granularities.HOUR, true), new ClientCompactionTaskDimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("ts", "dim"))), new AggregatorFactory[] { new CountAggregatorFactory("cnt") }, new ClientCompactionTaskTransformSpec(new SelectorDimFilter("dim1", "foo", null)), ImmutableMap.of("key", "value"));
    final byte[] json = mapper.writeValueAsBytes(query);
    final CompactionTask task = (CompactionTask) mapper.readValue(json, Task.class);
    Assert.assertEquals(query.getId(), task.getId());
    Assert.assertEquals(query.getDataSource(), task.getDataSource());
    Assert.assertTrue(task.getIoConfig().getInputSpec() instanceof CompactionIntervalSpec);
    Assert.assertEquals(query.getIoConfig().getInputSpec().getInterval(), ((CompactionIntervalSpec) task.getIoConfig().getInputSpec()).getInterval());
    Assert.assertEquals(query.getIoConfig().getInputSpec().getSha256OfSortedSegmentIds(), ((CompactionIntervalSpec) task.getIoConfig().getInputSpec()).getSha256OfSortedSegmentIds());
    Assert.assertEquals(query.getTuningConfig().getMaxRowsInMemory().intValue(), task.getTuningConfig().getMaxRowsInMemory());
    Assert.assertEquals(query.getTuningConfig().getMaxBytesInMemory().longValue(), task.getTuningConfig().getMaxBytesInMemory());
    Assert.assertEquals(query.getTuningConfig().getSplitHintSpec(), task.getTuningConfig().getSplitHintSpec());
    Assert.assertEquals(query.getTuningConfig().getPartitionsSpec(), task.getTuningConfig().getPartitionsSpec());
    Assert.assertEquals(query.getTuningConfig().getIndexSpec(), task.getTuningConfig().getIndexSpec());
    Assert.assertEquals(query.getTuningConfig().getIndexSpecForIntermediatePersists(), task.getTuningConfig().getIndexSpecForIntermediatePersists());
    Assert.assertEquals(query.getTuningConfig().getPushTimeout().longValue(), task.getTuningConfig().getPushTimeout());
    Assert.assertEquals(query.getTuningConfig().getSegmentWriteOutMediumFactory(), task.getTuningConfig().getSegmentWriteOutMediumFactory());
    Assert.assertEquals(query.getTuningConfig().getMaxNumConcurrentSubTasks().intValue(), task.getTuningConfig().getMaxNumConcurrentSubTasks());
    Assert.assertEquals(query.getTuningConfig().getMaxRetry().intValue(), task.getTuningConfig().getMaxRetry());
    Assert.assertEquals(query.getTuningConfig().getTaskStatusCheckPeriodMs().longValue(), task.getTuningConfig().getTaskStatusCheckPeriodMs());
    Assert.assertEquals(query.getTuningConfig().getChatHandlerTimeout(), task.getTuningConfig().getChatHandlerTimeout());
    Assert.assertEquals(query.getTuningConfig().getMaxNumSegmentsToMerge().intValue(), task.getTuningConfig().getMaxNumSegmentsToMerge());
    Assert.assertEquals(query.getTuningConfig().getTotalNumMergeTasks().intValue(), task.getTuningConfig().getTotalNumMergeTasks());
    Assert.assertEquals(query.getGranularitySpec(), task.getGranularitySpec());
    Assert.assertEquals(query.getGranularitySpec().getQueryGranularity(), task.getGranularitySpec().getQueryGranularity());
    Assert.assertEquals(query.getGranularitySpec().getSegmentGranularity(), task.getGranularitySpec().getSegmentGranularity());
    Assert.assertEquals(query.getGranularitySpec().isRollup(), task.getGranularitySpec().isRollup());
    Assert.assertEquals(query.getIoConfig().isDropExisting(), task.getIoConfig().isDropExisting());
    Assert.assertEquals(query.getContext(), task.getContext());
    Assert.assertEquals(query.getDimensionsSpec().getDimensions(), task.getDimensionsSpec().getDimensions());
    Assert.assertEquals(query.getTransformSpec().getFilter(), task.getTransformSpec().getFilter());
    Assert.assertArrayEquals(query.getMetricsSpec(), task.getMetricsSpec());
}
Also used : IndexSpec(org.apache.druid.segment.IndexSpec) ClientCompactionIOConfig(org.apache.druid.client.indexing.ClientCompactionIOConfig) ClientCompactionIntervalSpec(org.apache.druid.client.indexing.ClientCompactionIntervalSpec) Duration(org.joda.time.Duration) ClientCompactionTaskTransformSpec(org.apache.druid.client.indexing.ClientCompactionTaskTransformSpec) ClientCompactionTaskGranularitySpec(org.apache.druid.client.indexing.ClientCompactionTaskGranularitySpec) ClientCompactionTaskQueryTuningConfig(org.apache.druid.client.indexing.ClientCompactionTaskQueryTuningConfig) ClientCompactionIntervalSpec(org.apache.druid.client.indexing.ClientCompactionIntervalSpec) SegmentsSplitHintSpec(org.apache.druid.data.input.SegmentsSplitHintSpec) DynamicPartitionsSpec(org.apache.druid.indexer.partitions.DynamicPartitionsSpec) DefaultBitmapSerdeFactory(org.apache.druid.segment.data.BitmapSerde.DefaultBitmapSerdeFactory) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) ClientCompactionTaskQuery(org.apache.druid.client.indexing.ClientCompactionTaskQuery) ClientCompactionTaskDimensionsSpec(org.apache.druid.client.indexing.ClientCompactionTaskDimensionsSpec) DefaultObjectMapper(org.apache.druid.jackson.DefaultObjectMapper) HumanReadableBytes(org.apache.druid.java.util.common.HumanReadableBytes) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) DefaultObjectMapper(org.apache.druid.jackson.DefaultObjectMapper) Test(org.junit.Test)

Example 44 with DynamicPartitionsSpec

use of org.apache.druid.indexer.partitions.DynamicPartitionsSpec in project druid by druid-io.

the class InputSourceProcessor method process.

/**
 * This method opens the given {@link InputSource} and processes data via {@link InputSourceReader}.
 * All read data is consumed by {@link BatchAppenderatorDriver} which creates new segments.
 * All created segments are pushed when all input data is processed successfully.
 *
 * @return {@link SegmentsAndCommitMetadata} for the pushed segments.
 */
public static SegmentsAndCommitMetadata process(DataSchema dataSchema, BatchAppenderatorDriver driver, PartitionsSpec partitionsSpec, InputSource inputSource, @Nullable InputFormat inputFormat, File tmpDir, SequenceNameFunction sequenceNameFunction, IndexTaskInputRowIteratorBuilder inputRowIteratorBuilder, RowIngestionMeters buildSegmentsMeters, ParseExceptionHandler parseExceptionHandler, long pushTimeout) throws IOException, InterruptedException, ExecutionException, TimeoutException {
    @Nullable final DynamicPartitionsSpec dynamicPartitionsSpec = partitionsSpec instanceof DynamicPartitionsSpec ? (DynamicPartitionsSpec) partitionsSpec : null;
    final GranularitySpec granularitySpec = dataSchema.getGranularitySpec();
    try (final CloseableIterator<InputRow> inputRowIterator = AbstractBatchIndexTask.inputSourceReader(tmpDir, dataSchema, inputSource, inputFormat, AbstractBatchIndexTask.defaultRowFilter(granularitySpec), buildSegmentsMeters, parseExceptionHandler);
        final HandlingInputRowIterator iterator = inputRowIteratorBuilder.delegate(inputRowIterator).granularitySpec(granularitySpec).build()) {
        while (iterator.hasNext()) {
            final InputRow inputRow = iterator.next();
            if (inputRow == null) {
                continue;
            }
            // IndexTaskInputRowIteratorBuilder.absentBucketIntervalConsumer() ensures the interval will be present here
            Optional<Interval> optInterval = granularitySpec.bucketInterval(inputRow.getTimestamp());
            @SuppressWarnings("OptionalGetWithoutIsPresent") final Interval interval = optInterval.get();
            final String sequenceName = sequenceNameFunction.getSequenceName(interval, inputRow);
            final AppenderatorDriverAddResult addResult = driver.add(inputRow, sequenceName);
            if (addResult.isOk()) {
                // incremental segment publishment is allowed only when rollup doesn't have to be perfect.
                if (dynamicPartitionsSpec != null) {
                    final boolean isPushRequired = addResult.isPushRequired(dynamicPartitionsSpec.getMaxRowsPerSegment(), dynamicPartitionsSpec.getMaxTotalRowsOr(DynamicPartitionsSpec.DEFAULT_MAX_TOTAL_ROWS));
                    if (isPushRequired) {
                        // There can be some segments waiting for being pushed even though no more rows will be added to them
                        // in the future.
                        // If those segments are not pushed here, the remaining available space in appenderator will be kept
                        // small which could lead to smaller segments.
                        final SegmentsAndCommitMetadata pushed = driver.pushAllAndClear(pushTimeout);
                        LOG.debugSegments(pushed.getSegments(), "Pushed segments");
                    }
                }
            } else {
                throw new ISE("Failed to add a row with timestamp[%s]", inputRow.getTimestamp());
            }
        }
        final SegmentsAndCommitMetadata pushed = driver.pushAllAndClear(pushTimeout);
        LOG.debugSegments(pushed.getSegments(), "Pushed segments");
        return pushed;
    }
}
Also used : SegmentsAndCommitMetadata(org.apache.druid.segment.realtime.appenderator.SegmentsAndCommitMetadata) HandlingInputRowIterator(org.apache.druid.data.input.HandlingInputRowIterator) AppenderatorDriverAddResult(org.apache.druid.segment.realtime.appenderator.AppenderatorDriverAddResult) DynamicPartitionsSpec(org.apache.druid.indexer.partitions.DynamicPartitionsSpec) GranularitySpec(org.apache.druid.segment.indexing.granularity.GranularitySpec) InputRow(org.apache.druid.data.input.InputRow) ISE(org.apache.druid.java.util.common.ISE) Nullable(javax.annotation.Nullable) Interval(org.joda.time.Interval)

Example 45 with DynamicPartitionsSpec

use of org.apache.druid.indexer.partitions.DynamicPartitionsSpec in project druid by druid-io.

the class CompactionTaskParallelRunTest method testRunCompactionWithNewMetricsShouldStoreInState.

@Test
public void testRunCompactionWithNewMetricsShouldStoreInState() throws Exception {
    runIndexTask(null, true);
    final Builder builder = new Builder(DATA_SOURCE, getSegmentCacheManagerFactory(), RETRY_POLICY_FACTORY);
    final CompactionTask compactionTask = builder.inputSpec(new CompactionIntervalSpec(INTERVAL_TO_INDEX, null)).tuningConfig(AbstractParallelIndexSupervisorTaskTest.DEFAULT_TUNING_CONFIG_FOR_PARALLEL_INDEXING).metricsSpec(new AggregatorFactory[] { new CountAggregatorFactory("cnt"), new LongSumAggregatorFactory("val", "val") }).build();
    final Set<DataSegment> compactedSegments = runTask(compactionTask);
    Assert.assertEquals(3, compactedSegments.size());
    for (DataSegment segment : compactedSegments) {
        Assert.assertSame(lockGranularity == LockGranularity.TIME_CHUNK ? NumberedShardSpec.class : NumberedOverwriteShardSpec.class, segment.getShardSpec().getClass());
        Map<String, String> expectedCountMetric = new HashMap<>();
        expectedCountMetric.put("type", "count");
        expectedCountMetric.put("name", "cnt");
        Map<String, String> expectedLongSumMetric = new HashMap<>();
        expectedLongSumMetric.put("type", "longSum");
        expectedLongSumMetric.put("name", "val");
        expectedLongSumMetric.put("fieldName", "val");
        expectedLongSumMetric.put("expression", null);
        CompactionState expectedState = new CompactionState(new DynamicPartitionsSpec(null, Long.MAX_VALUE), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("ts", "dim"))), ImmutableList.of(expectedCountMetric, expectedLongSumMetric), getObjectMapper().readValue(getObjectMapper().writeValueAsString(compactionTask.getTransformSpec()), Map.class), compactionTask.getTuningConfig().getIndexSpec().asMap(getObjectMapper()), getObjectMapper().readValue(getObjectMapper().writeValueAsString(new UniformGranularitySpec(Granularities.HOUR, Granularities.MINUTE, true, ImmutableList.of(segment.getInterval()))), Map.class));
        Assert.assertEquals(expectedState, segment.getLastCompactionState());
    }
}
Also used : HashMap(java.util.HashMap) Builder(org.apache.druid.indexing.common.task.CompactionTask.Builder) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) DataSegment(org.apache.druid.timeline.DataSegment) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) DynamicPartitionsSpec(org.apache.druid.indexer.partitions.DynamicPartitionsSpec) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) NumberedOverwriteShardSpec(org.apache.druid.timeline.partition.NumberedOverwriteShardSpec) CompactionState(org.apache.druid.timeline.CompactionState) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) AbstractParallelIndexSupervisorTaskTest(org.apache.druid.indexing.common.task.batch.parallel.AbstractParallelIndexSupervisorTaskTest) Test(org.junit.Test)

Aggregations

DynamicPartitionsSpec (org.apache.druid.indexer.partitions.DynamicPartitionsSpec)52 Test (org.junit.Test)34 IndexSpec (org.apache.druid.segment.IndexSpec)19 List (java.util.List)15 Map (java.util.Map)15 ImmutableList (com.google.common.collect.ImmutableList)13 StringUtils (org.apache.druid.java.util.common.StringUtils)13 DataSegment (org.apache.druid.timeline.DataSegment)13 ImmutableMap (com.google.common.collect.ImmutableMap)12 HashMap (java.util.HashMap)11 Function (java.util.function.Function)11 Pair (org.apache.druid.java.util.common.Pair)11 Closeable (java.io.Closeable)10 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)10 RoaringBitmapSerdeFactory (org.apache.druid.segment.data.RoaringBitmapSerdeFactory)10 Duration (org.joda.time.Duration)10 Interval (org.joda.time.Interval)10 ArrayList (java.util.ArrayList)9 UUID (java.util.UUID)9 UniformGranularitySpec (org.apache.druid.segment.indexing.granularity.UniformGranularitySpec)9