Search in sources :

Example 11 with ParallelIndexIngestionSpec

use of org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexIngestionSpec in project druid by druid-io.

the class CompactionTaskTest method testCreateIngestionSchemaWithCustomMetricsSpec.

@Test
public void testCreateIngestionSchemaWithCustomMetricsSpec() throws IOException, SegmentLoadingException {
    final AggregatorFactory[] customMetricsSpec = new AggregatorFactory[] { new CountAggregatorFactory("custom_count"), new LongSumAggregatorFactory("custom_long_sum", "agg_1"), new FloatMinAggregatorFactory("custom_float_min", "agg_3"), new DoubleMaxAggregatorFactory("custom_double_max", "agg_4") };
    final List<ParallelIndexIngestionSpec> ingestionSpecs = CompactionTask.createIngestionSchema(toolbox, LockGranularity.TIME_CHUNK, new SegmentProvider(DATA_SOURCE, new CompactionIntervalSpec(COMPACTION_INTERVAL, null)), new PartitionConfigurationManager(TUNING_CONFIG), null, null, customMetricsSpec, null, COORDINATOR_CLIENT, segmentCacheManagerFactory, RETRY_POLICY_FACTORY, IOConfig.DEFAULT_DROP_EXISTING);
    final List<DimensionsSpec> expectedDimensionsSpec = getExpectedDimensionsSpecForAutoGeneration();
    ingestionSpecs.sort((s1, s2) -> Comparators.intervalsByStartThenEnd().compare(s1.getDataSchema().getGranularitySpec().inputIntervals().get(0), s2.getDataSchema().getGranularitySpec().inputIntervals().get(0)));
    Assert.assertEquals(6, ingestionSpecs.size());
    assertIngestionSchema(ingestionSpecs, expectedDimensionsSpec, Arrays.asList(customMetricsSpec), SEGMENT_INTERVALS, Granularities.MONTH, Granularities.NONE, IOConfig.DEFAULT_DROP_EXISTING);
}
Also used : DoubleMaxAggregatorFactory(org.apache.druid.query.aggregation.DoubleMaxAggregatorFactory) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) ParallelIndexIngestionSpec(org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexIngestionSpec) DoubleLastAggregatorFactory(org.apache.druid.query.aggregation.last.DoubleLastAggregatorFactory) FloatMinAggregatorFactory(org.apache.druid.query.aggregation.FloatMinAggregatorFactory) FloatFirstAggregatorFactory(org.apache.druid.query.aggregation.first.FloatFirstAggregatorFactory) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) DoubleMaxAggregatorFactory(org.apache.druid.query.aggregation.DoubleMaxAggregatorFactory) LongMaxAggregatorFactory(org.apache.druid.query.aggregation.LongMaxAggregatorFactory) SegmentProvider(org.apache.druid.indexing.common.task.CompactionTask.SegmentProvider) FloatMinAggregatorFactory(org.apache.druid.query.aggregation.FloatMinAggregatorFactory) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) PartitionConfigurationManager(org.apache.druid.indexing.common.task.CompactionTask.PartitionConfigurationManager) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) Test(org.junit.Test)

Example 12 with ParallelIndexIngestionSpec

use of org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexIngestionSpec in project druid by druid-io.

the class CompactionTaskTest method testGranularitySpecWithNullRollup.

@Test
public void testGranularitySpecWithNullRollup() throws IOException, SegmentLoadingException {
    final List<ParallelIndexIngestionSpec> ingestionSpecs = CompactionTask.createIngestionSchema(toolbox, LockGranularity.TIME_CHUNK, new SegmentProvider(DATA_SOURCE, new CompactionIntervalSpec(COMPACTION_INTERVAL, null)), new PartitionConfigurationManager(TUNING_CONFIG), null, null, null, new ClientCompactionTaskGranularitySpec(null, null, null), COORDINATOR_CLIENT, segmentCacheManagerFactory, RETRY_POLICY_FACTORY, IOConfig.DEFAULT_DROP_EXISTING);
    Assert.assertEquals(6, ingestionSpecs.size());
    for (ParallelIndexIngestionSpec indexIngestionSpec : ingestionSpecs) {
        // Expect false since rollup value in metadata of existing segments are null
        Assert.assertFalse(indexIngestionSpec.getDataSchema().getGranularitySpec().isRollup());
    }
}
Also used : PartitionConfigurationManager(org.apache.druid.indexing.common.task.CompactionTask.PartitionConfigurationManager) ParallelIndexIngestionSpec(org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexIngestionSpec) SegmentProvider(org.apache.druid.indexing.common.task.CompactionTask.SegmentProvider) ClientCompactionTaskGranularitySpec(org.apache.druid.client.indexing.ClientCompactionTaskGranularitySpec) Test(org.junit.Test)

Example 13 with ParallelIndexIngestionSpec

use of org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexIngestionSpec in project druid by druid-io.

the class CompactionTaskTest method testCreateIngestionSchemaWithMaxTotalRows.

@Test
public void testCreateIngestionSchemaWithMaxTotalRows() throws IOException, SegmentLoadingException {
    final CompactionTask.CompactionTuningConfig tuningConfig = new CompactionTask.CompactionTuningConfig(null, null, null, 500000, 1000000L, null, 1000000L, null, null, null, new IndexSpec(new RoaringBitmapSerdeFactory(true), CompressionStrategy.LZ4, CompressionStrategy.LZF, LongEncodingStrategy.LONGS), null, null, false, false, 5000L, null, null, null, null, null, null, null, null, null, null, null, null, null, null);
    final List<ParallelIndexIngestionSpec> ingestionSpecs = CompactionTask.createIngestionSchema(toolbox, LockGranularity.TIME_CHUNK, new SegmentProvider(DATA_SOURCE, new CompactionIntervalSpec(COMPACTION_INTERVAL, null)), new PartitionConfigurationManager(tuningConfig), null, null, null, null, COORDINATOR_CLIENT, segmentCacheManagerFactory, RETRY_POLICY_FACTORY, IOConfig.DEFAULT_DROP_EXISTING);
    final List<DimensionsSpec> expectedDimensionsSpec = getExpectedDimensionsSpecForAutoGeneration();
    ingestionSpecs.sort((s1, s2) -> Comparators.intervalsByStartThenEnd().compare(s1.getDataSchema().getGranularitySpec().inputIntervals().get(0), s2.getDataSchema().getGranularitySpec().inputIntervals().get(0)));
    Assert.assertEquals(6, ingestionSpecs.size());
    assertIngestionSchema(ingestionSpecs, expectedDimensionsSpec, AGGREGATORS.stream().map(AggregatorFactory::getCombiningFactory).collect(Collectors.toList()), SEGMENT_INTERVALS, tuningConfig, Granularities.MONTH, Granularities.NONE, IOConfig.DEFAULT_DROP_EXISTING);
}
Also used : IndexSpec(org.apache.druid.segment.IndexSpec) ParallelIndexIngestionSpec(org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexIngestionSpec) SegmentProvider(org.apache.druid.indexing.common.task.CompactionTask.SegmentProvider) DoubleLastAggregatorFactory(org.apache.druid.query.aggregation.last.DoubleLastAggregatorFactory) FloatMinAggregatorFactory(org.apache.druid.query.aggregation.FloatMinAggregatorFactory) FloatFirstAggregatorFactory(org.apache.druid.query.aggregation.first.FloatFirstAggregatorFactory) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) DoubleMaxAggregatorFactory(org.apache.druid.query.aggregation.DoubleMaxAggregatorFactory) LongMaxAggregatorFactory(org.apache.druid.query.aggregation.LongMaxAggregatorFactory) RoaringBitmapSerdeFactory(org.apache.druid.segment.data.RoaringBitmapSerdeFactory) PartitionConfigurationManager(org.apache.druid.indexing.common.task.CompactionTask.PartitionConfigurationManager) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) Test(org.junit.Test)

Example 14 with ParallelIndexIngestionSpec

use of org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexIngestionSpec in project druid by druid-io.

the class CompactionTask method runTask.

@Override
public TaskStatus runTask(TaskToolbox toolbox) throws Exception {
    final List<ParallelIndexIngestionSpec> ingestionSpecs = createIngestionSchema(toolbox, getTaskLockHelper().getLockGranularityToUse(), segmentProvider, partitionConfigurationManager, dimensionsSpec, transformSpec, metricsSpec, granularitySpec, toolbox.getCoordinatorClient(), segmentCacheManagerFactory, retryPolicyFactory, ioConfig.isDropExisting());
    final List<ParallelIndexSupervisorTask> indexTaskSpecs = IntStream.range(0, ingestionSpecs.size()).mapToObj(i -> {
        // The ID of SubtaskSpecs is used as the base sequenceName in segment allocation protocol.
        // The indexing tasks generated by the compaction task should use different sequenceNames
        // so that they can allocate valid segment IDs with no duplication.
        ParallelIndexIngestionSpec ingestionSpec = ingestionSpecs.get(i);
        final String baseSequenceName = createIndexTaskSpecId(i);
        return newTask(baseSequenceName, ingestionSpec);
    }).collect(Collectors.toList());
    if (indexTaskSpecs.isEmpty()) {
        String msg = StringUtils.format("Can't find segments from inputSpec[%s], nothing to do.", ioConfig.getInputSpec());
        log.warn(msg);
        return TaskStatus.failure(getId(), msg);
    } else {
        registerResourceCloserOnAbnormalExit(currentSubTaskHolder);
        final int totalNumSpecs = indexTaskSpecs.size();
        log.info("Generated [%d] compaction task specs", totalNumSpecs);
        int failCnt = 0;
        for (ParallelIndexSupervisorTask eachSpec : indexTaskSpecs) {
            final String json = toolbox.getJsonMapper().writerWithDefaultPrettyPrinter().writeValueAsString(eachSpec);
            if (!currentSubTaskHolder.setTask(eachSpec)) {
                String errMsg = "Task was asked to stop. Finish as failed.";
                log.info(errMsg);
                return TaskStatus.failure(getId(), errMsg);
            }
            try {
                if (eachSpec.isReady(toolbox.getTaskActionClient())) {
                    log.info("Running indexSpec: " + json);
                    final TaskStatus eachResult = eachSpec.run(toolbox);
                    if (!eachResult.isSuccess()) {
                        failCnt++;
                        log.warn("Failed to run indexSpec: [%s].\nTrying the next indexSpec.", json);
                    }
                } else {
                    failCnt++;
                    log.warn("indexSpec is not ready: [%s].\nTrying the next indexSpec.", json);
                }
            } catch (Exception e) {
                failCnt++;
                log.warn(e, "Failed to run indexSpec: [%s].\nTrying the next indexSpec.", json);
            }
        }
        String msg = StringUtils.format("Ran [%d] specs, [%d] succeeded, [%d] failed", totalNumSpecs, totalNumSpecs - failCnt, failCnt);
        log.info(msg);
        return failCnt == 0 ? TaskStatus.success(getId()) : TaskStatus.failure(getId(), msg);
    }
}
Also used : Verify(org.apache.curator.shaded.com.google.common.base.Verify) TaskToolbox(org.apache.druid.indexing.common.TaskToolbox) JsonProperty(com.fasterxml.jackson.annotation.JsonProperty) SegmentCacheManagerFactory(org.apache.druid.indexing.common.SegmentCacheManagerFactory) Comparators(org.apache.druid.java.util.common.guava.Comparators) LongDimensionSchema(org.apache.druid.data.input.impl.LongDimensionSchema) DimensionHandlerUtils(org.apache.druid.segment.DimensionHandlerUtils) IndexSpec(org.apache.druid.segment.IndexSpec) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) JodaUtils(org.apache.druid.java.util.common.JodaUtils) TaskActionClient(org.apache.druid.indexing.common.actions.TaskActionClient) Map(java.util.Map) DynamicPartitionsSpec(org.apache.druid.indexer.partitions.DynamicPartitionsSpec) AppenderatorsManager(org.apache.druid.segment.realtime.appenderator.AppenderatorsManager) IAE(org.apache.druid.java.util.common.IAE) MultiValueHandling(org.apache.druid.data.input.impl.DimensionSchema.MultiValueHandling) BiMap(com.google.common.collect.BiMap) JacksonInject(com.fasterxml.jackson.annotation.JacksonInject) Property(org.apache.druid.indexer.Property) RetryPolicyFactory(org.apache.druid.indexing.common.RetryPolicyFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) SplitHintSpec(org.apache.druid.data.input.SplitHintSpec) Segments(org.apache.druid.indexing.overlord.Segments) QueryableIndex(org.apache.druid.segment.QueryableIndex) StringUtils(org.apache.druid.java.util.common.StringUtils) ISE(org.apache.druid.java.util.common.ISE) Collectors(java.util.stream.Collectors) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) LockGranularity(org.apache.druid.indexing.common.LockGranularity) PartitionHolder(org.apache.druid.timeline.partition.PartitionHolder) List(java.util.List) DimensionSchema(org.apache.druid.data.input.impl.DimensionSchema) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) IndexTuningConfig(org.apache.druid.indexing.common.task.IndexTask.IndexTuningConfig) DataSegment(org.apache.druid.timeline.DataSegment) Entry(java.util.Map.Entry) ColumnCapabilities(org.apache.druid.segment.column.ColumnCapabilities) TransformSpec(org.apache.druid.segment.transform.TransformSpec) Logger(org.apache.druid.java.util.common.logger.Logger) IntStream(java.util.stream.IntStream) Granularity(org.apache.druid.java.util.common.granularity.Granularity) DoubleDimensionSchema(org.apache.druid.data.input.impl.DoubleDimensionSchema) Intervals(org.apache.druid.java.util.common.Intervals) Duration(org.joda.time.Duration) SegmentLoadingException(org.apache.druid.segment.loading.SegmentLoadingException) SegmentWriteOutMediumFactory(org.apache.druid.segment.writeout.SegmentWriteOutMediumFactory) HashMap(java.util.HashMap) ParallelIndexSupervisorTask(org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexSupervisorTask) TaskStatus(org.apache.druid.indexer.TaskStatus) TuningConfig(org.apache.druid.segment.indexing.TuningConfig) ArrayList(java.util.ArrayList) PartitionChunk(org.apache.druid.timeline.partition.PartitionChunk) Interval(org.joda.time.Interval) Lists(com.google.common.collect.Lists) ColumnHolder(org.apache.druid.segment.column.ColumnHolder) ImmutableList(com.google.common.collect.ImmutableList) FloatDimensionSchema(org.apache.druid.data.input.impl.FloatDimensionSchema) SettableSupplier(org.apache.druid.common.guava.SettableSupplier) CoordinatorClient(org.apache.druid.client.coordinator.CoordinatorClient) JsonIgnore(com.fasterxml.jackson.annotation.JsonIgnore) CompactSegments(org.apache.druid.server.coordinator.duty.CompactSegments) DruidInputSource(org.apache.druid.indexing.input.DruidInputSource) Nonnull(javax.annotation.Nonnull) PartitionsSpec(org.apache.druid.indexer.partitions.PartitionsSpec) Nullable(javax.annotation.Nullable) ClientCompactionTaskTransformSpec(org.apache.druid.client.indexing.ClientCompactionTaskTransformSpec) VersionedIntervalTimeline(org.apache.druid.timeline.VersionedIntervalTimeline) RE(org.apache.druid.java.util.common.RE) NonnullPair(org.apache.druid.java.util.common.NonnullPair) GranularitySpec(org.apache.druid.segment.indexing.granularity.GranularitySpec) TimelineObjectHolder(org.apache.druid.timeline.TimelineObjectHolder) Include(com.fasterxml.jackson.annotation.JsonInclude.Include) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) ParallelIndexTuningConfig(org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexTuningConfig) IOException(java.io.IOException) ClientCompactionTaskQuery(org.apache.druid.client.indexing.ClientCompactionTaskQuery) File(java.io.File) HashBiMap(com.google.common.collect.HashBiMap) ClientCompactionTaskGranularitySpec(org.apache.druid.client.indexing.ClientCompactionTaskGranularitySpec) GranularityType(org.apache.druid.java.util.common.granularity.GranularityType) DimensionHandler(org.apache.druid.segment.DimensionHandler) TreeMap(java.util.TreeMap) Checks(org.apache.druid.indexer.Checks) JsonCreator(com.fasterxml.jackson.annotation.JsonCreator) JsonInclude(com.fasterxml.jackson.annotation.JsonInclude) AppendableIndexSpec(org.apache.druid.segment.incremental.AppendableIndexSpec) Preconditions(com.google.common.base.Preconditions) VisibleForTesting(com.google.common.annotations.VisibleForTesting) ParallelIndexIngestionSpec(org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexIngestionSpec) RetrieveUsedSegmentsAction(org.apache.druid.indexing.common.actions.RetrieveUsedSegmentsAction) ParallelIndexIOConfig(org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexIOConfig) IndexIO(org.apache.druid.segment.IndexIO) DataSchema(org.apache.druid.segment.indexing.DataSchema) Collections(java.util.Collections) ParallelIndexSupervisorTask(org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexSupervisorTask) ParallelIndexIngestionSpec(org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexIngestionSpec) TaskStatus(org.apache.druid.indexer.TaskStatus) SegmentLoadingException(org.apache.druid.segment.loading.SegmentLoadingException) IOException(java.io.IOException)

Example 15 with ParallelIndexIngestionSpec

use of org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexIngestionSpec in project druid by druid-io.

the class CompactionTaskTest method testSegmentGranularityAndNullQueryGranularity.

@Test
public void testSegmentGranularityAndNullQueryGranularity() throws IOException, SegmentLoadingException {
    final List<ParallelIndexIngestionSpec> ingestionSpecs = CompactionTask.createIngestionSchema(toolbox, LockGranularity.TIME_CHUNK, new SegmentProvider(DATA_SOURCE, new CompactionIntervalSpec(COMPACTION_INTERVAL, null)), new PartitionConfigurationManager(TUNING_CONFIG), null, null, null, new ClientCompactionTaskGranularitySpec(new PeriodGranularity(Period.months(3), null, null), null, null), COORDINATOR_CLIENT, segmentCacheManagerFactory, RETRY_POLICY_FACTORY, IOConfig.DEFAULT_DROP_EXISTING);
    final List<DimensionsSpec> expectedDimensionsSpec = ImmutableList.of(new DimensionsSpec(getDimensionSchema(new DoubleDimensionSchema("string_to_double"))));
    ingestionSpecs.sort((s1, s2) -> Comparators.intervalsByStartThenEnd().compare(s1.getDataSchema().getGranularitySpec().inputIntervals().get(0), s2.getDataSchema().getGranularitySpec().inputIntervals().get(0)));
    Assert.assertEquals(1, ingestionSpecs.size());
    assertIngestionSchema(ingestionSpecs, expectedDimensionsSpec, AGGREGATORS.stream().map(AggregatorFactory::getCombiningFactory).collect(Collectors.toList()), Collections.singletonList(COMPACTION_INTERVAL), new PeriodGranularity(Period.months(3), null, null), Granularities.NONE, IOConfig.DEFAULT_DROP_EXISTING);
}
Also used : DoubleDimensionSchema(org.apache.druid.data.input.impl.DoubleDimensionSchema) PartitionConfigurationManager(org.apache.druid.indexing.common.task.CompactionTask.PartitionConfigurationManager) PeriodGranularity(org.apache.druid.java.util.common.granularity.PeriodGranularity) ParallelIndexIngestionSpec(org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexIngestionSpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) SegmentProvider(org.apache.druid.indexing.common.task.CompactionTask.SegmentProvider) ClientCompactionTaskGranularitySpec(org.apache.druid.client.indexing.ClientCompactionTaskGranularitySpec) DoubleLastAggregatorFactory(org.apache.druid.query.aggregation.last.DoubleLastAggregatorFactory) FloatMinAggregatorFactory(org.apache.druid.query.aggregation.FloatMinAggregatorFactory) FloatFirstAggregatorFactory(org.apache.druid.query.aggregation.first.FloatFirstAggregatorFactory) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) DoubleMaxAggregatorFactory(org.apache.druid.query.aggregation.DoubleMaxAggregatorFactory) LongMaxAggregatorFactory(org.apache.druid.query.aggregation.LongMaxAggregatorFactory) Test(org.junit.Test)

Aggregations

ParallelIndexIngestionSpec (org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexIngestionSpec)18 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)15 PartitionConfigurationManager (org.apache.druid.indexing.common.task.CompactionTask.PartitionConfigurationManager)14 SegmentProvider (org.apache.druid.indexing.common.task.CompactionTask.SegmentProvider)14 Test (org.junit.Test)14 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)13 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)13 CountAggregatorFactory (org.apache.druid.query.aggregation.CountAggregatorFactory)12 DoubleMaxAggregatorFactory (org.apache.druid.query.aggregation.DoubleMaxAggregatorFactory)12 FloatMinAggregatorFactory (org.apache.druid.query.aggregation.FloatMinAggregatorFactory)12 LongMaxAggregatorFactory (org.apache.druid.query.aggregation.LongMaxAggregatorFactory)12 FloatFirstAggregatorFactory (org.apache.druid.query.aggregation.first.FloatFirstAggregatorFactory)12 DoubleLastAggregatorFactory (org.apache.druid.query.aggregation.last.DoubleLastAggregatorFactory)12 ClientCompactionTaskGranularitySpec (org.apache.druid.client.indexing.ClientCompactionTaskGranularitySpec)8 DoubleDimensionSchema (org.apache.druid.data.input.impl.DoubleDimensionSchema)4 IndexSpec (org.apache.druid.segment.IndexSpec)4 DataSchema (org.apache.druid.segment.indexing.DataSchema)4 ArrayList (java.util.ArrayList)3 TimestampSpec (org.apache.druid.data.input.impl.TimestampSpec)3 ParallelIndexIOConfig (org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexIOConfig)3