Search in sources :

Example 6 with PartitionsSpec

use of org.apache.druid.indexer.partitions.PartitionsSpec in project druid by druid-io.

the class NewestSegmentFirstPolicyTest method testIteratorReturnsSegmentsAsSegmentsWasCompactedAndHaveDifferentQueryGranularity.

@Test
public void testIteratorReturnsSegmentsAsSegmentsWasCompactedAndHaveDifferentQueryGranularity() {
    // Same indexSpec as what is set in the auto compaction config
    Map<String, Object> indexSpec = mapper.convertValue(new IndexSpec(), new TypeReference<Map<String, Object>>() {
    });
    // Same partitionsSpec as what is set in the auto compaction config
    PartitionsSpec partitionsSpec = NewestSegmentFirstIterator.findPartitionsSpecFromConfig(ClientCompactionTaskQueryTuningConfig.from(null, null));
    // Create segments that were compacted (CompactionState != null) and have
    // queryGranularity=DAY for interval 2017-10-01T00:00:00/2017-10-02T00:00:00,
    // queryGranularity=MINUTE for interval 2017-10-02T00:00:00/2017-10-03T00:00:00,
    // and queryGranularity=null for interval 2017-10-03T00:00:00/2017-10-04T00:00:00 (queryGranularity was not set during last compaction)
    final VersionedIntervalTimeline<String, DataSegment> timeline = createTimeline(new SegmentGenerateSpec(Intervals.of("2017-10-01T00:00:00/2017-10-02T00:00:00"), new Period("P1D"), null, new CompactionState(partitionsSpec, null, null, null, indexSpec, ImmutableMap.of("queryGranularity", "day"))), new SegmentGenerateSpec(Intervals.of("2017-10-02T00:00:00/2017-10-03T00:00:00"), new Period("P1D"), null, new CompactionState(partitionsSpec, null, null, null, indexSpec, ImmutableMap.of("queryGranularity", "minute"))), new SegmentGenerateSpec(Intervals.of("2017-10-03T00:00:00/2017-10-04T00:00:00"), new Period("P1D"), null, new CompactionState(partitionsSpec, null, null, null, indexSpec, ImmutableMap.of())));
    // Auto compaction config sets queryGranularity=MINUTE
    final CompactionSegmentIterator iterator = policy.reset(ImmutableMap.of(DATA_SOURCE, createCompactionConfig(130000, new Period("P0D"), new UserCompactionTaskGranularityConfig(null, Granularities.MINUTE, null))), ImmutableMap.of(DATA_SOURCE, timeline), Collections.emptyMap());
    // We should get interval 2017-10-01T00:00:00/2017-10-02T00:00:00 and interval 2017-10-03T00:00:00/2017-10-04T00:00:00.
    Assert.assertTrue(iterator.hasNext());
    List<DataSegment> expectedSegmentsToCompact = new ArrayList<>(timeline.findNonOvershadowedObjectsInInterval(Intervals.of("2017-10-03T00:00:00/2017-10-04T00:00:00"), Partitions.ONLY_COMPLETE));
    Assert.assertEquals(ImmutableSet.copyOf(expectedSegmentsToCompact), ImmutableSet.copyOf(iterator.next()));
    Assert.assertTrue(iterator.hasNext());
    expectedSegmentsToCompact = new ArrayList<>(timeline.findNonOvershadowedObjectsInInterval(Intervals.of("2017-10-01T00:00:00/2017-10-02T00:00:00"), Partitions.ONLY_COMPLETE));
    Assert.assertEquals(ImmutableSet.copyOf(expectedSegmentsToCompact), ImmutableSet.copyOf(iterator.next()));
    // No more
    Assert.assertFalse(iterator.hasNext());
}
Also used : IndexSpec(org.apache.druid.segment.IndexSpec) ArrayList(java.util.ArrayList) Period(org.joda.time.Period) DataSegment(org.apache.druid.timeline.DataSegment) PartitionsSpec(org.apache.druid.indexer.partitions.PartitionsSpec) CompactionState(org.apache.druid.timeline.CompactionState) UserCompactionTaskGranularityConfig(org.apache.druid.server.coordinator.UserCompactionTaskGranularityConfig) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) Test(org.junit.Test)

Example 7 with PartitionsSpec

use of org.apache.druid.indexer.partitions.PartitionsSpec in project druid by druid-io.

the class NewestSegmentFirstPolicyTest method testIteratorReturnsSegmentsAsSegmentsWasCompactedAndHaveDifferentRollup.

@Test
public void testIteratorReturnsSegmentsAsSegmentsWasCompactedAndHaveDifferentRollup() {
    // Same indexSpec as what is set in the auto compaction config
    Map<String, Object> indexSpec = mapper.convertValue(new IndexSpec(), new TypeReference<Map<String, Object>>() {
    });
    // Same partitionsSpec as what is set in the auto compaction config
    PartitionsSpec partitionsSpec = NewestSegmentFirstIterator.findPartitionsSpecFromConfig(ClientCompactionTaskQueryTuningConfig.from(null, null));
    // Create segments that were compacted (CompactionState != null) and have
    // rollup=false for interval 2017-10-01T00:00:00/2017-10-02T00:00:00,
    // rollup=true for interval 2017-10-02T00:00:00/2017-10-03T00:00:00,
    // and rollup=null for interval 2017-10-03T00:00:00/2017-10-04T00:00:00 (queryGranularity was not set during last compaction)
    final VersionedIntervalTimeline<String, DataSegment> timeline = createTimeline(new SegmentGenerateSpec(Intervals.of("2017-10-01T00:00:00/2017-10-02T00:00:00"), new Period("P1D"), null, new CompactionState(partitionsSpec, null, null, null, indexSpec, ImmutableMap.of("rollup", "false"))), new SegmentGenerateSpec(Intervals.of("2017-10-02T00:00:00/2017-10-03T00:00:00"), new Period("P1D"), null, new CompactionState(partitionsSpec, null, null, null, indexSpec, ImmutableMap.of("rollup", "true"))), new SegmentGenerateSpec(Intervals.of("2017-10-03T00:00:00/2017-10-04T00:00:00"), new Period("P1D"), null, new CompactionState(partitionsSpec, null, null, null, indexSpec, ImmutableMap.of())));
    // Auto compaction config sets rollup=true
    final CompactionSegmentIterator iterator = policy.reset(ImmutableMap.of(DATA_SOURCE, createCompactionConfig(130000, new Period("P0D"), new UserCompactionTaskGranularityConfig(null, null, true))), ImmutableMap.of(DATA_SOURCE, timeline), Collections.emptyMap());
    // We should get interval 2017-10-01T00:00:00/2017-10-02T00:00:00 and interval 2017-10-03T00:00:00/2017-10-04T00:00:00.
    Assert.assertTrue(iterator.hasNext());
    List<DataSegment> expectedSegmentsToCompact = new ArrayList<>(timeline.findNonOvershadowedObjectsInInterval(Intervals.of("2017-10-03T00:00:00/2017-10-04T00:00:00"), Partitions.ONLY_COMPLETE));
    Assert.assertEquals(ImmutableSet.copyOf(expectedSegmentsToCompact), ImmutableSet.copyOf(iterator.next()));
    Assert.assertTrue(iterator.hasNext());
    expectedSegmentsToCompact = new ArrayList<>(timeline.findNonOvershadowedObjectsInInterval(Intervals.of("2017-10-01T00:00:00/2017-10-02T00:00:00"), Partitions.ONLY_COMPLETE));
    Assert.assertEquals(ImmutableSet.copyOf(expectedSegmentsToCompact), ImmutableSet.copyOf(iterator.next()));
    // No more
    Assert.assertFalse(iterator.hasNext());
}
Also used : IndexSpec(org.apache.druid.segment.IndexSpec) ArrayList(java.util.ArrayList) Period(org.joda.time.Period) DataSegment(org.apache.druid.timeline.DataSegment) PartitionsSpec(org.apache.druid.indexer.partitions.PartitionsSpec) CompactionState(org.apache.druid.timeline.CompactionState) UserCompactionTaskGranularityConfig(org.apache.druid.server.coordinator.UserCompactionTaskGranularityConfig) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) Test(org.junit.Test)

Example 8 with PartitionsSpec

use of org.apache.druid.indexer.partitions.PartitionsSpec in project druid by druid-io.

the class HadoopIngestionSpecTest method testPartitionsSpecMaxPartitionSize.

@Test
public void testPartitionsSpecMaxPartitionSize() {
    final HadoopIngestionSpec schema;
    try {
        schema = jsonReadWriteRead("{\n" + "    \"tuningConfig\": {\n" + "        \"type\": \"hadoop\",\n" + "        \"partitionsSpec\": {\n" + "            \"type\": \"dimension\",\n" + "            \"targetPartitionSize\": 100,\n" + "            \"maxPartitionSize\" : null,\n" + "            \"partitionDimension\" : \"foo\"\n" + "        }\n" + "    }\n" + "}", HadoopIngestionSpec.class);
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
    PartitionsSpec partitionsSpec = schema.getTuningConfig().getPartitionsSpec();
    Assert.assertTrue("partitionsSpec", partitionsSpec instanceof SingleDimensionPartitionsSpec);
    SingleDimensionPartitionsSpec singleDimensionPartitionsSpec = (SingleDimensionPartitionsSpec) partitionsSpec;
    Assert.assertTrue("isDeterminingPartitions", singleDimensionPartitionsSpec.needsDeterminePartitions(true));
    Assert.assertEquals("getTargetPartitionSize", 100, singleDimensionPartitionsSpec.getTargetRowsPerSegment().intValue());
    Assert.assertEquals("getMaxPartitionSize", 150, singleDimensionPartitionsSpec.getMaxRowsPerSegment().intValue());
    Assert.assertEquals("getPartitionDimension", "foo", singleDimensionPartitionsSpec.getPartitionDimension());
}
Also used : HashedPartitionsSpec(org.apache.druid.indexer.partitions.HashedPartitionsSpec) SingleDimensionPartitionsSpec(org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec) PartitionsSpec(org.apache.druid.indexer.partitions.PartitionsSpec) SingleDimensionPartitionsSpec(org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec) Test(org.junit.Test)

Example 9 with PartitionsSpec

use of org.apache.druid.indexer.partitions.PartitionsSpec in project druid by druid-io.

the class ParallelIndexSupervisorTask method runHashPartitionMultiPhaseParallel.

@VisibleForTesting
TaskStatus runHashPartitionMultiPhaseParallel(TaskToolbox toolbox) throws Exception {
    TaskState state;
    ParallelIndexIngestionSpec ingestionSchemaToUse = ingestionSchema;
    if (!(ingestionSchema.getTuningConfig().getPartitionsSpec() instanceof HashedPartitionsSpec)) {
        // only range and hash partitioning is supported for multiphase parallel ingestion, see runMultiPhaseParallel()
        throw new ISE("forceGuaranteedRollup is set but partitionsSpec [%s] is not a single_dim or hash partition spec.", ingestionSchema.getTuningConfig().getPartitionsSpec());
    }
    final Map<Interval, Integer> intervalToNumShards;
    HashedPartitionsSpec partitionsSpec = (HashedPartitionsSpec) ingestionSchema.getTuningConfig().getPartitionsSpec();
    final boolean needsInputSampling = partitionsSpec.getNumShards() == null || ingestionSchemaToUse.getDataSchema().getGranularitySpec().inputIntervals().isEmpty();
    if (needsInputSampling) {
        // 0. need to determine intervals and numShards by scanning the data
        LOG.info("Needs to determine intervals or numShards, beginning %s phase.", PartialDimensionCardinalityTask.TYPE);
        ParallelIndexTaskRunner<PartialDimensionCardinalityTask, DimensionCardinalityReport> cardinalityRunner = createRunner(toolbox, this::createPartialDimensionCardinalityRunner);
        state = runNextPhase(cardinalityRunner);
        if (state.isFailure()) {
            String errMsg = StringUtils.format(TASK_PHASE_FAILURE_MSG, cardinalityRunner.getName());
            return TaskStatus.failure(getId(), errMsg);
        }
        if (cardinalityRunner.getReports().isEmpty()) {
            String msg = "No valid rows for hash partitioning." + " All rows may have invalid timestamps or have been filtered out.";
            LOG.warn(msg);
            return TaskStatus.success(getId(), msg);
        }
        if (partitionsSpec.getNumShards() == null) {
            int effectiveMaxRowsPerSegment = partitionsSpec.getMaxRowsPerSegment() == null ? PartitionsSpec.DEFAULT_MAX_ROWS_PER_SEGMENT : partitionsSpec.getMaxRowsPerSegment();
            LOG.info("effective maxRowsPerSegment is: " + effectiveMaxRowsPerSegment);
            intervalToNumShards = determineNumShardsFromCardinalityReport(cardinalityRunner.getReports().values(), effectiveMaxRowsPerSegment);
        } else {
            intervalToNumShards = CollectionUtils.mapValues(mergeCardinalityReports(cardinalityRunner.getReports().values()), k -> partitionsSpec.getNumShards());
        }
        ingestionSchemaToUse = rewriteIngestionSpecWithIntervalsIfMissing(ingestionSchemaToUse, intervalToNumShards.keySet());
    } else {
        // numShards will be determined in PartialHashSegmentGenerateTask
        intervalToNumShards = null;
    }
    // 1. Partial segment generation phase
    final ParallelIndexIngestionSpec segmentCreateIngestionSpec = ingestionSchemaToUse;
    ParallelIndexTaskRunner<PartialHashSegmentGenerateTask, GeneratedPartitionsReport> indexingRunner = createRunner(toolbox, f -> createPartialHashSegmentGenerateRunner(toolbox, segmentCreateIngestionSpec, intervalToNumShards));
    state = runNextPhase(indexingRunner);
    if (state.isFailure()) {
        String errMsg = StringUtils.format(TASK_PHASE_FAILURE_MSG, indexingRunner.getName());
        return TaskStatus.failure(getId(), errMsg);
    }
    // 2. Partial segment merge phase
    // partition (interval, partitionId) -> partition locations
    Map<Partition, List<PartitionLocation>> partitionToLocations = getPartitionToLocations(indexingRunner.getReports());
    final List<PartialSegmentMergeIOConfig> ioConfigs = createGenericMergeIOConfigs(ingestionSchema.getTuningConfig().getTotalNumMergeTasks(), partitionToLocations);
    final ParallelIndexIngestionSpec segmentMergeIngestionSpec = ingestionSchemaToUse;
    final ParallelIndexTaskRunner<PartialGenericSegmentMergeTask, PushedSegmentsReport> mergeRunner = createRunner(toolbox, tb -> createPartialGenericSegmentMergeRunner(tb, ioConfigs, segmentMergeIngestionSpec));
    state = runNextPhase(mergeRunner);
    TaskStatus taskStatus;
    if (state.isSuccess()) {
        // noinspection ConstantConditions
        publishSegments(toolbox, mergeRunner.getReports());
        if (awaitSegmentAvailabilityTimeoutMillis > 0) {
            waitForSegmentAvailability(mergeRunner.getReports());
        }
        taskStatus = TaskStatus.success(getId());
    } else {
        // there is only success or failure after running....
        Preconditions.checkState(state.isFailure(), "Unrecognized state after task is complete[%s]", state);
        String errMsg = StringUtils.format(TASK_PHASE_FAILURE_MSG, mergeRunner.getName());
        taskStatus = TaskStatus.failure(getId(), errMsg);
    }
    toolbox.getTaskReportFileWriter().write(getId(), getTaskCompletionReports(taskStatus, segmentAvailabilityConfirmationCompleted));
    return taskStatus;
}
Also used : ArrayListMultimap(com.google.common.collect.ArrayListMultimap) TaskReport(org.apache.druid.indexing.common.TaskReport) TaskToolbox(org.apache.druid.indexing.common.TaskToolbox) JsonProperty(com.fasterxml.jackson.annotation.JsonProperty) PartitionBoundaries(org.apache.druid.timeline.partition.PartitionBoundaries) Produces(javax.ws.rs.Produces) IngestionState(org.apache.druid.indexer.IngestionState) Pair(org.apache.druid.java.util.common.Pair) MediaType(javax.ws.rs.core.MediaType) TaskActionClient(org.apache.druid.indexing.common.actions.TaskActionClient) SegmentTransactionalInsertAction(org.apache.druid.indexing.common.actions.SegmentTransactionalInsertAction) FiniteFirehoseFactory(org.apache.druid.data.input.FiniteFirehoseFactory) Map(java.util.Map) StringDistribution(org.apache.druid.indexing.common.task.batch.parallel.distribution.StringDistribution) AbstractBatchIndexTask(org.apache.druid.indexing.common.task.AbstractBatchIndexTask) InputFormat(org.apache.druid.data.input.InputFormat) IngestionStatsAndErrorsTaskReportData(org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReportData) Set(java.util.Set) ISE(org.apache.druid.java.util.common.ISE) TaskState(org.apache.druid.indexer.TaskState) IndexTuningConfig(org.apache.druid.indexing.common.task.IndexTask.IndexTuningConfig) IndexTaskUtils(org.apache.druid.indexing.common.task.IndexTaskUtils) Granularity(org.apache.druid.java.util.common.granularity.Granularity) GET(javax.ws.rs.GET) Tasks(org.apache.druid.indexing.common.task.Tasks) TaskStatus(org.apache.druid.indexer.TaskStatus) ArrayList(java.util.ArrayList) IndexTask(org.apache.druid.indexing.common.task.IndexTask) Interval(org.joda.time.Interval) HttpServletRequest(javax.servlet.http.HttpServletRequest) ThreadLocalRandom(java.util.concurrent.ThreadLocalRandom) StringSketchMerger(org.apache.druid.indexing.common.task.batch.parallel.distribution.StringSketchMerger) PartitionsSpec(org.apache.druid.indexer.partitions.PartitionsSpec) Nullable(javax.annotation.Nullable) BuildingShardSpec(org.apache.druid.timeline.partition.BuildingShardSpec) GranularitySpec(org.apache.druid.segment.indexing.granularity.GranularitySpec) Throwables(com.google.common.base.Throwables) StringDistributionMerger(org.apache.druid.indexing.common.task.batch.parallel.distribution.StringDistributionMerger) IOException(java.io.IOException) TreeMap(java.util.TreeMap) ChatHandlers(org.apache.druid.segment.realtime.firehose.ChatHandlers) Preconditions(com.google.common.base.Preconditions) ArbitraryGranularitySpec(org.apache.druid.segment.indexing.granularity.ArbitraryGranularitySpec) SubTaskSpecStatus(org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexTaskRunner.SubTaskSpecStatus) HllSketch(org.apache.datasketches.hll.HllSketch) AuthorizerMapper(org.apache.druid.server.security.AuthorizerMapper) Path(javax.ws.rs.Path) Memory(org.apache.datasketches.memory.Memory) TaskResource(org.apache.druid.indexing.common.task.TaskResource) MonotonicNonNull(org.checkerframework.checker.nullness.qual.MonotonicNonNull) ChatHandler(org.apache.druid.segment.realtime.firehose.ChatHandler) QueryParam(javax.ws.rs.QueryParam) Consumes(javax.ws.rs.Consumes) Union(org.apache.datasketches.hll.Union) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Task(org.apache.druid.indexing.common.task.Task) SmileMediaTypes(com.fasterxml.jackson.jaxrs.smile.SmileMediaTypes) Context(javax.ws.rs.core.Context) ImmutableMap(com.google.common.collect.ImmutableMap) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) Collection(java.util.Collection) StringUtils(org.apache.druid.java.util.common.StringUtils) HashedPartitionsSpec(org.apache.druid.indexer.partitions.HashedPartitionsSpec) Action(org.apache.druid.server.security.Action) Collectors(java.util.stream.Collectors) MaxAllowedLocksExceededException(org.apache.druid.indexing.common.task.batch.MaxAllowedLocksExceededException) Objects(java.util.Objects) IndexIngestionSpec(org.apache.druid.indexing.common.task.IndexTask.IndexIngestionSpec) List(java.util.List) Response(javax.ws.rs.core.Response) DataSegment(org.apache.druid.timeline.DataSegment) Entry(java.util.Map.Entry) CurrentSubTaskHolder(org.apache.druid.indexing.common.task.CurrentSubTaskHolder) Logger(org.apache.druid.java.util.common.logger.Logger) PathParam(javax.ws.rs.PathParam) CollectionUtils(org.apache.druid.utils.CollectionUtils) HashMap(java.util.HashMap) Multimap(com.google.common.collect.Multimap) RowIngestionMeters(org.apache.druid.segment.incremental.RowIngestionMeters) Function(java.util.function.Function) TuningConfig(org.apache.druid.segment.indexing.TuningConfig) HashSet(java.util.HashSet) InputSource(org.apache.druid.data.input.InputSource) RowIngestionMetersTotals(org.apache.druid.segment.incremental.RowIngestionMetersTotals) Status(javax.ws.rs.core.Response.Status) DimensionRangePartitionsSpec(org.apache.druid.indexer.partitions.DimensionRangePartitionsSpec) ParseExceptionReport(org.apache.druid.segment.incremental.ParseExceptionReport) POST(javax.ws.rs.POST) TransactionalSegmentPublisher(org.apache.druid.segment.realtime.appenderator.TransactionalSegmentPublisher) DateTime(org.joda.time.DateTime) SegmentIdWithShardSpec(org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec) IngestionStatsAndErrorsTaskReport(org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReport) JsonCreator(com.fasterxml.jackson.annotation.JsonCreator) IntermediaryDataManager(org.apache.druid.indexing.worker.shuffle.IntermediaryDataManager) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Comparator(java.util.Comparator) Collections(java.util.Collections) HashedPartitionsSpec(org.apache.druid.indexer.partitions.HashedPartitionsSpec) ISE(org.apache.druid.java.util.common.ISE) ArrayList(java.util.ArrayList) List(java.util.List) TaskStatus(org.apache.druid.indexer.TaskStatus) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) TaskState(org.apache.druid.indexer.TaskState) Interval(org.joda.time.Interval) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 10 with PartitionsSpec

use of org.apache.druid.indexer.partitions.PartitionsSpec in project druid by druid-io.

the class PartialSegmentGenerateTask method generateSegments.

private List<DataSegment> generateSegments(final TaskToolbox toolbox, final ParallelIndexSupervisorTaskClient taskClient, final InputSource inputSource, final File tmpDir) throws IOException, InterruptedException, ExecutionException, TimeoutException {
    final DataSchema dataSchema = ingestionSchema.getDataSchema();
    final FireDepartment fireDepartmentForMetrics = new FireDepartment(dataSchema, new RealtimeIOConfig(null, null), null);
    final FireDepartmentMetrics fireDepartmentMetrics = fireDepartmentForMetrics.getMetrics();
    final RowIngestionMeters buildSegmentsMeters = toolbox.getRowIngestionMetersFactory().createRowIngestionMeters();
    toolbox.addMonitor(new RealtimeMetricsMonitor(Collections.singletonList(fireDepartmentForMetrics), Collections.singletonMap(DruidMetrics.TASK_ID, new String[] { getId() })));
    final ParallelIndexTuningConfig tuningConfig = ingestionSchema.getTuningConfig();
    final PartitionsSpec partitionsSpec = tuningConfig.getGivenOrDefaultPartitionsSpec();
    final long pushTimeout = tuningConfig.getPushTimeout();
    final SegmentAllocatorForBatch segmentAllocator = createSegmentAllocator(toolbox, taskClient);
    final SequenceNameFunction sequenceNameFunction = segmentAllocator.getSequenceNameFunction();
    final ParseExceptionHandler parseExceptionHandler = new ParseExceptionHandler(buildSegmentsMeters, tuningConfig.isLogParseExceptions(), tuningConfig.getMaxParseExceptions(), tuningConfig.getMaxSavedParseExceptions());
    final boolean useMaxMemoryEstimates = getContextValue(Tasks.USE_MAX_MEMORY_ESTIMATES, Tasks.DEFAULT_USE_MAX_MEMORY_ESTIMATES);
    final Appenderator appenderator = BatchAppenderators.newAppenderator(getId(), toolbox.getAppenderatorsManager(), fireDepartmentMetrics, toolbox, dataSchema, tuningConfig, new ShuffleDataSegmentPusher(supervisorTaskId, getId(), toolbox.getIntermediaryDataManager()), buildSegmentsMeters, parseExceptionHandler, useMaxMemoryEstimates);
    boolean exceptionOccurred = false;
    try (final BatchAppenderatorDriver driver = BatchAppenderators.newDriver(appenderator, toolbox, segmentAllocator)) {
        driver.startJob();
        final SegmentsAndCommitMetadata pushed = InputSourceProcessor.process(dataSchema, driver, partitionsSpec, inputSource, inputSource.needsFormat() ? ParallelIndexSupervisorTask.getInputFormat(ingestionSchema) : null, tmpDir, sequenceNameFunction, inputRowIteratorBuilder, buildSegmentsMeters, parseExceptionHandler, pushTimeout);
        return pushed.getSegments();
    } catch (Exception e) {
        exceptionOccurred = true;
        throw e;
    } finally {
        if (exceptionOccurred) {
            appenderator.closeNow();
        } else {
            appenderator.close();
        }
    }
}
Also used : RealtimeIOConfig(org.apache.druid.segment.indexing.RealtimeIOConfig) ShuffleDataSegmentPusher(org.apache.druid.indexing.worker.shuffle.ShuffleDataSegmentPusher) SegmentsAndCommitMetadata(org.apache.druid.segment.realtime.appenderator.SegmentsAndCommitMetadata) BatchAppenderatorDriver(org.apache.druid.segment.realtime.appenderator.BatchAppenderatorDriver) TimeoutException(java.util.concurrent.TimeoutException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) DataSchema(org.apache.druid.segment.indexing.DataSchema) FireDepartment(org.apache.druid.segment.realtime.FireDepartment) FireDepartmentMetrics(org.apache.druid.segment.realtime.FireDepartmentMetrics) SegmentAllocatorForBatch(org.apache.druid.indexing.common.task.SegmentAllocatorForBatch) Appenderator(org.apache.druid.segment.realtime.appenderator.Appenderator) PartitionsSpec(org.apache.druid.indexer.partitions.PartitionsSpec) ParseExceptionHandler(org.apache.druid.segment.incremental.ParseExceptionHandler) RealtimeMetricsMonitor(org.apache.druid.segment.realtime.RealtimeMetricsMonitor) SequenceNameFunction(org.apache.druid.indexing.common.task.SequenceNameFunction) RowIngestionMeters(org.apache.druid.segment.incremental.RowIngestionMeters)

Aggregations

PartitionsSpec (org.apache.druid.indexer.partitions.PartitionsSpec)34 Test (org.junit.Test)19 Map (java.util.Map)17 ArrayList (java.util.ArrayList)16 DataSegment (org.apache.druid.timeline.DataSegment)16 Period (org.joda.time.Period)16 ImmutableMap (com.google.common.collect.ImmutableMap)15 HashedPartitionsSpec (org.apache.druid.indexer.partitions.HashedPartitionsSpec)15 IndexSpec (org.apache.druid.segment.IndexSpec)15 CompactionState (org.apache.druid.timeline.CompactionState)14 DynamicPartitionsSpec (org.apache.druid.indexer.partitions.DynamicPartitionsSpec)11 UserCompactionTaskGranularityConfig (org.apache.druid.server.coordinator.UserCompactionTaskGranularityConfig)11 SingleDimensionPartitionsSpec (org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec)10 StringUtils (org.apache.druid.java.util.common.StringUtils)9 Function (java.util.function.Function)8 IOException (java.io.IOException)7 List (java.util.List)7 Pair (org.apache.druid.java.util.common.Pair)5 Interval (org.joda.time.Interval)5 Test (org.testng.annotations.Test)5