Search in sources :

Example 1 with Segments

use of org.apache.druid.indexing.overlord.Segments in project druid by druid-io.

the class IndexerSQLMetadataStorageCoordinatorTest method testAnnounceHistoricalSegments.

@Test
public void testAnnounceHistoricalSegments() throws IOException {
    Set<DataSegment> segments = new HashSet<>();
    for (int i = 0; i < 105; i++) {
        segments.add(new DataSegment("fooDataSource", Intervals.of("2015-01-01T00Z/2015-01-02T00Z"), "version", ImmutableMap.of(), ImmutableList.of("dim1"), ImmutableList.of("m1"), new LinearShardSpec(i), 9, 100));
    }
    coordinator.announceHistoricalSegments(segments);
    for (DataSegment segment : segments) {
        Assert.assertArrayEquals(mapper.writeValueAsString(segment).getBytes(StandardCharsets.UTF_8), derbyConnector.lookup(derbyConnectorRule.metadataTablesConfigSupplier().get().getSegmentsTable(), "id", "payload", segment.getId().toString()));
    }
    List<String> segmentIds = segments.stream().map(segment -> segment.getId().toString()).collect(Collectors.toList());
    segmentIds.sort(Comparator.naturalOrder());
    Assert.assertEquals(segmentIds, retrieveUsedSegmentIds());
    // Should not update dataSource metadata.
    Assert.assertEquals(0, metadataUpdateCounter.get());
}
Also used : Arrays(java.util.Arrays) DimensionRangeShardSpec(org.apache.druid.timeline.partition.DimensionRangeShardSpec) DataSourceMetadata(org.apache.druid.indexing.overlord.DataSourceMetadata) Assertions(org.assertj.core.api.Assertions) PreparedBatch(org.skife.jdbi.v2.PreparedBatch) HashBasedNumberedPartialShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedPartialShardSpec) DateTimes(org.apache.druid.java.util.common.DateTimes) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) SegmentPublishResult(org.apache.druid.indexing.overlord.SegmentPublishResult) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) Collection(java.util.Collection) Segments(org.apache.druid.indexing.overlord.Segments) StringUtils(org.apache.druid.java.util.common.StringUtils) Set(java.util.Set) ISE(org.apache.druid.java.util.common.ISE) StringMapper(org.skife.jdbi.v2.util.StringMapper) Collectors(java.util.stream.Collectors) StandardCharsets(java.nio.charset.StandardCharsets) List(java.util.List) LinearShardSpec(org.apache.druid.timeline.partition.LinearShardSpec) PartitionIds(org.apache.druid.timeline.partition.PartitionIds) DataSegment(org.apache.druid.timeline.DataSegment) PartialShardSpec(org.apache.druid.timeline.partition.PartialShardSpec) StringTuple(org.apache.druid.data.input.StringTuple) NumberedPartialShardSpec(org.apache.druid.timeline.partition.NumberedPartialShardSpec) Iterables(com.google.common.collect.Iterables) Intervals(org.apache.druid.java.util.common.Intervals) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) HashSet(java.util.HashSet) Interval(org.joda.time.Interval) ImmutableList(com.google.common.collect.ImmutableList) ObjectMetadata(org.apache.druid.indexing.overlord.ObjectMetadata) NumberedOverwriteShardSpec(org.apache.druid.timeline.partition.NumberedOverwriteShardSpec) ExpectedException(org.junit.rules.ExpectedException) Before(org.junit.Before) NumberedOverwritePartialShardSpec(org.apache.druid.timeline.partition.NumberedOverwritePartialShardSpec) HandleCallback(org.skife.jdbi.v2.tweak.HandleCallback) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) DateTime(org.joda.time.DateTime) SegmentIdWithShardSpec(org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec) Test(org.junit.Test) IOException(java.io.IOException) NoneShardSpec(org.apache.druid.timeline.partition.NoneShardSpec) AtomicLong(java.util.concurrent.atomic.AtomicLong) TestHelper(org.apache.druid.segment.TestHelper) Rule(org.junit.Rule) Handle(org.skife.jdbi.v2.Handle) SingleDimensionShardSpec(org.apache.druid.timeline.partition.SingleDimensionShardSpec) Assert(org.junit.Assert) Comparator(java.util.Comparator) Collections(java.util.Collections) LinearShardSpec(org.apache.druid.timeline.partition.LinearShardSpec) DataSegment(org.apache.druid.timeline.DataSegment) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 2 with Segments

use of org.apache.druid.indexing.overlord.Segments in project druid by druid-io.

the class CompactionTask method createDimensionsSpec.

private static DimensionsSpec createDimensionsSpec(List<NonnullPair<QueryableIndex, DataSegment>> queryableIndices) {
    final BiMap<String, Integer> uniqueDims = HashBiMap.create();
    final Map<String, DimensionSchema> dimensionSchemaMap = new HashMap<>();
    // Here, we try to retain the order of dimensions as they were specified since the order of dimensions may be
    // optimized for performance.
    // Dimensions are extracted from the recent segments to olders because recent segments are likely to be queried more
    // frequently, and thus the performance should be optimized for recent ones rather than old ones.
    // sort timelineSegments in order of interval, see https://github.com/apache/druid/pull/9905
    queryableIndices.sort((o1, o2) -> Comparators.intervalsByStartThenEnd().compare(o1.rhs.getInterval(), o2.rhs.getInterval()));
    int index = 0;
    for (NonnullPair<QueryableIndex, DataSegment> pair : Lists.reverse(queryableIndices)) {
        final QueryableIndex queryableIndex = pair.lhs;
        final Map<String, DimensionHandler> dimensionHandlerMap = queryableIndex.getDimensionHandlers();
        for (String dimension : queryableIndex.getAvailableDimensions()) {
            final ColumnHolder columnHolder = Preconditions.checkNotNull(queryableIndex.getColumnHolder(dimension), "Cannot find column for dimension[%s]", dimension);
            if (!uniqueDims.containsKey(dimension)) {
                final DimensionHandler dimensionHandler = Preconditions.checkNotNull(dimensionHandlerMap.get(dimension), "Cannot find dimensionHandler for dimension[%s]", dimension);
                uniqueDims.put(dimension, index++);
                dimensionSchemaMap.put(dimension, createDimensionSchema(dimension, columnHolder.getCapabilities(), dimensionHandler.getMultivalueHandling()));
            }
        }
    }
    final BiMap<Integer, String> orderedDims = uniqueDims.inverse();
    final List<DimensionSchema> dimensionSchemas = IntStream.range(0, orderedDims.size()).mapToObj(i -> {
        final String dimName = orderedDims.get(i);
        return Preconditions.checkNotNull(dimensionSchemaMap.get(dimName), "Cannot find dimension[%s] from dimensionSchemaMap", dimName);
    }).collect(Collectors.toList());
    return new DimensionsSpec(dimensionSchemas);
}
Also used : Verify(org.apache.curator.shaded.com.google.common.base.Verify) TaskToolbox(org.apache.druid.indexing.common.TaskToolbox) JsonProperty(com.fasterxml.jackson.annotation.JsonProperty) SegmentCacheManagerFactory(org.apache.druid.indexing.common.SegmentCacheManagerFactory) Comparators(org.apache.druid.java.util.common.guava.Comparators) LongDimensionSchema(org.apache.druid.data.input.impl.LongDimensionSchema) DimensionHandlerUtils(org.apache.druid.segment.DimensionHandlerUtils) IndexSpec(org.apache.druid.segment.IndexSpec) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) JodaUtils(org.apache.druid.java.util.common.JodaUtils) TaskActionClient(org.apache.druid.indexing.common.actions.TaskActionClient) Map(java.util.Map) DynamicPartitionsSpec(org.apache.druid.indexer.partitions.DynamicPartitionsSpec) AppenderatorsManager(org.apache.druid.segment.realtime.appenderator.AppenderatorsManager) IAE(org.apache.druid.java.util.common.IAE) MultiValueHandling(org.apache.druid.data.input.impl.DimensionSchema.MultiValueHandling) BiMap(com.google.common.collect.BiMap) JacksonInject(com.fasterxml.jackson.annotation.JacksonInject) Property(org.apache.druid.indexer.Property) RetryPolicyFactory(org.apache.druid.indexing.common.RetryPolicyFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) SplitHintSpec(org.apache.druid.data.input.SplitHintSpec) Segments(org.apache.druid.indexing.overlord.Segments) QueryableIndex(org.apache.druid.segment.QueryableIndex) StringUtils(org.apache.druid.java.util.common.StringUtils) ISE(org.apache.druid.java.util.common.ISE) Collectors(java.util.stream.Collectors) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) LockGranularity(org.apache.druid.indexing.common.LockGranularity) PartitionHolder(org.apache.druid.timeline.partition.PartitionHolder) List(java.util.List) DimensionSchema(org.apache.druid.data.input.impl.DimensionSchema) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) IndexTuningConfig(org.apache.druid.indexing.common.task.IndexTask.IndexTuningConfig) DataSegment(org.apache.druid.timeline.DataSegment) Entry(java.util.Map.Entry) ColumnCapabilities(org.apache.druid.segment.column.ColumnCapabilities) TransformSpec(org.apache.druid.segment.transform.TransformSpec) Logger(org.apache.druid.java.util.common.logger.Logger) IntStream(java.util.stream.IntStream) Granularity(org.apache.druid.java.util.common.granularity.Granularity) DoubleDimensionSchema(org.apache.druid.data.input.impl.DoubleDimensionSchema) Intervals(org.apache.druid.java.util.common.Intervals) Duration(org.joda.time.Duration) SegmentLoadingException(org.apache.druid.segment.loading.SegmentLoadingException) SegmentWriteOutMediumFactory(org.apache.druid.segment.writeout.SegmentWriteOutMediumFactory) HashMap(java.util.HashMap) ParallelIndexSupervisorTask(org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexSupervisorTask) TaskStatus(org.apache.druid.indexer.TaskStatus) TuningConfig(org.apache.druid.segment.indexing.TuningConfig) ArrayList(java.util.ArrayList) PartitionChunk(org.apache.druid.timeline.partition.PartitionChunk) Interval(org.joda.time.Interval) Lists(com.google.common.collect.Lists) ColumnHolder(org.apache.druid.segment.column.ColumnHolder) ImmutableList(com.google.common.collect.ImmutableList) FloatDimensionSchema(org.apache.druid.data.input.impl.FloatDimensionSchema) SettableSupplier(org.apache.druid.common.guava.SettableSupplier) CoordinatorClient(org.apache.druid.client.coordinator.CoordinatorClient) JsonIgnore(com.fasterxml.jackson.annotation.JsonIgnore) CompactSegments(org.apache.druid.server.coordinator.duty.CompactSegments) DruidInputSource(org.apache.druid.indexing.input.DruidInputSource) Nonnull(javax.annotation.Nonnull) PartitionsSpec(org.apache.druid.indexer.partitions.PartitionsSpec) Nullable(javax.annotation.Nullable) ClientCompactionTaskTransformSpec(org.apache.druid.client.indexing.ClientCompactionTaskTransformSpec) VersionedIntervalTimeline(org.apache.druid.timeline.VersionedIntervalTimeline) RE(org.apache.druid.java.util.common.RE) NonnullPair(org.apache.druid.java.util.common.NonnullPair) GranularitySpec(org.apache.druid.segment.indexing.granularity.GranularitySpec) TimelineObjectHolder(org.apache.druid.timeline.TimelineObjectHolder) Include(com.fasterxml.jackson.annotation.JsonInclude.Include) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) ParallelIndexTuningConfig(org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexTuningConfig) IOException(java.io.IOException) ClientCompactionTaskQuery(org.apache.druid.client.indexing.ClientCompactionTaskQuery) File(java.io.File) HashBiMap(com.google.common.collect.HashBiMap) ClientCompactionTaskGranularitySpec(org.apache.druid.client.indexing.ClientCompactionTaskGranularitySpec) GranularityType(org.apache.druid.java.util.common.granularity.GranularityType) DimensionHandler(org.apache.druid.segment.DimensionHandler) TreeMap(java.util.TreeMap) Checks(org.apache.druid.indexer.Checks) JsonCreator(com.fasterxml.jackson.annotation.JsonCreator) JsonInclude(com.fasterxml.jackson.annotation.JsonInclude) AppendableIndexSpec(org.apache.druid.segment.incremental.AppendableIndexSpec) Preconditions(com.google.common.base.Preconditions) VisibleForTesting(com.google.common.annotations.VisibleForTesting) ParallelIndexIngestionSpec(org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexIngestionSpec) RetrieveUsedSegmentsAction(org.apache.druid.indexing.common.actions.RetrieveUsedSegmentsAction) ParallelIndexIOConfig(org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexIOConfig) IndexIO(org.apache.druid.segment.IndexIO) DataSchema(org.apache.druid.segment.indexing.DataSchema) Collections(java.util.Collections) ColumnHolder(org.apache.druid.segment.column.ColumnHolder) DimensionHandler(org.apache.druid.segment.DimensionHandler) HashMap(java.util.HashMap) LongDimensionSchema(org.apache.druid.data.input.impl.LongDimensionSchema) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) DimensionSchema(org.apache.druid.data.input.impl.DimensionSchema) DoubleDimensionSchema(org.apache.druid.data.input.impl.DoubleDimensionSchema) FloatDimensionSchema(org.apache.druid.data.input.impl.FloatDimensionSchema) DataSegment(org.apache.druid.timeline.DataSegment) QueryableIndex(org.apache.druid.segment.QueryableIndex) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec)

Example 3 with Segments

use of org.apache.druid.indexing.overlord.Segments in project druid by druid-io.

the class MaterializedViewSupervisor method getVersionAndBaseSegments.

private Pair<Map<Interval, String>, Map<Interval, List<DataSegment>>> getVersionAndBaseSegments(Collection<DataSegment> snapshot) {
    Map<Interval, String> versions = new HashMap<>();
    Map<Interval, List<DataSegment>> segments = new HashMap<>();
    for (DataSegment segment : snapshot) {
        Interval interval = segment.getInterval();
        versions.put(interval, segment.getVersion());
        segments.computeIfAbsent(interval, i -> new ArrayList<>()).add(segment);
    }
    return new Pair<>(versions, segments);
}
Also used : MoreExecutors(com.google.common.util.concurrent.MoreExecutors) Comparators(org.apache.druid.java.util.common.guava.Comparators) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) SupervisorStateManager(org.apache.druid.indexing.overlord.supervisor.SupervisorStateManager) Duration(org.joda.time.Duration) HashMap(java.util.HashMap) TaskStatus(org.apache.druid.indexer.TaskStatus) Pair(org.apache.druid.java.util.common.Pair) ArrayList(java.util.ArrayList) EntryExistsException(org.apache.druid.metadata.EntryExistsException) HashSet(java.util.HashSet) Interval(org.joda.time.Interval) JodaUtils(org.apache.druid.java.util.common.JodaUtils) DataSourceMetadata(org.apache.druid.indexing.overlord.DataSourceMetadata) Optional(com.google.common.base.Optional) Map(java.util.Map) IAE(org.apache.druid.java.util.common.IAE) ListeningScheduledExecutorService(com.google.common.util.concurrent.ListeningScheduledExecutorService) DateTimes(org.apache.druid.java.util.common.DateTimes) Execs(org.apache.druid.java.util.common.concurrent.Execs) EmittingLogger(org.apache.druid.java.util.emitter.EmittingLogger) Collection(java.util.Collection) Segments(org.apache.druid.indexing.overlord.Segments) StringUtils(org.apache.druid.java.util.common.StringUtils) Set(java.util.Set) IOException(java.io.IOException) Maps(com.google.common.collect.Maps) LagStats(org.apache.druid.indexing.overlord.supervisor.autoscaler.LagStats) TimeUnit(java.util.concurrent.TimeUnit) MapDifference(com.google.common.collect.MapDifference) List(java.util.List) MetadataSupervisorManager(org.apache.druid.metadata.MetadataSupervisorManager) TreeMap(java.util.TreeMap) HadoopIndexTask(org.apache.druid.indexing.common.task.HadoopIndexTask) IndexerMetadataStorageCoordinator(org.apache.druid.indexing.overlord.IndexerMetadataStorageCoordinator) DataSegment(org.apache.druid.timeline.DataSegment) SupervisorReport(org.apache.druid.indexing.overlord.supervisor.SupervisorReport) Preconditions(com.google.common.base.Preconditions) TaskMaster(org.apache.druid.indexing.overlord.TaskMaster) SqlSegmentsMetadataManager(org.apache.druid.metadata.SqlSegmentsMetadataManager) VisibleForTesting(com.google.common.annotations.VisibleForTesting) TaskStorage(org.apache.druid.indexing.overlord.TaskStorage) Supervisor(org.apache.druid.indexing.overlord.supervisor.Supervisor) SortedMap(java.util.SortedMap) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ArrayList(java.util.ArrayList) List(java.util.List) DataSegment(org.apache.druid.timeline.DataSegment) Interval(org.joda.time.Interval) Pair(org.apache.druid.java.util.common.Pair)

Example 4 with Segments

use of org.apache.druid.indexing.overlord.Segments in project druid by druid-io.

the class MaterializedViewSupervisor method getMaxCreateDateAndBaseSegments.

private Pair<Map<Interval, String>, Map<Interval, List<DataSegment>>> getMaxCreateDateAndBaseSegments(Collection<Pair<DataSegment, String>> snapshot) {
    Interval maxAllowedToBuildInterval = snapshot.parallelStream().map(pair -> pair.lhs).map(DataSegment::getInterval).max(Comparators.intervalsByStartThenEnd()).get();
    Map<Interval, String> maxCreatedDate = new HashMap<>();
    Map<Interval, List<DataSegment>> segments = new HashMap<>();
    for (Pair<DataSegment, String> entry : snapshot) {
        DataSegment segment = entry.lhs;
        String createDate = entry.rhs;
        Interval interval = segment.getInterval();
        if (!hasEnoughLag(interval, maxAllowedToBuildInterval)) {
            continue;
        }
        maxCreatedDate.merge(interval, createDate, (date1, date2) -> {
            return DateTimes.max(DateTimes.of(date1), DateTimes.of(date2)).toString();
        });
        segments.computeIfAbsent(interval, i -> new ArrayList<>()).add(segment);
    }
    return new Pair<>(maxCreatedDate, segments);
}
Also used : MoreExecutors(com.google.common.util.concurrent.MoreExecutors) Comparators(org.apache.druid.java.util.common.guava.Comparators) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) SupervisorStateManager(org.apache.druid.indexing.overlord.supervisor.SupervisorStateManager) Duration(org.joda.time.Duration) HashMap(java.util.HashMap) TaskStatus(org.apache.druid.indexer.TaskStatus) Pair(org.apache.druid.java.util.common.Pair) ArrayList(java.util.ArrayList) EntryExistsException(org.apache.druid.metadata.EntryExistsException) HashSet(java.util.HashSet) Interval(org.joda.time.Interval) JodaUtils(org.apache.druid.java.util.common.JodaUtils) DataSourceMetadata(org.apache.druid.indexing.overlord.DataSourceMetadata) Optional(com.google.common.base.Optional) Map(java.util.Map) IAE(org.apache.druid.java.util.common.IAE) ListeningScheduledExecutorService(com.google.common.util.concurrent.ListeningScheduledExecutorService) DateTimes(org.apache.druid.java.util.common.DateTimes) Execs(org.apache.druid.java.util.common.concurrent.Execs) EmittingLogger(org.apache.druid.java.util.emitter.EmittingLogger) Collection(java.util.Collection) Segments(org.apache.druid.indexing.overlord.Segments) StringUtils(org.apache.druid.java.util.common.StringUtils) Set(java.util.Set) IOException(java.io.IOException) Maps(com.google.common.collect.Maps) LagStats(org.apache.druid.indexing.overlord.supervisor.autoscaler.LagStats) TimeUnit(java.util.concurrent.TimeUnit) MapDifference(com.google.common.collect.MapDifference) List(java.util.List) MetadataSupervisorManager(org.apache.druid.metadata.MetadataSupervisorManager) TreeMap(java.util.TreeMap) HadoopIndexTask(org.apache.druid.indexing.common.task.HadoopIndexTask) IndexerMetadataStorageCoordinator(org.apache.druid.indexing.overlord.IndexerMetadataStorageCoordinator) DataSegment(org.apache.druid.timeline.DataSegment) SupervisorReport(org.apache.druid.indexing.overlord.supervisor.SupervisorReport) Preconditions(com.google.common.base.Preconditions) TaskMaster(org.apache.druid.indexing.overlord.TaskMaster) SqlSegmentsMetadataManager(org.apache.druid.metadata.SqlSegmentsMetadataManager) VisibleForTesting(com.google.common.annotations.VisibleForTesting) TaskStorage(org.apache.druid.indexing.overlord.TaskStorage) Supervisor(org.apache.druid.indexing.overlord.supervisor.Supervisor) SortedMap(java.util.SortedMap) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ArrayList(java.util.ArrayList) List(java.util.List) DataSegment(org.apache.druid.timeline.DataSegment) Interval(org.joda.time.Interval) Pair(org.apache.druid.java.util.common.Pair)

Example 5 with Segments

use of org.apache.druid.indexing.overlord.Segments in project druid by druid-io.

the class CompactionTask method runTask.

@Override
public TaskStatus runTask(TaskToolbox toolbox) throws Exception {
    final List<ParallelIndexIngestionSpec> ingestionSpecs = createIngestionSchema(toolbox, getTaskLockHelper().getLockGranularityToUse(), segmentProvider, partitionConfigurationManager, dimensionsSpec, transformSpec, metricsSpec, granularitySpec, toolbox.getCoordinatorClient(), segmentCacheManagerFactory, retryPolicyFactory, ioConfig.isDropExisting());
    final List<ParallelIndexSupervisorTask> indexTaskSpecs = IntStream.range(0, ingestionSpecs.size()).mapToObj(i -> {
        // The ID of SubtaskSpecs is used as the base sequenceName in segment allocation protocol.
        // The indexing tasks generated by the compaction task should use different sequenceNames
        // so that they can allocate valid segment IDs with no duplication.
        ParallelIndexIngestionSpec ingestionSpec = ingestionSpecs.get(i);
        final String baseSequenceName = createIndexTaskSpecId(i);
        return newTask(baseSequenceName, ingestionSpec);
    }).collect(Collectors.toList());
    if (indexTaskSpecs.isEmpty()) {
        String msg = StringUtils.format("Can't find segments from inputSpec[%s], nothing to do.", ioConfig.getInputSpec());
        log.warn(msg);
        return TaskStatus.failure(getId(), msg);
    } else {
        registerResourceCloserOnAbnormalExit(currentSubTaskHolder);
        final int totalNumSpecs = indexTaskSpecs.size();
        log.info("Generated [%d] compaction task specs", totalNumSpecs);
        int failCnt = 0;
        for (ParallelIndexSupervisorTask eachSpec : indexTaskSpecs) {
            final String json = toolbox.getJsonMapper().writerWithDefaultPrettyPrinter().writeValueAsString(eachSpec);
            if (!currentSubTaskHolder.setTask(eachSpec)) {
                String errMsg = "Task was asked to stop. Finish as failed.";
                log.info(errMsg);
                return TaskStatus.failure(getId(), errMsg);
            }
            try {
                if (eachSpec.isReady(toolbox.getTaskActionClient())) {
                    log.info("Running indexSpec: " + json);
                    final TaskStatus eachResult = eachSpec.run(toolbox);
                    if (!eachResult.isSuccess()) {
                        failCnt++;
                        log.warn("Failed to run indexSpec: [%s].\nTrying the next indexSpec.", json);
                    }
                } else {
                    failCnt++;
                    log.warn("indexSpec is not ready: [%s].\nTrying the next indexSpec.", json);
                }
            } catch (Exception e) {
                failCnt++;
                log.warn(e, "Failed to run indexSpec: [%s].\nTrying the next indexSpec.", json);
            }
        }
        String msg = StringUtils.format("Ran [%d] specs, [%d] succeeded, [%d] failed", totalNumSpecs, totalNumSpecs - failCnt, failCnt);
        log.info(msg);
        return failCnt == 0 ? TaskStatus.success(getId()) : TaskStatus.failure(getId(), msg);
    }
}
Also used : Verify(org.apache.curator.shaded.com.google.common.base.Verify) TaskToolbox(org.apache.druid.indexing.common.TaskToolbox) JsonProperty(com.fasterxml.jackson.annotation.JsonProperty) SegmentCacheManagerFactory(org.apache.druid.indexing.common.SegmentCacheManagerFactory) Comparators(org.apache.druid.java.util.common.guava.Comparators) LongDimensionSchema(org.apache.druid.data.input.impl.LongDimensionSchema) DimensionHandlerUtils(org.apache.druid.segment.DimensionHandlerUtils) IndexSpec(org.apache.druid.segment.IndexSpec) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) JodaUtils(org.apache.druid.java.util.common.JodaUtils) TaskActionClient(org.apache.druid.indexing.common.actions.TaskActionClient) Map(java.util.Map) DynamicPartitionsSpec(org.apache.druid.indexer.partitions.DynamicPartitionsSpec) AppenderatorsManager(org.apache.druid.segment.realtime.appenderator.AppenderatorsManager) IAE(org.apache.druid.java.util.common.IAE) MultiValueHandling(org.apache.druid.data.input.impl.DimensionSchema.MultiValueHandling) BiMap(com.google.common.collect.BiMap) JacksonInject(com.fasterxml.jackson.annotation.JacksonInject) Property(org.apache.druid.indexer.Property) RetryPolicyFactory(org.apache.druid.indexing.common.RetryPolicyFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) SplitHintSpec(org.apache.druid.data.input.SplitHintSpec) Segments(org.apache.druid.indexing.overlord.Segments) QueryableIndex(org.apache.druid.segment.QueryableIndex) StringUtils(org.apache.druid.java.util.common.StringUtils) ISE(org.apache.druid.java.util.common.ISE) Collectors(java.util.stream.Collectors) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) LockGranularity(org.apache.druid.indexing.common.LockGranularity) PartitionHolder(org.apache.druid.timeline.partition.PartitionHolder) List(java.util.List) DimensionSchema(org.apache.druid.data.input.impl.DimensionSchema) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) IndexTuningConfig(org.apache.druid.indexing.common.task.IndexTask.IndexTuningConfig) DataSegment(org.apache.druid.timeline.DataSegment) Entry(java.util.Map.Entry) ColumnCapabilities(org.apache.druid.segment.column.ColumnCapabilities) TransformSpec(org.apache.druid.segment.transform.TransformSpec) Logger(org.apache.druid.java.util.common.logger.Logger) IntStream(java.util.stream.IntStream) Granularity(org.apache.druid.java.util.common.granularity.Granularity) DoubleDimensionSchema(org.apache.druid.data.input.impl.DoubleDimensionSchema) Intervals(org.apache.druid.java.util.common.Intervals) Duration(org.joda.time.Duration) SegmentLoadingException(org.apache.druid.segment.loading.SegmentLoadingException) SegmentWriteOutMediumFactory(org.apache.druid.segment.writeout.SegmentWriteOutMediumFactory) HashMap(java.util.HashMap) ParallelIndexSupervisorTask(org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexSupervisorTask) TaskStatus(org.apache.druid.indexer.TaskStatus) TuningConfig(org.apache.druid.segment.indexing.TuningConfig) ArrayList(java.util.ArrayList) PartitionChunk(org.apache.druid.timeline.partition.PartitionChunk) Interval(org.joda.time.Interval) Lists(com.google.common.collect.Lists) ColumnHolder(org.apache.druid.segment.column.ColumnHolder) ImmutableList(com.google.common.collect.ImmutableList) FloatDimensionSchema(org.apache.druid.data.input.impl.FloatDimensionSchema) SettableSupplier(org.apache.druid.common.guava.SettableSupplier) CoordinatorClient(org.apache.druid.client.coordinator.CoordinatorClient) JsonIgnore(com.fasterxml.jackson.annotation.JsonIgnore) CompactSegments(org.apache.druid.server.coordinator.duty.CompactSegments) DruidInputSource(org.apache.druid.indexing.input.DruidInputSource) Nonnull(javax.annotation.Nonnull) PartitionsSpec(org.apache.druid.indexer.partitions.PartitionsSpec) Nullable(javax.annotation.Nullable) ClientCompactionTaskTransformSpec(org.apache.druid.client.indexing.ClientCompactionTaskTransformSpec) VersionedIntervalTimeline(org.apache.druid.timeline.VersionedIntervalTimeline) RE(org.apache.druid.java.util.common.RE) NonnullPair(org.apache.druid.java.util.common.NonnullPair) GranularitySpec(org.apache.druid.segment.indexing.granularity.GranularitySpec) TimelineObjectHolder(org.apache.druid.timeline.TimelineObjectHolder) Include(com.fasterxml.jackson.annotation.JsonInclude.Include) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) ParallelIndexTuningConfig(org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexTuningConfig) IOException(java.io.IOException) ClientCompactionTaskQuery(org.apache.druid.client.indexing.ClientCompactionTaskQuery) File(java.io.File) HashBiMap(com.google.common.collect.HashBiMap) ClientCompactionTaskGranularitySpec(org.apache.druid.client.indexing.ClientCompactionTaskGranularitySpec) GranularityType(org.apache.druid.java.util.common.granularity.GranularityType) DimensionHandler(org.apache.druid.segment.DimensionHandler) TreeMap(java.util.TreeMap) Checks(org.apache.druid.indexer.Checks) JsonCreator(com.fasterxml.jackson.annotation.JsonCreator) JsonInclude(com.fasterxml.jackson.annotation.JsonInclude) AppendableIndexSpec(org.apache.druid.segment.incremental.AppendableIndexSpec) Preconditions(com.google.common.base.Preconditions) VisibleForTesting(com.google.common.annotations.VisibleForTesting) ParallelIndexIngestionSpec(org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexIngestionSpec) RetrieveUsedSegmentsAction(org.apache.druid.indexing.common.actions.RetrieveUsedSegmentsAction) ParallelIndexIOConfig(org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexIOConfig) IndexIO(org.apache.druid.segment.IndexIO) DataSchema(org.apache.druid.segment.indexing.DataSchema) Collections(java.util.Collections) ParallelIndexSupervisorTask(org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexSupervisorTask) ParallelIndexIngestionSpec(org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexIngestionSpec) TaskStatus(org.apache.druid.indexer.TaskStatus) SegmentLoadingException(org.apache.druid.segment.loading.SegmentLoadingException) IOException(java.io.IOException)

Aggregations

IOException (java.io.IOException)8 List (java.util.List)8 Segments (org.apache.druid.indexing.overlord.Segments)8 StringUtils (org.apache.druid.java.util.common.StringUtils)8 DataSegment (org.apache.druid.timeline.DataSegment)8 Interval (org.joda.time.Interval)8 Preconditions (com.google.common.base.Preconditions)7 ArrayList (java.util.ArrayList)7 Map (java.util.Map)7 ImmutableList (com.google.common.collect.ImmutableList)6 Collection (java.util.Collection)6 Collections (java.util.Collections)6 HashSet (java.util.HashSet)6 Set (java.util.Set)6 Collectors (java.util.stream.Collectors)6 TaskStatus (org.apache.druid.indexer.TaskStatus)6 ISE (org.apache.druid.java.util.common.ISE)6 JodaUtils (org.apache.druid.java.util.common.JodaUtils)6 VisibleForTesting (com.google.common.annotations.VisibleForTesting)5 IAE (org.apache.druid.java.util.common.IAE)5