Search in sources :

Example 1 with LockGranularity

use of org.apache.druid.indexing.common.LockGranularity in project druid by druid-io.

the class CompactionTask method createIngestionSchema.

/**
 * Generate {@link ParallelIndexIngestionSpec} from input segments.
 *
 * @return an empty list if input segments don't exist. Otherwise, a generated ingestionSpec.
 */
@VisibleForTesting
static List<ParallelIndexIngestionSpec> createIngestionSchema(final TaskToolbox toolbox, final LockGranularity lockGranularityInUse, final SegmentProvider segmentProvider, final PartitionConfigurationManager partitionConfigurationManager, @Nullable final DimensionsSpec dimensionsSpec, @Nullable final ClientCompactionTaskTransformSpec transformSpec, @Nullable final AggregatorFactory[] metricsSpec, @Nullable final ClientCompactionTaskGranularitySpec granularitySpec, final CoordinatorClient coordinatorClient, final SegmentCacheManagerFactory segmentCacheManagerFactory, final RetryPolicyFactory retryPolicyFactory, final boolean dropExisting) throws IOException, SegmentLoadingException {
    NonnullPair<Map<DataSegment, File>, List<TimelineObjectHolder<String, DataSegment>>> pair = prepareSegments(toolbox, segmentProvider, lockGranularityInUse);
    final Map<DataSegment, File> segmentFileMap = pair.lhs;
    final List<TimelineObjectHolder<String, DataSegment>> timelineSegments = pair.rhs;
    if (timelineSegments.size() == 0) {
        return Collections.emptyList();
    }
    // find metadata for interval
    // queryableIndexAndSegments is sorted by the interval of the dataSegment
    final List<NonnullPair<QueryableIndex, DataSegment>> queryableIndexAndSegments = loadSegments(timelineSegments, segmentFileMap, toolbox.getIndexIO());
    final CompactionTuningConfig compactionTuningConfig = partitionConfigurationManager.computeTuningConfig();
    if (granularitySpec == null || granularitySpec.getSegmentGranularity() == null) {
        // original granularity
        final Map<Interval, List<NonnullPair<QueryableIndex, DataSegment>>> intervalToSegments = new TreeMap<>(Comparators.intervalsByStartThenEnd());
        queryableIndexAndSegments.forEach(p -> intervalToSegments.computeIfAbsent(p.rhs.getInterval(), k -> new ArrayList<>()).add(p));
        // unify overlapping intervals to ensure overlapping segments compacting in the same indexSpec
        List<NonnullPair<Interval, List<NonnullPair<QueryableIndex, DataSegment>>>> intervalToSegmentsUnified = new ArrayList<>();
        Interval union = null;
        List<NonnullPair<QueryableIndex, DataSegment>> segments = new ArrayList<>();
        for (Entry<Interval, List<NonnullPair<QueryableIndex, DataSegment>>> entry : intervalToSegments.entrySet()) {
            Interval cur = entry.getKey();
            if (union == null) {
                union = cur;
                segments.addAll(entry.getValue());
            } else if (union.overlaps(cur)) {
                union = Intervals.utc(union.getStartMillis(), Math.max(union.getEndMillis(), cur.getEndMillis()));
                segments.addAll(entry.getValue());
            } else {
                intervalToSegmentsUnified.add(new NonnullPair<>(union, segments));
                union = cur;
                segments = new ArrayList<>(entry.getValue());
            }
        }
        intervalToSegmentsUnified.add(new NonnullPair<>(union, segments));
        final List<ParallelIndexIngestionSpec> specs = new ArrayList<>(intervalToSegmentsUnified.size());
        for (NonnullPair<Interval, List<NonnullPair<QueryableIndex, DataSegment>>> entry : intervalToSegmentsUnified) {
            final Interval interval = entry.lhs;
            final List<NonnullPair<QueryableIndex, DataSegment>> segmentsToCompact = entry.rhs;
            // If granularitySpec is not null, then set segmentGranularity. Otherwise,
            // creates new granularitySpec and set segmentGranularity
            Granularity segmentGranularityToUse = GranularityType.fromPeriod(interval.toPeriod()).getDefaultGranularity();
            final DataSchema dataSchema = createDataSchema(segmentProvider.dataSource, segmentsToCompact, dimensionsSpec, transformSpec, metricsSpec, granularitySpec == null ? new ClientCompactionTaskGranularitySpec(segmentGranularityToUse, null, null) : granularitySpec.withSegmentGranularity(segmentGranularityToUse));
            specs.add(new ParallelIndexIngestionSpec(dataSchema, createIoConfig(toolbox, dataSchema, interval, coordinatorClient, segmentCacheManagerFactory, retryPolicyFactory, dropExisting), compactionTuningConfig));
        }
        return specs;
    } else {
        // given segment granularity
        final DataSchema dataSchema = createDataSchema(segmentProvider.dataSource, queryableIndexAndSegments, dimensionsSpec, transformSpec, metricsSpec, granularitySpec);
        return Collections.singletonList(new ParallelIndexIngestionSpec(dataSchema, createIoConfig(toolbox, dataSchema, segmentProvider.interval, coordinatorClient, segmentCacheManagerFactory, retryPolicyFactory, dropExisting), compactionTuningConfig));
    }
}
Also used : ArrayList(java.util.ArrayList) LockGranularity(org.apache.druid.indexing.common.LockGranularity) Granularity(org.apache.druid.java.util.common.granularity.Granularity) DataSegment(org.apache.druid.timeline.DataSegment) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) NonnullPair(org.apache.druid.java.util.common.NonnullPair) ParallelIndexIngestionSpec(org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexIngestionSpec) TreeMap(java.util.TreeMap) ClientCompactionTaskGranularitySpec(org.apache.druid.client.indexing.ClientCompactionTaskGranularitySpec) DataSchema(org.apache.druid.segment.indexing.DataSchema) TimelineObjectHolder(org.apache.druid.timeline.TimelineObjectHolder) QueryableIndex(org.apache.druid.segment.QueryableIndex) Map(java.util.Map) BiMap(com.google.common.collect.BiMap) HashMap(java.util.HashMap) HashBiMap(com.google.common.collect.HashBiMap) TreeMap(java.util.TreeMap) File(java.io.File) Interval(org.joda.time.Interval) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 2 with LockGranularity

use of org.apache.druid.indexing.common.LockGranularity in project druid by druid-io.

the class SegmentAllocateActionTest method testResumeSequence.

@Test
public void testResumeSequence() {
    final Task task = NoopTask.create();
    taskActionTestKit.getTaskLockbox().add(task);
    final Map<Integer, SegmentIdWithShardSpec> allocatedPartyTimeIds = new HashMap<>();
    final Map<Integer, SegmentIdWithShardSpec> allocatedFutureIds = new HashMap<>();
    final SegmentIdWithShardSpec id1 = allocate(task, PARTY_TIME, Granularities.NONE, Granularities.HOUR, "s1", null);
    Assert.assertNotNull(id1);
    allocatedPartyTimeIds.put(id1.getShardSpec().getPartitionNum(), id1);
    final SegmentIdWithShardSpec id2 = allocate(task, THE_DISTANT_FUTURE, Granularities.NONE, Granularities.HOUR, "s1", id1.toString());
    Assert.assertNotNull(id2);
    allocatedFutureIds.put(id2.getShardSpec().getPartitionNum(), id2);
    final SegmentIdWithShardSpec id3 = allocate(task, PARTY_TIME, Granularities.NONE, Granularities.HOUR, "s1", id2.toString());
    Assert.assertNotNull(id3);
    allocatedPartyTimeIds.put(id3.getShardSpec().getPartitionNum(), id3);
    final SegmentIdWithShardSpec id4 = allocate(task, PARTY_TIME, Granularities.NONE, Granularities.HOUR, "s1", id1.toString());
    Assert.assertNull(id4);
    final SegmentIdWithShardSpec id5 = allocate(task, THE_DISTANT_FUTURE, Granularities.NONE, Granularities.HOUR, "s1", id1.toString());
    Assert.assertNotNull(id5);
    allocatedFutureIds.put(id5.getShardSpec().getPartitionNum(), id5);
    final SegmentIdWithShardSpec id6 = allocate(task, THE_DISTANT_FUTURE, Granularities.NONE, Granularities.MINUTE, "s1", id1.toString());
    Assert.assertNull(id6);
    final SegmentIdWithShardSpec id7 = allocate(task, THE_DISTANT_FUTURE, Granularities.NONE, Granularities.DAY, "s1", id1.toString());
    Assert.assertNotNull(id7);
    allocatedFutureIds.put(id7.getShardSpec().getPartitionNum(), id7);
    if (lockGranularity == LockGranularity.TIME_CHUNK) {
        final TaskLock partyLock = Iterables.getOnlyElement(FluentIterable.from(taskActionTestKit.getTaskLockbox().findLocksForTask(task)).filter(new Predicate<TaskLock>() {

            @Override
            public boolean apply(TaskLock input) {
                return input.getInterval().contains(PARTY_TIME);
            }
        }));
        final TaskLock futureLock = Iterables.getOnlyElement(FluentIterable.from(taskActionTestKit.getTaskLockbox().findLocksForTask(task)).filter(new Predicate<TaskLock>() {

            @Override
            public boolean apply(TaskLock input) {
                return input.getInterval().contains(THE_DISTANT_FUTURE);
            }
        }));
        assertSameIdentifier(id1, new SegmentIdWithShardSpec(DATA_SOURCE, Granularities.HOUR.bucket(PARTY_TIME), partyLock.getVersion(), new NumberedShardSpec(0, 0)));
        assertSameIdentifier(id2, new SegmentIdWithShardSpec(DATA_SOURCE, Granularities.HOUR.bucket(THE_DISTANT_FUTURE), futureLock.getVersion(), new NumberedShardSpec(0, 0)));
        assertSameIdentifier(id3, new SegmentIdWithShardSpec(DATA_SOURCE, Granularities.HOUR.bucket(PARTY_TIME), partyLock.getVersion(), new NumberedShardSpec(1, 0)));
    } else {
        final List<TaskLock> partyLocks = taskActionTestKit.getTaskLockbox().findLocksForTask(task).stream().filter(input -> input.getInterval().contains(PARTY_TIME)).collect(Collectors.toList());
        Assert.assertEquals(2, partyLocks.size());
        final Map<Integer, SegmentLock> partitionIdToLock = new HashMap<>();
        partyLocks.forEach(lock -> {
            Assert.assertEquals(LockGranularity.SEGMENT, lock.getGranularity());
            final SegmentLock segmentLock = (SegmentLock) lock;
            partitionIdToLock.put(segmentLock.getPartitionId(), segmentLock);
        });
        for (Entry<Integer, SegmentLock> entry : partitionIdToLock.entrySet()) {
            assertSameIdentifier(new SegmentIdWithShardSpec(DATA_SOURCE, Granularities.HOUR.bucket(PARTY_TIME), allocatedPartyTimeIds.get(entry.getKey()).getVersion(), new NumberedShardSpec(entry.getValue().getPartitionId(), 0)), allocatedPartyTimeIds.get(entry.getKey()));
        }
        final List<TaskLock> futureLocks = taskActionTestKit.getTaskLockbox().findLocksForTask(task).stream().filter(input -> input.getInterval().contains(THE_DISTANT_FUTURE)).collect(Collectors.toList());
        Assert.assertEquals(1, futureLocks.size());
        partitionIdToLock.clear();
        futureLocks.forEach(lock -> {
            Assert.assertEquals(LockGranularity.SEGMENT, lock.getGranularity());
            final SegmentLock segmentLock = (SegmentLock) lock;
            partitionIdToLock.put(segmentLock.getPartitionId(), segmentLock);
        });
        for (Entry<Integer, SegmentLock> entry : partitionIdToLock.entrySet()) {
            assertSameIdentifier(new SegmentIdWithShardSpec(DATA_SOURCE, Granularities.HOUR.bucket(THE_DISTANT_FUTURE), allocatedFutureIds.get(entry.getKey()).getVersion(), new NumberedShardSpec(entry.getValue().getPartitionId(), 0)), allocatedFutureIds.get(entry.getKey()));
        }
    }
    Assert.assertNull(id4);
    assertSameIdentifier(id2, id5);
    Assert.assertNull(id6);
    assertSameIdentifier(id2, id7);
}
Also used : NumberedPartialShardSpec(org.apache.druid.timeline.partition.NumberedPartialShardSpec) Iterables(com.google.common.collect.Iterables) Granularity(org.apache.druid.java.util.common.granularity.Granularity) Intervals(org.apache.druid.java.util.common.Intervals) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) RunWith(org.junit.runner.RunWith) HashMap(java.util.HashMap) ImmutableList(com.google.common.collect.ImmutableList) FluentIterable(com.google.common.collect.FluentIterable) PeriodGranularity(org.apache.druid.java.util.common.granularity.PeriodGranularity) Task(org.apache.druid.indexing.common.task.Task) Map(java.util.Map) TaskLock(org.apache.druid.indexing.common.TaskLock) ExpectedException(org.junit.rules.ExpectedException) HashBasedNumberedPartialShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedPartialShardSpec) Parameterized(org.junit.runners.Parameterized) Before(org.junit.Before) DateTimes(org.apache.druid.java.util.common.DateTimes) ShardSpec(org.apache.druid.timeline.partition.ShardSpec) Period(org.joda.time.Period) ImmutableSet(com.google.common.collect.ImmutableSet) EmittingLogger(org.apache.druid.java.util.emitter.EmittingLogger) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) DateTime(org.joda.time.DateTime) SegmentIdWithShardSpec(org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec) Test(org.junit.Test) IOException(java.io.IOException) EasyMock(org.easymock.EasyMock) Collectors(java.util.stream.Collectors) LockGranularity(org.apache.druid.indexing.common.LockGranularity) DefaultObjectMapper(org.apache.druid.jackson.DefaultObjectMapper) Granularities(org.apache.druid.java.util.common.granularity.Granularities) NoopTask(org.apache.druid.indexing.common.task.NoopTask) List(java.util.List) Rule(org.junit.Rule) Predicate(com.google.common.base.Predicate) SegmentLock(org.apache.druid.indexing.common.SegmentLock) LinearShardSpec(org.apache.druid.timeline.partition.LinearShardSpec) ServiceEmitter(org.apache.druid.java.util.emitter.service.ServiceEmitter) DataSegment(org.apache.druid.timeline.DataSegment) Entry(java.util.Map.Entry) LinearPartialShardSpec(org.apache.druid.timeline.partition.LinearPartialShardSpec) PartialShardSpec(org.apache.druid.timeline.partition.PartialShardSpec) Assert(org.junit.Assert) Task(org.apache.druid.indexing.common.task.Task) NoopTask(org.apache.druid.indexing.common.task.NoopTask) HashMap(java.util.HashMap) SegmentLock(org.apache.druid.indexing.common.SegmentLock) SegmentIdWithShardSpec(org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec) Predicate(com.google.common.base.Predicate) TaskLock(org.apache.druid.indexing.common.TaskLock) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) Test(org.junit.Test)

Example 3 with LockGranularity

use of org.apache.druid.indexing.common.LockGranularity in project druid by druid-io.

the class SegmentAllocateActionTest method testManySegmentsSameInterval.

@Test
public void testManySegmentsSameInterval() {
    final Task task = NoopTask.create();
    taskActionTestKit.getTaskLockbox().add(task);
    final SegmentIdWithShardSpec id1 = allocate(task, PARTY_TIME, Granularities.NONE, Granularities.HOUR, "s1", null);
    final SegmentIdWithShardSpec id2 = allocate(task, PARTY_TIME, Granularities.NONE, Granularities.HOUR, "s1", id1.toString());
    final SegmentIdWithShardSpec id3 = allocate(task, PARTY_TIME, Granularities.NONE, Granularities.HOUR, "s1", id2.toString());
    if (lockGranularity == LockGranularity.TIME_CHUNK) {
        final TaskLock partyLock = Iterables.getOnlyElement(FluentIterable.from(taskActionTestKit.getTaskLockbox().findLocksForTask(task)).filter(input -> input.getInterval().contains(PARTY_TIME)));
        assertSameIdentifier(id1, new SegmentIdWithShardSpec(DATA_SOURCE, Granularities.HOUR.bucket(PARTY_TIME), partyLock.getVersion(), new NumberedShardSpec(0, 0)));
        assertSameIdentifier(id2, new SegmentIdWithShardSpec(DATA_SOURCE, Granularities.HOUR.bucket(PARTY_TIME), partyLock.getVersion(), new NumberedShardSpec(1, 0)));
        assertSameIdentifier(id3, new SegmentIdWithShardSpec(DATA_SOURCE, Granularities.HOUR.bucket(PARTY_TIME), partyLock.getVersion(), new NumberedShardSpec(2, 0)));
    } else {
        final List<TaskLock> partyTimeLocks = taskActionTestKit.getTaskLockbox().findLocksForTask(task).stream().filter(input -> input.getInterval().contains(PARTY_TIME)).collect(Collectors.toList());
        Assert.assertEquals(3, partyTimeLocks.size());
        assertSameIdentifier(new SegmentIdWithShardSpec(DATA_SOURCE, Granularities.HOUR.bucket(PARTY_TIME), id1.getVersion(), new NumberedShardSpec(0, 0)), id1);
        assertSameIdentifier(new SegmentIdWithShardSpec(DATA_SOURCE, Granularities.HOUR.bucket(PARTY_TIME), id1.getVersion(), new NumberedShardSpec(1, 0)), id2);
        assertSameIdentifier(new SegmentIdWithShardSpec(DATA_SOURCE, Granularities.HOUR.bucket(PARTY_TIME), id1.getVersion(), new NumberedShardSpec(2, 0)), id3);
    }
}
Also used : NumberedPartialShardSpec(org.apache.druid.timeline.partition.NumberedPartialShardSpec) Iterables(com.google.common.collect.Iterables) Granularity(org.apache.druid.java.util.common.granularity.Granularity) Intervals(org.apache.druid.java.util.common.Intervals) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) RunWith(org.junit.runner.RunWith) HashMap(java.util.HashMap) ImmutableList(com.google.common.collect.ImmutableList) FluentIterable(com.google.common.collect.FluentIterable) PeriodGranularity(org.apache.druid.java.util.common.granularity.PeriodGranularity) Task(org.apache.druid.indexing.common.task.Task) Map(java.util.Map) TaskLock(org.apache.druid.indexing.common.TaskLock) ExpectedException(org.junit.rules.ExpectedException) HashBasedNumberedPartialShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedPartialShardSpec) Parameterized(org.junit.runners.Parameterized) Before(org.junit.Before) DateTimes(org.apache.druid.java.util.common.DateTimes) ShardSpec(org.apache.druid.timeline.partition.ShardSpec) Period(org.joda.time.Period) ImmutableSet(com.google.common.collect.ImmutableSet) EmittingLogger(org.apache.druid.java.util.emitter.EmittingLogger) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) DateTime(org.joda.time.DateTime) SegmentIdWithShardSpec(org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec) Test(org.junit.Test) IOException(java.io.IOException) EasyMock(org.easymock.EasyMock) Collectors(java.util.stream.Collectors) LockGranularity(org.apache.druid.indexing.common.LockGranularity) DefaultObjectMapper(org.apache.druid.jackson.DefaultObjectMapper) Granularities(org.apache.druid.java.util.common.granularity.Granularities) NoopTask(org.apache.druid.indexing.common.task.NoopTask) List(java.util.List) Rule(org.junit.Rule) Predicate(com.google.common.base.Predicate) SegmentLock(org.apache.druid.indexing.common.SegmentLock) LinearShardSpec(org.apache.druid.timeline.partition.LinearShardSpec) ServiceEmitter(org.apache.druid.java.util.emitter.service.ServiceEmitter) DataSegment(org.apache.druid.timeline.DataSegment) Entry(java.util.Map.Entry) LinearPartialShardSpec(org.apache.druid.timeline.partition.LinearPartialShardSpec) PartialShardSpec(org.apache.druid.timeline.partition.PartialShardSpec) Assert(org.junit.Assert) Task(org.apache.druid.indexing.common.task.Task) NoopTask(org.apache.druid.indexing.common.task.NoopTask) TaskLock(org.apache.druid.indexing.common.TaskLock) SegmentIdWithShardSpec(org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) Test(org.junit.Test)

Example 4 with LockGranularity

use of org.apache.druid.indexing.common.LockGranularity in project druid by druid-io.

the class SinglePhaseParallelIndexingTest method testRunAndOverwrite.

private void testRunAndOverwrite(@Nullable Interval inputInterval, Granularity secondSegmentGranularity) {
    // Ingest all data.
    runTestTask(inputInterval, Granularities.DAY, false, Collections.emptyList());
    final Collection<DataSegment> allSegments = new HashSet<>(inputInterval == null ? getStorageCoordinator().retrieveAllUsedSegments("dataSource", Segments.ONLY_VISIBLE) : getStorageCoordinator().retrieveUsedSegmentsForInterval("dataSource", inputInterval, Segments.ONLY_VISIBLE));
    // Reingest the same data. Each segment should get replaced by a segment with a newer version.
    final LockGranularity actualLockGranularity;
    if (inputInterval == null) {
        actualLockGranularity = LockGranularity.TIME_CHUNK;
    } else {
        actualLockGranularity = secondSegmentGranularity.equals(Granularities.DAY) ? lockGranularity : LockGranularity.TIME_CHUNK;
    }
    runOverwriteTask(inputInterval, secondSegmentGranularity, actualLockGranularity);
    // Verify that the segment has been replaced.
    final Collection<DataSegment> newSegments = inputInterval == null ? getStorageCoordinator().retrieveAllUsedSegments("dataSource", Segments.ONLY_VISIBLE) : getStorageCoordinator().retrieveUsedSegmentsForInterval("dataSource", inputInterval, Segments.ONLY_VISIBLE);
    Assert.assertFalse(newSegments.isEmpty());
    allSegments.addAll(newSegments);
    final VersionedIntervalTimeline<String, DataSegment> timeline = VersionedIntervalTimeline.forSegments(allSegments);
    final Interval timelineInterval = inputInterval == null ? Intervals.ETERNITY : inputInterval;
    final Set<DataSegment> visibles = timeline.findNonOvershadowedObjectsInInterval(timelineInterval, Partitions.ONLY_COMPLETE);
    Assert.assertEquals(new HashSet<>(newSegments), visibles);
}
Also used : LockGranularity(org.apache.druid.indexing.common.LockGranularity) DataSegment(org.apache.druid.timeline.DataSegment) HashSet(java.util.HashSet) Interval(org.joda.time.Interval)

Example 5 with LockGranularity

use of org.apache.druid.indexing.common.LockGranularity in project druid by druid-io.

the class AppenderatorDriverRealtimeIndexTask method run.

@Override
public TaskStatus run(final TaskToolbox toolbox) {
    runThread = Thread.currentThread();
    authorizerMapper = toolbox.getAuthorizerMapper();
    rowIngestionMeters = toolbox.getRowIngestionMetersFactory().createRowIngestionMeters();
    parseExceptionHandler = new ParseExceptionHandler(rowIngestionMeters, spec.getTuningConfig().isLogParseExceptions(), spec.getTuningConfig().getMaxParseExceptions(), spec.getTuningConfig().getMaxSavedParseExceptions());
    setupTimeoutAlert();
    DataSchema dataSchema = spec.getDataSchema();
    RealtimeAppenderatorTuningConfig tuningConfig = spec.getTuningConfig().withBasePersistDirectory(toolbox.getPersistDir());
    final FireDepartment fireDepartmentForMetrics = new FireDepartment(dataSchema, new RealtimeIOConfig(null, null), null);
    final TaskRealtimeMetricsMonitor metricsMonitor = TaskRealtimeMetricsMonitorBuilder.build(this, fireDepartmentForMetrics, rowIngestionMeters);
    this.metrics = fireDepartmentForMetrics.getMetrics();
    final Supplier<Committer> committerSupplier = Committers.nilSupplier();
    DiscoveryDruidNode discoveryDruidNode = createDiscoveryDruidNode(toolbox);
    appenderator = newAppenderator(dataSchema, tuningConfig, metrics, toolbox);
    TaskLockType lockType = getContextValue(Tasks.USE_SHARED_LOCK, false) ? TaskLockType.SHARED : TaskLockType.EXCLUSIVE;
    StreamAppenderatorDriver driver = newDriver(dataSchema, appenderator, toolbox, metrics, lockType);
    try {
        log.debug("Found chat handler of class[%s]", toolbox.getChatHandlerProvider().getClass().getName());
        toolbox.getChatHandlerProvider().register(getId(), this, false);
        if (toolbox.getAppenderatorsManager().shouldTaskMakeNodeAnnouncements()) {
            toolbox.getDataSegmentServerAnnouncer().announce();
            toolbox.getDruidNodeAnnouncer().announce(discoveryDruidNode);
        }
        driver.startJob(segmentId -> {
            try {
                if (lockGranularity == LockGranularity.SEGMENT) {
                    return toolbox.getTaskActionClient().submit(new SegmentLockAcquireAction(TaskLockType.EXCLUSIVE, segmentId.getInterval(), segmentId.getVersion(), segmentId.getShardSpec().getPartitionNum(), 1000L)).isOk();
                } else {
                    final TaskLock lock = toolbox.getTaskActionClient().submit(new TimeChunkLockAcquireAction(TaskLockType.EXCLUSIVE, segmentId.getInterval(), 1000L));
                    if (lock == null) {
                        return false;
                    }
                    if (lock.isRevoked()) {
                        throw new ISE(StringUtils.format("Lock for interval [%s] was revoked.", segmentId.getInterval()));
                    }
                    return true;
                }
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
        });
        // Set up metrics emission
        toolbox.addMonitor(metricsMonitor);
        // Delay firehose connection to avoid claiming input resources while the plumber is starting up.
        final FirehoseFactory firehoseFactory = spec.getIOConfig().getFirehoseFactory();
        final boolean firehoseDrainableByClosing = isFirehoseDrainableByClosing(firehoseFactory);
        int sequenceNumber = 0;
        String sequenceName = makeSequenceName(getId(), sequenceNumber);
        final TransactionalSegmentPublisher publisher = (mustBeNullOrEmptyOverwriteSegments, mustBeNullOrEmptyDropSegments, segments, commitMetadata) -> {
            if (mustBeNullOrEmptyOverwriteSegments != null && !mustBeNullOrEmptyOverwriteSegments.isEmpty()) {
                throw new ISE("Stream ingestion task unexpectedly attempted to overwrite segments: %s", SegmentUtils.commaSeparatedIdentifiers(mustBeNullOrEmptyOverwriteSegments));
            }
            if (mustBeNullOrEmptyDropSegments != null && !mustBeNullOrEmptyDropSegments.isEmpty()) {
                throw new ISE("Stream ingestion task unexpectedly attempted to drop segments: %s", SegmentUtils.commaSeparatedIdentifiers(mustBeNullOrEmptyDropSegments));
            }
            final SegmentTransactionalInsertAction action = SegmentTransactionalInsertAction.appendAction(segments, null, null);
            return toolbox.getTaskActionClient().submit(action);
        };
        // Skip connecting firehose if we've been stopped before we got started.
        synchronized (this) {
            if (!gracefullyStopped) {
                firehose = firehoseFactory.connect(Preconditions.checkNotNull(spec.getDataSchema().getParser(), "inputRowParser"), toolbox.getIndexingTmpDir());
            }
        }
        ingestionState = IngestionState.BUILD_SEGMENTS;
        // Time to read data!
        while (!gracefullyStopped && firehoseDrainableByClosing && firehose.hasMore()) {
            try {
                InputRow inputRow = firehose.nextRow();
                if (inputRow == null) {
                    log.debug("Discarded null row, considering thrownAway.");
                    rowIngestionMeters.incrementThrownAway();
                } else {
                    AppenderatorDriverAddResult addResult = driver.add(inputRow, sequenceName, committerSupplier);
                    if (addResult.isOk()) {
                        final boolean isPushRequired = addResult.isPushRequired(tuningConfig.getPartitionsSpec().getMaxRowsPerSegment(), tuningConfig.getPartitionsSpec().getMaxTotalRowsOr(DynamicPartitionsSpec.DEFAULT_MAX_TOTAL_ROWS));
                        if (isPushRequired) {
                            publishSegments(driver, publisher, committerSupplier, sequenceName);
                            sequenceNumber++;
                            sequenceName = makeSequenceName(getId(), sequenceNumber);
                        }
                    } else {
                        // If we allow continuing, then consider blacklisting the interval for a while to avoid constant checks.
                        throw new ISE("Could not allocate segment for row with timestamp[%s]", inputRow.getTimestamp());
                    }
                }
            } catch (ParseException e) {
                handleParseException(e);
            }
        }
        ingestionState = IngestionState.COMPLETED;
        if (!gracefullyStopped) {
            synchronized (this) {
                if (gracefullyStopped) {
                    // Someone called stopGracefully after we checked the flag. That's okay, just stop now.
                    log.info("Gracefully stopping.");
                } else {
                    finishingJob = true;
                }
            }
            if (finishingJob) {
                log.info("Finishing job...");
                // Publish any remaining segments
                publishSegments(driver, publisher, committerSupplier, sequenceName);
                waitForSegmentPublishAndHandoff(tuningConfig.getPublishAndHandoffTimeout());
            }
        } else if (firehose != null) {
            log.info("Task was gracefully stopped, will persist data before exiting");
            persistAndWait(driver, committerSupplier.get());
        }
    } catch (Throwable e) {
        log.makeAlert(e, "Exception aborted realtime processing[%s]", dataSchema.getDataSource()).emit();
        errorMsg = Throwables.getStackTraceAsString(e);
        toolbox.getTaskReportFileWriter().write(getId(), getTaskCompletionReports());
        return TaskStatus.failure(getId(), errorMsg);
    } finally {
        toolbox.getChatHandlerProvider().unregister(getId());
        CloseableUtils.closeAndSuppressExceptions(firehose, e -> log.warn("Failed to close Firehose"));
        appenderator.close();
        CloseableUtils.closeAndSuppressExceptions(driver, e -> log.warn("Failed to close AppenderatorDriver"));
        toolbox.removeMonitor(metricsMonitor);
        if (toolbox.getAppenderatorsManager().shouldTaskMakeNodeAnnouncements()) {
            toolbox.getDataSegmentServerAnnouncer().unannounce();
            toolbox.getDruidNodeAnnouncer().unannounce(discoveryDruidNode);
        }
    }
    log.info("Job done!");
    toolbox.getTaskReportFileWriter().write(getId(), getTaskCompletionReports());
    return TaskStatus.success(getId());
}
Also used : StreamAppenderatorDriver(org.apache.druid.segment.realtime.appenderator.StreamAppenderatorDriver) TaskReport(org.apache.druid.indexing.common.TaskReport) TaskToolbox(org.apache.druid.indexing.common.TaskToolbox) JsonProperty(com.fasterxml.jackson.annotation.JsonProperty) TaskConfig(org.apache.druid.indexing.common.config.TaskConfig) LookupNodeService(org.apache.druid.discovery.LookupNodeService) Produces(javax.ws.rs.Produces) AuthorizerMapper(org.apache.druid.server.security.AuthorizerMapper) Path(javax.ws.rs.Path) TimeoutException(java.util.concurrent.TimeoutException) FireDepartmentMetrics(org.apache.druid.segment.realtime.FireDepartmentMetrics) Timer(java.util.Timer) IngestionState(org.apache.druid.indexer.IngestionState) NoopQueryRunner(org.apache.druid.query.NoopQueryRunner) MonotonicNonNull(org.checkerframework.checker.nullness.qual.MonotonicNonNull) MediaType(javax.ws.rs.core.MediaType) ChatHandler(org.apache.druid.segment.realtime.firehose.ChatHandler) TaskActionClient(org.apache.druid.indexing.common.actions.TaskActionClient) SegmentTransactionalInsertAction(org.apache.druid.indexing.common.actions.SegmentTransactionalInsertAction) Map(java.util.Map) TaskLock(org.apache.druid.indexing.common.TaskLock) DynamicPartitionsSpec(org.apache.druid.indexer.partitions.DynamicPartitionsSpec) QueryRunner(org.apache.druid.query.QueryRunner) TimerTask(java.util.TimerTask) DateTimes(org.apache.druid.java.util.common.DateTimes) EventReceiverFirehoseFactory(org.apache.druid.segment.realtime.firehose.EventReceiverFirehoseFactory) Context(javax.ws.rs.core.Context) RealtimeAppenderatorIngestionSpec(org.apache.druid.indexing.common.index.RealtimeAppenderatorIngestionSpec) ImmutableMap(com.google.common.collect.ImmutableMap) TimeChunkLockAcquireAction(org.apache.druid.indexing.common.actions.TimeChunkLockAcquireAction) IngestionStatsAndErrorsTaskReportData(org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReportData) StringUtils(org.apache.druid.java.util.common.StringUtils) ISE(org.apache.druid.java.util.common.ISE) RealtimeIOConfig(org.apache.druid.segment.indexing.RealtimeIOConfig) Action(org.apache.druid.server.security.Action) LockGranularity(org.apache.druid.indexing.common.LockGranularity) TaskRealtimeMetricsMonitorBuilder(org.apache.druid.indexing.common.TaskRealtimeMetricsMonitorBuilder) InputRow(org.apache.druid.data.input.InputRow) CountDownLatch(java.util.concurrent.CountDownLatch) Firehose(org.apache.druid.data.input.Firehose) List(java.util.List) Response(javax.ws.rs.core.Response) ClippedFirehoseFactory(org.apache.druid.segment.realtime.firehose.ClippedFirehoseFactory) TaskLockType(org.apache.druid.indexing.common.TaskLockType) TimedShutoffFirehoseFactory(org.apache.druid.segment.realtime.firehose.TimedShutoffFirehoseFactory) RealtimeAppenderatorTuningConfig(org.apache.druid.indexing.common.index.RealtimeAppenderatorTuningConfig) NodeRole(org.apache.druid.discovery.NodeRole) SegmentAllocateAction(org.apache.druid.indexing.common.actions.SegmentAllocateAction) Queue(java.util.Queue) ConcurrentLinkedQueue(java.util.concurrent.ConcurrentLinkedQueue) DiscoveryDruidNode(org.apache.druid.discovery.DiscoveryDruidNode) ActionBasedSegmentAllocator(org.apache.druid.indexing.appenderator.ActionBasedSegmentAllocator) NumberedPartialShardSpec(org.apache.druid.timeline.partition.NumberedPartialShardSpec) ParseExceptionHandler(org.apache.druid.segment.incremental.ParseExceptionHandler) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) GET(javax.ws.rs.GET) SegmentLockAcquireAction(org.apache.druid.indexing.common.actions.SegmentLockAcquireAction) ParseException(org.apache.druid.java.util.common.parsers.ParseException) Supplier(com.google.common.base.Supplier) AppenderatorDriverAddResult(org.apache.druid.segment.realtime.appenderator.AppenderatorDriverAddResult) HashMap(java.util.HashMap) RowIngestionMeters(org.apache.druid.segment.incremental.RowIngestionMeters) TaskStatus(org.apache.druid.indexer.TaskStatus) TaskRealtimeMetricsMonitor(org.apache.druid.indexing.common.stats.TaskRealtimeMetricsMonitor) HttpServletRequest(javax.servlet.http.HttpServletRequest) Query(org.apache.druid.query.Query) JsonIgnore(com.fasterxml.jackson.annotation.JsonIgnore) SegmentsAndCommitMetadata(org.apache.druid.segment.realtime.appenderator.SegmentsAndCommitMetadata) Appenderator(org.apache.druid.segment.realtime.appenderator.Appenderator) ActionBasedUsedSegmentChecker(org.apache.druid.indexing.appenderator.ActionBasedUsedSegmentChecker) ParseExceptionReport(org.apache.druid.segment.incremental.ParseExceptionReport) FirehoseFactory(org.apache.druid.data.input.FirehoseFactory) EmittingLogger(org.apache.druid.java.util.emitter.EmittingLogger) TransactionalSegmentPublisher(org.apache.druid.segment.realtime.appenderator.TransactionalSegmentPublisher) Throwables(com.google.common.base.Throwables) Committers(org.apache.druid.segment.realtime.plumber.Committers) IOException(java.io.IOException) FireDepartment(org.apache.druid.segment.realtime.FireDepartment) ExecutionException(java.util.concurrent.ExecutionException) TimeUnit(java.util.concurrent.TimeUnit) Futures(com.google.common.util.concurrent.Futures) IngestionStatsAndErrorsTaskReport(org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReport) JsonCreator(com.fasterxml.jackson.annotation.JsonCreator) SegmentUtils(org.apache.druid.segment.SegmentUtils) Committer(org.apache.druid.data.input.Committer) Preconditions(com.google.common.base.Preconditions) AsyncFunction(com.google.common.util.concurrent.AsyncFunction) VisibleForTesting(com.google.common.annotations.VisibleForTesting) StreamAppenderatorDriver(org.apache.druid.segment.realtime.appenderator.StreamAppenderatorDriver) DataSchema(org.apache.druid.segment.indexing.DataSchema) CloseableUtils(org.apache.druid.utils.CloseableUtils) Collections(java.util.Collections) RealtimeIOConfig(org.apache.druid.segment.indexing.RealtimeIOConfig) EventReceiverFirehoseFactory(org.apache.druid.segment.realtime.firehose.EventReceiverFirehoseFactory) ClippedFirehoseFactory(org.apache.druid.segment.realtime.firehose.ClippedFirehoseFactory) TimedShutoffFirehoseFactory(org.apache.druid.segment.realtime.firehose.TimedShutoffFirehoseFactory) FirehoseFactory(org.apache.druid.data.input.FirehoseFactory) SegmentTransactionalInsertAction(org.apache.druid.indexing.common.actions.SegmentTransactionalInsertAction) SegmentLockAcquireAction(org.apache.druid.indexing.common.actions.SegmentLockAcquireAction) FireDepartment(org.apache.druid.segment.realtime.FireDepartment) TransactionalSegmentPublisher(org.apache.druid.segment.realtime.appenderator.TransactionalSegmentPublisher) TaskLock(org.apache.druid.indexing.common.TaskLock) RealtimeAppenderatorTuningConfig(org.apache.druid.indexing.common.index.RealtimeAppenderatorTuningConfig) ISE(org.apache.druid.java.util.common.ISE) TaskRealtimeMetricsMonitor(org.apache.druid.indexing.common.stats.TaskRealtimeMetricsMonitor) IOException(java.io.IOException) AppenderatorDriverAddResult(org.apache.druid.segment.realtime.appenderator.AppenderatorDriverAddResult) DataSchema(org.apache.druid.segment.indexing.DataSchema) DiscoveryDruidNode(org.apache.druid.discovery.DiscoveryDruidNode) ParseExceptionHandler(org.apache.druid.segment.incremental.ParseExceptionHandler) TaskLockType(org.apache.druid.indexing.common.TaskLockType) InputRow(org.apache.druid.data.input.InputRow) TimeChunkLockAcquireAction(org.apache.druid.indexing.common.actions.TimeChunkLockAcquireAction) Committer(org.apache.druid.data.input.Committer) ParseException(org.apache.druid.java.util.common.parsers.ParseException)

Aggregations

LockGranularity (org.apache.druid.indexing.common.LockGranularity)6 HashMap (java.util.HashMap)5 List (java.util.List)5 Map (java.util.Map)5 DataSegment (org.apache.druid.timeline.DataSegment)5 ImmutableList (com.google.common.collect.ImmutableList)4 IOException (java.io.IOException)4 TaskLock (org.apache.druid.indexing.common.TaskLock)4 DateTimes (org.apache.druid.java.util.common.DateTimes)4 EmittingLogger (org.apache.druid.java.util.emitter.EmittingLogger)4 NumberedPartialShardSpec (org.apache.druid.timeline.partition.NumberedPartialShardSpec)4 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)3 Predicate (com.google.common.base.Predicate)3 FluentIterable (com.google.common.collect.FluentIterable)3 ImmutableSet (com.google.common.collect.ImmutableSet)3 Iterables (com.google.common.collect.Iterables)3 Entry (java.util.Map.Entry)3 Collectors (java.util.stream.Collectors)3 Granularity (org.apache.druid.java.util.common.granularity.Granularity)3 SegmentLock (org.apache.druid.indexing.common.SegmentLock)2